Merge 'release-4-6' into master
authorRoland Schulz <roland@utk.edu>
Sun, 18 Nov 2012 22:55:57 +0000 (17:55 -0500)
committerRoland Schulz <roland@utk.edu>
Mon, 19 Nov 2012 20:17:29 +0000 (15:17 -0500)
Conflicts:
CMakeLists.txt
src/config.h.cmakein
src/mdlib/CMakeLists.txt
src/mdlib/libmd.pc.cmakein (all 4 trivial)

Removed old src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single and moved
new ones.

XML not removed (51ec258 reverted as part of merge resolution), besides
HAVE_LIBXML2 (wasn't used).

Change-Id: I831320df04d49fc80fd3bf46c94af50a842b29b8

172 files changed:
1  2 
CMakeLists.txt
src/config.h.cmakein
src/gromacs/gmxlib/nonbonded/CMakeLists.txt
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/kernelutil_x86_sse2_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/make_nb_kernel_sse2_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwNone_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwNone_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwNone_GeomP1P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwNone_GeomW3W3_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwNone_GeomW4W4_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre
src/gromacs/gmxlib/nonbonded/nonbonded.c
src/gromacs/legacyheaders/ns.h
src/gromacs/legacyheaders/types/nblist.h
src/gromacs/mdlib/force.c
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/nbnxn_atomdata.c
src/gromacs/mdlib/nbnxn_internal.h
src/gromacs/mdlib/nbnxn_search.c
src/gromacs/mdlib/ns.c
src/programs/grompp/grompp.c
src/programs/mdrun/md.c

diff --cc CMakeLists.txt
index 46a2e31c6dfbbf8231e3855fa8707f7177165110,7dd6f4540ad4ad95709be43f6be69b4dd911ec0e..411b97b155fbb4f14b963bbd5557c76a7e566d9d
@@@ -158,19 -165,18 +158,19 @@@ option(GMX_POWERPC_INVSQRT "Use PowerP
  mark_as_advanced(GMX_POWERPC_INVSQRT)
  option(GMX_FAHCORE "Build a library with mdrun functionality" OFF)
  mark_as_advanced(GMX_FAHCORE)
 +option(GMX_OPENMM "Accelerated execution on GPUs through the OpenMM library (rerun cmake after changing to see relevant options)" OFF)
  
  include(gmxDetectAcceleration)
- if(NOT DEFINED GMX_ACCELERATION)
+ if(NOT DEFINED GMX_CPU_ACCELERATION)
      if(CMAKE_CROSSCOMPILING)
-         set(GMX_SUGGESTED_ACCELERATION "None")
+         set(GMX_SUGGESTED_CPU_ACCELERATION "None")
      else(CMAKE_CROSSCOMPILING)
-         gmx_detect_acceleration(GMX_SUGGESTED_ACCELERATION)
+         gmx_detect_acceleration(GMX_SUGGESTED_CPU_ACCELERATION)
      endif(CMAKE_CROSSCOMPILING)
- endif(NOT DEFINED GMX_ACCELERATION)
+ endif(NOT DEFINED GMX_CPU_ACCELERATION)
  
- set(GMX_ACCELERATION "@GMX_SUGGESTED_ACCELERATION@"
-     CACHE STRING "Accelerated kernels. Pick one of: None, SSE2, SSE4.1, AVX_128_FMA, AVX_256, BlueGene, Power6, Fortran")
+ set(GMX_CPU_ACCELERATION "@GMX_SUGGESTED_CPU_ACCELERATION@"
+     CACHE STRING "Accelerated CPU kernels. Pick one of: None, SSE2, SSE4.1, AVX_128_FMA, AVX_256, BlueGene, Power6, Fortran")
  
  set(GMX_FFT_LIBRARY "fftw3" 
      CACHE STRING "FFT library choices: fftw3,mkl,fftpack[built-in]")
@@@ -777,16 -769,18 +779,16 @@@ elseif(${GMX_CPU_ACCELERATION} STREQUA
          message(WARNING "No C AVX flag found. Consider a newer compiler, or disable AVX for much lower performance.")
      endif (NOT GNU_AVX_CFLAG AND NOT MSVC_AVX_CFLAG)
  
 -    if (CMAKE_CXX_COMPILER_LOADED)
 -        GMX_TEST_CXXFLAG(GNU_AVX_CXXFLAG "-mavx" GROMACS_CXX_FLAGS)
 -        if (NOT GNU_AVX_CXXFLAG)
 -            GMX_TEST_CXXFLAG(MSVC_AVX_CXXFLAG "/arch:AVX" GROMACS_CXX_FLAGS)
 -        endif (NOT GNU_AVX_CXXFLAG)
 -        if (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
 -            message(WARNING "No C++ AVX flag found. Consider a newer compiler, or disable AVX for much lower performance.")
 -        endif (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
 -    endif()
 +    GMX_TEST_CXXFLAG(GNU_AVX_CXXFLAG "-mavx" GROMACS_CXX_FLAGS)
 +    if (NOT GNU_AVX_CXXFLAG)
 +       GMX_TEST_CXXFLAG(MSVC_AVX_CXXFLAG "/arch:AVX" GROMACS_CXX_FLAGS)
 +    endif (NOT GNU_AVX_CXXFLAG)
 +    if (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
 +       message(WARNING "No C++ AVX flag found. Consider a newer compiler, or disable AVX for much lower performance.")
 +    endif (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
  
      # Set the FMA4 flags (MSVC doesn't require any)
-     if(${GMX_ACCELERATION} STREQUAL "AVX_128_FMA" AND NOT MSVC)
+     if(${GMX_CPU_ACCELERATION} STREQUAL "AVX_128_FMA" AND NOT MSVC)
          GMX_TEST_CFLAG(GNU_FMA_CFLAG "-mfma4" GROMACS_C_FLAGS)
          if (NOT GNU_FMA_CFLAG)
              message(WARNING "No C FMA4 flag found. Consider a newer compiler, or disable AVX_128_FMA for much lower performance.")
Simple merge
index 0000000000000000000000000000000000000000,ad6c21a571143b0b9ecb7a0e6d09f3df432ace96..ad6c21a571143b0b9ecb7a0e6d09f3df432ace96
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,d810bde489886b2ba45cd63e6c2a7d16df3a05da..d810bde489886b2ba45cd63e6c2a7d16df3a05da
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,f06c4c3c7e644a191f10097583a8d3b8f23bb05f..f06c4c3c7e644a191f10097583a8d3b8f23bb05f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e78f14bee1854b885a197f9cbbf69f7cba229aa6..e78f14bee1854b885a197f9cbbf69f7cba229aa6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,dc109470c759f9694cbbb7c34485798aae9d502a..dc109470c759f9694cbbb7c34485798aae9d502a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b07cf7b099e924556c84440e5230ad4f9f114d54..b07cf7b099e924556c84440e5230ad4f9f114d54
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,945c2899813a58567ee7e2a98234db837d938205..945c2899813a58567ee7e2a98234db837d938205
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,d26e65e0a213df62c4349d8ada1ab0608b6dfd64..d26e65e0a213df62c4349d8ada1ab0608b6dfd64
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,df2a8120a4eabb8415ebbbed1e58052aa868a523..df2a8120a4eabb8415ebbbed1e58052aa868a523
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ecda96999a9bcee9c060471387c42a407e62c80b..ecda96999a9bcee9c060471387c42a407e62c80b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0f9f0dc77cddaa9d2a410b9b19bc2ea121ea2897..0f9f0dc77cddaa9d2a410b9b19bc2ea121ea2897
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,237af11e8912d810dbfb57debfb79243aedc0348..237af11e8912d810dbfb57debfb79243aedc0348
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,99b3fa3a25e7e9eeb52dcfad2e0312816adc62a7..99b3fa3a25e7e9eeb52dcfad2e0312816adc62a7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bf824efebdda59ca6e938afd9917b6fee11ec151..bf824efebdda59ca6e938afd9917b6fee11ec151
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,040065d62db38a60ef2c4eb5e264f338c5aa6fbe..040065d62db38a60ef2c4eb5e264f338c5aa6fbe
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ffb4b050bc0e03c0b7082545c4010adc8c584df1..ffb4b050bc0e03c0b7082545c4010adc8c584df1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a1f394cd8121743425c3488cc7771ac86dbb77f0..a1f394cd8121743425c3488cc7771ac86dbb77f0
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9dfa80fbab5f90240add217481ccc000a01de760..9dfa80fbab5f90240add217481ccc000a01de760
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,15afb7d29b5fd809459590a15999af5cb262f27f..15afb7d29b5fd809459590a15999af5cb262f27f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,be31d87a0bfdfc33b2d0be12e922100cfdd0ce53..be31d87a0bfdfc33b2d0be12e922100cfdd0ce53
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b6f28fa37a7fb4211e800a7fbc678cabe682de29..b6f28fa37a7fb4211e800a7fbc678cabe682de29
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,eaef49879463ed2d84e7a609a2078099fbd84d80..eaef49879463ed2d84e7a609a2078099fbd84d80
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ce228708aa250b4ca5d0780127f07b771d898648..ce228708aa250b4ca5d0780127f07b771d898648
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c295b34210e2f8a1ebd39fee3529adad8f8608f5..c295b34210e2f8a1ebd39fee3529adad8f8608f5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a41a7f818e0e6608da9042d4ba1ae68ceb61146b..a41a7f818e0e6608da9042d4ba1ae68ceb61146b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a6b2a2f6d4a789ec53b9dd9c1a64fb8565debdf7..a6b2a2f6d4a789ec53b9dd9c1a64fb8565debdf7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,dd74d08ad526f86e97ea71d19c0ba354c7c498c4..dd74d08ad526f86e97ea71d19c0ba354c7c498c4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b3a6e65769530ef125e963675c52b129dfd03626..b3a6e65769530ef125e963675c52b129dfd03626
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,5db012fa82626e18a36e872526ab8775d58fe561..5db012fa82626e18a36e872526ab8775d58fe561
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fdd75234bdd522522b1bbd5a46373e9652f00649..fdd75234bdd522522b1bbd5a46373e9652f00649
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,7dfac8d49c19563715128122e30bbbc055541a48..7dfac8d49c19563715128122e30bbbc055541a48
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,51bf341399dd04aba48af4e9f3c758ace96eeea6..51bf341399dd04aba48af4e9f3c758ace96eeea6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,246a6e3d88fa217a12a9f292ed1b7ad658a13af3..246a6e3d88fa217a12a9f292ed1b7ad658a13af3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,79299091c99bdc84c88451252b562fd73522873e..79299091c99bdc84c88451252b562fd73522873e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,da9e66a8d92218553b2990829ffd2f234445f7f8..da9e66a8d92218553b2990829ffd2f234445f7f8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,539423e410983c330fa3cd685b7413c98467988e..539423e410983c330fa3cd685b7413c98467988e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1dda59c1b6464256df88935f6e676386b9f34ee3..1dda59c1b6464256df88935f6e676386b9f34ee3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c078658f52f2db453b5d5ed943d9564cd5af3f6f..c078658f52f2db453b5d5ed943d9564cd5af3f6f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,7e652f0d126ad6963995b4153ecf72e17c28d6d9..7e652f0d126ad6963995b4153ecf72e17c28d6d9
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c88daa19c6830fcca2dd34c95d1b7f474e99e73c..c88daa19c6830fcca2dd34c95d1b7f474e99e73c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2e0ce9825e2057be94b94f1a9a3853598326c765..2e0ce9825e2057be94b94f1a9a3853598326c765
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b42e52d5f43874337377fc7426bb70ee7422931b..b42e52d5f43874337377fc7426bb70ee7422931b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bddd767d7e2738393389894ea12f044b676e0712..bddd767d7e2738393389894ea12f044b676e0712
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f0d6ed0a236eb86cbf713a99e9cd043798e84009..f0d6ed0a236eb86cbf713a99e9cd043798e84009
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3e4a4888cd37569f3b3b61d25219a18c2af4ffe6..3e4a4888cd37569f3b3b61d25219a18c2af4ffe6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8746ecb9fc263655f98a189989d7972cd1d700d6..8746ecb9fc263655f98a189989d7972cd1d700d6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1faa2926e38258628d109c84e9782491e5f59d1a..1faa2926e38258628d109c84e9782491e5f59d1a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0b4dfe6b74ac885a3c091009e2734c2ac09efafe..0b4dfe6b74ac885a3c091009e2734c2ac09efafe
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ccc37cbc0aead0439176684c9f2a8717cd1efaf8..ccc37cbc0aead0439176684c9f2a8717cd1efaf8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3cc1f50f0e609ae3b66b736863cdd979913214f3..3cc1f50f0e609ae3b66b736863cdd979913214f3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fb1468dc76bd140f7fa9eb722d35be2e1a29751a..fb1468dc76bd140f7fa9eb722d35be2e1a29751a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c2230f2f1146595d85c0917aba3373a4ac912dbd..c2230f2f1146595d85c0917aba3373a4ac912dbd
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ebae1c31bfb4fbdddf03e0653696104d262697c1..ebae1c31bfb4fbdddf03e0653696104d262697c1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fab1a8517e4804e73a4aaee5807afc456614e969..fab1a8517e4804e73a4aaee5807afc456614e969
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fdbf8e1c9f4d0469076939acda215633153fb3a0..fdbf8e1c9f4d0469076939acda215633153fb3a0
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,143a6b0c2c53ca2321542cf000072767e839b69e..143a6b0c2c53ca2321542cf000072767e839b69e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1734983ce81cb8ed1b2dd940fc2312a323ae7d8b..1734983ce81cb8ed1b2dd940fc2312a323ae7d8b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1462dd38e995128acb2c9d93d35887c6166840aa..1462dd38e995128acb2c9d93d35887c6166840aa
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c3c548a938b5b801eda7c460b3923af296de2779..c3c548a938b5b801eda7c460b3923af296de2779
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fb01e1264e0d09f880ac61783c32cd5b774664ca..fb01e1264e0d09f880ac61783c32cd5b774664ca
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,53e1d8674f9a13a34084a93593396e126fb03187..53e1d8674f9a13a34084a93593396e126fb03187
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,545a0d1b9d5193df46253761b419eda592cee376..545a0d1b9d5193df46253761b419eda592cee376
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ffd647a7fdd9ec0e333440114c85110e1cf239aa..ffd647a7fdd9ec0e333440114c85110e1cf239aa
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fb3a721dd0fc1fde458d9def61a22284235ec561..fb3a721dd0fc1fde458d9def61a22284235ec561
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b12cf6933f53bcdbd564b53c3d264b28b7e16d78..b12cf6933f53bcdbd564b53c3d264b28b7e16d78
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4732af9cc72b61c90cd715496fab0a5de50915cd..4732af9cc72b61c90cd715496fab0a5de50915cd
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0fda25ef363dda0e72fe1a141cc61977930b669a..0fda25ef363dda0e72fe1a141cc61977930b669a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,501c5edaba442e31a1542a224cf4d6a11de0fcae..501c5edaba442e31a1542a224cf4d6a11de0fcae
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ec00cfc621602378ffe65aa6dbf149bd1efc819b..ec00cfc621602378ffe65aa6dbf149bd1efc819b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e41351986d051beb03cc0ff682620943a12014df..e41351986d051beb03cc0ff682620943a12014df
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3c1d2d8ba886b368c79359236698189a5c3cb1b7..3c1d2d8ba886b368c79359236698189a5c3cb1b7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,6aaa31da4ef58ff11c8a49446afdc0b59d59017b..6aaa31da4ef58ff11c8a49446afdc0b59d59017b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f62be8bee679971aabd488a43cf2d3670a516c8e..f62be8bee679971aabd488a43cf2d3670a516c8e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,7173bdddd8ca9defad336ab7a8e00655c0601f04..7173bdddd8ca9defad336ab7a8e00655c0601f04
mode 000000,100644..100644
--- /dev/null
Simple merge
index 4fc5f794c59c25face22fad5608d642da262d39c,0000000000000000000000000000000000000000..cdcc6e9f2dd6e776d457ffccc6a9e4ce607e9979
mode 100644,000000..100644
--- /dev/null
@@@ -1,923 -1,0 +1,923 @@@
-                                 bFillGrid,bDoLongRangeNS);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "nrnb.h"
 +#include "bondf.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "pme.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "gmx_omp_nthreads.h"
 +
 +
 +void ns(FILE *fp,
 +        t_forcerec *fr,
 +        rvec       x[],
 +        matrix     box,
 +        gmx_groups_t *groups,
 +        t_grpopts  *opts,
 +        gmx_localtop_t *top,
 +        t_mdatoms  *md,
 +        t_commrec  *cr,
 +        t_nrnb     *nrnb,
 +        real       *lambda,
 +        real       *dvdlambda,
 +        gmx_grppairener_t *grppener,
 +        gmx_bool       bFillGrid,
 +        gmx_bool       bDoLongRangeNS)
 +{
 +  char   *ptr;
 +  int    nsearch;
 +
 +
 +  if (!fr->ns.nblist_initialized)
 +  {
 +      init_neighbor_list(fp, fr, md->homenr);
 +  }
 +
 +  if (fr->bTwinRange)
 +    fr->nlr=0;
 +
 +    nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
 +                                lambda,dvdlambda,grppener,
++                                bFillGrid,bDoLongRangeNS,TRUE);
 +  if (debug)
 +    fprintf(debug,"nsearch = %d\n",nsearch);
 +
 +  /* Check whether we have to do dynamic load balancing */
 +  /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
 +    count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
 +    &(top->idef),opts->ngener);
 +  */
 +  if (fr->ns.dump_nl > 0)
 +    dump_nblist(fp,cr,fr,fr->ns.dump_nl);
 +}
 +
 +static void reduce_thread_forces(int n,rvec *f,
 +                                 tensor vir,
 +                                 real *Vcorr,
 +                                 int efpt_ind,real *dvdl,
 +                                 int nthreads,f_thread_t *f_t)
 +{
 +    int t,i;
 +
 +    /* This reduction can run over any number of threads */
 +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
 +    for(i=0; i<n; i++)
 +    {
 +        for(t=1; t<nthreads; t++)
 +        {
 +            rvec_inc(f[i],f_t[t].f[i]);
 +        }
 +    }
 +    for(t=1; t<nthreads; t++)
 +    {
 +        *Vcorr += f_t[t].Vcorr;
 +        *dvdl  += f_t[t].dvdl[efpt_ind];
 +        m_add(vir,f_t[t].vir,vir);
 +    }
 +}
 +
 +void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
 +                       t_forcerec *fr,      t_inputrec *ir,
 +                       t_idef     *idef,    t_commrec  *cr,
 +                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
 +                       t_mdatoms  *md,
 +                       t_grpopts  *opts,
 +                       rvec       x[],      history_t  *hist,
 +                       rvec       f[],
 +                       rvec       f_longrange[],
 +                       gmx_enerdata_t *enerd,
 +                       t_fcdata   *fcd,
 +                       gmx_mtop_t     *mtop,
 +                       gmx_localtop_t *top,
 +                       gmx_genborn_t *born,
 +                       t_atomtypes *atype,
 +                       gmx_bool       bBornRadii,
 +                       matrix     box,
 +                       t_lambda   *fepvals,
 +                       real       *lambda,
 +                       t_graph    *graph,
 +                       t_blocka   *excl,
 +                       rvec       mu_tot[],
 +                       int        flags,
 +                       float      *cycles_pme)
 +{
 +    int     i,j,status;
 +    int     donb_flags;
 +    gmx_bool    bDoEpot,bSepDVDL,bSB;
 +    int     pme_flags;
 +    matrix  boxs;
 +    rvec    box_size;
 +    real    Vsr,Vlr,Vcorr=0;
 +    t_pbc   pbc;
 +    real    dvdgb;
 +    char    buf[22];
 +    gmx_enerdata_t ed_lam;
 +    double  clam_i,vlam_i;
 +    real    dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
 +    real    dvdlsum;
 +
 +#ifdef GMX_MPI
 +    double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
 +#endif
 +
 +#define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
 +
 +
 +    set_pbc(&pbc,fr->ePBC,box);
 +
 +    /* reset free energy components */
 +    for (i=0;i<efptNR;i++)
 +    {
 +        dvdl_nb[i]  = 0;
 +        dvdl_dum[i] = 0;
 +    }
 +
 +    /* Reset box */
 +    for(i=0; (i<DIM); i++)
 +    {
 +        box_size[i]=box[i][i];
 +    }
 +
 +    bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
 +    debug_gmx();
 +
 +    /* do QMMM first if requested */
 +    if(fr->bQMMM)
 +    {
 +        enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
 +    }
 +
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
 +                gmx_step_str(step,buf),cr->nodeid);
 +    }
 +
 +    /* Call the short range functions all in one go. */
 +
 +#ifdef GMX_MPI
 +    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
 +#define TAKETIME FALSE
 +    if (TAKETIME)
 +    {
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t0=MPI_Wtime();
 +    }
 +#endif
 +
 +    if (ir->nwall)
 +    {
 +        /* foreign lambda component for walls */
 +        dvdl = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
 +                        enerd->grpp.ener[egLJSR],nrnb);
 +        PRINT_SEPDVDL("Walls",0.0,dvdl);
 +        enerd->dvdl_lin[efptVDW] += dvdl;
 +    }
 +
 +      /* If doing GB, reset dvda and calculate the Born radii */
 +      if (ir->implicit_solvent)
 +      {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +
 +              for(i=0;i<born->nr;i++)
 +              {
 +                      fr->dvda[i]=0;
 +              }
 +
 +              if(bBornRadii)
 +              {
 +                      calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
 +              }
 +
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +      }
 +
 +    where();
 +    if (flags & GMX_FORCE_NONBONDED)
 +    {
 +        donb_flags = 0;
 +        /* Add short-range interactions */
 +        donb_flags |= GMX_NONBONDED_DO_SR;
 +
 +        if (flags & GMX_FORCE_FORCES)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_FORCE;
 +        }
 +        if (flags & GMX_FORCE_ENERGY)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
 +        }
 +        if (flags & GMX_FORCE_DO_LR)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_LR;
 +        }
 +
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +        do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
 +                    &enerd->grpp,box_size,nrnb,
 +                    lambda,dvdl_nb,-1,-1,donb_flags);
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +
 +    /* If we do foreign lambda and we have soft-core interactions
 +     * we have to recalculate the (non-linear) energies contributions.
 +     */
 +    if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +        init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
 +
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
 +            for (j=0;j<efptNR;j++)
 +            {
 +                lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +            }
 +            reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
 +            do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
 +                         &(ed_lam.grpp), box_size,nrnb,
 +                         lam_i,dvdl_dum,-1,-1,
 +                         GMX_NONBONDED_DO_FOREIGNLAMBDA | GMX_NONBONDED_DO_SR);
 +            sum_epot(&ir->opts,&ed_lam);
 +            enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
 +        }
 +        destroy_enerdata(&ed_lam);
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +    where();
 +
 +      /* If we are doing GB, calculate bonded forces and apply corrections
 +       * to the solvation forces */
 +    /* MRS: Eventually, many need to include free energy contribution here! */
 +      if (ir->implicit_solvent)
 +    {
 +              calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
 +                       ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t1=MPI_Wtime();
 +        fr->t_fnbf += t1-t0;
 +    }
 +#endif
 +
 +    if (fepvals->sc_alpha!=0)
 +    {
 +        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +
 +    if (fepvals->sc_alpha!=0)
 +
 +        /* even though coulomb part is linear, we already added it, beacuse we
 +           need to go through the vdw calculation anyway */
 +    {
 +        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +
 +    Vsr = 0;
 +    if (bSepDVDL)
 +    {
 +        for(i=0; i<enerd->grpp.nener; i++)
 +        {
 +            Vsr +=
 +                (fr->bBHAM ?
 +                 enerd->grpp.ener[egBHAMSR][i] :
 +                 enerd->grpp.ener[egLJSR][i])
 +                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
 +        }
 +        dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
 +        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
 +    }
 +    debug_gmx();
 +
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
 +    }
 +
 +    /* Shift the coordinates. Must be done before bonded forces and PPPM,
 +     * but is also necessary for SHAKE and update, therefore it can NOT
 +     * go when no bonded forces have to be evaluated.
 +     */
 +
 +    /* Here sometimes we would not need to shift with NBFonly,
 +     * but we do so anyhow for consistency of the returned coordinates.
 +     */
 +    if (graph)
 +    {
 +        shift_self(graph,box,x);
 +        if (TRICLINIC(box))
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
 +        }
 +        else
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
 +        }
 +    }
 +    /* Check whether we need to do bondeds or correct for exclusions */
 +    if (fr->bMolPBC &&
 +        ((flags & GMX_FORCE_BONDED)
 +         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
 +    {
 +        /* Since all atoms are in the rectangular or triclinic unit-cell,
 +         * only single box vector shifts (2 in x) are required.
 +         */
 +        set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
 +    }
 +    debug_gmx();
 +
 +    if (flags & GMX_FORCE_BONDED)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsBONDED);
 +        calc_bonds(fplog,cr->ms,
 +                   idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
 +                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
 +                   flags,
 +                   fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
 +
 +        /* Check if we have to determine energy differences
 +         * at foreign lambda's.
 +         */
 +        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
 +            idef->ilsort != ilsortNO_FE)
 +        {
 +            if (idef->ilsort != ilsortFE_SORTED)
 +            {
 +                gmx_incons("The bonded interactions are not sorted for free energy");
 +            }
 +            init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
 +
 +            for(i=0; i<enerd->n_lambda; i++)
 +            {
 +                reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
 +                for (j=0;j<efptNR;j++)
 +                {
 +                    lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +                }
 +                calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
 +                                  fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                sum_epot(&ir->opts,&ed_lam);
 +                enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
 +            }
 +            destroy_enerdata(&ed_lam);
 +        }
 +        debug_gmx();
 +
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +    where();
 +
 +    *cycles_pme = 0;
 +    if (EEL_FULL(fr->eeltype))
 +    {
 +        bSB = (ir->nwall == 2);
 +        if (bSB)
 +        {
 +            copy_mat(box,boxs);
 +            svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +            box_size[ZZ] *= ir->wall_ewald_zfac;
 +        }
 +
 +        clear_mat(fr->vir_el_recip);
 +
 +        if (fr->bEwald)
 +        {
 +            Vcorr = 0;
 +            dvdl  = 0;
 +
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            /* The TPI molecule does not have exclusions with the rest
 +             * of the system and no intra-molecular PME grid contributions
 +             * will be calculated in gmx_pme_calc_energy.
 +             */
 +            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
 +                ir->ewald_geometry != eewg3D ||
 +                ir->epsilon_surface != 0)
 +            {
 +                int nthreads,t;
 +
 +                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
 +
 +                if (fr->n_tpi > 0)
 +                {
 +                    gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
 +                }
 +
 +                nthreads = gmx_omp_nthreads_get(emntBonded);
 +#pragma omp parallel for num_threads(nthreads) schedule(static)
 +                for(t=0; t<nthreads; t++)
 +                {
 +                    int s,e,i;
 +                    rvec *fnv;
 +                    tensor *vir;
 +                    real *Vcorrt,*dvdlt;
 +                    if (t == 0)
 +                    {
 +                        fnv    = fr->f_novirsum;
 +                        vir    = &fr->vir_el_recip;
 +                        Vcorrt = &Vcorr;
 +                        dvdlt  = &dvdl;
 +                    }
 +                    else
 +                    {
 +                        fnv    = fr->f_t[t].f;
 +                        vir    = &fr->f_t[t].vir;
 +                        Vcorrt = &fr->f_t[t].Vcorr;
 +                        dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
 +                        for(i=0; i<fr->natoms_force; i++)
 +                        {
 +                            clear_rvec(fnv[i]);
 +                        }
 +                        clear_mat(*vir);
 +                    }
 +                    *dvdlt = 0;
 +                    *Vcorrt =
 +                        ewald_LRcorrection(fplog,
 +                                           fr->excl_load[t],fr->excl_load[t+1],
 +                                           cr,t,fr,
 +                                           md->chargeA,
 +                                           md->nChargePerturbed ? md->chargeB : NULL,
 +                                           ir->cutoff_scheme != ecutsVERLET,
 +                                           excl,x,bSB ? boxs : box,mu_tot,
 +                                           ir->ewald_geometry,
 +                                           ir->epsilon_surface,
 +                                           fnv,*vir,
 +                                           lambda[efptCOUL],dvdlt);
 +                }
 +                if (nthreads > 1)
 +                {
 +                    reduce_thread_forces(fr->natoms_force,fr->f_novirsum,
 +                                         fr->vir_el_recip,
 +                                         &Vcorr,efptCOUL,&dvdl,
 +                                         nthreads,fr->f_t);
 +                }
 +
 +                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
 +            }
 +
 +            if (fr->n_tpi == 0)
 +            {
 +                Vcorr += ewald_charge_correction(cr,fr,lambda[efptCOUL],box,
 +                                                 &dvdl,fr->vir_el_recip);
 +            }
 +
 +            PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdl);
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +        }
 +
 +        status = 0;
 +        Vlr  = 0;
 +        dvdl = 0;
 +        switch (fr->eeltype)
 +        {
 +        case eelPME:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +        case eelP3M_AD:
 +            if (cr->duty & DUTY_PME)
 +            {
 +                assert(fr->n_tpi >= 0);
 +                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
 +                {
 +                    pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
 +                    if (flags & GMX_FORCE_FORCES)
 +                    {
 +                        pme_flags |= GMX_PME_CALC_F;
 +                    }
 +                    if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
 +                    {
 +                        pme_flags |= GMX_PME_CALC_ENER_VIR;
 +                    }
 +                    if (fr->n_tpi > 0)
 +                    {
 +                        /* We don't calculate f, but we do want the potential */
 +                        pme_flags |= GMX_PME_CALC_POT;
 +                    }
 +                    wallcycle_start(wcycle,ewcPMEMESH);
 +                    status = gmx_pme_do(fr->pmedata,
 +                                        md->start,md->homenr - fr->n_tpi,
 +                                        x,fr->f_novirsum,
 +                                        md->chargeA,md->chargeB,
 +                                        bSB ? boxs : box,cr,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
 +                                        nrnb,wcycle,
 +                                        fr->vir_el_recip,fr->ewaldcoeff,
 +                                        &Vlr,lambda[efptCOUL],&dvdl,
 +                                        pme_flags);
 +                    *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +                    /* We should try to do as little computation after
 +                     * this as possible, because parallel PME synchronizes
 +                     * the nodes, so we want all load imbalance of the rest
 +                     * of the force calculation to be before the PME call.
 +                     * DD load balancing is done on the whole time of
 +                     * the force call (without PME).
 +                     */
 +                }
 +                if (fr->n_tpi > 0)
 +                {
 +                    /* Determine the PME grid energy of the test molecule
 +                     * with the PME grid potential of the other charges.
 +                     */
 +                    gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
 +                                        x + md->homenr - fr->n_tpi,
 +                                        md->chargeA + md->homenr - fr->n_tpi,
 +                                        &Vlr);
 +                }
 +                PRINT_SEPDVDL("PME mesh",Vlr,dvdl);
 +            }
 +            break;
 +        case eelEWALD:
 +            Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
 +                           md->chargeA,md->chargeB,
 +                           box_size,cr,md->homenr,
 +                           fr->vir_el_recip,fr->ewaldcoeff,
 +                           lambda[efptCOUL],&dvdl,fr->ewald_table);
 +            PRINT_SEPDVDL("Ewald long-range",Vlr,dvdl);
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"No such electrostatics method implemented %s",
 +                      eel_names[fr->eeltype]);
 +        }
 +        if (status != 0)
 +        {
 +            gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
 +                      status,EELTYPE(fr->eeltype));
 +              }
 +        /* Note that with separate PME nodes we get the real energies later */
 +        enerd->dvdl_lin[efptCOUL] += dvdl;
 +        enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
 +        if (debug)
 +        {
 +            fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
 +                    Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
 +            pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
 +            pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
 +        }
 +    }
 +    else
 +    {
 +        if (EEL_RF(fr->eeltype))
 +        {
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
 +            {
 +                dvdl = 0;
 +                enerd->term[F_RF_EXCL] =
 +                    RF_excl_correction(fplog,fr,graph,md,excl,x,f,
 +                                       fr->fshift,&pbc,lambda[efptCOUL],&dvdl);
 +            }
 +
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +            PRINT_SEPDVDL("RF exclusion correction",
 +                          enerd->term[F_RF_EXCL],dvdl);
 +        }
 +    }
 +    where();
 +    debug_gmx();
 +
 +    if (debug)
 +    {
 +        print_nrnb(debug,nrnb);
 +    }
 +    debug_gmx();
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t2=MPI_Wtime();
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t3=MPI_Wtime();
 +        fr->t_wait += t3-t2;
 +        if (fr->timesteps == 11)
 +        {
 +            fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
 +                    cr->nodeid, gmx_step_str(fr->timesteps,buf),
 +                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
 +                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
 +        }
 +        fr->timesteps++;
 +    }
 +#endif
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
 +    }
 +
 +}
 +
 +void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
 +{
 +    int i,n2;
 +
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        enerd->term[i] = 0;
 +    }
 +
 +
 +    for(i=0; i<efptNR; i++) {
 +        enerd->dvdl_lin[i]  = 0;
 +        enerd->dvdl_nonlin[i]  = 0;
 +    }
 +
 +    n2=ngener*ngener;
 +    if (debug)
 +    {
 +        fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
 +    }
 +    enerd->grpp.nener = n2;
 +    for(i=0; (i<egNR); i++)
 +    {
 +        snew(enerd->grpp.ener[i],n2);
 +    }
 +
 +    if (n_lambda)
 +    {
 +        enerd->n_lambda = 1 + n_lambda;
 +        snew(enerd->enerpart_lambda,enerd->n_lambda);
 +    }
 +    else
 +    {
 +        enerd->n_lambda = 0;
 +    }
 +}
 +
 +void destroy_enerdata(gmx_enerdata_t *enerd)
 +{
 +    int i;
 +
 +    for(i=0; (i<egNR); i++)
 +    {
 +        sfree(enerd->grpp.ener[i]);
 +    }
 +
 +    if (enerd->n_lambda)
 +    {
 +        sfree(enerd->enerpart_lambda);
 +    }
 +}
 +
 +static real sum_v(int n,real v[])
 +{
 +  real t;
 +  int  i;
 +
 +  t = 0.0;
 +  for(i=0; (i<n); i++)
 +    t = t + v[i];
 +
 +  return t;
 +}
 +
 +void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd)
 +{
 +  gmx_grppairener_t *grpp;
 +  real *epot;
 +  int i;
 +
 +  grpp = &enerd->grpp;
 +  epot = enerd->term;
 +
 +  /* Accumulate energies */
 +  epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
 +  epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
 +  epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
 +  epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
 +  epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
 +  epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
 +  /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
 +  epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
 +
 +/* lattice part of LR doesnt belong to any group
 + * and has been added earlier
 + */
 +  epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
 +  epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
 +
 +  epot[F_EPOT] = 0;
 +  for(i=0; (i<F_EPOT); i++)
 +  {
 +      if (i != F_DISRESVIOL && i != F_ORIRESDEV)
 +      {
 +          epot[F_EPOT] += epot[i];
 +      }
 +  }
 +}
 +
 +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
 +{
 +    int i,j,index;
 +    double dlam;
 +
 +    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
 +    enerd->term[F_DVDL] = 0.0;
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (fepvals->separate_dvdl[i])
 +        {
 +            /* could this be done more readably/compactly? */
 +            switch (i) {
 +            case (efptCOUL):
 +                index = F_DVDL_COUL;
 +                break;
 +            case (efptVDW):
 +                index = F_DVDL_VDW;
 +                break;
 +            case (efptBONDED):
 +                index = F_DVDL_BONDED;
 +                break;
 +            case (efptRESTRAINT):
 +                index = F_DVDL_RESTRAINT;
 +                break;
 +            case (efptMASS):
 +                index = F_DKDL;
 +                break;
 +            default:
 +                index = F_DVDL;
 +                break;
 +            }
 +            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
 +            }
 +        }
 +        else
 +        {
 +            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
 +            }
 +        }
 +    }
 +
 +    /* Notes on the foreign lambda free energy difference evaluation:
 +     * Adding the potential and ekin terms that depend linearly on lambda
 +     * as delta lam * dvdl to the energy differences is exact.
 +     * For the constraints this is not exact, but we have no other option
 +     * without literally changing the lengths and reevaluating the energies at each step.
 +     * (try to remedy this post 4.6 - MRS)
 +     * For the non-bonded LR term we assume that the soft-core (if present)
 +     * no longer affects the energy beyond the short-range cut-off,
 +     * which is a very good approximation (except for exotic settings).
 +     * (investigate how to overcome this post 4.6 - MRS)
 +     */
 +
 +    for(i=0; i<fepvals->n_lambda; i++)
 +    {                                         /* note we are iterating over fepvals here!
 +                                                 For the current lam, dlam = 0 automatically,
 +                                                 so we don't need to add anything to the
 +                                                 enerd->enerpart_lambda[0] */
 +
 +        /* we don't need to worry about dvdl contributions to the current lambda, because
 +           it's automatically zero */
 +
 +        /* first kinetic energy term */
 +        dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
 +
 +        enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
 +
 +        for (j=0;j<efptNR;j++)
 +        {
 +            if (j==efptMASS) {continue;} /* no other mass term to worry about */
 +
 +            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
 +            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
 +            if (debug)
 +            {
 +                fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
 +                        fepvals->all_lambda[j][i],efpt_names[j],
 +                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
 +                        dlam,enerd->dvdl_lin[j]);
 +            }
 +        }
 +    }
 +}
 +
 +void reset_enerdata(t_grpopts *opts,
 +                    t_forcerec *fr,gmx_bool bNS,
 +                    gmx_enerdata_t *enerd,
 +                    gmx_bool bMaster)
 +{
 +    gmx_bool bKeepLR;
 +    int  i,j;
 +
 +    /* First reset all energy components, except for the long range terms
 +     * on the master at non neighbor search steps, since the long range
 +     * terms have already been summed at the last neighbor search step.
 +     */
 +    bKeepLR = (fr->bTwinRange && !bNS);
 +    for(i=0; (i<egNR); i++) {
 +        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
 +            for(j=0; (j<enerd->grpp.nener); j++)
 +                enerd->grpp.ener[i][j] = 0.0;
 +        }
 +    }
 +    for (i=0;i<efptNR;i++)
 +    {
 +        enerd->dvdl_lin[i]    = 0.0;
 +        enerd->dvdl_nonlin[i] = 0.0;
 +    }
 +
 +    /* Normal potential energy components */
 +    for(i=0; (i<=F_EPOT); i++) {
 +        enerd->term[i] = 0.0;
 +    }
 +    /* Initialize the dVdlambda term with the long range contribution */
 +    /* Initialize the dvdl term with the long range contribution */
 +    enerd->term[F_DVDL]            = 0.0;
 +    enerd->term[F_DVDL_COUL]       = 0.0;
 +    enerd->term[F_DVDL_VDW]        = 0.0;
 +    enerd->term[F_DVDL_BONDED]     = 0.0;
 +    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
 +    enerd->term[F_DKDL]            = 0.0;
 +    if (enerd->n_lambda > 0)
 +    {
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
 +            enerd->enerpart_lambda[i] = 0.0;
 +        }
 +    }
 +}
index 97eb67675e5dce653504ed6ada54f9ce234abc95,0000000000000000000000000000000000000000..6690a5bf62be762ca9ec66309ede056bbb9f0715
mode 100644,000000..100644
--- /dev/null
@@@ -1,2692 -1,0 +1,2692 @@@
- static void init_forcerec_f_threads(t_forcerec *fr,int grpp_nener)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "gmx_fatal.h"
 +#include "gmx_fatal_collective.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "tables.h"
 +#include "nonbonded.h"
 +#include "invblock.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "md_support.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "copyrite.h"
 +#include "mtop_util.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_consts.h"
 +#include "statutil.h"
 +#include "gmx_omp_nthreads.h"
 +
 +#ifdef _MSC_VER
 +/* MSVC definition for __cpuid() */
 +#include <intrin.h>
 +#endif
 +
 +#include "types/nbnxn_cuda_types_ext.h"
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +
 +t_forcerec *mk_forcerec(void)
 +{
 +  t_forcerec *fr;
 +  
 +  snew(fr,1);
 +  
 +  return fr;
 +}
 +
 +#ifdef DEBUG
 +static void pr_nbfp(FILE *fp,real *nbfp,gmx_bool bBHAM,int atnr)
 +{
 +  int i,j;
 +  
 +  for(i=0; (i<atnr); i++) {
 +    for(j=0; (j<atnr); j++) {
 +      fprintf(fp,"%2d - %2d",i,j);
 +      if (bBHAM)
 +      fprintf(fp,"  a=%10g, b=%10g, c=%10g\n",BHAMA(nbfp,atnr,i,j),
 +              BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j)/6.0);
 +      else
 +      fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j)/6.0,
 +            C12(nbfp,atnr,i,j)/12.0);
 +    }
 +  }
 +}
 +#endif
 +
 +static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
 +{
 +  real *nbfp;
 +  int  i,j,k,atnr;
 +  
 +  atnr=idef->atnr;
 +  if (bBHAM) {
 +    snew(nbfp,3*atnr*atnr);
 +    for(i=k=0; (i<atnr); i++) {
 +      for(j=0; (j<atnr); j++,k++) {
 +          BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
 +          BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
 +          /* nbfp now includes the 6.0 derivative prefactor */
 +          BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c*6.0;
 +      }
 +    }
 +  }
 +  else {
 +    snew(nbfp,2*atnr*atnr);
 +    for(i=k=0; (i<atnr); i++) {
 +      for(j=0; (j<atnr); j++,k++) {
 +          /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +          C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6*6.0;
 +          C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12*12.0;
 +      }
 +    }
 +  }
 +
 +  return nbfp;
 +}
 +
 +/* This routine sets fr->solvent_opt to the most common solvent in the 
 + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in 
 + * the fr->solvent_type array with the correct type (or esolNO).
 + *
 + * Charge groups that fulfill the conditions but are not identical to the
 + * most common one will be marked as esolNO in the solvent_type array. 
 + *
 + * TIP3p is identical to SPC for these purposes, so we call it
 + * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 + * 
 + * NOTE: QM particle should not
 + * become an optimized solvent. Not even if there is only one charge
 + * group in the Qm 
 + */
 +
 +typedef struct 
 +{
 +    int    model;          
 +    int    count;
 +    int    vdwtype[4];
 +    real   charge[4];
 +} solvent_parameters_t;
 +
 +static void
 +check_solvent_cg(const gmx_moltype_t   *molt,
 +                 int                   cg0,
 +                 int                   nmol,
 +                 const unsigned char   *qm_grpnr,
 +                 const t_grps          *qm_grps,
 +                 t_forcerec *          fr,
 +                 int                   *n_solvent_parameters,
 +                 solvent_parameters_t  **solvent_parameters_p,
 +                 int                   cginfo,
 +                 int                   *cg_sp)
 +{
 +    const t_blocka *  excl;
 +    t_atom            *atom;
 +    int               j,k;
 +    int               j0,j1,nj;
 +    gmx_bool              perturbed;
 +    gmx_bool              has_vdw[4];
 +    gmx_bool              match;
 +    real              tmp_charge[4];
 +    int               tmp_vdwtype[4];
 +    int               tjA;
 +    gmx_bool              qm;
 +    solvent_parameters_t *solvent_parameters;
 +
 +    /* We use a list with parameters for each solvent type. 
 +     * Every time we discover a new molecule that fulfills the basic 
 +     * conditions for a solvent we compare with the previous entries
 +     * in these lists. If the parameters are the same we just increment
 +     * the counter for that type, and otherwise we create a new type
 +     * based on the current molecule.
 +     *
 +     * Once we've finished going through all molecules we check which
 +     * solvent is most common, and mark all those molecules while we
 +     * clear the flag on all others.
 +     */   
 +
 +    solvent_parameters = *solvent_parameters_p;
 +
 +    /* Mark the cg first as non optimized */
 +    *cg_sp = -1;
 +    
 +    /* Check if this cg has no exclusions with atoms in other charge groups
 +     * and all atoms inside the charge group excluded.
 +     * We only have 3 or 4 atom solvent loops.
 +     */
 +    if (GET_CGINFO_EXCL_INTER(cginfo) ||
 +        !GET_CGINFO_EXCL_INTRA(cginfo))
 +    {
 +        return;
 +    }
 +
 +    /* Get the indices of the first atom in this charge group */
 +    j0     = molt->cgs.index[cg0];
 +    j1     = molt->cgs.index[cg0+1];
 +    
 +    /* Number of atoms in our molecule */
 +    nj     = j1 - j0;
 +
 +    if (debug) {
 +        fprintf(debug,
 +                "Moltype '%s': there are %d atoms in this charge group\n",
 +                *molt->name,nj);
 +    }
 +    
 +    /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 +     * otherwise skip it.
 +     */
 +    if (nj<3 || nj>4)
 +    {
 +        return;
 +    }
 +    
 +    /* Check if we are doing QM on this group */
 +    qm = FALSE; 
 +    if (qm_grpnr != NULL)
 +    {
 +        for(j=j0 ; j<j1 && !qm; j++)
 +        {
 +            qm = (qm_grpnr[j] < qm_grps->nr - 1);
 +        }
 +    }
 +    /* Cannot use solvent optimization with QM */
 +    if (qm)
 +    {
 +        return;
 +    }
 +    
 +    atom = molt->atoms.atom;
 +
 +    /* Still looks like a solvent, time to check parameters */
 +    
 +    /* If it is perturbed (free energy) we can't use the solvent loops,
 +     * so then we just skip to the next molecule.
 +     */   
 +    perturbed = FALSE; 
 +    
 +    for(j=j0; j<j1 && !perturbed; j++)
 +    {
 +        perturbed = PERTURBED(atom[j]);
 +    }
 +    
 +    if (perturbed)
 +    {
 +        return;
 +    }
 +    
 +    /* Now it's only a question if the VdW and charge parameters 
 +     * are OK. Before doing the check we compare and see if they are 
 +     * identical to a possible previous solvent type.
 +     * First we assign the current types and charges.    
 +     */
 +    for(j=0; j<nj; j++)
 +    {
 +        tmp_vdwtype[j] = atom[j0+j].type;
 +        tmp_charge[j]  = atom[j0+j].q;
 +    } 
 +    
 +    /* Does it match any previous solvent type? */
 +    for(k=0 ; k<*n_solvent_parameters; k++)
 +    {
 +        match = TRUE;
 +        
 +        
 +        /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 +        if( (solvent_parameters[k].model==esolSPC   && nj!=3)  ||
 +            (solvent_parameters[k].model==esolTIP4P && nj!=4) )
 +            match = FALSE;
 +        
 +        /* Check that types & charges match for all atoms in molecule */
 +        for(j=0 ; j<nj && match==TRUE; j++)
 +        {                     
 +            if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 +            {
 +                match = FALSE;
 +            }
 +            if(tmp_charge[j] != solvent_parameters[k].charge[j])
 +            {
 +                match = FALSE;
 +            }
 +        }
 +        if (match == TRUE)
 +        {
 +            /* Congratulations! We have a matched solvent.
 +             * Flag it with this type for later processing.
 +             */
 +            *cg_sp = k;
 +            solvent_parameters[k].count += nmol;
 +
 +            /* We are done with this charge group */
 +            return;
 +        }
 +    }
 +    
 +    /* If we get here, we have a tentative new solvent type.
 +     * Before we add it we must check that it fulfills the requirements
 +     * of the solvent optimized loops. First determine which atoms have
 +     * VdW interactions.   
 +     */
 +    for(j=0; j<nj; j++) 
 +    {
 +        has_vdw[j] = FALSE;
 +        tjA        = tmp_vdwtype[j];
 +        
 +        /* Go through all other tpes and see if any have non-zero
 +         * VdW parameters when combined with this one.
 +         */   
 +        for(k=0; k<fr->ntype && (has_vdw[j]==FALSE); k++)
 +        {
 +            /* We already checked that the atoms weren't perturbed,
 +             * so we only need to check state A now.
 +             */ 
 +            if (fr->bBHAM) 
 +            {
 +                has_vdw[j] = (has_vdw[j] || 
 +                              (BHAMA(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
 +                              (BHAMB(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
 +                              (BHAMC(fr->nbfp,fr->ntype,tjA,k) != 0.0));
 +            }
 +            else
 +            {
 +                /* Standard LJ */
 +                has_vdw[j] = (has_vdw[j] || 
 +                              (C6(fr->nbfp,fr->ntype,tjA,k)  != 0.0) ||
 +                              (C12(fr->nbfp,fr->ntype,tjA,k) != 0.0));
 +            }
 +        }
 +    }
 +    
 +    /* Now we know all we need to make the final check and assignment. */
 +    if (nj == 3)
 +    {
 +        /* So, is it an SPC?
 +         * For this we require thatn all atoms have charge, 
 +         * the charges on atom 2 & 3 should be the same, and only
 +         * atom 1 might have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            tmp_charge[0]  != 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1])
 +        {
 +            srenew(solvent_parameters,*n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolSPC;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for(k=0;k<3;k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +    else if (nj==4)
 +    {
 +        /* Or could it be a TIP4P?
 +         * For this we require thatn atoms 2,3,4 have charge, but not atom 1. 
 +         * Only atom 1 mght have VdW.
 +         */
 +        if(has_vdw[1] == FALSE &&
 +           has_vdw[2] == FALSE &&
 +           has_vdw[3] == FALSE &&
 +           tmp_charge[0]  == 0 &&
 +           tmp_charge[1]  != 0 &&
 +           tmp_charge[2]  == tmp_charge[1] &&
 +           tmp_charge[3]  != 0)
 +        {
 +            srenew(solvent_parameters,*n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for(k=0;k<4;k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +            
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +
 +    *solvent_parameters_p = solvent_parameters;
 +}
 +
 +static void
 +check_solvent(FILE *                fp,
 +              const gmx_mtop_t *    mtop,
 +              t_forcerec *          fr,
 +              cginfo_mb_t           *cginfo_mb)
 +{
 +    const t_block *   cgs;
 +    const t_block *   mols;
 +    const gmx_moltype_t *molt;
 +    int               mb,mol,cg_mol,at_offset,cg_offset,am,cgm,i,nmol_ch,nmol;
 +    int               n_solvent_parameters;
 +    solvent_parameters_t *solvent_parameters;
 +    int               **cg_sp;
 +    int               bestsp,bestsol;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Going to determine what solvent types we have.\n");
 +    }
 +
 +    mols = &mtop->mols;
 +
 +    n_solvent_parameters = 0;
 +    solvent_parameters = NULL;
 +    /* Allocate temporary array for solvent type */
 +    snew(cg_sp,mtop->nmolblock);
 +
 +    cg_offset = 0;
 +    at_offset = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        molt = &mtop->moltype[mtop->molblock[mb].type];
 +        cgs  = &molt->cgs;
 +        /* Here we have to loop over all individual molecules
 +         * because we need to check for QMMM particles.
 +         */
 +        snew(cg_sp[mb],cginfo_mb[mb].cg_mod);
 +        nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 +        nmol    = mtop->molblock[mb].nmol/nmol_ch;
 +        for(mol=0; mol<nmol_ch; mol++)
 +        {
 +            cgm = mol*cgs->nr;
 +            am  = mol*cgs->index[cgs->nr];
 +            for(cg_mol=0; cg_mol<cgs->nr; cg_mol++)
 +            {
 +                check_solvent_cg(molt,cg_mol,nmol,
 +                                 mtop->groups.grpnr[egcQMMM] ?
 +                                 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 +                                 &mtop->groups.grps[egcQMMM],
 +                                 fr,
 +                                 &n_solvent_parameters,&solvent_parameters,
 +                                 cginfo_mb[mb].cginfo[cgm+cg_mol],
 +                                 &cg_sp[mb][cgm+cg_mol]);
 +            }
 +        }
 +        cg_offset += cgs->nr;
 +        at_offset += cgs->index[cgs->nr];
 +    }
 +
 +    /* Puh! We finished going through all charge groups.
 +     * Now find the most common solvent model.
 +     */   
 +    
 +    /* Most common solvent this far */
 +    bestsp = -2;
 +    for(i=0;i<n_solvent_parameters;i++)
 +    {
 +        if (bestsp == -2 ||
 +            solvent_parameters[i].count > solvent_parameters[bestsp].count)
 +        {
 +            bestsp = i;
 +        }
 +    }
 +    
 +    if (bestsp >= 0)
 +    {
 +        bestsol = solvent_parameters[bestsp].model;
 +    }
 +    else
 +    {
 +        bestsol = esolNO;
 +    }
 +    
 +#ifdef DISABLE_WATER_NLIST
 +      bestsol = esolNO;
 +#endif
 +
 +    fr->nWatMol = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
 +        nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 +        for(i=0; i<cginfo_mb[mb].cg_mod; i++)
 +        {
 +            if (cg_sp[mb][i] == bestsp)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],bestsol);
 +                fr->nWatMol += nmol;
 +            }
 +            else
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],esolNO);
 +            }
 +        }
 +        sfree(cg_sp[mb]);
 +    }
 +    sfree(cg_sp);
 +    
 +    if (bestsol != esolNO && fp!=NULL)
 +    {
 +        fprintf(fp,"\nEnabling %s-like water optimization for %d molecules.\n\n",
 +                esol_names[bestsol],
 +                solvent_parameters[bestsp].count);
 +    }
 +
 +    sfree(solvent_parameters);
 +    fr->solvent_opt = bestsol;
 +}
 +
 +enum { acNONE=0, acCONSTRAINT, acSETTLE };
 +
 +static cginfo_mb_t *init_cginfo_mb(FILE *fplog,const gmx_mtop_t *mtop,
 +                                   t_forcerec *fr,gmx_bool bNoSolvOpt,
 +                                   gmx_bool *bExcl_IntraCGAll_InterCGNone)
 +{
 +    const t_block *cgs;
 +    const t_blocka *excl;
 +    const gmx_moltype_t *molt;
 +    const gmx_molblock_t *molb;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool *type_VDW;
 +    int  *cginfo;
 +    int  cg_offset,a_offset,cgm,am;
 +    int  mb,m,ncg_tot,cg,a0,a1,gid,ai,j,aj,excl_nalloc;
 +    int  *a_con;
 +    int  ftype;
 +    int  ia;
 +    gmx_bool bId,*bExcl,bExclIntraAll,bExclInter,bHaveVDW,bHaveQ;
 +
 +    ncg_tot = ncg_mtop(mtop);
 +    snew(cginfo_mb,mtop->nmolblock);
 +
 +    snew(type_VDW,fr->ntype);
 +    for(ai=0; ai<fr->ntype; ai++)
 +    {
 +        type_VDW[ai] = FALSE;
 +        for(j=0; j<fr->ntype; j++)
 +        {
 +            type_VDW[ai] = type_VDW[ai] ||
 +                fr->bBHAM ||
 +                C6(fr->nbfp,fr->ntype,ai,j) != 0 ||
 +                C12(fr->nbfp,fr->ntype,ai,j) != 0;
 +        }
 +    }
 +
 +    *bExcl_IntraCGAll_InterCGNone = TRUE;
 +
 +    excl_nalloc = 10;
 +    snew(bExcl,excl_nalloc);
 +    cg_offset = 0;
 +    a_offset  = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        cgs  = &molt->cgs;
 +        excl = &molt->excls;
 +
 +        /* Check if the cginfo is identical for all molecules in this block.
 +         * If so, we only need an array of the size of one molecule.
 +         * Otherwise we make an array of #mol times #cgs per molecule.
 +         */
 +        bId = TRUE;
 +        am = 0;
 +        for(m=0; m<molb->nmol; m++)
 +        {
 +            am = m*cgs->index[cgs->nr];
 +            for(cg=0; cg<cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                if (ggrpnr(&mtop->groups,egcENER,a_offset+am+a0) !=
 +                    ggrpnr(&mtop->groups,egcENER,a_offset   +a0))
 +                {
 +                    bId = FALSE;
 +                }
 +                if (mtop->groups.grpnr[egcQMMM] != NULL)
 +                {
 +                    for(ai=a0; ai<a1; ai++)
 +                    {
 +                        if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 +                            mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 +                        {
 +                            bId = FALSE;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +
 +        cginfo_mb[mb].cg_start = cg_offset;
 +        cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 +        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 +        snew(cginfo_mb[mb].cginfo,cginfo_mb[mb].cg_mod);
 +        cginfo = cginfo_mb[mb].cginfo;
 +
 +        /* Set constraints flags for constrained atoms */
 +        snew(a_con,molt->atoms.nr);
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (interaction_function[ftype].flags & IF_CONSTRAINT)
 +            {
 +                int nral;
 +
 +                nral = NRAL(ftype);
 +                for(ia=0; ia<molt->ilist[ftype].nr; ia+=1+nral)
 +                {
 +                    int a;
 +
 +                    for(a=0; a<nral; a++)
 +                    {
 +                        a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 +                            (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 +                    }
 +                }
 +            }
 +        }
 +
 +        for(m=0; m<(bId ? 1 : molb->nmol); m++)
 +        {
 +            cgm = m*cgs->nr;
 +            am  = m*cgs->index[cgs->nr];
 +            for(cg=0; cg<cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +
 +                /* Store the energy group in cginfo */
 +                gid = ggrpnr(&mtop->groups,egcENER,a_offset+am+a0);
 +                SET_CGINFO_GID(cginfo[cgm+cg],gid);
 +                
 +                /* Check the intra/inter charge group exclusions */
 +                if (a1-a0 > excl_nalloc) {
 +                    excl_nalloc = a1 - a0;
 +                    srenew(bExcl,excl_nalloc);
 +                }
 +                /* bExclIntraAll: all intra cg interactions excluded
 +                 * bExclInter:    any inter cg interactions excluded
 +                 */
 +                bExclIntraAll = TRUE;
 +                bExclInter    = FALSE;
 +                bHaveVDW      = FALSE;
 +                bHaveQ        = FALSE;
 +                for(ai=a0; ai<a1; ai++)
 +                {
 +                    /* Check VDW and electrostatic interactions */
 +                    bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 +                                            type_VDW[molt->atoms.atom[ai].typeB]);
 +                    bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 +                                            molt->atoms.atom[ai].qB != 0);
 +
 +                    /* Clear the exclusion list for atom ai */
 +                    for(aj=a0; aj<a1; aj++)
 +                    {
 +                        bExcl[aj-a0] = FALSE;
 +                    }
 +                    /* Loop over all the exclusions of atom ai */
 +                    for(j=excl->index[ai]; j<excl->index[ai+1]; j++)
 +                    {
 +                        aj = excl->a[j];
 +                        if (aj < a0 || aj >= a1)
 +                        {
 +                            bExclInter = TRUE;
 +                        }
 +                        else
 +                        {
 +                            bExcl[aj-a0] = TRUE;
 +                        }
 +                    }
 +                    /* Check if ai excludes a0 to a1 */
 +                    for(aj=a0; aj<a1; aj++)
 +                    {
 +                        if (!bExcl[aj-a0])
 +                        {
 +                            bExclIntraAll = FALSE;
 +                        }
 +                    }
 +
 +                    switch (a_con[ai])
 +                    {
 +                    case acCONSTRAINT:
 +                        SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 +                        break;
 +                    case acSETTLE:
 +                        SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 +                        break;
 +                    default:
 +                        break;
 +                    }
 +                }
 +                if (bExclIntraAll)
 +                {
 +                    SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 +                }
 +                if (bExclInter)
 +                {
 +                    SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 +                }
 +                if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 +                {
 +                    /* The size in cginfo is currently only read with DD */
 +                    gmx_fatal(FARGS,"A charge group has size %d which is larger than the limit of %d atoms",a1-a0,MAX_CHARGEGROUP_SIZE);
 +                }
 +                if (bHaveVDW)
 +                {
 +                    SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 +                }
 +                if (bHaveQ)
 +                {
 +                    SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 +                }
 +                /* Store the charge group size */
 +                SET_CGINFO_NATOMS(cginfo[cgm+cg],a1-a0);
 +
 +                if (!bExclIntraAll || bExclInter)
 +                {
 +                    *bExcl_IntraCGAll_InterCGNone = FALSE;
 +                }
 +            }
 +        }
 +
 +        sfree(a_con);
 +
 +        cg_offset += molb->nmol*cgs->nr;
 +        a_offset  += molb->nmol*cgs->index[cgs->nr];
 +    }
 +    sfree(bExcl);
 +    
 +    /* the solvent optimizer is called after the QM is initialized,
 +     * because we don't want to have the QM subsystemto become an
 +     * optimized solvent
 +     */
 +
 +    check_solvent(fplog,mtop,fr,cginfo_mb);
 +    
 +    if (getenv("GMX_NO_SOLV_OPT"))
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found environment variable GMX_NO_SOLV_OPT.\n"
 +                    "Disabling all solvent optimization\n");
 +        }
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (bNoSolvOpt)
 +    {
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (!fr->solvent_opt)
 +    {
 +        for(mb=0; mb<mtop->nmolblock; mb++)
 +        {
 +            for(cg=0; cg<cginfo_mb[mb].cg_mod; cg++)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg],esolNO);
 +            }
 +        }
 +    }
 +    
 +    return cginfo_mb;
 +}
 +
 +static int *cginfo_expand(int nmb,cginfo_mb_t *cgi_mb)
 +{
 +    int ncg,mb,cg;
 +    int *cginfo;
 +
 +    ncg = cgi_mb[nmb-1].cg_end;
 +    snew(cginfo,ncg);
 +    mb = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        while (cg >= cgi_mb[mb].cg_end)
 +        {
 +            mb++;
 +        }
 +        cginfo[cg] =
 +            cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 +    }
 +
 +    return cginfo;
 +}
 +
 +static void set_chargesum(FILE *log,t_forcerec *fr,const gmx_mtop_t *mtop)
 +{
 +    double qsum,q2sum,q;
 +    int    mb,nmol,i;
 +    const t_atoms *atoms;
 +    
 +    qsum  = 0;
 +    q2sum = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        nmol  = mtop->molblock[mb].nmol;
 +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +        for(i=0; i<atoms->nr; i++)
 +        {
 +            q = atoms->atom[i].q;
 +            qsum  += nmol*q;
 +            q2sum += nmol*q*q;
 +        }
 +    }
 +    fr->qsum[0]  = qsum;
 +    fr->q2sum[0] = q2sum;
 +    if (fr->efep != efepNO)
 +    {
 +        qsum  = 0;
 +        q2sum = 0;
 +        for(mb=0; mb<mtop->nmolblock; mb++)
 +        {
 +            nmol  = mtop->molblock[mb].nmol;
 +            atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +            for(i=0; i<atoms->nr; i++)
 +            {
 +                q = atoms->atom[i].qB;
 +                qsum  += nmol*q;
 +                q2sum += nmol*q*q;
 +            }
 +            fr->qsum[1]  = qsum;
 +            fr->q2sum[1] = q2sum;
 +        }
 +    }
 +    else
 +    {
 +        fr->qsum[1]  = fr->qsum[0];
 +        fr->q2sum[1] = fr->q2sum[0];
 +    }
 +    if (log) {
 +        if (fr->efep == efepNO)
 +            fprintf(log,"System total charge: %.3f\n",fr->qsum[0]);
 +        else
 +            fprintf(log,"System total charge, top. A: %.3f top. B: %.3f\n",
 +                    fr->qsum[0],fr->qsum[1]);
 +    }
 +}
 +
 +void update_forcerec(FILE *log,t_forcerec *fr,matrix box)
 +{
 +    if (fr->eeltype == eelGRF)
 +    {
 +        calc_rffac(NULL,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
 +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
 +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
 +    }
 +}
 +
 +void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *atoms,*atoms_tpi;
 +    const t_blocka *excl;
 +    int    mb,nmol,nmolc,i,j,tpi,tpj,j1,j2,k,n,nexcl,q;
 +#if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)    
 +    long long int  npair,npair_ij,tmpi,tmpj;
 +#else
 +    double npair, npair_ij,tmpi,tmpj;
 +#endif
 +    double csix,ctwelve;
 +    int    ntp,*typecount;
 +    gmx_bool   bBHAM;
 +    real   *nbfp;
 +
 +    ntp = fr->ntype;
 +    bBHAM = fr->bBHAM;
 +    nbfp = fr->nbfp;
 +    
 +    for(q=0; q<(fr->efep==efepNO ? 1 : 2); q++) {
 +        csix = 0;
 +        ctwelve = 0;
 +        npair = 0;
 +        nexcl = 0;
 +        if (!fr->n_tpi) {
 +            /* Count the types so we avoid natoms^2 operations */
 +            snew(typecount,ntp);
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for(i=0; i<atoms->nr; i++) {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    typecount[tpi] += nmol;
 +                }
 +            }
 +            for(tpi=0; tpi<ntp; tpi++) {
 +                for(tpj=tpi; tpj<ntp; tpj++) {
 +                    tmpi = typecount[tpi];
 +                    tmpj = typecount[tpj];
 +                    if (tpi != tpj)
 +                    {
 +                        npair_ij = tmpi*tmpj;
 +                    }
 +                    else
 +                    {
 +                        npair_ij = tmpi*(tmpi - 1)/2;
 +                    }
 +                    if (bBHAM) {
 +                        /* nbfp now includes the 6.0 derivative prefactor */
 +                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                    } else {
 +                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj)/6.0;
 +                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj)/12.0;
 +                    }
 +                    npair += npair_ij;
 +                }
 +            }
 +            sfree(typecount);
 +            /* Subtract the excluded pairs.
 +             * The main reason for substracting exclusions is that in some cases
 +             * some combinations might never occur and the parameters could have
 +             * any value. These unused values should not influence the dispersion
 +             * correction.
 +             */
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 +                for(i=0; (i<atoms->nr); i++) {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    j1  = excl->index[i];
 +                    j2  = excl->index[i+1];
 +                    for(j=j1; j<j2; j++) {
 +                        k = excl->a[j];
 +                        if (k > i)
 +                        {
 +                            if (q == 0)
 +                            {
 +                                tpj = atoms->atom[k].type;
 +                            }
 +                            else
 +                            {
 +                                tpj = atoms->atom[k].typeB;
 +                            }
 +                            if (bBHAM) {
 +                                /* nbfp now includes the 6.0 derivative prefactor */
 +                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                            } else {
 +                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj)/6.0;
 +                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj)/12.0;
 +                            }
 +                            nexcl += nmol;
 +                        }
 +                    }
 +                }
 +            }
 +        } else {
 +            /* Only correct for the interaction of the test particle
 +             * with the rest of the system.
 +             */
 +            atoms_tpi =
 +                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
 +
 +            npair = 0;
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for(j=0; j<atoms->nr; j++) {
 +                    nmolc = nmol;
 +                    /* Remove the interaction of the test charge group
 +                     * with itself.
 +                     */
 +                    if (mb == mtop->nmolblock-1)
 +                    {
 +                        nmolc--;
 +                        
 +                        if (mb == 0 && nmol == 1)
 +                        {
 +                            gmx_fatal(FARGS,"Old format tpr with TPI, please generate a new tpr file");
 +                        }
 +                    }
 +                    if (q == 0)
 +                    {
 +                        tpj = atoms->atom[j].type;
 +                    }
 +                    else
 +                    {
 +                        tpj = atoms->atom[j].typeB;
 +                    }
 +                    for(i=0; i<fr->n_tpi; i++)
 +                    {
 +                        if (q == 0)
 +                        {
 +                            tpi = atoms_tpi->atom[i].type;
 +                        }
 +                        else
 +                        {
 +                            tpi = atoms_tpi->atom[i].typeB;
 +                        }
 +                        if (bBHAM)
 +                        {
 +                            /* nbfp now includes the 6.0 derivative prefactor */
 +                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                        }
 +                        else
 +                        {
 +                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj)/6.0;
 +                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj)/12.0;
 +                        }
 +                        npair += nmolc;
 +                    }
 +                }
 +            }
 +        }
 +        if (npair - nexcl <= 0 && fplog) {
 +            fprintf(fplog,"\nWARNING: There are no atom pairs for dispersion correction\n\n");
 +            csix     = 0;
 +            ctwelve  = 0;
 +        } else {
 +            csix    /= npair - nexcl;
 +            ctwelve /= npair - nexcl;
 +        }
 +        if (debug) {
 +            fprintf(debug,"Counted %d exclusions\n",nexcl);
 +            fprintf(debug,"Average C6 parameter is: %10g\n",(double)csix);
 +            fprintf(debug,"Average C12 parameter is: %10g\n",(double)ctwelve);
 +        }
 +        fr->avcsix[q]    = csix;
 +        fr->avctwelve[q] = ctwelve;
 +    }
 +    if (fplog != NULL)
 +    {
 +        if (fr->eDispCorr == edispcAllEner ||
 +            fr->eDispCorr == edispcAllEnerPres)
 +        {
 +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                    fr->avcsix[0],fr->avctwelve[0]);
 +        }
 +        else
 +        {
 +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e\n",fr->avcsix[0]);
 +        }
 +    }
 +}
 +
 +
 +static void set_bham_b_max(FILE *fplog,t_forcerec *fr,
 +                           const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *at1,*at2;
 +    int  mt1,mt2,i,j,tpi,tpj,ntypes;
 +    real b,bmin;
 +    real *nbfp;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Determining largest Buckingham b parameter for table\n");
 +    }
 +    nbfp   = fr->nbfp;
 +    ntypes = fr->ntype;
 +    
 +    bmin           = -1;
 +    fr->bham_b_max = 0;
 +    for(mt1=0; mt1<mtop->nmoltype; mt1++)
 +    {
 +        at1 = &mtop->moltype[mt1].atoms;
 +        for(i=0; (i<at1->nr); i++)
 +        {
 +            tpi = at1->atom[i].type;
 +            if (tpi >= ntypes)
 +                gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",i,tpi,ntypes);
 +            
 +            for(mt2=mt1; mt2<mtop->nmoltype; mt2++)
 +            {
 +                at2 = &mtop->moltype[mt2].atoms;
 +                for(j=0; (j<at2->nr); j++) {
 +                    tpj = at2->atom[j].type;
 +                    if (tpj >= ntypes)
 +                    {
 +                        gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",j,tpj,ntypes);
 +                    }
 +                    b = BHAMB(nbfp,ntypes,tpi,tpj);
 +                    if (b > fr->bham_b_max)
 +                    {
 +                        fr->bham_b_max = b;
 +                    }
 +                    if ((b < bmin) || (bmin==-1))
 +                    {
 +                        bmin = b;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Buckingham b parameters, min: %g, max: %g\n",
 +                bmin,fr->bham_b_max);
 +    }
 +}
 +
 +static void make_nbf_tables(FILE *fp,const output_env_t oenv,
 +                            t_forcerec *fr,real rtab,
 +                            const t_commrec *cr,
 +                            const char *tabfn,char *eg1,char *eg2,
 +                            t_nblists *nbl)
 +{
 +    char buf[STRLEN];
 +    int i,j;
 +
 +    if (tabfn == NULL) {
 +        if (debug)
 +            fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
 +        return;
 +    }
 +
 +    sprintf(buf,"%s",tabfn);
 +    if (eg1 && eg2)
 +    /* Append the two energy group names */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
 +                eg1,eg2,ftp2ext(efXVG));
 +    nbl->table_elec_vdw = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
 +    /* Copy the contents of the table to separate coulomb and LJ tables too,
 +     * to improve cache performance.
 +     */
 +    /* For performance reasons we want
 +     * the table data to be aligned to 16-byte. The pointers could be freed
 +     * but currently aren't.
 +     */
 +    nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
 +    nbl->table_elec.format = nbl->table_elec_vdw.format;
 +    nbl->table_elec.r = nbl->table_elec_vdw.r;
 +    nbl->table_elec.n = nbl->table_elec_vdw.n;
 +    nbl->table_elec.scale = nbl->table_elec_vdw.scale;
 +    nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
 +    nbl->table_elec.ninteractions = 1;
 +    nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
 +    snew_aligned(nbl->table_elec.data,nbl->table_elec.stride*(nbl->table_elec.n+1),16);
 +
 +    nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
 +    nbl->table_vdw.format = nbl->table_elec_vdw.format;
 +    nbl->table_vdw.r = nbl->table_elec_vdw.r;
 +    nbl->table_vdw.n = nbl->table_elec_vdw.n;
 +    nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
 +    nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
 +    nbl->table_vdw.ninteractions = 2;
 +    nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
 +    snew_aligned(nbl->table_vdw.data,nbl->table_vdw.stride*(nbl->table_vdw.n+1),16);
 +
 +    for(i=0; i<=nbl->table_elec_vdw.n; i++)
 +    {
 +        for(j=0; j<4; j++)
 +            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
 +        for(j=0; j<8; j++)
 +            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
 +    }
 +}
 +
 +static void count_tables(int ftype1,int ftype2,const gmx_mtop_t *mtop,
 +                         int *ncount,int **count)
 +{
 +    const gmx_moltype_t *molt;
 +    const t_ilist *il;
 +    int mt,ftype,stride,i,j,tabnr;
 +    
 +    for(mt=0; mt<mtop->nmoltype; mt++)
 +    {
 +        molt = &mtop->moltype[mt];
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (ftype == ftype1 || ftype == ftype2) {
 +                il = &molt->ilist[ftype];
 +                stride = 1 + NRAL(ftype);
 +                for(i=0; i<il->nr; i+=stride) {
 +                    tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
 +                    if (tabnr < 0)
 +                        gmx_fatal(FARGS,"A bonded table number is smaller than 0: %d\n",tabnr);
 +                    if (tabnr >= *ncount) {
 +                        srenew(*count,tabnr+1);
 +                        for(j=*ncount; j<tabnr+1; j++)
 +                            (*count)[j] = 0;
 +                        *ncount = tabnr+1;
 +                    }
 +                    (*count)[tabnr]++;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static bondedtable_t *make_bonded_tables(FILE *fplog,
 +                                         int ftype1,int ftype2,
 +                                         const gmx_mtop_t *mtop,
 +                                         const char *basefn,const char *tabext)
 +{
 +    int  i,ncount,*count;
 +    char tabfn[STRLEN];
 +    bondedtable_t *tab;
 +    
 +    tab = NULL;
 +    
 +    ncount = 0;
 +    count = NULL;
 +    count_tables(ftype1,ftype2,mtop,&ncount,&count);
 +    
 +    if (ncount > 0) {
 +        snew(tab,ncount);
 +        for(i=0; i<ncount; i++) {
 +            if (count[i] > 0) {
 +                sprintf(tabfn,"%s",basefn);
 +                sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1,"_%s%d.%s",
 +                        tabext,i,ftp2ext(efXVG));
 +                tab[i] = make_bonded_table(fplog,tabfn,NRAL(ftype1)-2);
 +            }
 +        }
 +        sfree(count);
 +    }
 +  
 +    return tab;
 +}
 +
 +void forcerec_set_ranges(t_forcerec *fr,
 +                         int ncg_home,int ncg_force,
 +                         int natoms_force,
 +                         int natoms_force_constr,int natoms_f_novirsum)
 +{
 +    fr->cg0 = 0;
 +    fr->hcg = ncg_home;
 +
 +    /* fr->ncg_force is unused in the standard code,
 +     * but it can be useful for modified code dealing with charge groups.
 +     */
 +    fr->ncg_force           = ncg_force;
 +    fr->natoms_force        = natoms_force;
 +    fr->natoms_force_constr = natoms_force_constr;
 +
 +    if (fr->natoms_force_constr > fr->nalloc_force)
 +    {
 +        fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
 +
 +        if (fr->bTwinRange)
 +        {
 +            srenew(fr->f_twin,fr->nalloc_force);
 +        }
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        fr->f_novirsum_n = natoms_f_novirsum;
 +        if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
 +        {
 +            fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
 +            srenew(fr->f_novirsum_alloc,fr->f_novirsum_nalloc);
 +        }
 +    }
 +    else
 +    {
 +        fr->f_novirsum_n = 0;
 +    }
 +}
 +
 +static real cutoff_inf(real cutoff)
 +{
 +    if (cutoff == 0)
 +    {
 +        cutoff = GMX_CUTOFF_INF;
 +    }
 +
 +    return cutoff;
 +}
 +
 +static void make_adress_tf_tables(FILE *fp,const output_env_t oenv,
 +                            t_forcerec *fr,const t_inputrec *ir,
 +                          const char *tabfn, const gmx_mtop_t *mtop,
 +                            matrix     box)
 +{
 +  char buf[STRLEN];
 +  int i,j;
 +
 +  if (tabfn == NULL) {
 +        gmx_fatal(FARGS,"No thermoforce table file given. Use -tabletf to specify a file\n");
 +    return;
 +  }
 +
 +  snew(fr->atf_tabs, ir->adress->n_tf_grps);
 +
 +  for (i=0; i<ir->adress->n_tf_grps; i++){
 +    j = ir->adress->tf_table_index[i]; /* get energy group index */
 +    sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"tf_%s.%s",
 +        *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]) ,ftp2ext(efXVG));
 +    printf("loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[j], buf);
 +    fr->atf_tabs[i] = make_atf_table(fp,oenv,fr,buf, box);
 +  }
 +
 +}
 +
 +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 +                      gmx_bool bPrintNote,t_commrec *cr,FILE *fp)
 +{
 +    gmx_bool bAllvsAll;
 +
 +    bAllvsAll =
 +        (
 +         ir->rlist==0            &&
 +         ir->rcoulomb==0         &&
 +         ir->rvdw==0             &&
 +         ir->ePBC==epbcNONE      &&
 +         ir->vdwtype==evdwCUT    &&
 +         ir->coulombtype==eelCUT &&
 +         ir->efep==efepNO        &&
 +         (ir->implicit_solvent == eisNO || 
 +          (ir->implicit_solvent==eisGBSA && (ir->gb_algorithm==egbSTILL || 
 +                                             ir->gb_algorithm==egbHCT   || 
 +                                             ir->gb_algorithm==egbOBC))) &&
 +         getenv("GMX_NO_ALLVSALL") == NULL
 +            );
 +    
 +    if (bAllvsAll && ir->opts.ngener > 1)
 +    {
 +        const char *note="NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
 +
 +        if (bPrintNote)
 +        {
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr,"\n%s\n",note);
 +            }
 +            if (fp != NULL)
 +            {
 +                fprintf(fp,"\n%s\n",note);
 +            }
 +        }
 +        bAllvsAll = FALSE;
 +    }
 +
 +    if(bAllvsAll && fp && MASTER(cr))
 +    {
 +        fprintf(fp,"\nUsing accelerated all-vs-all kernels.\n\n");
 +    }
 +    
 +    return bAllvsAll;
 +}
 +
 +
-             /* snew(fr->f_t[t].ener,F_NRE); */
-             fr->f_t[t].grpp.nener = grpp_nener;
++static void init_forcerec_f_threads(t_forcerec *fr,int nenergrp)
 +{
 +    int t,i;
 +
++    /* These thread local data structures are used for bondeds only */
 +    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
 +
 +    if (fr->nthreads > 1)
 +    {
 +        snew(fr->f_t,fr->nthreads);
 +        /* Thread 0 uses the global force and energy arrays */
 +        for(t=1; t<fr->nthreads; t++)
 +        {
 +            fr->f_t[t].f = NULL;
 +            fr->f_t[t].f_nalloc = 0;
 +            snew(fr->f_t[t].fshift,SHIFTS);
-                 snew(fr->f_t[t].grpp.ener[i],grpp_nener);
++            fr->f_t[t].grpp.nener = nenergrp*nenergrp;
 +            for(i=0; i<egNR; i++)
 +            {
++                snew(fr->f_t[t].grpp.ener[i],fr->f_t[t].grpp.nener);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void pick_nbnxn_kernel_cpu(FILE *fp,
 +                                  const t_commrec *cr,
 +                                  const gmx_cpuid_t cpuid_info,
 +                                  int *kernel_type)
 +{
 +    *kernel_type = nbk4x4_PlainC;
 +
 +#ifdef GMX_X86_SSE2
 +    {
 +        /* On Intel Sandy-Bridge AVX-256 kernels are always faster.
 +         * On AMD Bulldozer AVX-256 is much slower than AVX-128.
 +         */
 +        if(gmx_cpuid_feature(cpuid_info, GMX_CPUID_FEATURE_X86_AVX) == 1 &&
 +           gmx_cpuid_vendor(cpuid_info) != GMX_CPUID_VENDOR_AMD)
 +        {
 +#ifdef GMX_X86_AVX_256
 +            *kernel_type = nbk4xN_X86_SIMD256;
 +#else
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +#endif
 +        }
 +        else
 +        {
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +        }
 +
 +        if (getenv("GMX_NBNXN_AVX128") != NULL)
 +        {
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +        }
 +        if (getenv("GMX_NBNXN_AVX256") != NULL)
 +        {
 +#ifdef GMX_X86_AVX_256
 +            *kernel_type = nbk4xN_X86_SIMD256;
 +#else
 +            gmx_fatal(FARGS,"You requested AVX-256 nbnxn kernels, but GROMACS was built without AVX support");
 +#endif
 +        }
 +    }
 +#endif /* GMX_X86_SSE2 */
 +}
 +
 +
 +/* Note that _mm_... intrinsics can be converted to either SSE or AVX
 + * depending on compiler flags.
 + * For gcc we check for __AVX__
 + * At least a check for icc should be added (if there is a macro)
 + */
 +static const char *nbk_name[] =
 +  { "not set", "plain C 4x4",
 +#if !(defined GMX_X86_AVX_256 || defined GMX_X86_AVX128_FMA || defined __AVX__)
 +#ifndef GMX_X86_SSE4_1
 +#ifndef GMX_DOUBLE
 +    "SSE2 4x4",
 +#else
 +    "SSE2 4x2",
 +#endif
 +#else
 +#ifndef GMX_DOUBLE
 +    "SSE4.1 4x4",
 +#else
 +    "SSE4.1 4x2",
 +#endif
 +#endif
 +#else
 +#ifndef GMX_DOUBLE
 +    "AVX-128 4x4",
 +#else
 +    "AVX-128 4x2",
 +#endif
 +#endif
 +#ifndef GMX_DOUBLE
 +    "AVX-256 4x8",
 +#else
 +    "AVX-256 4x4",
 +#endif
 +    "CUDA 8x8x8", "plain C 8x8x8" };
 +
 +static void pick_nbnxn_kernel(FILE *fp,
 +                              const t_commrec *cr,
 +                              const gmx_hw_info_t *hwinfo,
 +                              gmx_bool use_cpu_acceleration,
 +                              gmx_bool *bUseGPU,
 +                              int *kernel_type)
 +{
 +    gmx_bool bEmulateGPU, bGPU;
 +    char gpu_err_str[STRLEN];
 +
 +    assert(kernel_type);
 +
 +    *kernel_type = nbkNotSet;
 +    /* if bUseGPU == NULL we don't want a GPU (e.g. hybrid mode kernel selection) */
 +    bGPU = (bUseGPU != NULL) && hwinfo->bCanUseGPU;
 +
 +    /* Run GPU emulation mode if GMX_EMULATE_GPU is defined or in case if nobonded
 +       calculations are turned off via GMX_NO_NONBONDED -- this is the simple way
 +       to turn off GPU/CUDA initializations as well.. */
 +    bEmulateGPU = ((getenv("GMX_EMULATE_GPU") != NULL) ||
 +                   (getenv("GMX_NO_NONBONDED") != NULL));
 +
 +    if (bGPU)
 +    {
 +        if (bEmulateGPU)
 +        {
 +            bGPU = FALSE;
 +        }
 +        else
 +        {
 +            /* Each PP node will use the intra-node id-th device from the
 +             * list of detected/selected GPUs. */ 
 +            if (!init_gpu(cr->nodeid_group_intra, gpu_err_str, &hwinfo->gpu_info))
 +            {
 +                /* At this point the init should never fail as we made sure that 
 +                 * we have all the GPUs we need. If it still does, we'll bail. */
 +                gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
 +                          cr->nodeid,
 +                          get_gpu_device_id(&hwinfo->gpu_info, cr->nodeid_group_intra),
 +                          gpu_err_str);
 +            }
 +        }
 +        *bUseGPU = bGPU;
 +    }
 +
 +    if (bEmulateGPU)
 +    {
 +        *kernel_type = nbk8x8x8_PlainC;
 +
 +        md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
 +    }
 +    else if (bGPU)
 +    {
 +        *kernel_type = nbk8x8x8_CUDA;
 +    }
 +
 +    if (*kernel_type == nbkNotSet)
 +    {
 +        if (use_cpu_acceleration)
 +        {
 +            pick_nbnxn_kernel_cpu(fp,cr,hwinfo->cpuid_info,kernel_type);
 +        }
 +        else
 +        {
 +            *kernel_type = nbk4x4_PlainC;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"Using %s non-bonded kernels\n",
 +                    nbk_name[*kernel_type]);
 +        }
 +        fprintf(fp,"\nUsing %s non-bonded kernels\n\n",
 +                nbk_name[*kernel_type]);
 +    }
 +}
 +
 +gmx_bool uses_simple_tables(int cutoff_scheme,
 +                            nonbonded_verlet_t *nbv,
 +                            int group)
 +{
 +    gmx_bool bUsesSimpleTables = TRUE;
 +    int grp_index;
 +
 +    switch(cutoff_scheme)
 +    {
 +    case ecutsGROUP:
 +        bUsesSimpleTables = TRUE;
 +        break;
 +    case ecutsVERLET:
 +        assert(NULL != nbv && NULL != nbv->grp);
 +        grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
 +        bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
 +        break;
 +    default:
 +        gmx_incons("unimplemented");
 +    }
 +    return bUsesSimpleTables;
 +}
 +
 +static void init_ewald_f_table(interaction_const_t *ic,
 +                               gmx_bool bUsesSimpleTables,
 +                               real rtab)
 +{
 +    real maxr;
 +
 +    if (bUsesSimpleTables)
 +    {
 +        /* With a spacing of 0.0005 we are at the force summation accuracy
 +         * for the SSE kernels for "normal" atomistic simulations.
 +         */
 +        ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
 +                                                   ic->rcoulomb);
 +        
 +        maxr = (rtab>ic->rcoulomb) ? rtab : ic->rcoulomb;
 +        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
 +    }
 +    else
 +    {
 +        ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +        /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +        ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
 +    }
 +
 +    sfree_aligned(ic->tabq_coul_FDV0);
 +    sfree_aligned(ic->tabq_coul_F);
 +    sfree_aligned(ic->tabq_coul_V);
 +
 +    /* Create the original table data in FDV0 */
 +    snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
 +    snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
 +    snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
 +    table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,ic->tabq_coul_FDV0,
 +                                ic->tabq_size,1/ic->tabq_scale,ic->ewaldcoeff);
 +}
 +
 +void init_interaction_const_tables(FILE *fp, 
 +                                   interaction_const_t *ic,
 +                                   gmx_bool bUsesSimpleTables,
 +                                   real rtab)
 +{
 +    real spacing;
 +
 +    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
 +    {
 +        init_ewald_f_table(ic,bUsesSimpleTables,rtab);
 +
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
 +                    1/ic->tabq_scale,ic->tabq_size);
 +        }
 +    }
 +}
 +
 +void init_interaction_const(FILE *fp, 
 +                            interaction_const_t **interaction_const,
 +                            const t_forcerec *fr,
 +                            real  rtab)
 +{
 +    interaction_const_t *ic;
 +    gmx_bool bUsesSimpleTables = TRUE;
 +
 +    snew(ic, 1);
 +
 +    /* Just allocate something so we can free it */
 +    snew_aligned(ic->tabq_coul_FDV0,16,16);
 +    snew_aligned(ic->tabq_coul_F,16,16);
 +    snew_aligned(ic->tabq_coul_V,16,16);
 +
 +    ic->rlist       = fr->rlist;
 +    ic->rlistlong   = fr->rlistlong;
 +    
 +    /* Lennard-Jones */
 +    ic->rvdw        = fr->rvdw;
 +    if (fr->vdw_modifier==eintmodPOTSHIFT)
 +    {
 +        ic->sh_invrc6 = pow(ic->rvdw,-6.0);
 +    }
 +    else
 +    {
 +        ic->sh_invrc6 = 0;
 +    }
 +
 +    /* Electrostatics */
 +    ic->eeltype     = fr->eeltype;
 +    ic->rcoulomb    = fr->rcoulomb;
 +    ic->epsilon_r   = fr->epsilon_r;
 +    ic->epsfac      = fr->epsfac;
 +
 +    /* Ewald */
 +    ic->ewaldcoeff  = fr->ewaldcoeff;
 +    if (fr->coulomb_modifier==eintmodPOTSHIFT)
 +    {
 +        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
 +    }
 +    else
 +    {
 +        ic->sh_ewald = 0;
 +    }
 +
 +    /* Reaction-field */
 +    if (EEL_RF(ic->eeltype))
 +    {
 +        ic->epsilon_rf = fr->epsilon_rf;
 +        ic->k_rf       = fr->k_rf;
 +        ic->c_rf       = fr->c_rf;
 +    }
 +    else
 +    {
 +        /* For plain cut-off we might use the reaction-field kernels */
 +        ic->epsilon_rf = ic->epsilon_r;
 +        ic->k_rf       = 0;
 +        if (fr->coulomb_modifier==eintmodPOTSHIFT)
 +        {
 +            ic->c_rf   = 1/ic->rcoulomb;
 +        }
 +        else
 +        {
 +            ic->c_rf   = 0;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"Potential shift: LJ r^-12: %.3f r^-6 %.3f",
 +                sqr(ic->sh_invrc6),ic->sh_invrc6);
 +        if (ic->eeltype == eelCUT)
 +        {
 +            fprintf(fp,", Coulomb %.3f",ic->c_rf);
 +        }
 +        else if (EEL_PME(ic->eeltype))
 +        {
 +            fprintf(fp,", Ewald %.3e",ic->sh_ewald);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +
 +    *interaction_const = ic;
 +
 +    if (fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
 +    }
 +
 +    bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
 +    init_interaction_const_tables(fp,ic,bUsesSimpleTables,rtab);
 +}
 +
 +static void init_nb_verlet(FILE *fp,
 +                           nonbonded_verlet_t **nb_verlet,
 +                           const t_inputrec *ir,
 +                           const t_forcerec *fr,
 +                           const t_commrec *cr,
 +                           const char *nbpu_opt)
 +{
 +    nonbonded_verlet_t *nbv;
 +    int  i;
 +    char *env;
 +    gmx_bool bHybridGPURun = FALSE;
 +
 +    nbnxn_alloc_t *nb_alloc;
 +    nbnxn_free_t  *nb_free;
 +
 +    snew(nbv, 1);
 +
 +    nbv->nbs = NULL;
 +
 +    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
 +    for(i=0; i<nbv->ngrp; i++)
 +    {
 +        nbv->grp[i].nbl_lists.nnbl = 0;
 +        nbv->grp[i].nbat           = NULL;
 +        nbv->grp[i].kernel_type    = nbkNotSet;
 +
 +        if (i == 0) /* local */
 +        {
 +            pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                              &nbv->bUseGPU,
 +                              &nbv->grp[i].kernel_type);
 +        }
 +        else /* non-local */
 +        {
 +            if (nbpu_opt != NULL && strcmp(nbpu_opt,"gpu_cpu") == 0)
 +            {
 +                /* Use GPU for local, select a CPU kernel for non-local */
 +                pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                                  NULL,
 +                                  &nbv->grp[i].kernel_type);
 +
 +                bHybridGPURun = TRUE;
 +            }
 +            else
 +            {
 +                /* Use the same kernel for local and non-local interactions */
 +                nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
 +            }
 +        }
 +    }
 +
 +    if (nbv->bUseGPU)
 +    {
 +        /* init the NxN GPU data; the last argument tells whether we'll have
 +         * both local and non-local NB calculation on GPU */
 +        nbnxn_cuda_init(fp, &nbv->cu_nbv,
 +                        &fr->hwinfo->gpu_info, cr->nodeid_group_intra,
 +                        (nbv->ngrp > 1) && !bHybridGPURun);
 +
 +        if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
 +        {
 +            char *end;
 +
 +            nbv->min_ci_balanced = strtol(env, &end, 10);
 +            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
 +            {
 +                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
 +            }
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n", 
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +        else
 +        {
 +            nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nbv->min_ci_balanced = 0;
 +    }
 +
 +    *nb_verlet = nbv;
 +
 +    nbnxn_init_search(&nbv->nbs,
 +                      DOMAINDECOMP(cr) ? & cr->dd->nc : NULL,
 +                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
 +                      gmx_omp_nthreads_get(emntNonbonded));
 +
 +    for(i=0; i<nbv->ngrp; i++)
 +    {
 +        if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
 +        {
 +            nb_alloc = &pmalloc;
 +            nb_free  = &pfree;
 +        }
 +        else
 +        {
 +            nb_alloc = NULL;
 +            nb_free  = NULL;
 +        }
 +
 +        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                /* 8x8x8 "non-simple" lists are ATM always combined */
 +                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                nb_alloc, nb_free);
 +
 +        if (i == 0 ||
 +            nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
 +        {
 +            snew(nbv->grp[i].nbat,1);
 +            nbnxn_atomdata_init(fp,
 +                                nbv->grp[i].nbat,
 +                                nbv->grp[i].kernel_type,
 +                                fr->ntype,fr->nbfp,
 +                                ir->opts.ngener,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
 +                                nb_alloc, nb_free);
 +        }
 +        else
 +        {
 +            nbv->grp[i].nbat = nbv->grp[0].nbat;
 +        }
 +    }
 +}
 +
 +void init_forcerec(FILE *fp,
 +                   const output_env_t oenv,
 +                   t_forcerec *fr,
 +                   t_fcdata   *fcd,
 +                   const t_inputrec *ir,
 +                   const gmx_mtop_t *mtop,
 +                   const t_commrec  *cr,
 +                   matrix     box,
 +                   gmx_bool       bMolEpot,
 +                   const char *tabfn,
 +                   const char *tabafn,
 +                   const char *tabpfn,
 +                   const char *tabbfn,
 +                   const char *nbpu_opt,
 +                   gmx_bool   bNoSolvOpt,
 +                   real       print_force)
 +{
 +    int     i,j,m,natoms,ngrp,negp_pp,negptable,egi,egj;
 +    real    rtab;
 +    char    *env;
 +    double  dbl;
 +    rvec    box_size;
 +    const t_block *cgs;
 +    gmx_bool    bGenericKernelOnly;
 +    gmx_bool    bTab,bSep14tab,bNormalnblists;
 +    t_nblists *nbl;
 +    int     *nm_ind,egp_flags;
 +    
 +    /* By default we turn acceleration on, but it might be turned off further down... */
 +    fr->use_cpu_acceleration = TRUE;
 +
 +    fr->bDomDec = DOMAINDECOMP(cr);
 +
 +    natoms = mtop->natoms;
 +
 +    if (check_box(ir->ePBC,box))
 +    {
 +        gmx_fatal(FARGS,check_box(ir->ePBC,box));
 +    }
 +    
 +    /* Test particle insertion ? */
 +    if (EI_TPI(ir->eI)) {
 +        /* Set to the size of the molecule to be inserted (the last one) */
 +        /* Because of old style topologies, we have to use the last cg
 +         * instead of the last molecule type.
 +         */
 +        cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
 +        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
 +        if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1]) {
 +            gmx_fatal(FARGS,"The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
 +        }
 +    } else {
 +        fr->n_tpi = 0;
 +    }
 +    
 +    /* Copy AdResS parameters */
 +    if (ir->bAdress) {
 +      fr->adress_type     = ir->adress->type;
 +      fr->adress_const_wf = ir->adress->const_wf;
 +      fr->adress_ex_width = ir->adress->ex_width;
 +      fr->adress_hy_width = ir->adress->hy_width;
 +      fr->adress_icor     = ir->adress->icor;
 +      fr->adress_site     = ir->adress->site;
 +      fr->adress_ex_forcecap = ir->adress->ex_forcecap;
 +      fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
 +
 +
 +      snew(fr->adress_group_explicit , ir->adress->n_energy_grps);
 +      for (i=0; i< ir->adress->n_energy_grps; i++){
 +          fr->adress_group_explicit[i]= ir->adress->group_explicit[i];
 +      }
 +
 +      fr->n_adress_tf_grps = ir->adress->n_tf_grps;
 +      snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
 +      for (i=0; i< fr->n_adress_tf_grps; i++){
 +          fr->adress_tf_table_index[i]= ir->adress->tf_table_index[i];
 +      }
 +      copy_rvec(ir->adress->refs,fr->adress_refs);
 +    } else {
 +      fr->adress_type = eAdressOff;
 +      fr->adress_do_hybridpairs = FALSE;
 +    }
 +    
 +    /* Copy the user determined parameters */
 +    fr->userint1 = ir->userint1;
 +    fr->userint2 = ir->userint2;
 +    fr->userint3 = ir->userint3;
 +    fr->userint4 = ir->userint4;
 +    fr->userreal1 = ir->userreal1;
 +    fr->userreal2 = ir->userreal2;
 +    fr->userreal3 = ir->userreal3;
 +    fr->userreal4 = ir->userreal4;
 +    
 +    /* Shell stuff */
 +    fr->fc_stepsize = ir->fc_stepsize;
 +    
 +    /* Free energy */
 +    fr->efep       = ir->efep;
 +    fr->sc_alphavdw = ir->fepvals->sc_alpha;
 +    if (ir->fepvals->bScCoul)
 +    {
 +        fr->sc_alphacoul = ir->fepvals->sc_alpha;
 +        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min,6);
 +    }
 +    else
 +    {
 +        fr->sc_alphacoul = 0;
 +        fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
 +    }
 +    fr->sc_power   = ir->fepvals->sc_power;
 +    fr->sc_r_power   = ir->fepvals->sc_r_power;
 +    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma,6);
 +
 +    env = getenv("GMX_SCSIGMA_MIN");
 +    if (env != NULL)
 +    {
 +        dbl = 0;
 +        sscanf(env,"%lf",&dbl);
 +        fr->sc_sigma6_min = pow(dbl,6);
 +        if (fp)
 +        {
 +            fprintf(fp,"Setting the minimum soft core sigma to %g nm\n",dbl);
 +        }
 +    }
 +
 +    fr->bNonbonded = TRUE;
 +    if (getenv("GMX_NO_NONBONDED") != NULL)
 +    {
 +        /* turn off non-bonded calculations */
 +        fr->bNonbonded = FALSE;
 +        md_print_warn(cr,fp,
 +                      "Found environment variable GMX_NO_NONBONDED.\n"
 +                      "Disabling nonbonded calculations.\n");
 +    }
 +
 +    bGenericKernelOnly = FALSE;
 +
 +    /* We now check in the NS code whether a particular combination of interactions
 +     * can be used with water optimization, and disable it if that is not the case.
 +     */
 +
 +    if (getenv("GMX_NB_GENERIC") != NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "Found environment variable GMX_NB_GENERIC.\n"
 +                    "Disabling all interaction-specific nonbonded kernels, will only\n"
 +                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
 +        }
 +        bGenericKernelOnly = TRUE;
 +    }
 +
 +    if (bGenericKernelOnly==TRUE)
 +    {
 +        bNoSolvOpt         = TRUE;
 +    }
 +
 +    if( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
 +    {
 +        fr->use_cpu_acceleration = FALSE;
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
 +                    "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
 +        }
 +    }
 +
 +    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +
 +    /* Check if we can/should do all-vs-all kernels */
 +    fr->bAllvsAll       = can_use_allvsall(ir,mtop,FALSE,NULL,NULL);
 +    fr->AllvsAll_work   = NULL;
 +    fr->AllvsAll_workgb = NULL;
 +
 +
 +    /* Neighbour searching stuff */
 +    fr->cutoff_scheme = ir->cutoff_scheme;
 +    fr->bGrid         = (ir->ns_type == ensGRID);
 +    fr->ePBC          = ir->ePBC;
 +
 +    /* Determine if we will do PBC for distances in bonded interactions */
 +    if (fr->ePBC == epbcNONE)
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            /* The group cut-off scheme and SHAKE assume charge groups
 +             * are whole, but not using molpbc is faster in most cases.
 +             */
 +            if (fr->cutoff_scheme == ecutsGROUP ||
 +                (ir->eConstrAlg == econtSHAKE &&
 +                 (gmx_mtop_ftype_count(mtop,F_CONSTR) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_CONSTRNC) > 0)))
 +            {
 +                fr->bMolPBC = ir->bPeriodicMols;
 +            }
 +            else
 +            {
 +                fr->bMolPBC = TRUE;
 +                if (getenv("GMX_USE_GRAPH") != NULL)
 +                {
 +                    fr->bMolPBC = FALSE;
 +                    if (fp)
 +                    {
 +                        fprintf(fp,"\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fr->bMolPBC = dd_bonded_molpbc(cr->dd,fr->ePBC);
 +        }
 +    }
 +
 +    fr->rc_scaling = ir->refcoord_scaling;
 +    copy_rvec(ir->posres_com,fr->posres_com);
 +    copy_rvec(ir->posres_comB,fr->posres_comB);
 +    fr->rlist      = cutoff_inf(ir->rlist);
 +    fr->rlistlong  = cutoff_inf(ir->rlistlong);
 +    fr->eeltype    = ir->coulombtype;
 +    fr->vdwtype    = ir->vdwtype;
 +
 +    fr->coulomb_modifier = ir->coulomb_modifier;
 +    fr->vdw_modifier     = ir->vdw_modifier;
 +
 +    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
 +    switch(fr->eeltype)
 +    {
 +        case eelCUT:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB;
 +            break;
 +
 +        case eelRF:
 +        case eelGRF:
 +        case eelRF_NEC:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            break;
 +
 +        case eelRF_ZERO:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
 +            break;
 +
 +        case eelSWITCH:
 +        case eelSHIFT:
 +        case eelUSER:
 +        case eelENCADSHIFT:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            break;
 +
 +        case eelPME:
 +        case eelEWALD:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS,"Unsupported electrostatic interaction: %s",eel_names[fr->eeltype]);
 +            break;
 +    }
 +
 +    /* Vdw: Translate from mdp settings to kernel format */
 +    switch(fr->vdwtype)
 +    {
 +        case evdwCUT:
 +            if(fr->bBHAM)
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
 +            }
 +            else
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
 +            }
 +            break;
 +
 +        case evdwSWITCH:
 +        case evdwSHIFT:
 +        case evdwUSER:
 +        case evdwENCADSHIFT:
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS,"Unsupported vdw interaction: %s",evdw_names[fr->vdwtype]);
 +            break;
 +    }
 +
 +    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
 +    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
 +    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
 +
 +    fr->bTwinRange = fr->rlistlong > fr->rlist;
 +    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype==eelEWALD);
 +    
 +    fr->reppow     = mtop->ffparams.reppow;
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
 +                          !gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS));
 +        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
 +        fr->bcoultab   = !(fr->eeltype == eelCUT ||
 +                           fr->eeltype == eelEWALD ||
 +                           fr->eeltype == eelPME ||
 +                           fr->eeltype == eelRF ||
 +                           fr->eeltype == eelRF_ZERO);
 +
 +        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
 +         * going to be faster to tabulate the interaction than calling the generic kernel.
 +         */
 +        if(fr->nbkernel_elec_modifier==eintmodPOTSWITCH && fr->nbkernel_vdw_modifier==eintmodPOTSWITCH)
 +        {
 +            if((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +        else if((fr->nbkernel_elec_modifier==eintmodPOTSHIFT && fr->nbkernel_vdw_modifier==eintmodPOTSHIFT) ||
 +                ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
 +                  fr->nbkernel_elec_modifier==eintmodEXACTCUTOFF &&
 +                  (fr->nbkernel_vdw_modifier==eintmodPOTSWITCH || fr->nbkernel_vdw_modifier==eintmodPOTSHIFT))))
 +        {
 +            if(fr->rcoulomb != fr->rvdw)
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +
 +        if (getenv("GMX_REQUIRE_TABLES"))
 +        {
 +            fr->bvdwtab  = TRUE;
 +            fr->bcoultab = TRUE;
 +        }
 +
 +        if (fp)
 +        {
 +            fprintf(fp,"Table routines are used for coulomb: %s\n",bool_names[fr->bcoultab]);
 +            fprintf(fp,"Table routines are used for vdw:     %s\n",bool_names[fr->bvdwtab ]);
 +        }
 +
 +        if(fr->bvdwtab==TRUE)
 +        {
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            fr->nbkernel_vdw_modifier    = eintmodNONE;
 +        }
 +        if(fr->bcoultab==TRUE)
 +        {
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            fr->nbkernel_elec_modifier    = eintmodNONE;
 +        }
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (!gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS))
 +        {
 +            gmx_fatal(FARGS,"Cut-off scheme %S only supports LJ repulsion power 12",ecutscheme_names[ir->cutoff_scheme]);
 +        }
 +        fr->bvdwtab  = FALSE;
 +        fr->bcoultab = FALSE;
 +    }
 +    
 +    /* Tables are used for direct ewald sum */
 +    if(fr->bEwald)
 +    {
 +        if (EEL_PME(ir->coulombtype))
 +        {
 +            if (fp)
 +                fprintf(fp,"Will do PME sum in reciprocal space.\n");
 +            if (ir->coulombtype == eelP3M_AD)
 +            {
 +                please_cite(fp,"Hockney1988");
 +                please_cite(fp,"Ballenegger2012");
 +            }
 +            else
 +            {
 +                please_cite(fp,"Essmann95a");
 +            }
 +            
 +            if (ir->ewald_geometry == eewg3DC)
 +            {
 +                if (fp)
 +                {
 +                    fprintf(fp,"Using the Ewald3DC correction for systems with a slab geometry.\n");
 +                }
 +                please_cite(fp,"In-Chul99a");
 +            }
 +        }
 +        fr->ewaldcoeff=calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
 +        init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
 +        if (fp)
 +        {
 +            fprintf(fp,"Using a Gaussian width (1/beta) of %g nm for Ewald\n",
 +                    1/fr->ewaldcoeff);
 +        }
 +    }
 +    
 +    /* Electrostatics */
 +    fr->epsilon_r  = ir->epsilon_r;
 +    fr->epsilon_rf = ir->epsilon_rf;
 +    fr->fudgeQQ    = mtop->ffparams.fudgeQQ;
 +    fr->rcoulomb_switch = ir->rcoulomb_switch;
 +    fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
 +    
 +    /* Parameters for generalized RF */
 +    fr->zsquare = 0.0;
 +    fr->temp    = 0.0;
 +    
 +    if (fr->eeltype == eelGRF)
 +    {
 +        init_generalized_rf(fp,mtop,ir,fr);
 +    }
 +    else if (fr->eeltype == eelSHIFT)
 +    {
 +        for(m=0; (m<DIM); m++)
 +            box_size[m]=box[m][m];
 +        
 +        if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
 +            set_shift_consts(fp,fr->rcoulomb_switch,fr->rcoulomb,box_size,fr);
 +    }
 +    
 +    fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
 +                       gmx_mtop_ftype_count(mtop,F_POSRES) > 0 ||
 +                       gmx_mtop_ftype_count(mtop,F_FBPOSRES) > 0 ||
 +                       IR_ELEC_FIELD(*ir) ||
 +                       (fr->adress_icor != eAdressICOff)
 +                      );
 +    
 +    if (fr->cutoff_scheme == ecutsGROUP &&
 +        ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr)) {
 +        /* Count the total number of charge groups */
 +        fr->cg_nalloc = ncg_mtop(mtop);
 +        srenew(fr->cg_cm,fr->cg_nalloc);
 +    }
 +    if (fr->shift_vec == NULL)
 +        snew(fr->shift_vec,SHIFTS);
 +    
 +    if (fr->fshift == NULL)
 +        snew(fr->fshift,SHIFTS);
 +    
 +    if (fr->nbfp == NULL) {
 +        fr->ntype = mtop->ffparams.atnr;
 +        fr->nbfp  = mk_nbfp(&mtop->ffparams,fr->bBHAM);
 +    }
 +    
 +    /* Copy the energy group exclusions */
 +    fr->egp_flags = ir->opts.egp_flags;
 +    
 +    /* Van der Waals stuff */
 +    fr->rvdw        = cutoff_inf(ir->rvdw);
 +    fr->rvdw_switch = ir->rvdw_switch;
 +    if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM) {
 +        if (fr->rvdw_switch >= fr->rvdw)
 +            gmx_fatal(FARGS,"rvdw_switch (%f) must be < rvdw (%f)",
 +                      fr->rvdw_switch,fr->rvdw);
 +        if (fp)
 +            fprintf(fp,"Using %s Lennard-Jones, switch between %g and %g nm\n",
 +                    (fr->eeltype==eelSWITCH) ? "switched":"shifted",
 +                    fr->rvdw_switch,fr->rvdw);
 +    } 
 +    
 +    if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
 +        gmx_fatal(FARGS,"Switch/shift interaction not supported with Buckingham");
 +    
 +    if (fp)
 +        fprintf(fp,"Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
 +                fr->rlist,fr->rcoulomb,fr->bBHAM ? "BHAM":"LJ",fr->rvdw);
 +    
 +    fr->eDispCorr = ir->eDispCorr;
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        set_avcsixtwelve(fp,fr,mtop);
 +    }
 +    
 +    if (fr->bBHAM)
 +    {
 +        set_bham_b_max(fp,fr,mtop);
 +    }
 +
 +    fr->bGB = (ir->implicit_solvent == eisGBSA);
 +      fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 +
 +    /* Copy the GBSA data (radius, volume and surftens for each
 +     * atomtype) from the topology atomtype section to forcerec.
 +     */
 +    snew(fr->atype_radius,fr->ntype);
 +    snew(fr->atype_vol,fr->ntype);
 +    snew(fr->atype_surftens,fr->ntype);
 +    snew(fr->atype_gb_radius,fr->ntype);
 +    snew(fr->atype_S_hct,fr->ntype);
 +
 +    if (mtop->atomtypes.nr > 0)
 +    {
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_radius[i] =mtop->atomtypes.radius[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_vol[i] = mtop->atomtypes.vol[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
 +    }  
 +      
 +      /* Generate the GB table if needed */
 +      if(fr->bGB)
 +      {
 +#ifdef GMX_DOUBLE
 +              fr->gbtabscale=2000;
 +#else
 +              fr->gbtabscale=500;
 +#endif
 +              
 +              fr->gbtabr=100;
 +              fr->gbtab=make_gb_table(fp,oenv,fr,tabpfn,fr->gbtabscale);
 +
 +        init_gb(&fr->born,cr,fr,ir,mtop,ir->rgbradii,ir->gb_algorithm);
 +
 +        /* Copy local gb data (for dd, this is done in dd_partition_system) */
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            make_local_gb(cr,fr->born,ir->gb_algorithm);
 +        }
 +    }
 +
 +    /* Set the charge scaling */
 +    if (fr->epsilon_r != 0)
 +        fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
 +    else
 +        /* eps = 0 is infinite dieletric: no coulomb interactions */
 +        fr->epsfac = 0;
 +    
 +    /* Reaction field constants */
 +    if (EEL_RF(fr->eeltype))
 +        calc_rffac(fp,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
 +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
 +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
 +    
 +    set_chargesum(fp,fr,mtop);
 +    
 +    /* if we are using LR electrostatics, and they are tabulated,
 +     * the tables will contain modified coulomb interactions.
 +     * Since we want to use the non-shifted ones for 1-4
 +     * coulombic interactions, we must have an extra set of tables.
 +     */
 +    
 +    /* Construct tables.
 +     * A little unnecessary to make both vdw and coul tables sometimes,
 +     * but what the heck... */
 +    
 +    bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
 +
 +    bSep14tab = ((!bTab || fr->eeltype!=eelCUT || fr->vdwtype!=evdwCUT ||
 +                  fr->bBHAM || fr->bEwald) &&
 +                 (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_LJC_PAIRS_NB) > 0));
 +
 +    negp_pp = ir->opts.ngener - ir->nwall;
 +    negptable = 0;
 +    if (!bTab) {
 +        bNormalnblists = TRUE;
 +        fr->nnblists = 1;
 +    } else {
 +        bNormalnblists = (ir->eDispCorr != edispcNO);
 +        for(egi=0; egi<negp_pp; egi++) {
 +            for(egj=egi;  egj<negp_pp; egj++) {
 +                egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
 +                if (!(egp_flags & EGP_EXCL)) {
 +                    if (egp_flags & EGP_TABLE) {
 +                        negptable++;
 +                    } else {
 +                        bNormalnblists = TRUE;
 +                    }
 +                }
 +            }
 +        }
 +        if (bNormalnblists) {
 +            fr->nnblists = negptable + 1;
 +        } else {
 +            fr->nnblists = negptable;
 +        }
 +        if (fr->nnblists > 1)
 +            snew(fr->gid2nblists,ir->opts.ngener*ir->opts.ngener);
 +    }
 +    snew(fr->nblists,fr->nnblists);
 +    
 +    /* This code automatically gives table length tabext without cut-off's,
 +     * in that case grompp should already have checked that we do not need
 +     * normal tables and we only generate tables for 1-4 interactions.
 +     */
 +    rtab = ir->rlistlong + ir->tabext;
 +
 +    if (bTab) {
 +        /* make tables for ordinary interactions */
 +        if (bNormalnblists) {
 +            make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,NULL,NULL,&fr->nblists[0]);
 +            if (!bSep14tab)
 +                fr->tab14 = fr->nblists[0].table_elec_vdw;
 +            m = 1;
 +        } else {
 +            m = 0;
 +        }
 +        if (negptable > 0) {
 +            /* Read the special tables for certain energy group pairs */
 +            nm_ind = mtop->groups.grps[egcENER].nm_ind;
 +            for(egi=0; egi<negp_pp; egi++) {
 +                for(egj=egi;  egj<negp_pp; egj++) {
 +                    egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
 +                    if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL)) {
 +                        nbl = &(fr->nblists[m]);
 +                        if (fr->nnblists > 1) {
 +                            fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = m;
 +                        }
 +                        /* Read the table file with the two energy groups names appended */
 +                        make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,
 +                                        *mtop->groups.grpname[nm_ind[egi]],
 +                                        *mtop->groups.grpname[nm_ind[egj]],
 +                                        &fr->nblists[m]);
 +                        m++;
 +                    } else if (fr->nnblists > 1) {
 +                        fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = 0;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (bSep14tab)
 +    {
 +        /* generate extra tables with plain Coulomb for 1-4 interactions only */
 +        fr->tab14 = make_tables(fp,oenv,fr,MASTER(cr),tabpfn,rtab,
 +                                GMX_MAKETABLES_14ONLY);
 +    }
 +
 +    /* Read AdResS Thermo Force table if needed */
 +    if(fr->adress_icor == eAdressICThermoForce)
 +    {
 +        /* old todo replace */ 
 +        
 +        if (ir->adress->n_tf_grps > 0){
 +            make_adress_tf_tables(fp,oenv,fr,ir,tabfn, mtop, box);
 +
 +        }else{
 +            /* load the default table */
 +            snew(fr->atf_tabs, 1);
 +            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp,oenv,fr,tabafn, box);
 +        }
 +    }
 +    
 +    /* Wall stuff */
 +    fr->nwall = ir->nwall;
 +    if (ir->nwall && ir->wall_type==ewtTABLE)
 +    {
 +        make_wall_tables(fp,oenv,ir,tabfn,&mtop->groups,fr);
 +    }
 +    
 +    if (fcd && tabbfn) {
 +        fcd->bondtab  = make_bonded_tables(fp,
 +                                           F_TABBONDS,F_TABBONDSNC,
 +                                           mtop,tabbfn,"b");
 +        fcd->angletab = make_bonded_tables(fp,
 +                                           F_TABANGLES,-1,
 +                                           mtop,tabbfn,"a");
 +        fcd->dihtab   = make_bonded_tables(fp,
 +                                           F_TABDIHS,-1,
 +                                           mtop,tabbfn,"d");
 +    } else {
 +        if (debug)
 +            fprintf(debug,"No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
 +    }
 +    
 +    /* QM/MM initialization if requested
 +     */
 +    if (ir->bQMMM)
 +    {
 +        fprintf(stderr,"QM/MM calculation requested.\n");
 +    }
 +    
 +    fr->bQMMM      = ir->bQMMM;   
 +    fr->qr         = mk_QMMMrec();
 +    
 +    /* Set all the static charge group info */
 +    fr->cginfo_mb = init_cginfo_mb(fp,mtop,fr,bNoSolvOpt,
 +                                   &fr->bExcl_IntraCGAll_InterCGNone);
 +    if (DOMAINDECOMP(cr)) {
 +        fr->cginfo = NULL;
 +    } else {
 +        fr->cginfo = cginfo_expand(mtop->nmolblock,fr->cginfo_mb);
 +    }
 +    
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* When using particle decomposition, the effect of the second argument,
 +         * which sets fr->hcg, is corrected later in do_md and init_em.
 +         */
 +        forcerec_set_ranges(fr,ncg_mtop(mtop),ncg_mtop(mtop),
 +                            mtop->natoms,mtop->natoms,mtop->natoms);
 +    }
 +    
 +    fr->print_force = print_force;
 +
 +
 +    /* coarse load balancing vars */
 +    fr->t_fnbf=0.;
 +    fr->t_wait=0.;
 +    fr->timesteps=0;
 +    
 +    /* Initialize neighbor search */
 +    init_ns(fp,cr,&fr->ns,fr,mtop,box);
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        gmx_nonbonded_setup(fp,fr,bGenericKernelOnly);
 +    /*
 +     if (ir->bAdress)
 +        {
 +            gmx_setup_adress_kernels(fp,bGenericKernelOnly);
 +        }
 +     */
 +    }
 +
 +    /* Initialize the thread working data for bonded interactions */
 +    init_forcerec_f_threads(fr,mtop->groups.grps[egcENER].nr);
 +    
 +    snew(fr->excl_load,fr->nthreads+1);
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            gmx_fatal(FARGS,"With Verlet lists rcoulomb and rvdw should be identical");
 +        }
 +
 +        init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
 +    }
 +
 +    /* fr->ic is used both by verlet and group kernels (to some extent) now */
 +    init_interaction_const(fp, &fr->ic, fr, rtab);
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        calc_enervirdiff(fp,ir->eDispCorr,fr);
 +    }
 +}
 +
 +#define pr_real(fp,r) fprintf(fp,"%s: %e\n",#r,r)
 +#define pr_int(fp,i)  fprintf((fp),"%s: %d\n",#i,i)
 +#define pr_bool(fp,b) fprintf((fp),"%s: %s\n",#b,bool_names[b])
 +
 +void pr_forcerec(FILE *fp,t_forcerec *fr,t_commrec *cr)
 +{
 +  int i;
 +
 +  pr_real(fp,fr->rlist);
 +  pr_real(fp,fr->rcoulomb);
 +  pr_real(fp,fr->fudgeQQ);
 +  pr_bool(fp,fr->bGrid);
 +  pr_bool(fp,fr->bTwinRange);
 +  /*pr_int(fp,fr->cg0);
 +    pr_int(fp,fr->hcg);*/
 +  for(i=0; i<fr->nnblists; i++)
 +    pr_int(fp,fr->nblists[i].table_elec_vdw.n);
 +  pr_real(fp,fr->rcoulomb_switch);
 +  pr_real(fp,fr->rcoulomb);
 +  
 +  fflush(fp);
 +}
 +
 +void forcerec_set_excl_load(t_forcerec *fr,
 +                            const gmx_localtop_t *top,const t_commrec *cr)
 +{
 +    const int *ind,*a;
 +    int t,i,j,ntot,n,ntarget;
 +
 +    if (cr != NULL && PARTDECOMP(cr))
 +    {
 +        /* No OpenMP with particle decomposition */
 +        pd_at_range(cr,
 +                    &fr->excl_load[0],
 +                    &fr->excl_load[1]);
 +
 +        return;
 +    }
 +
 +    ind = top->excls.index;
 +    a   = top->excls.a;
 +
 +    ntot = 0;
 +    for(i=0; i<top->excls.nr; i++)
 +    {
 +        for(j=ind[i]; j<ind[i+1]; j++)
 +        {
 +            if (a[j] > i)
 +            {
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    fr->excl_load[0] = 0;
 +    n = 0;
 +    i = 0;
 +    for(t=1; t<=fr->nthreads; t++)
 +    {
 +        ntarget = (ntot*t)/fr->nthreads;
 +        while(i < top->excls.nr && n < ntarget)
 +        {
 +            for(j=ind[i]; j<ind[i+1]; j++)
 +            {
 +                if (a[j] > i)
 +                {
 +                    n++;
 +                }
 +            }
 +            i++;
 +        }
 +        fr->excl_load[t] = i;
 +    }
 +}
 +
Simple merge
Simple merge
Simple merge
index 46a283f2f55f93e4ee38d6302ff6307305d0132a,0000000000000000000000000000000000000000..b0dd488e5a55798017a6deeabb0a7351f42cfb22
mode 100644,000000..100644
--- /dev/null
@@@ -1,2639 -1,0 +1,2660 @@@
-         nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "maths.h"
 +#include "vec.h"
 +#include "network.h"
 +#include "nsgrid.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "ns.h"
 +#include "pbc.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "nrnb.h"
 +#include "txtdump.h"
 +#include "mtop_util.h"
 +
 +#include "domdec.h"
 +#include "adress.h"
 +
 +
 +/* 
 + *    E X C L U S I O N   H A N D L I N G
 + */
 +
 +#ifdef DEBUG
 +static void SETEXCL_(t_excl e[],atom_id i,atom_id j)
 +{   e[j] = e[j] | (1<<i); }
 +static void RMEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ e[j]=e[j] & ~(1<<i); }
 +static gmx_bool ISEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ return (gmx_bool)(e[j] & (1<<i)); }
 +static gmx_bool NOTEXCL_(t_excl e[],atom_id i,atom_id j)
 +{  return !(ISEXCL(e,i,j)); }
 +#else
 +#define SETEXCL(e,i,j) (e)[((atom_id) (j))] |= (1<<((atom_id) (i)))
 +#define RMEXCL(e,i,j)  (e)[((atom_id) (j))] &= (~(1<<((atom_id) (i))))
 +#define ISEXCL(e,i,j)  (gmx_bool) ((e)[((atom_id) (j))] & (1<<((atom_id) (i))))
 +#define NOTEXCL(e,i,j) !(ISEXCL(e,i,j))
 +#endif
 +
++static int
++round_up_to_simd_width(int length, int simd_width)
++{
++    int offset,newlength;
++    
++    offset = (simd_width>0) ? length % simd_width : 0;
++
++    return (offset==0) ? length : length-offset+simd_width;
++}
 +/************************************************
 + *
 + *  U T I L I T I E S    F O R    N S
 + *
 + ************************************************/
 +
 +static void reallocate_nblist(t_nblist *nl)
 +{
 +    if (gmx_debug_at)
 +    {
 +        fprintf(debug,"reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, free_energy=%d), maxnri=%d\n",
 +                nl->ielec,nl->ivdw,nl->igeometry,nl->free_energy,nl->maxnri);
 +    }
 +    srenew(nl->iinr,   nl->maxnri);
 +    if (nl->igeometry == GMX_NBLIST_GEOMETRY_CG_CG)
 +    {
 +        srenew(nl->iinr_end,nl->maxnri);
 +    }
 +    srenew(nl->gid,    nl->maxnri);
 +    srenew(nl->shift,  nl->maxnri);
 +    srenew(nl->jindex, nl->maxnri+1);
 +}
 +
 +
 +static void init_nblist(FILE *log, t_nblist *nl_sr,t_nblist *nl_lr,
 +                        int maxsr,int maxlr,
 +                        int ivdw, int ivdwmod,
 +                        int ielec, int ielecmod,
 +                        gmx_bool bfree, int igeometry)
 +{
 +    t_nblist *nl;
 +    int      homenr;
 +    int      i,nn;
 +    
 +    for(i=0; (i<2); i++)
 +    {
 +        nl     = (i == 0) ? nl_sr : nl_lr;
 +        homenr = (i == 0) ? maxsr : maxlr;
 +
 +        if (nl == NULL)
 +        {
 +            continue;
 +        }
 +
 +
 +        /* Set coul/vdw in neighborlist, and for the normal loops we determine
 +         * an index of which one to call.
 +         */
 +        nl->ivdw        = ivdw;
 +        nl->ivdwmod     = ivdwmod;
 +        nl->ielec       = ielec;
 +        nl->ielecmod    = ielecmod;
 +        nl->free_energy = bfree;
 +        nl->igeometry   = igeometry;
 +
 +        if (bfree)
 +        {
 +            nl->igeometry  = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
 +        }
 +        
++        /* This will also set the simd_padding_width field */
 +        gmx_nonbonded_set_kernel_pointers( (i==0) ? log : NULL,nl);
 +        
 +        /* maxnri is influenced by the number of shifts (maximum is 8)
 +         * and the number of energy groups.
 +         * If it is not enough, nl memory will be reallocated during the run.
 +         * 4 seems to be a reasonable factor, which only causes reallocation
 +         * during runs with tiny and many energygroups.
 +         */
 +        nl->maxnri      = homenr*4;
 +        nl->maxnrj      = 0;
 +        nl->maxlen      = 0;
 +        nl->nri         = -1;
 +        nl->nrj         = 0;
 +        nl->iinr        = NULL;
 +        nl->gid         = NULL;
 +        nl->shift       = NULL;
 +        nl->jindex      = NULL;
 +        reallocate_nblist(nl);
 +        nl->jindex[0] = 0;
 +
 +        if(debug)
 +        {
 +            fprintf(debug,"Initiating neighbourlist (ielec=%d, ivdw=%d, free=%d) for %s interactions,\nwith %d SR, %d LR atoms.\n",
 +                    nl->ielec,nl->ivdw,nl->free_energy,gmx_nblist_geometry_names[nl->igeometry],maxsr,maxlr);
 +        }
 +    }
 +}
 +
 +void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
 +{
 +   /* Make maxlr tunable! (does not seem to be a big difference though) 
 +    * This parameter determines the number of i particles in a long range 
 +    * neighbourlist. Too few means many function calls, too many means
 +    * cache trashing.
 +    */
 +   int maxsr,maxsr_wat,maxlr,maxlr_wat;
 +   int ielec,ielecf,ivdw,ielecmod,ielecmodf,ivdwmod;
 +   int solvent;
 +   int igeometry_def,igeometry_w,igeometry_ww;
 +   int i;
 +   t_nblists *nbl;
 +
 +   /* maxsr     = homenr-fr->nWatMol*3; */
 +   maxsr     = homenr;
 +
 +   if (maxsr < 0)
 +   {
 +     gmx_fatal(FARGS,"%s, %d: Negative number of short range atoms.\n"
 +               "Call your Gromacs dealer for assistance.",__FILE__,__LINE__);
 +   }
 +   /* This is just for initial allocation, so we do not reallocate
 +    * all the nlist arrays many times in a row.
 +    * The numbers seem very accurate, but they are uncritical.
 +    */
 +   maxsr_wat = min(fr->nWatMol,(homenr+2)/3); 
 +   if (fr->bTwinRange) 
 +   {
 +       maxlr     = 50;
 +       maxlr_wat = min(maxsr_wat,maxlr);
 +   }
 +   else
 +   {
 +     maxlr = maxlr_wat = 0;
 +   }  
 +
 +   /* Determine the values for ielec/ivdw. */
 +   ielec = fr->nbkernel_elec_interaction;
 +   ivdw  = fr->nbkernel_vdw_interaction;
 +   ielecmod = fr->nbkernel_elec_modifier;
 +   ivdwmod  = fr->nbkernel_vdw_modifier;
 +
 +   fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
 +   if (!fr->ns.bCGlist)
 +   {
 +       igeometry_def = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
 +   }
 +   else
 +   {
 +       igeometry_def = GMX_NBLIST_GEOMETRY_CG_CG;
 +       if (log != NULL)
 +       {
 +           fprintf(log,"\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
 +       }
 +   }
 +   
 +   if (fr->solvent_opt == esolTIP4P) {
 +       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER4_PARTICLE;
 +       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER4_WATER4;
 +   } else {
 +       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER3_PARTICLE;
 +       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER3_WATER3;
 +   }
 +
 +   for(i=0; i<fr->nnblists; i++) 
 +   {
 +       nbl = &(fr->nblists[i]);
 +       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
 +                   maxsr,maxlr,ivdw,ivdwmod,ielec,ielecmod,FALSE,igeometry_def);
 +       init_nblist(log,&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
 +                   maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,FALSE,igeometry_def);
 +       init_nblist(log,&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
 +                   maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod,FALSE,igeometry_def);
 +       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
 +                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_w);
 +       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
 +                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_w);
 +       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
 +                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_ww);
 +       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
 +                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_ww);
 +
 +       /* Did we get the solvent loops so we can use optimized water kernels? */
 +       if(nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf==NULL
 +          || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf==NULL
 +#ifndef DISABLE_WATERWATER_NLIST
 +          || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf==NULL
 +          || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf==NULL
 +#endif
 +          )
 +       {
 +           fr->solvent_opt = esolNO;
 +           fprintf(log,"Note: The available nonbonded kernels do not support water optimization - disabling.\n");
 +       }
 +       
 +       if (fr->efep != efepNO) 
 +       {
 +           if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
 +           {
 +               ielecf = GMX_NBKERNEL_ELEC_EWALD;
 +               ielecmodf = eintmodNONE;
 +           }
 +           else
 +           {
 +               ielecf = ielec;
 +               ielecmodf = ielecmod;
 +           }
 +
 +           init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
 +                       maxsr,maxlr,ivdw,ivdwmod,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
 +           init_nblist(log,&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
 +                       maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
 +           init_nblist(log,&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
 +                       maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
 +       }  
 +   }
 +   /* QMMM MM list */
 +   if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
 +   {
 +       init_nblist(log,&fr->QMMMlist,NULL,
 +                   maxsr,maxlr,0,0,ielec,ielecmod,FALSE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
 +   }
 +
 +   if(log!=NULL)
 +   {
 +       fprintf(log,"\n");
 +   }
 +
 +   fr->ns.nblist_initialized=TRUE;
 +}
 +
 +static void reset_nblist(t_nblist *nl)
 +{
 +     nl->nri       = -1;
 +     nl->nrj       = 0;
 +     nl->maxlen    = 0;
 +     if (nl->jindex)
 +     {
 +         nl->jindex[0] = 0;
 +     }
 +}
 +
 +static void reset_neighbor_lists(t_forcerec *fr,gmx_bool bResetSR, gmx_bool bResetLR)
 +{
 +    int n,i;
 +  
 +    if (fr->bQMMM)
 +    {
 +        /* only reset the short-range nblist */
 +        reset_nblist(&(fr->QMMMlist));
 +    }
 +
 +    for(n=0; n<fr->nnblists; n++)
 +    {
 +        for(i=0; i<eNL_NR; i++)
 +        {
 +            if(bResetSR)
 +            {
 +                reset_nblist( &(fr->nblists[n].nlist_sr[i]) );
 +            }
 +            if(bResetLR)
 +            {
 +                reset_nblist( &(fr->nblists[n].nlist_lr[i]) );
 +            }
 +        }
 +    }
 +}
 +
 +
 +
 +
 +static inline void new_i_nblist(t_nblist *nlist,
 +                                gmx_bool bLR,atom_id i_atom,int shift,int gid)
 +{
 +    int    i,k,nri,nshift;
 +    
 +    nri = nlist->nri;
 +    
 +    /* Check whether we have to increase the i counter */
 +    if ((nri == -1) ||
 +        (nlist->iinr[nri]  != i_atom) || 
 +        (nlist->shift[nri] != shift) || 
 +        (nlist->gid[nri]   != gid))
 +    {
 +        /* This is something else. Now see if any entries have 
 +         * been added in the list of the previous atom.
 +         */
 +        if ((nri == -1) ||
 +            ((nlist->jindex[nri+1] > nlist->jindex[nri]) && 
 +             (nlist->gid[nri] != -1)))
 +        {
 +            /* If so increase the counter */
 +            nlist->nri++;
 +            nri++;
 +            if (nlist->nri >= nlist->maxnri)
 +            {
 +                nlist->maxnri += over_alloc_large(nlist->nri);
 +                reallocate_nblist(nlist);
 +            }
 +        }
 +        /* Set the number of neighbours and the atom number */
 +        nlist->jindex[nri+1] = nlist->jindex[nri];
 +        nlist->iinr[nri]     = i_atom;
 +        nlist->gid[nri]      = gid;
 +        nlist->shift[nri]    = shift;
 +    }
 +}
 +
 +static inline void close_i_nblist(t_nblist *nlist) 
 +{
 +    int nri = nlist->nri;
 +    int len;
 +    
 +    if (nri >= 0)
 +    {
++        /* Add elements up to padding. Since we allocate memory in units
++         * of the simd_padding width, we do not have to check for possible
++         * list reallocation here.
++         */
++        while((nlist->nrj % nlist->simd_padding_width)!=0)
++        {
++            /* Use -4 here, so we can write forces for 4 atoms before real data */
++            nlist->jjnr[nlist->nrj++]=-4;
++        }
 +        nlist->jindex[nri+1] = nlist->nrj;
 +        
 +        len=nlist->nrj -  nlist->jindex[nri];
 +        
 +        /* nlist length for water i molecules is treated statically 
 +         * in the innerloops 
 +         */
 +        if (len > nlist->maxlen)
 +        {
 +            nlist->maxlen = len;
 +        }
 +    }
 +}
 +
 +static inline void close_nblist(t_nblist *nlist)
 +{
 +    /* Only close this nblist when it has been initialized.
 +     * Avoid the creation of i-lists with no j-particles.
 +     */
 +    if (nlist->nrj == 0)
 +    {
 +        /* Some assembly kernels do not support empty lists,
 +         * make sure here that we don't generate any empty lists.
 +         * With the current ns code this branch is taken in two cases:
 +         * No i-particles at all: nri=-1 here
 +         * There are i-particles, but no j-particles; nri=0 here
 +         */
 +        nlist->nri = 0;
 +    }
 +    else
 +    {
 +        /* Close list number nri by incrementing the count */
 +        nlist->nri++;
 +    }
 +}
 +
 +static inline void close_neighbor_lists(t_forcerec *fr,gmx_bool bMakeQMMMnblist)
 +{
 +    int n,i;
 +    
 +    if (bMakeQMMMnblist)
 +    {
 +            close_nblist(&(fr->QMMMlist));
 +    }
 +
 +    for(n=0; n<fr->nnblists; n++)
 +    {
 +        for(i=0; (i<eNL_NR); i++)
 +        {
 +            close_nblist(&(fr->nblists[n].nlist_sr[i]));
 +            close_nblist(&(fr->nblists[n].nlist_lr[i]));
 +        }
 +    }
 +}
 +
 +
 +static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
-                       gmx_bool bDoLongRangeNS)
++        nlist->maxnrj = round_up_to_simd_width(over_alloc_small(nlist->nrj + 1),nlist->simd_padding_width);
++        
 +        if (gmx_debug_at)
 +            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
 +                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr,nlist->maxnrj);
 +    }
 +
 +    nlist->jjnr[nrj] = j_atom;
 +    nlist->nrj ++;
 +}
 +
 +static inline void add_j_to_nblist_cg(t_nblist *nlist,
 +                                      atom_id j_start,int j_end,
 +                                      t_excl *bexcl,gmx_bool i_is_j,
 +                                      gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    int j;
 +
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
 +            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
 +                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr    ,nlist->maxnrj);
 +        srenew(nlist->jjnr_end,nlist->maxnrj);
 +        srenew(nlist->excl    ,nlist->maxnrj*MAX_CGCGSIZE);
 +    }
 +
 +    nlist->jjnr[nrj]     = j_start;
 +    nlist->jjnr_end[nrj] = j_end;
 +
 +    if (j_end - j_start > MAX_CGCGSIZE)
 +    {
 +        gmx_fatal(FARGS,"The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d",MAX_CGCGSIZE,j_end-j_start);
 +    }
 +
 +    /* Set the exclusions */
 +    for(j=j_start; j<j_end; j++)
 +    {
 +        nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j];
 +    }
 +    if (i_is_j)
 +    {
 +        /* Avoid double counting of intra-cg interactions */
 +        for(j=1; j<j_end-j_start; j++)
 +        {
 +            nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<<j) - 1;
 +        }
 +    }
 +
 +    nlist->nrj ++;
 +}
 +
 +typedef void
 +put_in_list_t(gmx_bool              bHaveVdW[],
 +              int               ngid,
 +              t_mdatoms *       md,
 +              int               icg,
 +              int               jgid,
 +              int               nj,
 +              atom_id           jjcg[],
 +              atom_id           index[],
 +              t_excl            bExcl[],
 +              int               shift,
 +              t_forcerec *      fr,
 +              gmx_bool          bLR,
 +              gmx_bool          bDoVdW,
 +              gmx_bool          bDoCoul,
 +              int               solvent_opt);
 +
 +static void 
 +put_in_list_at(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
 +               gmx_bool          bLR,
 +               gmx_bool          bDoVdW,
 +               gmx_bool          bDoCoul,
 +               int               solvent_opt)
 +{
 +    /* The a[] index has been removed,
 +     * to put it back in i_atom should be a[i0] and jj should be a[jj].
 +     */
 +    t_nblist *   vdwc;
 +    t_nblist *   vdw;
 +    t_nblist *   coul;
 +    t_nblist *   vdwc_free  = NULL;
 +    t_nblist *   vdw_free   = NULL;
 +    t_nblist *   coul_free  = NULL;
 +    t_nblist *   vdwc_ww    = NULL;
 +    t_nblist *   coul_ww    = NULL;
 +    
 +    int           i,j,jcg,igid,gid,nbl_ind,ind_ij;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg,len;
 +    
 +    int       *cginfo;
 +    int       *type,*typeB;
 +    real      *charge,*chargeB;
 +    real      qi,qiB,qq,rlj;
 +    gmx_bool      bFreeEnergy,bFree,bFreeJ,bNotEx,*bPert;
 +    gmx_bool      bDoVdW_i,bDoCoul_i,bDoCoul_i_sol;
 +    int       iwater,jwater;
 +    t_nblist  *nlist;
 +    
 +    /* Copy some pointers */
 +    cginfo  = fr->cginfo;
 +    charge  = md->chargeA;
 +    chargeB = md->chargeB;
 +    type    = md->typeA;
 +    typeB   = md->typeB;
 +    bPert   = md->bPerturbed;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(cginfo[icg]);
 +
 +    iwater = (solvent_opt!=esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
 +    
 +    bFreeEnergy = FALSE;
 +    if (md->nPerturbed) 
 +    {
 +        /* Check if any of the particles involved are perturbed. 
 +         * If not we can do the cheaper normal put_in_list
 +         * and use more solvent optimization.
 +         */
 +        for(i=0; i<nicg; i++)
 +        {
 +            bFreeEnergy |= bPert[i0+i];
 +        }
 +        /* Loop over the j charge groups */
 +        for(j=0; (j<nj && !bFreeEnergy); j++) 
 +        {
 +            jcg = jjcg[j];
 +            jj0 = index[jcg];
 +            jj1 = index[jcg+1];
 +            /* Finally loop over the atoms in the j-charge group */   
 +            for(jj=jj0; jj<jj1; jj++)
 +            {
 +                bFreeEnergy |= bPert[jj];
 +            }
 +        }
 +    }
 +    
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[GID(igid,jgid,ngid)];
 +    }
 +    if (bLR)
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_lr;
 +    }
 +    else
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_sr;
 +    }
 +    
 +    if (iwater != esolNO)
 +    {
 +        vdwc = &nlist[eNL_VDWQQ_WATER];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ_WATER];
 +#ifndef DISABLE_WATERWATER_NLIST
 +        vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
 +        coul_ww = &nlist[eNL_QQ_WATERWATER];
 +#endif
 +    } 
 +    else 
 +    {
 +        vdwc = &nlist[eNL_VDWQQ];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ];
 +    }
 +    
 +    if (!bFreeEnergy) 
 +    {
 +        if (iwater != esolNO) 
 +        {
 +            /* Loop over the atoms in the i charge group */    
 +            i_atom  = i0;
 +            gid     = GID(igid,jgid,ngid);
 +            /* Create new i_atom for each energy group */
 +            if (bDoCoul && bDoVdW)
 +            {
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(vdwc_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }
 +            if (bDoVdW)
 +            {
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            }
 +            if (bDoCoul) 
 +            {
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(coul_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }      
 +        /* Loop over the j charge groups */
 +            for(j=0; (j<nj); j++) 
 +            {
 +                jcg=jjcg[j];
 +                
 +                if (jcg == icg)
 +                {
 +                    continue;
 +                }
 +                
 +                jj0 = index[jcg];
 +                jwater = GET_CGINFO_SOLOPT(cginfo[jcg]);
 +                
 +                if (iwater == esolSPC && jwater == esolSPC)
 +                {
 +                    /* Interaction between two SPC molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST       
 +                        /* Add entries for the three atoms - only do VdW if we need to */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);          
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }  
 +                } 
 +                else if (iwater == esolTIP4P && jwater == esolTIP4P) 
 +                {
 +                    /* Interaction between two TIP4p molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST       
 +                        /* Add entries for the four atoms - only do VdW if we need to */
 +                        if (bDoVdW)
 +                        {
 +                            add_j_to_nblist(vdw,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);          
 +                        add_j_to_nblist(coul,jj0+3,bLR);          
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }                                         
 +                }
 +                else 
 +                {
 +                    /* j charge group is not water, but i is.
 +                     * Add entries to the water-other_atom lists; the geometry of the water
 +                     * molecule doesn't matter - that is taken care of in the nonbonded kernel,
 +                     * so we don't care if it is SPC or TIP4P...
 +                     */
 +                    
 +                    jj1 = index[jcg+1];
 +                    
 +                    if (!bDoVdW) 
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else if (!bDoCoul)
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++)
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                add_j_to_nblist(vdw,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else 
 +                    {
 +                        /* _charge_ _groups_ interact with both coulomb and LJ */
 +                        /* Check which atoms we should add to the lists!       */
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (bHaveVdW[type[jj]]) 
 +                            {
 +                                if (charge[jj] != 0)
 +                                {
 +                                    add_j_to_nblist(vdwc,jj,bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(vdw,jj,bLR);
 +                                }
 +                            }
 +                            else if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw); 
 +            close_i_nblist(coul); 
 +            close_i_nblist(vdwc);  
 +#ifndef DISABLE_WATERWATER_NLIST
 +            close_i_nblist(coul_ww);
 +            close_i_nblist(vdwc_ww); 
 +#endif
 +        } 
 +        else
 +        { 
 +            /* no solvent as i charge group */
 +            /* Loop over the atoms in the i charge group */    
 +            for(i=0; i<nicg; i++) 
 +            {
 +                i_atom  = i0+i;
 +                gid     = GID(igid,jgid,ngid);
 +                qi      = charge[i_atom];
 +                
 +                /* Create new i_atom for each energy group */
 +                if (bDoVdW && bDoCoul)
 +                {
 +                    new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoVdW)
 +                {
 +                    new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoCoul)
 +                {
 +                    new_i_nblist(coul,bLR,i_atom,shift,gid);
 +                }
 +                bDoVdW_i  = (bDoVdW  && bHaveVdW[type[i_atom]]);
 +                bDoCoul_i = (bDoCoul && qi!=0);
 +                
 +                if (bDoVdW_i || bDoCoul_i) 
 +                {
 +                    /* Loop over the j charge groups */
 +                    for(j=0; (j<nj); j++) 
 +                    {
 +                        jcg=jjcg[j];
 +                        
 +                        /* Check for large charge groups */
 +                        if (jcg == icg)
 +                        {
 +                            jj0 = i0 + i + 1;
 +                        }
 +                        else
 +                        {
 +                            jj0 = index[jcg];
 +                        }
 +                        
 +                        jj1=index[jcg+1];
 +                        /* Finally loop over the atoms in the j-charge group */       
 +                        for(jj=jj0; jj<jj1; jj++) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (!bDoVdW_i) 
 +                                { 
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i) 
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +                close_i_nblist(vdw);
 +                close_i_nblist(coul);
 +                close_i_nblist(vdwc);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* we are doing free energy */
 +        vdwc_free = &nlist[eNL_VDWQQ_FREE];
 +        vdw_free  = &nlist[eNL_VDW_FREE];
 +        coul_free = &nlist[eNL_QQ_FREE];
 +        /* Loop over the atoms in the i charge group */    
 +        for(i=0; i<nicg; i++) 
 +        {
 +            i_atom  = i0+i;
 +            gid     = GID(igid,jgid,ngid);
 +            qi      = charge[i_atom];
 +            qiB     = chargeB[i_atom];
 +            
 +            /* Create new i_atom for each energy group */
 +            if (bDoVdW && bDoCoul) 
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +            if (bDoVdW)   
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            if (bDoCoul) 
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +            
 +            new_i_nblist(vdw_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(coul_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(vdwc_free,bLR,i_atom,shift,gid);
 +            
 +            bDoVdW_i  = (bDoVdW  &&
 +                         (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]]));
 +            bDoCoul_i = (bDoCoul && (qi!=0 || qiB!=0));
 +            /* For TIP4P the first atom does not have a charge,
 +             * but the last three do. So we should still put an atom
 +             * without LJ but with charge in the water-atom neighborlist
 +             * for a TIP4p i charge group.
 +             * For SPC type water the first atom has LJ and charge,
 +             * so there is no such problem.
 +             */
 +            if (iwater == esolNO)
 +            {
 +                bDoCoul_i_sol = bDoCoul_i;
 +            }
 +            else
 +            {
 +                bDoCoul_i_sol = bDoCoul;
 +            }
 +            
 +            if (bDoVdW_i || bDoCoul_i_sol) 
 +            {
 +                /* Loop over the j charge groups */
 +                for(j=0; (j<nj); j++)
 +                {
 +                    jcg=jjcg[j];
 +                    
 +                    /* Check for large charge groups */
 +                    if (jcg == icg)
 +                    {
 +                        jj0 = i0 + i + 1;
 +                    }
 +                    else
 +                    {
 +                        jj0 = index[jcg];
 +                    }
 +                    
 +                    jj1=index[jcg+1];
 +                    /* Finally loop over the atoms in the j-charge group */   
 +                    bFree = bPert[i_atom];
 +                    for(jj=jj0; (jj<jj1); jj++) 
 +                    {
 +                        bFreeJ = bFree || bPert[jj];
 +                        /* Complicated if, because the water H's should also
 +                         * see perturbed j-particles
 +                         */
 +                        if (iwater==esolNO || i==0 || bFreeJ) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (bFreeJ)
 +                                {
 +                                    if (!bDoVdW_i) 
 +                                    {
 +                                        if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                        {
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else if (!bDoCoul_i) 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
 +                                        {
 +                                            add_j_to_nblist(vdw_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]]) 
 +                                        {
 +                                            if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            {
 +                                                add_j_to_nblist(vdwc_free,jj,bLR);
 +                                            }
 +                                            else
 +                                            {
 +                                                add_j_to_nblist(vdw_free,jj,bLR);
 +                                            }
 +                                        }
 +                                        else if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoVdW_i) 
 +                                { 
 +                                    /* This is done whether or not bWater is set */
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i_sol) 
 +                                { 
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw);
 +            close_i_nblist(coul);
 +            close_i_nblist(vdwc);
 +            close_i_nblist(vdw_free);
 +            close_i_nblist(coul_free);
 +            close_i_nblist(vdwc_free);
 +        }
 +    }
 +}
 +
 +static void 
 +put_in_list_qmmm(gmx_bool              bHaveVdW[],
 +                 int               ngid,
 +                 t_mdatoms *       md,
 +                 int               icg,
 +                 int               jgid,
 +                 int               nj,
 +                 atom_id           jjcg[],
 +                 atom_id           index[],
 +                 t_excl            bExcl[],
 +                 int               shift,
 +                 t_forcerec *      fr,
 +                 gmx_bool          bLR,
 +                 gmx_bool          bDoVdW,
 +                 gmx_bool          bDoCoul,
 +                 int               solvent_opt)
 +{
 +    t_nblist *   coul;
 +    int         i,j,jcg,igid,gid;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg;
 +    gmx_bool      bNotEx;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(fr->cginfo[icg]);
 +    
 +    coul = &fr->QMMMlist;
 +    
 +    /* Loop over atoms in the ith charge group */
 +    for (i=0;i<nicg;i++)
 +    {
 +        i_atom = i0+i;
 +        gid    = GID(igid,jgid,ngid);
 +        /* Create new i_atom for each energy group */
 +        new_i_nblist(coul,bLR,i_atom,shift,gid);
 +        
 +        /* Loop over the j charge groups */
 +        for (j=0;j<nj;j++)
 +        {
 +            jcg=jjcg[j];
 +            
 +            /* Charge groups cannot have QM and MM atoms simultaneously */
 +            if (jcg!=icg)
 +            {
 +                jj0 = index[jcg];
 +                jj1 = index[jcg+1];
 +                /* Finally loop over the atoms in the j-charge group */
 +                for(jj=jj0; jj<jj1; jj++)
 +                {
 +                    bNotEx = NOTEXCL(bExcl,i,jj);
 +                    if(bNotEx)
 +                        add_j_to_nblist(coul,jj,bLR);
 +                }
 +            }
 +        }
 +        close_i_nblist(coul);
 +    }
 +}
 +
 +static void 
 +put_in_list_cg(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
 +               gmx_bool          bLR,
 +               gmx_bool          bDoVdW,
 +               gmx_bool          bDoCoul,
 +               int               solvent_opt)
 +{
 +    int          cginfo;
 +    int          igid,gid,nbl_ind;
 +    t_nblist *   vdwc;
 +    int          j,jcg;
 +
 +    cginfo = fr->cginfo[icg];
 +
 +    igid = GET_CGINFO_GID(cginfo);
 +    gid  = GID(igid,jgid,ngid);
 +
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[gid];
 +    }
 +    if (bLR)
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ];
 +    }
 +    else
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ];
 +    }
 +
 +    /* Make a new neighbor list for charge group icg.
 +     * Currently simply one neighbor list is made with LJ and Coulomb.
 +     * If required, zero interactions could be removed here
 +     * or in the force loop.
 +     */
 +    new_i_nblist(vdwc,bLR,index[icg],shift,gid);
 +    vdwc->iinr_end[vdwc->nri] = index[icg+1];
 +
 +    for(j=0; (j<nj); j++) 
 +    {
 +        jcg = jjcg[j];
 +        /* Skip the icg-icg pairs if all self interactions are excluded */
 +        if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo)))
 +        {
 +            /* Here we add the j charge group jcg to the list,
 +             * exclusions are also added to the list.
 +             */
 +            add_j_to_nblist_cg(vdwc,index[jcg],index[jcg+1],bExcl,icg==jcg,bLR);
 +        }
 +    }
 +
 +    close_i_nblist(vdwc);  
 +}
 +
 +static void setexcl(atom_id start,atom_id end,t_blocka *excl,gmx_bool b,
 +                    t_excl bexcl[])
 +{
 +    atom_id i,k;
 +    
 +    if (b)
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                SETEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                RMEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +}
 +
 +int calc_naaj(int icg,int cgtot)
 +{
 +    int naaj;
 +    
 +    if ((cgtot % 2) == 1)
 +    {
 +        /* Odd number of charge groups, easy */
 +        naaj = 1 + (cgtot/2);
 +    }
 +    else if ((cgtot % 4) == 0)
 +    {
 +    /* Multiple of four is hard */
 +        if (icg < cgtot/2)
 +        {
 +            if ((icg % 2) == 0)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +        else
 +        {
 +            if ((icg % 2) == 1)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* cgtot/2 = odd */
 +        if ((icg % 2) == 0)
 +        {
 +            naaj=1+(cgtot/2);
 +        }
 +        else
 +        {
 +            naaj=cgtot/2;
 +        }
 +    }
 +#ifdef DEBUG
 +    fprintf(log,"naaj=%d\n",naaj);
 +#endif
 +
 +    return naaj;
 +}
 +
 +/************************************************
 + *
 + *  S I M P L E      C O R E     S T U F F
 + *
 + ************************************************/
 +
 +static real calc_image_tric(rvec xi,rvec xj,matrix box,
 +                            rvec b_inv,int *shift)
 +{
 +    /* This code assumes that the cut-off is smaller than
 +     * a half times the smallest diagonal element of the box.
 +     */
 +    const real h25=2.5;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dz = xj[ZZ] - xi[ZZ];
 +    dy = xj[YY] - xi[YY];
 +    dx = xj[XX] - xi[XX];
 +    
 +  /* Perform NINT operation, using trunc operation, therefore
 +   * we first add 2.5 then subtract 2 again
 +   */
 +    tz = dz*b_inv[ZZ] + h25;
 +    tz -= 2;
 +    dz -= tz*box[ZZ][ZZ];
 +    dy -= tz*box[ZZ][YY];
 +    dx -= tz*box[ZZ][XX];
 +
 +    ty = dy*b_inv[YY] + h25;
 +    ty -= 2;
 +    dy -= ty*box[YY][YY];
 +    dx -= ty*box[YY][XX];
 +    
 +    tx = dx*b_inv[XX]+h25;
 +    tx -= 2;
 +    dx -= tx*box[XX][XX];
 +  
 +    /* Distance squared */
 +    r2 = (dx*dx) + (dy*dy) + (dz*dz);
 +
 +    *shift = XYZ2IS(tx,ty,tz);
 +
 +    return r2;
 +}
 +
 +static real calc_image_rect(rvec xi,rvec xj,rvec box_size,
 +                            rvec b_inv,int *shift)
 +{
 +    const real h15=1.5;
 +    real ddx,ddy,ddz;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dx = xj[XX] - xi[XX];
 +    dy = xj[YY] - xi[YY];
 +    dz = xj[ZZ] - xi[ZZ];
 +  
 +    /* Perform NINT operation, using trunc operation, therefore
 +     * we first add 1.5 then subtract 1 again
 +     */
 +    tx = dx*b_inv[XX] + h15;
 +    ty = dy*b_inv[YY] + h15;
 +    tz = dz*b_inv[ZZ] + h15;
 +    tx--;
 +    ty--;
 +    tz--;
 +    
 +    /* Correct diff vector for translation */
 +    ddx = tx*box_size[XX] - dx;
 +    ddy = ty*box_size[YY] - dy;
 +    ddz = tz*box_size[ZZ] - dz;
 +    
 +    /* Distance squared */
 +    r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz);
 +    
 +    *shift = XYZ2IS(tx,ty,tz);
 +    
 +    return r2;
 +}
 +
 +static void add_simple(t_ns_buf *nsbuf,int nrj,atom_id cg_j,
 +                       gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                       int icg,int jgid,t_block *cgs,t_excl bexcl[],
 +                       int shift,t_forcerec *fr,put_in_list_t *put_in_list)
 +{
 +    if (nsbuf->nj + nrj > MAX_CG)
 +    {
 +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nsbuf->ncg,nsbuf->jcg,
 +                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +        /* Reset buffer contents */
 +        nsbuf->ncg = nsbuf->nj = 0;
 +    }
 +    nsbuf->jcg[nsbuf->ncg++] = cg_j;
 +    nsbuf->nj += nrj;
 +}
 +
 +static void ns_inner_tric(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          matrix box,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +    
 +    cgindex = cgs->index;
 +    shift   = CENTRAL;
 +    for(j=0; (j<njcg); j++)
 +    {
 +        cg_j   = jcg[j];
 +        nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +        if (calc_image_tric(x[icg],x[cg_j],box,b_inv,&shift) < rcut2)
 +        {
 +            jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +            if (!(i_egp_flags[jgid] & EGP_EXCL))
 +            {
 +                add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                           bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                           put_in_list);
 +            }
 +        }
 +    }
 +}
 +
 +static void ns_inner_rect(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          gmx_bool bBox,rvec box_size,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +
 +    cgindex = cgs->index;
 +    if (bBox)
 +    {
 +        shift = CENTRAL;
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if (calc_image_rect(x[icg],x[cg_j],box_size,b_inv,&shift) < rcut2)
 +            {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if ((rcut2 == 0) || (distance2(x[icg],x[cg_j]) < rcut2)) {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][CENTRAL],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,CENTRAL,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* ns_simple_core needs to be adapted for QMMM still 2005 */
 +
 +static int ns_simple_core(t_forcerec *fr,
 +                          gmx_localtop_t *top,
 +                          t_mdatoms *md,
 +                          matrix box,rvec box_size,
 +                          t_excl bexcl[],atom_id *aaj,
 +                          int ngid,t_ns_buf **ns_buf,
 +                          put_in_list_t *put_in_list,gmx_bool bHaveVdW[])
 +{
 +    int      naaj,k;
 +    real     rlist2;
 +    int      nsearch,icg,jcg,igid,i0,nri,nn;
 +    int      *cginfo;
 +    t_ns_buf *nsbuf;
 +    /* atom_id  *i_atoms; */
 +    t_block  *cgs=&(top->cgs);
 +    t_blocka *excl=&(top->excls);
 +    rvec     b_inv;
 +    int      m;
 +    gmx_bool     bBox,bTriclinic;
 +    int      *i_egp_flags;
 +    
 +    rlist2 = sqr(fr->rlist);
 +    
 +    bBox = (fr->ePBC != epbcNONE);
 +    if (bBox)
 +    {
 +        for(m=0; (m<DIM); m++)
 +        {
 +            b_inv[m] = divide_err(1.0,box_size[m]);
 +        }
 +        bTriclinic = TRICLINIC(box);
 +    }
 +    else
 +    {
 +        bTriclinic = FALSE;
 +    }
 +    
 +    cginfo = fr->cginfo;
 +    
 +    nsearch=0;
 +    for (icg=fr->cg0; (icg<fr->hcg); icg++)
 +    {
 +        /*
 +          i0        = cgs->index[icg];
 +          nri       = cgs->index[icg+1]-i0;
 +          i_atoms   = &(cgs->a[i0]);
 +          i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms];
 +          setexcl(nri,i_atoms,excl,TRUE,bexcl);
 +        */
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        i_egp_flags = fr->egp_flags + ngid*igid;
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,TRUE,bexcl);
 +        
 +        naaj=calc_naaj(icg,cgs->nr);
 +        if (bTriclinic)
 +        {
 +            ns_inner_tric(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          box,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        else
 +        {
 +            ns_inner_rect(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          bBox,box_size,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        nsearch += naaj;
 +        
 +        for(nn=0; (nn<ngid); nn++)
 +        {
 +            for(k=0; (k<SHIFTS); k++)
 +            {
 +                nsbuf = &(ns_buf[nn][k]);
 +                if (nsbuf->ncg > 0)
 +                {
 +                    put_in_list(bHaveVdW,ngid,md,icg,nn,nsbuf->ncg,nsbuf->jcg,
 +                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +                    nsbuf->ncg=nsbuf->nj=0;
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,FALSE,bexcl);
 +    }
 +    close_neighbor_lists(fr,FALSE);
 +    
 +    return nsearch;
 +}
 +
 +/************************************************
 + *
 + *    N S 5     G R I D     S T U F F
 + *
 + ************************************************/
 +
 +static inline void get_dx(int Nx,real gridx,real rc2,int xgi,real x,
 +                          int *dx0,int *dx1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  xgi0,xgi1,i;
 +    
 +    if (xgi < 0)
 +    {
 +        *dx0 = 0;
 +        xgi0 = -1;
 +        *dx1 = -1;
 +        xgi1 = 0;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        *dx0 = Nx;
 +        xgi0 = Nx-1;
 +        *dx1 = Nx-1;
 +        xgi1 = Nx;
 +    }
 +    else
 +    {
 +        dcx2[xgi] = 0;
 +        *dx0 = xgi;
 +        xgi0 = xgi-1;
 +        *dx1 = xgi;
 +        xgi1 = xgi+1;
 +    }
 +    
 +    for(i=xgi0; i>=0; i--)
 +    {
 +        dcx = (i+1)*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +            break;
 +        *dx0 = i;
 +        dcx2[i] = tmp;
 +    }
 +    for(i=xgi1; i<Nx; i++)
 +    {
 +        dcx = i*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        *dx1 = i;
 +        dcx2[i] = tmp;
 +    }
 +}
 +
 +static inline void get_dx_dd(int Nx,real gridx,real rc2,int xgi,real x,
 +                             int ncpddc,int shift_min,int shift_max,
 +                             int *g0,int *g1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  g_min,g_max,shift_home;
 +    
 +    if (xgi < 0)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = 0;
 +        *g1   = -1;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = Nx;
 +        *g1   = Nx - 1;
 +    }
 +    else
 +    {
 +        if (ncpddc == 0)
 +        {
 +            g_min = 0;
 +            g_max = Nx - 1;
 +        }
 +        else
 +        {
 +            if (xgi < ncpddc)
 +            {
 +                shift_home = 0;
 +            }
 +            else
 +            {
 +                shift_home = -1;
 +            }
 +            g_min = (shift_min == shift_home ? 0          : ncpddc);
 +            g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1);
 +        }
 +        if (shift_min > 0)
 +        {
 +            *g0 = g_min;
 +            *g1 = g_min - 1;
 +        }
 +        else if (shift_max < 0)
 +        {
 +            *g0 = g_max + 1;
 +            *g1 = g_max;
 +        }
 +        else
 +        {
 +            *g0 = xgi;
 +            *g1 = xgi;
 +            dcx2[xgi] = 0;
 +        }
 +    }
 +    
 +    while (*g0 > g_min)
 +    {
 +        /* Check one grid cell down */
 +        dcx = ((*g0 - 1) + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g0)--;
 +        dcx2[*g0] = tmp;
 +    }
 +    
 +    while (*g1 < g_max)
 +    {
 +        /* Check one grid cell up */
 +        dcx = (*g1 + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g1)++;
 +        dcx2[*g1] = tmp;
 +    }
 +}
 +
 +
 +#define sqr(x) ((x)*(x))
 +#define calc_dx2(XI,YI,ZI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ]))
 +#define calc_cyl_dx2(XI,YI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]))
 +/****************************************************
 + *
 + *    F A S T   N E I G H B O R  S E A R C H I N G
 + *
 + *    Optimized neighboursearching routine using grid 
 + *    at least 1x1x1, see GROMACS manual
 + *
 + ****************************************************/
 +
 +
 +static void get_cutoff2(t_forcerec *fr,gmx_bool bDoLongRange,
 +                        real *rvdw2,real *rcoul2,
 +                        real *rs2,real *rm2,real *rl2)
 +{
 +    *rs2 = sqr(fr->rlist);
 +
 +    if (bDoLongRange && fr->bTwinRange)
 +    {
 +        /* The VdW and elec. LR cut-off's could be different,
 +         * so we can not simply set them to rlistlong.
 +         */
 +        if (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(fr->vdwtype) &&
 +            fr->rvdw > fr->rlist)
 +        {
 +            *rvdw2  = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rvdw2  = sqr(fr->rvdw);
 +        }
 +        if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(fr->eeltype) &&
 +            fr->rcoulomb > fr->rlist)
 +        {
 +            *rcoul2 = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rcoul2 = sqr(fr->rcoulomb);
 +        }
 +    }
 +    else
 +    {
 +        /* Workaround for a gcc -O3 or -ffast-math problem */
 +        *rvdw2  = *rs2;
 +        *rcoul2 = *rs2;
 +    }
 +    *rm2 = min(*rvdw2,*rcoul2);
 +    *rl2 = max(*rvdw2,*rcoul2);
 +}
 +
 +static void init_nsgrid_lists(t_forcerec *fr,int ngid,gmx_ns_t *ns)
 +{
 +    real rvdw2,rcoul2,rs2,rm2,rl2;
 +    int j;
 +
 +    get_cutoff2(fr,TRUE,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    /* Short range buffers */
 +    snew(ns->nl_sr,ngid);
 +    /* Counters */
 +    snew(ns->nsr,ngid);
 +    snew(ns->nlr_ljc,ngid);
 +    snew(ns->nlr_one,ngid);
 +    
 +    /* Always allocate both list types, since rcoulomb might now change with PME load balancing */
 +    /* Long range VdW and Coul buffers */
 +    snew(ns->nl_lr_ljc,ngid);
 +    /* Long range VdW or Coul only buffers */
 +    snew(ns->nl_lr_one,ngid);
 +
 +    for(j=0; (j<ngid); j++) {
 +        snew(ns->nl_sr[j],MAX_CG);
 +        snew(ns->nl_lr_ljc[j],MAX_CG);
 +        snew(ns->nl_lr_one[j],MAX_CG);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n",
 +                rs2,rm2,rl2);
 +    }
 +}
 +
 +static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
 +                       matrix box,rvec box_size,int ngid,
 +                       gmx_localtop_t *top,
 +                       t_grid *grid,rvec x[],
 +                       t_excl bexcl[],gmx_bool *bExcludeAlleg,
 +                       t_nrnb *nrnb,t_mdatoms *md,
 +                       real *lambda,real *dvdlambda,
 +                       gmx_grppairener_t *grppener,
 +                       put_in_list_t *put_in_list,
 +                       gmx_bool bHaveVdW[],
 +                       gmx_bool bDoLongRange,gmx_bool bMakeQMMMnblist)
 +{
 +    gmx_ns_t *ns;
 +    atom_id **nl_lr_ljc,**nl_lr_one,**nl_sr;
 +    int     *nlr_ljc,*nlr_one,*nsr;
 +    gmx_domdec_t *dd=NULL;
 +    t_block *cgs=&(top->cgs);
 +    int     *cginfo=fr->cginfo;
 +    /* atom_id *i_atoms,*cgsindex=cgs->index; */
 +    ivec    sh0,sh1,shp;
 +    int     cell_x,cell_y,cell_z;
 +    int     d,tx,ty,tz,dx,dy,dz,cj;
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    int     zsh_ty,zsh_tx,ysh_tx;
 +#endif
 +    int     dx0,dx1,dy0,dy1,dz0,dz1;
 +    int     Nx,Ny,Nz,shift=-1,j,nrj,nns,nn=-1;
 +    real    gridx,gridy,gridz,grid_x,grid_y,grid_z;
 +    real    *dcx2,*dcy2,*dcz2;
 +    int     zgi,ygi,xgi;
 +    int     cg0,cg1,icg=-1,cgsnr,i0,igid,nri,naaj,max_jcg;
 +    int     jcg0,jcg1,jjcg,cgj0,jgid;
 +    int     *grida,*gridnra,*gridind;
 +    gmx_bool    rvdw_lt_rcoul,rcoul_lt_rvdw;
 +    rvec    xi,*cgcm,grid_offset;
 +    real    r2,rs2,rvdw2,rcoul2,rm2,rl2,XI,YI,ZI,dcx,dcy,dcz,tmp1,tmp2;
 +    int     *i_egp_flags;
 +    gmx_bool    bDomDec,bTriclinicX,bTriclinicY;
 +    ivec    ncpddc;
 +    
 +    ns = &fr->ns;
 +    
 +    bDomDec = DOMAINDECOMP(cr);
 +    if (bDomDec)
 +    {
 +        dd = cr->dd;
 +    }
 +    
 +    bTriclinicX = ((YY < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[YY]==1) && box[YY][XX] != 0) ||
 +                   (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][XX] != 0));
 +    bTriclinicY =  (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][YY] != 0);
 +    
 +    cgsnr    = cgs->nr;
 +
 +    get_cutoff2(fr,bDoLongRange,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    rvdw_lt_rcoul = (rvdw2 >= rcoul2);
 +    rcoul_lt_rvdw = (rcoul2 >= rvdw2);
 +    
 +    if (bMakeQMMMnblist)
 +    {
 +        rm2 = rl2;
 +        rs2 = rl2;
 +    }
 +
 +    nl_sr     = ns->nl_sr;
 +    nsr       = ns->nsr;
 +    nl_lr_ljc = ns->nl_lr_ljc;
 +    nl_lr_one = ns->nl_lr_one;
 +    nlr_ljc   = ns->nlr_ljc;
 +    nlr_one   = ns->nlr_one;
 +    
 +    /* Unpack arrays */
 +    cgcm    = fr->cg_cm;
 +    Nx      = grid->n[XX];
 +    Ny      = grid->n[YY];
 +    Nz      = grid->n[ZZ];
 +    grida   = grid->a;
 +    gridind = grid->index;
 +    gridnra = grid->nra;
 +    nns     = 0;
 +    
 +    gridx      = grid->cell_size[XX];
 +    gridy      = grid->cell_size[YY];
 +    gridz      = grid->cell_size[ZZ];
 +    grid_x     = 1/gridx;
 +    grid_y     = 1/gridy;
 +    grid_z     = 1/gridz;
 +    copy_rvec(grid->cell_offset,grid_offset);
 +    copy_ivec(grid->ncpddc,ncpddc);
 +    dcx2       = grid->dcx2;
 +    dcy2       = grid->dcy2;
 +    dcz2       = grid->dcz2;
 +    
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5);
 +    zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5);
 +    ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5);
 +    if (zsh_tx!=0 && ysh_tx!=0)
 +    {
 +        /* This could happen due to rounding, when both ratios are 0.5 */
 +        ysh_tx = 0;
 +    }
 +#endif
 +    
 +    debug_gmx();
 +
 +    if (fr->n_tpi)
 +    {
 +        /* We only want a list for the test particle */
 +        cg0 = cgsnr - 1;
 +    }
 +    else
 +    {
 +        cg0 = grid->icg0;
 +    }
 +    cg1 = grid->icg1;
 +
 +    /* Set the shift range */
 +    for(d=0; d<DIM; d++)
 +    {
 +        sh0[d] = -1;
 +        sh1[d] = 1;
 +        /* Check if we need periodicity shifts.
 +         * Without PBC or with domain decomposition we don't need them.
 +         */
 +        if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1))
 +        {
 +            shp[d] = 0;
 +        }
 +        else
 +        {
 +            if (d == XX &&
 +                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
 +            {
 +                shp[d] = 2;
 +            }
 +            else
 +            {
 +                shp[d] = 1;
 +            }
 +        }
 +    }
 +    
 +    /* Loop over charge groups */
 +    for(icg=cg0; (icg < cg1); icg++)
 +    {
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        /* Skip this charge group if all energy groups are excluded! */
 +        if (bExcludeAlleg[igid])
 +        {
 +            continue;
 +        }
 +        
 +        i0   = cgs->index[icg];
 +        
 +        if (bMakeQMMMnblist)
 +        { 
 +            /* Skip this charge group if it is not a QM atom while making a
 +             * QM/MM neighbourlist
 +             */
 +            if (md->bQM[i0]==FALSE)
 +            {
 +                continue; /* MM particle, go to next particle */ 
 +            }
 +            
 +            /* Compute the number of charge groups that fall within the control
 +             * of this one (icg)
 +             */
 +            naaj    = calc_naaj(icg,cgsnr);
 +            jcg0    = icg;
 +            jcg1    = icg + naaj;
 +            max_jcg = cgsnr;       
 +        } 
 +        else
 +        { 
 +            /* make a normal neighbourlist */
 +            
 +            if (bDomDec)
 +            {
 +                /* Get the j charge-group and dd cell shift ranges */
 +                dd_get_ns_ranges(cr->dd,icg,&jcg0,&jcg1,sh0,sh1);
 +                max_jcg = 0;
 +            }
 +            else
 +            {
 +                /* Compute the number of charge groups that fall within the control
 +                 * of this one (icg)
 +                 */
 +                naaj = calc_naaj(icg,cgsnr);
 +                jcg0 = icg;
 +                jcg1 = icg + naaj;
 +                
 +                if (fr->n_tpi)
 +                {
 +                    /* The i-particle is awlways the test particle,
 +                     * so we want all j-particles
 +                     */
 +                    max_jcg = cgsnr - 1;
 +                }
 +                else
 +                {
 +                    max_jcg  = jcg1 - cgsnr;
 +                }
 +            }
 +        }
 +        
 +        i_egp_flags = fr->egp_flags + igid*ngid;
 +        
 +        /* Set the exclusions for the atoms in charge group icg using a bitmask */
 +        setexcl(i0,cgs->index[icg+1],&top->excls,TRUE,bexcl);
 +        
 +        ci2xyz(grid,icg,&cell_x,&cell_y,&cell_z);
 +        
 +        /* Changed iicg to icg, DvdS 990115 
 +         * (but see consistency check above, DvdS 990330) 
 +         */
 +#ifdef NS5DB
 +        fprintf(log,"icg=%5d, naaj=%5d, cell %d %d %d\n",
 +                icg,naaj,cell_x,cell_y,cell_z);
 +#endif
 +        /* Loop over shift vectors in three dimensions */
 +        for (tz=-shp[ZZ]; tz<=shp[ZZ]; tz++)
 +        {
 +            ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ];
 +            /* Calculate range of cells in Z direction that have the shift tz */
 +            zgi = cell_z + tz*Nz;
 +#define FAST_DD_NS
 +#ifndef FAST_DD_NS
 +            get_dx(Nz,gridz,rl2,zgi,ZI,&dz0,&dz1,dcz2);
 +#else
 +            get_dx_dd(Nz,gridz,rl2,zgi,ZI-grid_offset[ZZ],
 +                      ncpddc[ZZ],sh0[ZZ],sh1[ZZ],&dz0,&dz1,dcz2);
 +#endif
 +            if (dz0 > dz1)
 +            {
 +                continue;
 +            }
 +            for (ty=-shp[YY]; ty<=shp[YY]; ty++)
 +            {
 +                YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
 +                /* Calculate range of cells in Y direction that have the shift ty */
 +                if (bTriclinicY)
 +                {
 +                    ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny;
 +                }
 +                else
 +                {
 +                    ygi = cell_y + ty*Ny;
 +                }
 +#ifndef FAST_DD_NS
 +                get_dx(Ny,gridy,rl2,ygi,YI,&dy0,&dy1,dcy2);
 +#else
 +                get_dx_dd(Ny,gridy,rl2,ygi,YI-grid_offset[YY],
 +                          ncpddc[YY],sh0[YY],sh1[YY],&dy0,&dy1,dcy2);
 +#endif
 +                if (dy0 > dy1)
 +                {
 +                    continue;
 +                }
 +                for (tx=-shp[XX]; tx<=shp[XX]; tx++)
 +                {
 +                    XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
 +                    /* Calculate range of cells in X direction that have the shift tx */
 +                    if (bTriclinicX)
 +                    {
 +                        xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx;
 +                    }
 +                    else
 +                    {
 +                        xgi = cell_x + tx*Nx;
 +                    }
 +#ifndef FAST_DD_NS
 +                    get_dx(Nx,gridx,rl2,xgi*Nx,XI,&dx0,&dx1,dcx2);
 +#else
 +                    get_dx_dd(Nx,gridx,rl2,xgi,XI-grid_offset[XX],
 +                              ncpddc[XX],sh0[XX],sh1[XX],&dx0,&dx1,dcx2);
 +#endif
 +                    if (dx0 > dx1)
 +                    {
 +                        continue;
 +                    }
 +                    /* Adress: an explicit cg that has a weigthing function of 0 is excluded
 +                     *  from the neigbour list as it will not interact  */
 +                    if (fr->adress_type != eAdressOff){
 +                        if (md->wf[cgs->index[icg]]==0 && egp_explicit(fr, igid)){
 +                            continue;
 +                        }
 +                    }
 +                    /* Get shift vector */      
 +                    shift=XYZ2IS(tx,ty,tz);
 +#ifdef NS5DB
 +                    range_check(shift,0,SHIFTS);
 +#endif
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        nsr[nn]      = 0;
 +                        nlr_ljc[nn]  = 0;
 +                        nlr_one[nn] = 0;
 +                    }
 +#ifdef NS5DB
 +                    fprintf(log,"shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n",
 +                            shift,dx0,dx1,dy0,dy1,dz0,dz1);
 +                    fprintf(log,"cgcm: %8.3f  %8.3f  %8.3f\n",cgcm[icg][XX],
 +                            cgcm[icg][YY],cgcm[icg][ZZ]);
 +                    fprintf(log,"xi:   %8.3f  %8.3f  %8.3f\n",XI,YI,ZI);
 +#endif
 +                    for (dx=dx0; (dx<=dx1); dx++)
 +                    {
 +                        tmp1 = rl2 - dcx2[dx];
 +                        for (dy=dy0; (dy<=dy1); dy++)
 +                        {
 +                            tmp2 = tmp1 - dcy2[dy];
 +                            if (tmp2 > 0)
 +                            {
 +                                for (dz=dz0; (dz<=dz1); dz++) {
 +                                    if (tmp2 > dcz2[dz]) {
 +                                        /* Find grid-cell cj in which possible neighbours are */
 +                                        cj   = xyz2ci(Ny,Nz,dx,dy,dz);
 +                                        
 +                                        /* Check out how many cgs (nrj) there in this cell */
 +                                        nrj  = gridnra[cj];
 +                                        
 +                                        /* Find the offset in the cg list */
 +                                        cgj0 = gridind[cj];
 +                                        
 +                                        /* Check if all j's are out of range so we
 +                                         * can skip the whole cell.
 +                                         * Should save some time, especially with DD.
 +                                         */
 +                                        if (nrj == 0 ||
 +                                            (grida[cgj0] >= max_jcg &&
 +                                             (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0)))
 +                                        {
 +                                            continue;
 +                                        }
 +                                        
 +                                        /* Loop over cgs */
 +                                        for (j=0; (j<nrj); j++)
 +                                        {
 +                                            jjcg = grida[cgj0+j];
 +                                            
 +                                            /* check whether this guy is in range! */
 +                                            if ((jjcg >= jcg0 && jjcg < jcg1) ||
 +                                                (jjcg < max_jcg))
 +                                            {
 +                                                r2=calc_dx2(XI,YI,ZI,cgcm[jjcg]);
 +                                                if (r2 < rl2) {
 +                                                    /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */
 +                                                    jgid = GET_CGINFO_GID(cginfo[jjcg]);
 +                                                    /* check energy group exclusions */
 +                                                    if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                                                    {
 +                                                        if (r2 < rs2)
 +                                                        {
 +                                                            if (nsr[jgid] >= MAX_CG)
 +                                                            {
 +                                                                /* Add to short-range list */
 +                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
 +                                                                            nsr[jgid],nl_sr[jgid],
 +                                                                            cgs->index,/* cgsatoms, */ bexcl,
 +                                                                            shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +                                                                nsr[jgid]=0;
 +                                                            }
 +                                                            nl_sr[jgid][nsr[jgid]++]=jjcg;
 +                                                        } 
 +                                                        else if (r2 < rm2)
 +                                                        {
 +                                                            if (nlr_ljc[jgid] >= MAX_CG)
 +                                                            {
 +                                                                /* Add to LJ+coulomb long-range list */
 +                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
 +                                                                            nlr_ljc[jgid],nl_lr_ljc[jgid],top->cgs.index,
 +                                                                            bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
 +                                                                nlr_ljc[jgid]=0;
 +                                                            }
 +                                                            nl_lr_ljc[jgid][nlr_ljc[jgid]++]=jjcg;
 +                                                        }
 +                                                        else
 +                                                        {
 +                                                            if (nlr_one[jgid] >= MAX_CG)
 +                                                            {
 +                                                                /* Add to long-range list with only coul, or only LJ */
 +                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
 +                                                                            nlr_one[jgid],nl_lr_one[jgid],top->cgs.index,
 +                                                                            bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
 +                                                                nlr_one[jgid]=0;
 +                                                            }
 +                                                            nl_lr_one[jgid][nlr_one[jgid]++]=jjcg;
 +                                                        }
 +                                                    }
 +                                                }
 +                                                nns++;
 +                                            }
 +                                        }
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                    /* CHECK whether there is anything left in the buffers */
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        if (nsr[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW,ngid,md,icg,nn,nsr[nn],nl_sr[nn],
 +                                        cgs->index, /* cgsatoms, */ bexcl,
 +                                        shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +                        }
 +                        
 +                        if (nlr_ljc[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_ljc[nn],
 +                                        nl_lr_ljc[nn],top->cgs.index,
 +                                        bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
 +                        }
 +                        
 +                        if (nlr_one[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_one[nn],
 +                                        nl_lr_one[nn],top->cgs.index,
 +                                        bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],&top->excls,FALSE,bexcl);
 +    }
 +    /* No need to perform any left-over force calculations anymore (as we used to do here)
 +     * since we now save the proper long-range lists for later evaluation.
 +     */
 +
 +    debug_gmx();
 +     
 +    /* Close neighbourlists */
 +    close_neighbor_lists(fr,bMakeQMMMnblist);
 +    
 +    return nns;
 +}
 +
 +void ns_realloc_natoms(gmx_ns_t *ns,int natoms)
 +{
 +    int i;
 +    
 +    if (natoms > ns->nra_alloc)
 +    {
 +        ns->nra_alloc = over_alloc_dd(natoms);
 +        srenew(ns->bexcl,ns->nra_alloc);
 +        for(i=0; i<ns->nra_alloc; i++)
 +        {
 +            ns->bexcl[i] = 0;
 +        }
 +    }
 +}
 +
 +void init_ns(FILE *fplog,const t_commrec *cr,
 +             gmx_ns_t *ns,t_forcerec *fr,
 +             const gmx_mtop_t *mtop,
 +             matrix box)
 +{
 +    int  mt,icg,nr_in_cg,maxcg,i,j,jcg,ngid,ncg;
 +    t_block *cgs;
 +    char *ptr;
 +    
 +    /* Compute largest charge groups size (# atoms) */
 +    nr_in_cg=1;
 +    for(mt=0; mt<mtop->nmoltype; mt++) {
 +        cgs = &mtop->moltype[mt].cgs;
 +        for (icg=0; (icg < cgs->nr); icg++)
 +        {
 +            nr_in_cg=max(nr_in_cg,(int)(cgs->index[icg+1]-cgs->index[icg]));
 +        }
 +    }
 +
 +    /* Verify whether largest charge group is <= max cg.
 +     * This is determined by the type of the local exclusion type 
 +     * Exclusions are stored in bits. (If the type is not large
 +     * enough, enlarge it, unsigned char -> unsigned short -> unsigned long)
 +     */
 +    maxcg = sizeof(t_excl)*8;
 +    if (nr_in_cg > maxcg)
 +    {
 +        gmx_fatal(FARGS,"Max #atoms in a charge group: %d > %d\n",
 +                  nr_in_cg,maxcg);
 +    }
 +    
 +    ngid = mtop->groups.grps[egcENER].nr;
 +    snew(ns->bExcludeAlleg,ngid);
 +    for(i=0; i<ngid; i++) {
 +        ns->bExcludeAlleg[i] = TRUE;
 +        for(j=0; j<ngid; j++)
 +        {
 +            if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL))
 +            {
 +                ns->bExcludeAlleg[i] = FALSE;
 +            }
 +        }
 +    }
 +    
 +    if (fr->bGrid) {
 +        /* Grid search */
 +        ns->grid = init_grid(fplog,fr);
 +        init_nsgrid_lists(fr,ngid,ns);
 +    }
 +    else
 +    {
 +        /* Simple search */
 +        snew(ns->ns_buf,ngid);
 +        for(i=0; (i<ngid); i++)
 +        {
 +            snew(ns->ns_buf[i],SHIFTS);
 +        }
 +        ncg = ncg_mtop(mtop);
 +        snew(ns->simple_aaj,2*ncg);
 +        for(jcg=0; (jcg<ncg); jcg++)
 +        {
 +            ns->simple_aaj[jcg]     = jcg;
 +            ns->simple_aaj[jcg+ncg] = jcg;
 +        }
 +    }
 +    
 +    /* Create array that determines whether or not atoms have VdW */
 +    snew(ns->bHaveVdW,fr->ntype);
 +    for(i=0; (i<fr->ntype); i++)
 +    {
 +        for(j=0; (j<fr->ntype); j++)
 +        {
 +            ns->bHaveVdW[i] = (ns->bHaveVdW[i] || 
 +                               (fr->bBHAM ? 
 +                                ((BHAMA(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMB(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMC(fr->nbfp,fr->ntype,i,j) != 0)) :
 +                                ((C6(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (C12(fr->nbfp,fr->ntype,i,j) != 0))));
 +        }
 +    }
 +    if (debug) 
 +        pr_bvec(debug,0,"bHaveVdW",ns->bHaveVdW,fr->ntype,TRUE);
 +    
 +    ns->nra_alloc = 0;
 +    ns->bexcl = NULL;
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* This could be reduced with particle decomposition */
 +        ns_realloc_natoms(ns,mtop->natoms);
 +    }
 +
 +    ns->nblist_initialized=FALSE;
 +
 +    /* nbr list debug dump */
 +    {
 +        char *ptr=getenv("GMX_DUMP_NL");
 +        if (ptr)
 +        {
 +            ns->dump_nl=strtol(ptr,NULL,10);
 +            if (fplog)
 +            {
 +                fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl);
 +            }
 +        }
 +        else
 +        {
 +            ns->dump_nl=0;
 +        }
 +    }
 +}
 +
 +                       
 +int search_neighbours(FILE *log,t_forcerec *fr,
 +                      rvec x[],matrix box,
 +                      gmx_localtop_t *top,
 +                      gmx_groups_t *groups,
 +                      t_commrec *cr,
 +                      t_nrnb *nrnb,t_mdatoms *md,
 +                      real *lambda,real *dvdlambda,
 +                      gmx_grppairener_t *grppener,
 +                      gmx_bool bFillGrid,
++                      gmx_bool bDoLongRangeNS,
++                      gmx_bool bPadListsForKernels)
 +{
 +    t_block  *cgs=&(top->cgs);
 +    rvec     box_size,grid_x0,grid_x1;
 +    int      i,j,m,ngid;
 +    real     min_size,grid_dens;
 +    int      nsearch;
 +    gmx_bool     bGrid;
 +    char     *ptr;
 +    gmx_bool     *i_egp_flags;
 +    int      cg_start,cg_end,start,end;
 +    gmx_ns_t *ns;
 +    t_grid   *grid;
 +    gmx_domdec_zones_t *dd_zones;
 +    put_in_list_t *put_in_list;
 +
 +    ns = &fr->ns;
 +
 +    /* Set some local variables */
 +    bGrid = fr->bGrid;
 +    ngid = groups->grps[egcENER].nr;
 +    
 +    for(m=0; (m<DIM); m++)
 +    {
 +        box_size[m] = box[m][m];
 +    }
 +  
 +    if (fr->ePBC != epbcNONE)
 +    {
 +        if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC,box))
 +        {
 +            gmx_fatal(FARGS,"One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
 +        }
 +        if (!bGrid)
 +        {
 +            min_size = min(box_size[XX],min(box_size[YY],box_size[ZZ]));
 +            if (2*fr->rlistlong >= min_size)
 +                gmx_fatal(FARGS,"One of the box diagonal elements has become smaller than twice the cut-off length.");
 +        }
 +    }
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        ns_realloc_natoms(ns,cgs->index[cgs->nr]);
 +    }
 +    debug_gmx();
 +    
 +    /* Reset the neighbourlists */
 +    reset_neighbor_lists(fr,TRUE,TRUE);
 +    
 +    if (bGrid && bFillGrid)
 +    {
 +              
 +        grid = ns->grid;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_zones = domdec_zones(cr->dd);
 +        }
 +        else
 +        {
 +            dd_zones = NULL;
 +
 +            get_nsgrid_boundaries(grid->nboundeddim,box,NULL,NULL,NULL,NULL,
 +                                  cgs->nr,fr->cg_cm,grid_x0,grid_x1,&grid_dens);
 +
 +            grid_first(log,grid,NULL,NULL,fr->ePBC,box,grid_x0,grid_x1,
 +                       fr->rlistlong,grid_dens);
 +        }
 +        debug_gmx();
 +        
 +        /* Don't know why this all is... (DvdS 3/99) */
 +#ifndef SEGV
 +        start = 0;
 +        end   = cgs->nr;
 +#else
 +        start = fr->cg0;
 +        end   = (cgs->nr+1)/2;
 +#endif
 +        
 +        if (DOMAINDECOMP(cr))
 +        {
 +            end = cgs->nr;
 +            fill_grid(log,dd_zones,grid,end,-1,end,fr->cg_cm);
 +            grid->icg0 = 0;
 +            grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
 +        }
 +        else
 +        {
 +            fill_grid(log,NULL,grid,cgs->nr,fr->cg0,fr->hcg,fr->cg_cm);
 +            grid->icg0 = fr->cg0;
 +            grid->icg1 = fr->hcg;
 +            debug_gmx();
 +            
 +            if (PARTDECOMP(cr))
 +                mv_grid(cr,grid);
 +            debug_gmx();
 +        }
 +        
 +        calc_elemnr(log,grid,start,end,cgs->nr);
 +        calc_ptrs(grid);
 +        grid_last(log,grid,start,end,cgs->nr);
 +        
 +        if (gmx_debug_at)
 +        {
 +            check_grid(debug,grid);
 +            print_grid(debug,grid);
 +        }
 +    }
 +    else if (fr->n_tpi)
 +    {
 +        /* Set the grid cell index for the test particle only.
 +         * The cell to cg index is not corrected, but that does not matter.
 +         */
 +        fill_grid(log,NULL,ns->grid,fr->hcg,fr->hcg-1,fr->hcg,fr->cg_cm);
 +    }
 +    debug_gmx();
 +    
 +    if (!fr->ns.bCGlist)
 +    {
 +        put_in_list = put_in_list_at;
 +    }
 +    else
 +    {
 +        put_in_list = put_in_list_cg;
 +    }
 +
 +    /* Do the core! */
 +    if (bGrid)
 +    {
 +        grid = ns->grid;
 +        nsearch = nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                              grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                              nrnb,md,lambda,dvdlambda,grppener,
 +                              put_in_list,ns->bHaveVdW,
 +                              bDoLongRangeNS,FALSE);
 +        
 +        /* neighbour searching withouth QMMM! QM atoms have zero charge in
 +         * the classical calculation. The charge-charge interaction
 +         * between QM and MM atoms is handled in the QMMM core calculation
 +         * (see QMMM.c). The VDW however, we'd like to compute classically
 +         * and the QM MM atom pairs have just been put in the
 +         * corresponding neighbourlists. in case of QMMM we still need to
 +         * fill a special QMMM neighbourlist that contains all neighbours
 +         * of the QM atoms. If bQMMM is true, this list will now be made: 
 +         */
 +        if (fr->bQMMM && fr->qr->QMMMscheme!=eQMMMschemeoniom)
 +        {
 +            nsearch += nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                                   grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                                   nrnb,md,lambda,dvdlambda,grppener,
 +                                   put_in_list_qmmm,ns->bHaveVdW,
 +                                   bDoLongRangeNS,TRUE);
 +        }
 +    }
 +    else 
 +    {
 +        nsearch = ns_simple_core(fr,top,md,box,box_size,
 +                                 ns->bexcl,ns->simple_aaj,
 +                                 ngid,ns->ns_buf,put_in_list,ns->bHaveVdW);
 +    }
 +    debug_gmx();
 +
 +#ifdef DEBUG
 +    pr_nsblock(log);
 +#endif
 +    
 +    inc_nrnb(nrnb,eNR_NS,nsearch);
 +    /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
 +    
 +    return nsearch;
 +}
 +
 +int natoms_beyond_ns_buffer(t_inputrec *ir,t_forcerec *fr,t_block *cgs,
 +                            matrix scale_tot,rvec *x)
 +{
 +    int  cg0,cg1,cg,a0,a1,a,i,j;
 +    real rint,hbuf2,scale;
 +    rvec *cg_cm,cgsc;
 +    gmx_bool bIsotropic;
 +    int  nBeyond;
 +    
 +    nBeyond = 0;
 +    
 +    rint = max(ir->rcoulomb,ir->rvdw);
 +    if (ir->rlist < rint)
 +    {
 +        gmx_fatal(FARGS,"The neighbor search buffer has negative size: %f nm",
 +                  ir->rlist - rint);
 +    }
 +    cg_cm = fr->cg_cm;
 +    
 +    cg0 = fr->cg0;
 +    cg1 = fr->hcg;
 +    
 +    if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir))
 +    {
 +        hbuf2 = sqr(0.5*(ir->rlist - rint));
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            a0 = cgs->index[cg];
 +            a1 = cgs->index[cg+1];
 +            for(a=a0; a<a1; a++)
 +            {
 +                if (distance2(cg_cm[cg],x[a]) > hbuf2)
 +                {
 +                    nBeyond++;
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        bIsotropic = TRUE;
 +        scale = scale_tot[0][0];
 +        for(i=1; i<DIM; i++)
 +        {
 +            /* With anisotropic scaling, the original spherical ns volumes become
 +             * ellipsoids. To avoid costly transformations we use the minimum
 +             * eigenvalue of the scaling matrix for determining the buffer size.
 +             * Since the lower half is 0, the eigenvalues are the diagonal elements.
 +             */
 +            scale = min(scale,scale_tot[i][i]);
 +            if (scale_tot[i][i] != scale_tot[i-1][i-1])
 +            {
 +                bIsotropic = FALSE;
 +            }
 +            for(j=0; j<i; j++)
 +            {
 +                if (scale_tot[i][j] != 0)
 +                {
 +                    bIsotropic = FALSE;
 +                }
 +            }
 +        }
 +        hbuf2 = sqr(0.5*(scale*ir->rlist - rint));
 +        if (bIsotropic)
 +        {
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                svmul(scale,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {                    
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Anistropic scaling */
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                /* Since scale_tot contains the transpose of the scaling matrix,
 +                 * we need to multiply with the transpose.
 +                 */
 +                tmvmul_ur0(scale_tot,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    return nBeyond;
 +}
index 7f4ea24b76ea283a69be08f48103b5e9bb869fd6,0000000000000000000000000000000000000000..1be9796a3c2c24f5a4b4f895cbccd0a877b12e2e
mode 100644,000000..100644
--- /dev/null
@@@ -1,1765 -1,0 +1,1773 @@@
 +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.03
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <sys/types.h>
 +#include <math.h>
 +#include <string.h>
 +#include <errno.h>
 +#include <limits.h>
 +
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "string2.h"
 +#include "readir.h"
 +#include "toputil.h"
 +#include "topio.h"
 +#include "confio.h"
 +#include "copyrite.h"
 +#include "readir.h"
 +#include "symtab.h"
 +#include "names.h"
 +#include "grompp.h"
 +#include "random.h"
 +#include "vec.h"
 +#include "futil.h"
 +#include "statutil.h"
 +#include "splitter.h"
 +#include "sortwater.h"
 +#include "convparm.h"
 +#include "gmx_fatal.h"
 +#include "warninp.h"
 +#include "index.h"
 +#include "gmxfio.h"
 +#include "trnio.h"
 +#include "tpxio.h"
 +#include "vsite_parm.h"
 +#include "txtdump.h"
 +#include "calcgrid.h"
 +#include "add_par.h"
 +#include "enxio.h"
 +#include "perf_est.h"
 +#include "compute_io.h"
 +#include "gpp_atomtype.h"
 +#include "gpp_tomorse.h"
 +#include "mtop_util.h"
 +#include "genborn.h"
 +#include "calc_verletbuf.h"
 +
 +static int rm_interactions(int ifunc,int nrmols,t_molinfo mols[])
 +{
 +  int  i,n;
 +  
 +  n=0;
 +  /* For all the molecule types */
 +  for(i=0; i<nrmols; i++) {
 +    n += mols[i].plist[ifunc].nr;
 +    mols[i].plist[ifunc].nr=0;
 +  }
 +  return n;
 +}
 +
 +static int check_atom_names(const char *fn1, const char *fn2, 
 +                          gmx_mtop_t *mtop, t_atoms *at)
 +{
 +  int mb,m,i,j,nmismatch;
 +  t_atoms *tat;
 +#define MAXMISMATCH 20
 +
 +  if (mtop->natoms != at->nr)
 +    gmx_incons("comparing atom names");
 +  
 +  nmismatch=0;
 +  i = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    tat = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
 +      for(j=0; j < tat->nr; j++) {
 +      if (strcmp( *(tat->atomname[j]) , *(at->atomname[i]) ) != 0) {
 +        if (nmismatch < MAXMISMATCH) {
 +          fprintf(stderr,
 +                  "Warning: atom name %d in %s and %s does not match (%s - %s)\n",
 +                  i+1, fn1, fn2, *(tat->atomname[j]), *(at->atomname[i]));
 +        } else if (nmismatch == MAXMISMATCH) {
 +          fprintf(stderr,"(more than %d non-matching atom names)\n",MAXMISMATCH);
 +        }
 +        nmismatch++;
 +      }
 +      i++;
 +      }
 +    }
 +  }
 +
 +  return nmismatch;
 +}
 +
 +static void check_eg_vs_cg(gmx_mtop_t *mtop)
 +{
 +  int astart,mb,m,cg,j,firstj;
 +  unsigned char firsteg,eg;
 +  gmx_moltype_t *molt;
 +  
 +  /* Go through all the charge groups and make sure all their
 +   * atoms are in the same energy group.
 +   */
 +  
 +  astart = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molt = &mtop->moltype[mtop->molblock[mb].type];
 +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
 +      for(cg=0; cg<molt->cgs.nr;cg++) {
 +      /* Get the energy group of the first atom in this charge group */
 +      firstj = astart + molt->cgs.index[cg];
 +      firsteg = ggrpnr(&mtop->groups,egcENER,firstj);
 +      for(j=molt->cgs.index[cg]+1;j<molt->cgs.index[cg+1];j++) {
 +        eg = ggrpnr(&mtop->groups,egcENER,astart+j);
 +        if(eg != firsteg) {
 +          gmx_fatal(FARGS,"atoms %d and %d in charge group %d of molecule type '%s' are in different energy groups",
 +                    firstj+1,astart+j+1,cg+1,*molt->name);
 +        }
 +      }
 +      }
 +      astart += molt->atoms.nr;
 +    }
 +  }  
 +}
 +
 +static void check_cg_sizes(const char *topfn,t_block *cgs,warninp_t wi)
 +{
 +    int  maxsize,cg;
 +    char warn_buf[STRLEN];
 +
 +    maxsize = 0;
 +    for(cg=0; cg<cgs->nr; cg++)
 +    {
 +        maxsize = max(maxsize,cgs->index[cg+1]-cgs->index[cg]);
 +    }
 +    
 +    if (maxsize > MAX_CHARGEGROUP_SIZE)
 +    {
 +        gmx_fatal(FARGS,"The largest charge group contains %d atoms. The maximum is %d.",maxsize,MAX_CHARGEGROUP_SIZE);
 +    }
 +    else if (maxsize > 10)
 +    {
 +        set_warning_line(wi,topfn,-1);
 +        sprintf(warn_buf,
 +                "The largest charge group contains %d atoms.\n"
 +                "Since atoms only see each other when the centers of geometry of the charge groups they belong to are within the cut-off distance, too large charge groups can lead to serious cut-off artifacts.\n"
 +                "For efficiency and accuracy, charge group should consist of a few atoms.\n"
 +                "For all-atom force fields use: CH3, CH2, CH, NH2, NH, OH, CO2, CO, etc.",
 +                maxsize);
 +        warning_note(wi,warn_buf);
 +    }
 +}
 +
 +static void check_bonds_timestep(gmx_mtop_t *mtop,double dt,warninp_t wi)
 +{
 +    /* This check is not intended to ensure accurate integration,
 +     * rather it is to signal mistakes in the mdp settings.
 +     * A common mistake is to forget to turn on constraints
 +     * for MD after energy minimization with flexible bonds.
 +     * This check can also detect too large time steps for flexible water
 +     * models, but such errors will often be masked by the constraints
 +     * mdp options, which turns flexible water into water with bond constraints,
 +     * but without an angle constraint. Unfortunately such incorrect use
 +     * of water models can not easily be detected without checking
 +     * for specific model names.
 +     *
 +     * The stability limit of leap-frog or velocity verlet is 4.44 steps
 +     * per oscillational period.
 +     * But accurate bonds distributions are lost far before that limit.
 +     * To allow relatively common schemes (although not common with Gromacs)
 +     * of dt=1 fs without constraints and dt=2 fs with only H-bond constraints
 +     * we set the note limit to 10.
 +     */
 +    int       min_steps_warn=5;
 +    int       min_steps_note=10;
 +    t_iparams *ip;
 +    int       molt;
 +    gmx_moltype_t *moltype,*w_moltype;
 +    t_atom    *atom;
 +    t_ilist   *ilist,*ilb,*ilc,*ils;
 +    int       ftype;
 +    int       i,a1,a2,w_a1,w_a2,j;
 +    real      twopi2,limit2,fc,re,m1,m2,period2,w_period2;
 +    gmx_bool  bFound,bWater,bWarn;
 +    char      warn_buf[STRLEN];
 +
 +    ip = mtop->ffparams.iparams;
 +
 +    twopi2 = sqr(2*M_PI);
 +
 +    limit2 = sqr(min_steps_note*dt);
 +
 +    w_a1 = w_a2 = -1;
 +    w_period2 = -1.0;
 +    
 +    w_moltype = NULL;
 +    for(molt=0; molt<mtop->nmoltype; molt++)
 +    {
 +        moltype = &mtop->moltype[molt];
 +        atom  = moltype->atoms.atom;
 +        ilist = moltype->ilist;
 +        ilc = &ilist[F_CONSTR];
 +        ils = &ilist[F_SETTLE];
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (!(ftype == F_BONDS || ftype == F_G96BONDS || ftype == F_HARMONIC))
 +            {
 +                continue;
 +            }
 +            
 +            ilb = &ilist[ftype];
 +            for(i=0; i<ilb->nr; i+=3)
 +            {
 +                fc = ip[ilb->iatoms[i]].harmonic.krA;
 +                re = ip[ilb->iatoms[i]].harmonic.rA;
 +                if (ftype == F_G96BONDS)
 +                {
 +                    /* Convert squared sqaure fc to harmonic fc */
 +                    fc = 2*fc*re;
 +                }
 +                a1 = ilb->iatoms[i+1];
 +                a2 = ilb->iatoms[i+2];
 +                m1 = atom[a1].m;
 +                m2 = atom[a2].m;
 +                if (fc > 0 && m1 > 0 && m2 > 0)
 +                {
 +                    period2 = twopi2*m1*m2/((m1 + m2)*fc);
 +                }
 +                else
 +                {
 +                    period2 = GMX_FLOAT_MAX;
 +                }
 +                if (debug)
 +                {
 +                    fprintf(debug,"fc %g m1 %g m2 %g period %g\n",
 +                            fc,m1,m2,sqrt(period2));
 +                }
 +                if (period2 < limit2)
 +                {
 +                    bFound = FALSE;
 +                    for(j=0; j<ilc->nr; j+=3)
 +                    {
 +                        if ((ilc->iatoms[j+1] == a1 && ilc->iatoms[j+2] == a2) ||
 +                            (ilc->iatoms[j+1] == a2 && ilc->iatoms[j+2] == a1))
 +                            {
 +                                bFound = TRUE;
 +                            }
 +                        }
 +                    for(j=0; j<ils->nr; j+=4)
 +                    {
 +                        if ((a1 == ils->iatoms[j+1] || a1 == ils->iatoms[j+2] || a1 == ils->iatoms[j+3]) &&
 +                            (a2 == ils->iatoms[j+1] || a2 == ils->iatoms[j+2] || a2 == ils->iatoms[j+3]))
 +                        {
 +                            bFound = TRUE;
 +                        }
 +                    }
 +                    if (!bFound &&
 +                        (w_moltype == NULL || period2 < w_period2))
 +                    {
 +                        w_moltype = moltype;
 +                        w_a1      = a1;
 +                        w_a2      = a2;
 +                        w_period2 = period2;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (w_moltype != NULL)
 +    {
 +        bWarn = (w_period2 < sqr(min_steps_warn*dt));
 +        /* A check that would recognize most water models */
 +        bWater = ((*w_moltype->atoms.atomname[0])[0] == 'O' &&
 +                  w_moltype->atoms.nr <= 5);
 +        sprintf(warn_buf,"The bond in molecule-type %s between atoms %d %s and %d %s has an estimated oscillational period of %.1e ps, which is less than %d times the time step of %.1e ps.\n"
 +                "%s",
 +                *w_moltype->name,
 +                w_a1+1,*w_moltype->atoms.atomname[w_a1],
 +                w_a2+1,*w_moltype->atoms.atomname[w_a2],
 +                sqrt(w_period2),bWarn ? min_steps_warn : min_steps_note,dt,
 +                bWater ?
 +                "Maybe you asked for fexible water." :
 +                "Maybe you forgot to change the constraints mdp option.");
 +        if (bWarn)
 +        {
 +            warning(wi,warn_buf);
 +        }
 +        else
 +        {
 +            warning_note(wi,warn_buf);
 +        }
 +    }
 +}
 +
 +static void check_vel(gmx_mtop_t *mtop,rvec v[])
 +{
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom *atom;
 +  int a;
 +
 +  aloop = gmx_mtop_atomloop_all_init(mtop);
 +  while (gmx_mtop_atomloop_all_next(aloop,&a,&atom)) {
 +    if (atom->ptype == eptShell ||
 +      atom->ptype == eptBond  ||
 +      atom->ptype == eptVSite) {
 +      clear_rvec(v[a]);
 +    }
 +  }
 +}
 +
 +static gmx_bool nint_ftype(gmx_mtop_t *mtop,t_molinfo *mi,int ftype)
 +{
 +  int nint,mb;
 +
 +  nint = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    nint += mtop->molblock[mb].nmol*mi[mtop->molblock[mb].type].plist[ftype].nr;
 +  }
 +
 +  return nint;
 +}
 +
 +/* This routine reorders the molecule type array
 + * in the order of use in the molblocks,
 + * unused molecule types are deleted.
 + */
 +static void renumber_moltypes(gmx_mtop_t *sys,
 +                            int *nmolinfo,t_molinfo **molinfo)
 +{
 +  int *order,norder,i;
 +  int mb,mi;
 +  t_molinfo *minew;
 +
 +  snew(order,*nmolinfo);
 +  norder = 0;
 +  for(mb=0; mb<sys->nmolblock; mb++) {
 +    for(i=0; i<norder; i++) {
 +      if (order[i] == sys->molblock[mb].type) {
 +      break;
 +      }
 +    }
 +    if (i == norder) {
 +      /* This type did not occur yet, add it */
 +      order[norder] = sys->molblock[mb].type;
 +      /* Renumber the moltype in the topology */
 +      norder++;
 +    }
 +    sys->molblock[mb].type = i;
 +  }
 +  
 +  /* We still need to reorder the molinfo structs */
 +  snew(minew,norder);
 +  for(mi=0; mi<*nmolinfo; mi++) {
 +    for(i=0; i<norder; i++) {
 +      if (order[i] == mi) {
 +      break;
 +      }
 +    }
 +    if (i == norder) {
 +      done_mi(&(*molinfo)[mi]);
 +    } else {
 +      minew[i] = (*molinfo)[mi];
 +    }
 +  }
 +  sfree(*molinfo);
 +
 +  *nmolinfo = norder;
 +  *molinfo  = minew;
 +}
 +
 +static void molinfo2mtop(int nmi,t_molinfo *mi,gmx_mtop_t *mtop)
 +{
 +  int m;
 +  gmx_moltype_t *molt;
 +
 +  mtop->nmoltype = nmi;
 +  snew(mtop->moltype,nmi);
 +  for(m=0; m<nmi; m++) {
 +    molt = &mtop->moltype[m];
 +    molt->name  = mi[m].name;
 +    molt->atoms = mi[m].atoms;
 +    /* ilists are copied later */
 +    molt->cgs   = mi[m].cgs;
 +    molt->excls = mi[m].excls;
 +  }
 +}
 +
 +static void
 +new_status(const char *topfile,const char *topppfile,const char *confin,
 +           t_gromppopts *opts,t_inputrec *ir,gmx_bool bZero,
 +           gmx_bool bGenVel,gmx_bool bVerbose,t_state *state,
 +           gpp_atomtype_t atype,gmx_mtop_t *sys,
 +           int *nmi,t_molinfo **mi,t_params plist[],
 +           int *comb,double *reppow,real *fudgeQQ,
 +           gmx_bool bMorse,
 +           warninp_t wi)
 +{
 +  t_molinfo   *molinfo=NULL;
 +  int         nmolblock;
 +  gmx_molblock_t *molblock,*molbs;
 +  t_atoms     *confat;
 +  int         mb,i,nrmols,nmismatch;
 +  char        buf[STRLEN];
 +  gmx_bool        bGB=FALSE;
 +  char        warn_buf[STRLEN];
 +
 +  init_mtop(sys);
 +
 +  /* Set gmx_boolean for GB */
 +  if(ir->implicit_solvent)
 +    bGB=TRUE;
 +  
 +  /* TOPOLOGY processing */
 +  sys->name = do_top(bVerbose,topfile,topppfile,opts,bZero,&(sys->symtab),
 +                     plist,comb,reppow,fudgeQQ,
 +                     atype,&nrmols,&molinfo,ir,
 +                     &nmolblock,&molblock,bGB,
 +                     wi);
 +  
 +  sys->nmolblock = 0;
 +  snew(sys->molblock,nmolblock);
 +  
 +  sys->natoms = 0;
 +  for(mb=0; mb<nmolblock; mb++) {
 +    if (sys->nmolblock > 0 &&
 +      molblock[mb].type == sys->molblock[sys->nmolblock-1].type) {
 +      /* Merge consecutive blocks with the same molecule type */
 +      sys->molblock[sys->nmolblock-1].nmol += molblock[mb].nmol;
 +      sys->natoms += molblock[mb].nmol*sys->molblock[sys->nmolblock-1].natoms_mol;
 +    } else if (molblock[mb].nmol > 0) {
 +      /* Add a new molblock to the topology */
 +      molbs = &sys->molblock[sys->nmolblock];
 +      *molbs = molblock[mb];
 +      molbs->natoms_mol = molinfo[molbs->type].atoms.nr;
 +      molbs->nposres_xA = 0;
 +      molbs->nposres_xB = 0;
 +      sys->natoms += molbs->nmol*molbs->natoms_mol;
 +      sys->nmolblock++;
 +    }
 +  }
 +  if (sys->nmolblock == 0) {
 +    gmx_fatal(FARGS,"No molecules were defined in the system");
 +  }
 +
 +  renumber_moltypes(sys,&nrmols,&molinfo);
 +
 +  if (bMorse)
 +    convert_harmonics(nrmols,molinfo,atype);
 +
 +  if (ir->eDisre == edrNone) {
 +    i = rm_interactions(F_DISRES,nrmols,molinfo);
 +    if (i > 0) {
 +      set_warning_line(wi,"unknown",-1);
 +      sprintf(warn_buf,"disre = no, removed %d distance restraints",i);
 +      warning_note(wi,warn_buf);
 +    }
 +  }
 +  if (opts->bOrire == FALSE) {
 +    i = rm_interactions(F_ORIRES,nrmols,molinfo);
 +    if (i > 0) {
 +      set_warning_line(wi,"unknown",-1);
 +      sprintf(warn_buf,"orire = no, removed %d orientation restraints",i);
 +      warning_note(wi,warn_buf);
 +    }
 +  }
 +  
 +  /* Copy structures from msys to sys */
 +  molinfo2mtop(nrmols,molinfo,sys);
 +
 +  gmx_mtop_finalize(sys);
 + 
 +  /* COORDINATE file processing */
 +  if (bVerbose) 
 +    fprintf(stderr,"processing coordinates...\n");
 +
 +  get_stx_coordnum(confin,&state->natoms);
 +  if (state->natoms != sys->natoms)
 +    gmx_fatal(FARGS,"number of coordinates in coordinate file (%s, %d)\n"
 +              "             does not match topology (%s, %d)",
 +            confin,state->natoms,topfile,sys->natoms);
 +  else {
 +    /* make space for coordinates and velocities */
 +    char title[STRLEN];
 +    snew(confat,1);
 +    init_t_atoms(confat,state->natoms,FALSE);
 +    init_state(state,state->natoms,0,0,0,0);
 +    read_stx_conf(confin,title,confat,state->x,state->v,NULL,state->box);
 +    /* This call fixes the box shape for runs with pressure scaling */
 +    set_box_rel(ir,state);
 +
 +    nmismatch = check_atom_names(topfile, confin, sys, confat);
 +    free_t_atoms(confat,TRUE);
 +    sfree(confat);
 +    
 +    if (nmismatch) {
 +      sprintf(buf,"%d non-matching atom name%s\n"
 +            "atom names from %s will be used\n"
 +            "atom names from %s will be ignored\n",
 +            nmismatch,(nmismatch == 1) ? "" : "s",topfile,confin);
 +      warning(wi,buf);
 +    }    
 +    if (bVerbose) 
 +      fprintf(stderr,"double-checking input for internal consistency...\n");
 +    double_check(ir,state->box,nint_ftype(sys,molinfo,F_CONSTR),wi);
 +  }
 +
 +  if (bGenVel) {
 +    real *mass;
 +    gmx_mtop_atomloop_all_t aloop;
 +    t_atom *atom;
 +
 +    snew(mass,state->natoms);
 +    aloop = gmx_mtop_atomloop_all_init(sys);
 +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +      mass[i] = atom->m;
 +    }
 +
 +    if (opts->seed == -1) {
 +      opts->seed = make_seed();
 +      fprintf(stderr,"Setting gen_seed to %d\n",opts->seed);
 +    }
 +    maxwell_speed(opts->tempi,opts->seed,sys,state->v);
 +
 +    stop_cm(stdout,state->natoms,mass,state->x,state->v);
 +    sfree(mass);
 +  }
 +
 +  *nmi = nrmols;
 +  *mi  = molinfo;
 +}
 +
 +static void copy_state(const char *slog,t_trxframe *fr,
 +                       gmx_bool bReadVel,t_state *state,
 +                       double *use_time)
 +{
 +    int i;
 +
 +    if (fr->not_ok & FRAME_NOT_OK)
 +    {
 +        gmx_fatal(FARGS,"Can not start from an incomplete frame");
 +    }
 +    if (!fr->bX)
 +    {
 +        gmx_fatal(FARGS,"Did not find a frame with coordinates in file %s",
 +                  slog);
 +    }
 +
 +    for(i=0; i<state->natoms; i++)
 +    {
 +        copy_rvec(fr->x[i],state->x[i]);
 +    }
 +    if (bReadVel)
 +    {
 +        if (!fr->bV)
 +        {
 +            gmx_incons("Trajecory frame unexpectedly does not contain velocities");
 +        }
 +        for(i=0; i<state->natoms; i++)
 +        {
 +            copy_rvec(fr->v[i],state->v[i]);
 +        }
 +    }
 +    if (fr->bBox)
 +    {
 +        copy_mat(fr->box,state->box);
 +    }
 +
 +    *use_time = fr->time;
 +}
 +
 +static void cont_status(const char *slog,const char *ener,
 +                      gmx_bool bNeedVel,gmx_bool bGenVel, real fr_time,
 +                      t_inputrec *ir,t_state *state,
 +                      gmx_mtop_t *sys,
 +                        const output_env_t oenv)
 +     /* If fr_time == -1 read the last frame available which is complete */
 +{
 +    gmx_bool bReadVel;
 +    t_trxframe  fr;
 +    t_trxstatus *fp;
 +    int i;
 +    double use_time;
 +
 +    bReadVel = (bNeedVel && !bGenVel);
 +
 +    fprintf(stderr,
 +            "Reading Coordinates%s and Box size from old trajectory\n",
 +            bReadVel ? ", Velocities" : "");
 +    if (fr_time == -1)
 +    {
 +        fprintf(stderr,"Will read whole trajectory\n");
 +    }
 +    else
 +    {
 +        fprintf(stderr,"Will read till time %g\n",fr_time);
 +    }
 +    if (!bReadVel)
 +    {
 +        if (bGenVel)
 +        {
 +            fprintf(stderr,"Velocities generated: "
 +                    "ignoring velocities in input trajectory\n");
 +        }
 +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
 +    }
 +    else
 +    {
 +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X | TRX_NEED_V);
 +        
 +        if (!fr.bV)
 +        {
 +            fprintf(stderr,
 +                    "\n"
 +                    "WARNING: Did not find a frame with velocities in file %s,\n"
 +                    "         all velocities will be set to zero!\n\n",slog);
 +            for(i=0; i<sys->natoms; i++)
 +            {
 +                clear_rvec(state->v[i]);
 +            }
 +            close_trj(fp);
 +            /* Search for a frame without velocities */
 +            bReadVel = FALSE;
 +            read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
 +        }
 +    }
 +
 +    state->natoms = fr.natoms;
 +
 +    if (sys->natoms != state->natoms)
 +    {
 +        gmx_fatal(FARGS,"Number of atoms in Topology "
 +                  "is not the same as in Trajectory");
 +    }
 +    copy_state(slog,&fr,bReadVel,state,&use_time);
 +
 +    /* Find the appropriate frame */
 +    while ((fr_time == -1 || fr.time < fr_time) &&
 +           read_next_frame(oenv,fp,&fr))
 +    {
 +        copy_state(slog,&fr,bReadVel,state,&use_time);
 +    }
 +  
 +    close_trj(fp);
 +
 +    /* Set the relative box lengths for preserving the box shape.
 +     * Note that this call can lead to differences in the last bit
 +     * with respect to using tpbconv to create a [TT].tpx[tt] file.
 +     */
 +    set_box_rel(ir,state);
 +
 +    fprintf(stderr,"Using frame at t = %g ps\n",use_time);
 +    fprintf(stderr,"Starting time for run is %g ps\n",ir->init_t); 
 +  
 +    if ((ir->epc != epcNO  || ir->etc ==etcNOSEHOOVER) && ener)
 +    {
 +        get_enx_state(ener,use_time,&sys->groups,ir,state);
 +        preserve_box_shape(ir,state->box_rel,state->boxv);
 +    }
 +}
 +
 +static void read_posres(gmx_mtop_t *mtop,t_molinfo *molinfo,gmx_bool bTopB,
 +                        char *fn,
 +                        int rc_scaling, int ePBC, 
 +                        rvec com,
 +                        warninp_t wi)
 +{
 +  gmx_bool   bFirst = TRUE, *hadAtom;
 +  rvec   *x,*v,*xp;
 +  dvec   sum;
 +  double totmass;
 +  t_atoms dumat;
 +  matrix box,invbox;
 +  int    natoms,npbcdim=0;
 +  char   warn_buf[STRLEN],title[STRLEN];
 +  int    a,i,ai,j,k,mb,nat_molb;
 +  gmx_molblock_t *molb;
 +  t_params *pr,*prfb;
 +  t_atom *atom;
 +
 +  get_stx_coordnum(fn,&natoms);
 +  if (natoms != mtop->natoms) {
 +    sprintf(warn_buf,"The number of atoms in %s (%d) does not match the number of atoms in the topology (%d). Will assume that the first %d atoms in the topology and %s match.",fn,natoms,mtop->natoms,min(mtop->natoms,natoms),fn);
 +    warning(wi,warn_buf);
 +  }
 +  snew(x,natoms);
 +  snew(v,natoms);
 +  init_t_atoms(&dumat,natoms,FALSE);
 +  read_stx_conf(fn,title,&dumat,x,v,NULL,box);
 +  
 +  npbcdim = ePBC2npbcdim(ePBC);
 +  clear_rvec(com);
 +  if (rc_scaling != erscNO) {
 +    copy_mat(box,invbox);
 +    for(j=npbcdim; j<DIM; j++) {
 +      clear_rvec(invbox[j]);
 +      invbox[j][j] = 1;
 +    }
 +    m_inv_ur0(invbox,invbox);
 +  }
 +
 +  /* Copy the reference coordinates to mtop */
 +  clear_dvec(sum);
 +  totmass = 0;
 +  a = 0;
 +  snew(hadAtom,natoms);
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
 +    pr = &(molinfo[molb->type].plist[F_POSRES]);
 +    prfb = &(molinfo[molb->type].plist[F_FBPOSRES]);
 +    if (pr->nr > 0 || prfb->nr > 0) {
 +      atom = mtop->moltype[molb->type].atoms.atom;
 +      for(i=0; (i<pr->nr); i++) {
 +      ai=pr->param[i].AI;
 +      if (ai >= natoms) {
 +        gmx_fatal(FARGS,"Position restraint atom index (%d) in moltype '%s' is larger than number of atoms in %s (%d).\n",
 +                  ai+1,*molinfo[molb->type].name,fn,natoms);
 +      }
 +    hadAtom[ai]=TRUE;
 +      if (rc_scaling == erscCOM) {
 +        /* Determine the center of mass of the posres reference coordinates */
 +        for(j=0; j<npbcdim; j++) {
 +          sum[j] += atom[ai].m*x[a+ai][j];
 +        }
 +        totmass  += atom[ai].m;
 +      }
 +      }
 +      /* Same for flat-bottomed posres, but do not count an atom twice for COM */
 +      for(i=0; (i<prfb->nr); i++) {
 +          ai=prfb->param[i].AI;
 +          if (ai >= natoms) {
 +              gmx_fatal(FARGS,"Position restraint atom index (%d) in moltype '%s' is larger than number of atoms in %s (%d).\n",
 +                        ai+1,*molinfo[molb->type].name,fn,natoms);
 +          }
 +          if (rc_scaling == erscCOM && hadAtom[ai] == FALSE) {
 +              /* Determine the center of mass of the posres reference coordinates */
 +              for(j=0; j<npbcdim; j++) {
 +                  sum[j] += atom[ai].m*x[a+ai][j];
 +              }
 +              totmass  += atom[ai].m;
 +          }
 +      }
 +      if (!bTopB) {
 +      molb->nposres_xA = nat_molb;
 +      snew(molb->posres_xA,molb->nposres_xA);
 +      for(i=0; i<nat_molb; i++) {
 +        copy_rvec(x[a+i],molb->posres_xA[i]);
 +      }
 +      } else {
 +      molb->nposres_xB = nat_molb;
 +      snew(molb->posres_xB,molb->nposres_xB);
 +      for(i=0; i<nat_molb; i++) {
 +        copy_rvec(x[a+i],molb->posres_xB[i]);
 +      }
 +      }
 +    }
 +    a += nat_molb;
 +  }
 +  if (rc_scaling == erscCOM) {
 +    if (totmass == 0)
 +      gmx_fatal(FARGS,"The total mass of the position restraint atoms is 0");
 +    for(j=0; j<npbcdim; j++)
 +      com[j] = sum[j]/totmass;
 +    fprintf(stderr,"The center of mass of the position restraint coord's is %6.3f %6.3f %6.3f\n",com[XX],com[YY],com[ZZ]);
 +  }
 +
 +  if (rc_scaling != erscNO) {
 +    for(mb=0; mb<mtop->nmolblock; mb++) {
 +      molb = &mtop->molblock[mb];
 +      nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
 +      if (molb->nposres_xA > 0 || molb->nposres_xB > 0) {
 +      xp = (!bTopB ? molb->posres_xA : molb->posres_xB);
 +      for(i=0; i<nat_molb; i++) {
 +        for(j=0; j<npbcdim; j++) {
 +          if (rc_scaling == erscALL) {
 +            /* Convert from Cartesian to crystal coordinates */
 +            xp[i][j] *= invbox[j][j];
 +            for(k=j+1; k<npbcdim; k++) {
 +              xp[i][j] += invbox[k][j]*xp[i][k];
 +            }
 +          } else if (rc_scaling == erscCOM) {
 +            /* Subtract the center of mass */
 +            xp[i][j] -= com[j];
 +          }
 +        }
 +      }
 +      }
 +    }
 +
 +    if (rc_scaling == erscCOM) {
 +      /* Convert the COM from Cartesian to crystal coordinates */
 +      for(j=0; j<npbcdim; j++) {
 +      com[j] *= invbox[j][j];
 +      for(k=j+1; k<npbcdim; k++) {
 +        com[j] += invbox[k][j]*com[k];
 +      }
 +      }
 +    }
 +  }
 +  
 +  free_t_atoms(&dumat,TRUE);
 +  sfree(x);
 +  sfree(v);
 +  sfree(hadAtom);
 +}
 +
 +static void gen_posres(gmx_mtop_t *mtop,t_molinfo *mi,
 +                       char *fnA, char *fnB,
 +                       int rc_scaling, int ePBC,
 +                       rvec com, rvec comB,
 +                       warninp_t wi)
 +{
 +  int i,j;
 +
 +  read_posres  (mtop,mi,FALSE,fnA,rc_scaling,ePBC,com,wi);
 +  if (strcmp(fnA,fnB) != 0) {
 +      read_posres(mtop,mi,TRUE ,fnB,rc_scaling,ePBC,comB,wi);
 +  }
 +}
 +
 +static void set_wall_atomtype(gpp_atomtype_t at,t_gromppopts *opts,
 +                              t_inputrec *ir,warninp_t wi)
 +{
 +  int i;
 +  char warn_buf[STRLEN];
 +
 +  if (ir->nwall > 0)
 +  {
 +      fprintf(stderr,"Searching the wall atom type(s)\n");
 +  }
 +  for(i=0; i<ir->nwall; i++)
 +  {
 +      ir->wall_atomtype[i] = get_atomtype_type(opts->wall_atomtype[i],at);
 +      if (ir->wall_atomtype[i] == NOTSET)
 +      {
 +          sprintf(warn_buf,"Specified wall atom type %s is not defined",opts->wall_atomtype[i]);
 +          warning_error(wi,warn_buf);
 +      }
 +  }
 +}
 +
 +static int nrdf_internal(t_atoms *atoms)
 +{
 +  int i,nmass,nrdf;
 +
 +  nmass = 0;
 +  for(i=0; i<atoms->nr; i++) {
 +    /* Vsite ptype might not be set here yet, so also check the mass */
 +    if ((atoms->atom[i].ptype == eptAtom ||
 +       atoms->atom[i].ptype == eptNucleus)
 +      && atoms->atom[i].m > 0) {
 +      nmass++;
 +    }
 +  }
 +  switch (nmass) {
 +  case 0:  nrdf = 0; break;
 +  case 1:  nrdf = 0; break;
 +  case 2:  nrdf = 1; break;
 +  default: nrdf = nmass*3 - 6; break;
 +  }
 +  
 +  return nrdf;
 +}
 +
 +void
 +spline1d( double        dx,
 +               double *      y,
 +               int           n,
 +               double *      u,
 +               double *      y2 )
 +{
 +    int i;
 +    double p,q;
 +      
 +    y2[0] = 0.0;
 +    u[0]  = 0.0;
 +      
 +    for(i=1;i<n-1;i++)
 +    {
 +              p = 0.5*y2[i-1]+2.0;
 +        y2[i] = -0.5/p;
 +        q = (y[i+1]-2.0*y[i]+y[i-1])/dx;
 +              u[i] = (3.0*q/dx-0.5*u[i-1])/p;
 +    }
 +      
 +    y2[n-1] = 0.0;
 +      
 +    for(i=n-2;i>=0;i--)
 +    {
 +        y2[i] = y2[i]*y2[i+1]+u[i];
 +    }
 +}
 +
 +
 +void
 +interpolate1d( double     xmin,
 +                        double     dx,
 +                        double *   ya,
 +                        double *   y2a,
 +                        double     x,
 +                        double *   y,
 +                        double *   y1)
 +{
 +    int ix;
 +    double a,b;
 +      
 +    ix = (x-xmin)/dx;
 +      
 +    a = (xmin+(ix+1)*dx-x)/dx;
 +    b = (x-xmin-ix*dx)/dx;
 +      
 +    *y  = a*ya[ix]+b*ya[ix+1]+((a*a*a-a)*y2a[ix]+(b*b*b-b)*y2a[ix+1])*(dx*dx)/6.0;
 +    *y1 = (ya[ix+1]-ya[ix])/dx-(3.0*a*a-1.0)/6.0*dx*y2a[ix]+(3.0*b*b-1.0)/6.0*dx*y2a[ix+1];
 +}
 +
 +
 +void
 +setup_cmap (int              grid_spacing,
 +                      int              nc,
 +                      real *           grid ,
 +                      gmx_cmap_t *     cmap_grid)
 +{
 +      double *tmp_u,*tmp_u2,*tmp_yy,*tmp_y1,*tmp_t2,*tmp_grid;
 +      
 +    int    i,j,k,ii,jj,kk,idx;
 +      int    offset;
 +    double dx,xmin,v,v1,v2,v12;
 +    double phi,psi;
 +      
 +      snew(tmp_u,2*grid_spacing);
 +      snew(tmp_u2,2*grid_spacing);
 +      snew(tmp_yy,2*grid_spacing);
 +      snew(tmp_y1,2*grid_spacing);
 +      snew(tmp_t2,2*grid_spacing*2*grid_spacing);
 +      snew(tmp_grid,2*grid_spacing*2*grid_spacing);
 +      
 +    dx = 360.0/grid_spacing;
 +    xmin = -180.0-dx*grid_spacing/2;
 +      
 +      for(kk=0;kk<nc;kk++)
 +      {
 +              /* Compute an offset depending on which cmap we are using 
 +               * Offset will be the map number multiplied with the 
 +                 * grid_spacing * grid_spacing * 2
 +               */
 +              offset = kk * grid_spacing * grid_spacing * 2;
 +              
 +              for(i=0;i<2*grid_spacing;i++)
 +              {
 +                      ii=(i+grid_spacing-grid_spacing/2)%grid_spacing;
 +                      
 +                      for(j=0;j<2*grid_spacing;j++)
 +                      {
 +                              jj=(j+grid_spacing-grid_spacing/2)%grid_spacing;
 +                              tmp_grid[i*grid_spacing*2+j] = grid[offset+ii*grid_spacing+jj];
 +                      }
 +              }
 +              
 +              for(i=0;i<2*grid_spacing;i++)
 +              {
 +                      spline1d(dx,&(tmp_grid[2*grid_spacing*i]),2*grid_spacing,tmp_u,&(tmp_t2[2*grid_spacing*i]));
 +              }
 +              
 +              for(i=grid_spacing/2;i<grid_spacing+grid_spacing/2;i++)
 +              {
 +                      ii = i-grid_spacing/2;
 +                      phi = ii*dx-180.0;
 +                      
 +                      for(j=grid_spacing/2;j<grid_spacing+grid_spacing/2;j++)
 +                      {
 +                              jj = j-grid_spacing/2;
 +                              psi = jj*dx-180.0;
 +                              
 +                              for(k=0;k<2*grid_spacing;k++)
 +                              {
 +                                      interpolate1d(xmin,dx,&(tmp_grid[2*grid_spacing*k]),
 +                                                                &(tmp_t2[2*grid_spacing*k]),psi,&tmp_yy[k],&tmp_y1[k]);
 +                              }
 +                              
 +                              spline1d(dx,tmp_yy,2*grid_spacing,tmp_u,tmp_u2);
 +                              interpolate1d(xmin,dx,tmp_yy,tmp_u2,phi,&v,&v1);
 +                              spline1d(dx,tmp_y1,2*grid_spacing,tmp_u,tmp_u2);
 +                              interpolate1d(xmin,dx,tmp_y1,tmp_u2,phi,&v2,&v12);
 +                              
 +                              idx = ii*grid_spacing+jj;
 +                              cmap_grid->cmapdata[kk].cmap[idx*4] = grid[offset+ii*grid_spacing+jj];
 +                              cmap_grid->cmapdata[kk].cmap[idx*4+1] = v1;
 +                              cmap_grid->cmapdata[kk].cmap[idx*4+2] = v2;
 +                              cmap_grid->cmapdata[kk].cmap[idx*4+3] = v12;
 +                      }
 +              }
 +      }
 +}                             
 +                              
 +void init_cmap_grid(gmx_cmap_t *cmap_grid, int ngrid, int grid_spacing)
 +{
 +      int i,k,nelem;
 +      
 +      cmap_grid->ngrid        = ngrid;
 +      cmap_grid->grid_spacing = grid_spacing;
 +      nelem                   = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
 +      
 +      snew(cmap_grid->cmapdata,ngrid);
 +      
 +      for(i=0;i<cmap_grid->ngrid;i++)
 +      {
 +              snew(cmap_grid->cmapdata[i].cmap,4*nelem);
 +      }
 +}
 +
 +
 +static int count_constraints(gmx_mtop_t *mtop,t_molinfo *mi,warninp_t wi)
 +{
 +  int count,count_mol,i,mb;
 +  gmx_molblock_t *molb;
 +  t_params *plist;
 +  char buf[STRLEN];
 +
 +  count = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    count_mol = 0;
 +    molb  = &mtop->molblock[mb];
 +    plist = mi[molb->type].plist;
 +      
 +    for(i=0; i<F_NRE; i++) {
 +      if (i == F_SETTLE)
 +      count_mol += 3*plist[i].nr;
 +      else if (interaction_function[i].flags & IF_CONSTRAINT)
 +      count_mol += plist[i].nr;
 +    }
 +      
 +    if (count_mol > nrdf_internal(&mi[molb->type].atoms)) {
 +      sprintf(buf,
 +            "Molecule type '%s' has %d constraints.\n"
 +            "For stability and efficiency there should not be more constraints than internal number of degrees of freedom: %d.\n",
 +            *mi[molb->type].name,count_mol,
 +            nrdf_internal(&mi[molb->type].atoms));
 +      warning(wi,buf);
 +    }
 +    count += molb->nmol*count_mol;
 +  }
 +
 +  return count;
 +}
 +
 +static void check_gbsa_params_charged(gmx_mtop_t *sys, gpp_atomtype_t atype)
 +{
 +    int i,nmiss,natoms,mt;
 +    real q;
 +    const t_atoms *atoms;
 +  
 +    nmiss = 0;
 +    for(mt=0;mt<sys->nmoltype;mt++)
 +    {
 +        atoms  = &sys->moltype[mt].atoms;
 +        natoms = atoms->nr;
 +
 +        for(i=0;i<natoms;i++)
 +        {
 +            q = atoms->atom[i].q;
 +            if ((get_atomtype_radius(atoms->atom[i].type,atype)    == 0  ||
 +                 get_atomtype_vol(atoms->atom[i].type,atype)       == 0  ||
 +                 get_atomtype_surftens(atoms->atom[i].type,atype)  == 0  ||
 +                 get_atomtype_gb_radius(atoms->atom[i].type,atype) == 0  ||
 +                 get_atomtype_S_hct(atoms->atom[i].type,atype)     == 0) &&
 +                q != 0)
 +            {
 +                fprintf(stderr,"\nGB parameter(s) zero for atom type '%s' while charge is %g\n",
 +                        get_atomtype_name(atoms->atom[i].type,atype),q);
 +                nmiss++;
 +            }
 +        }
 +    }
 +
 +    if (nmiss > 0)
 +    {
 +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield has parameters with value zero for %d atomtypes that occur as charged atoms.",nmiss);
 +    }
 +}
 +
 +
 +static void check_gbsa_params(t_inputrec *ir,gpp_atomtype_t atype)
 +{
 +    int  nmiss,i;
 +
 +    /* If we are doing GBSA, check that we got the parameters we need
 +     * This checking is to see if there are GBSA paratmeters for all
 +     * atoms in the force field. To go around this for testing purposes
 +     * comment out the nerror++ counter temporarily
 +     */
 +    nmiss = 0;
 +    for(i=0;i<get_atomtype_ntypes(atype);i++)
 +    {
 +        if (get_atomtype_radius(i,atype)    < 0 ||
 +            get_atomtype_vol(i,atype)       < 0 ||
 +            get_atomtype_surftens(i,atype)  < 0 ||
 +            get_atomtype_gb_radius(i,atype) < 0 ||
 +            get_atomtype_S_hct(i,atype)     < 0)
 +        {
 +            fprintf(stderr,"\nGB parameter(s) missing or negative for atom type '%s'\n",
 +                    get_atomtype_name(i,atype));
 +            nmiss++;
 +        }
 +    }
 +    
 +    if (nmiss > 0)
 +    {
 +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield is missing parameters for %d atomtypes or they might be negative.",nmiss);
 +    }
 +  
 +}
 +
 +static void set_verlet_buffer(const gmx_mtop_t *mtop,
 +                              t_inputrec *ir,
 +                              matrix box,
 +                              real verletbuf_drift,
 +                              warninp_t wi)
 +{
 +    real ref_T;
 +    int i;
 +    verletbuf_list_setup_t ls;
 +    real rlist_1x1;
 +    int n_nonlin_vsite;
 +    char warn_buf[STRLEN];
 +
 +    ref_T = 0;
 +    for(i=0; i<ir->opts.ngtc; i++)
 +    {
 +        if (ir->opts.ref_t[i] < 0)
 +        {
 +            warning(wi,"Some atom groups do not use temperature coupling. This cannot be accounted for in the energy drift estimation for the Verlet buffer size. The energy drift and the Verlet buffer might be underestimated.");
 +        }
 +        else
 +        {
 +            ref_T = max(ref_T,ir->opts.ref_t[i]);
 +        }
 +    }
 +
 +    printf("Determining Verlet buffer for an energy drift of %g kJ/mol/ps at %g K\n",verletbuf_drift,ref_T);
 +
 +    for(i=0; i<ir->opts.ngtc; i++)
 +    {
 +        if (ir->opts.ref_t[i] >= 0 && ir->opts.ref_t[i] != ref_T)
 +        {
 +            sprintf(warn_buf,"ref_T for group of %.1f DOFs is %g K, which is smaller than the maximum of %g K used for the buffer size calculation. The buffer size might be on the conservative (large) side.",
 +                    ir->opts.nrdf[i],ir->opts.ref_t[i],ref_T);
 +            warning_note(wi,warn_buf);
 +        }
 +    }
 +
 +    /* Calculate the buffer size for simple atom vs atoms list */
 +    ls.cluster_size_i = 1;
 +    ls.cluster_size_j = 1;
 +    calc_verlet_buffer_size(mtop,det(box),ir,verletbuf_drift,
 +                            &ls,&n_nonlin_vsite,&rlist_1x1);
 +
 +    /* Set the pair-list buffer size in ir */
 +    verletbuf_get_list_setup(FALSE,&ls);
 +    calc_verlet_buffer_size(mtop,det(box),ir,verletbuf_drift,
 +                            &ls,&n_nonlin_vsite,&ir->rlist);
 +
 +    if (n_nonlin_vsite > 0)
 +    {
 +        sprintf(warn_buf,"There are %d non-linear virtual site constructions. Their contribution to the energy drift is approximated. In most cases this does not affect the energy drift significantly.",n_nonlin_vsite);
 +        warning_note(wi,warn_buf);
 +    }
 +
 +    printf("Calculated rlist for %dx%d atom pair-list as %.3f nm, buffer size %.3f nm\n",
 +           1,1,rlist_1x1,rlist_1x1-max(ir->rvdw,ir->rcoulomb));
 +
 +    ir->rlistlong = ir->rlist;
 +    printf("Set rlist, assuming %dx%d atom pair-list, to %.3f nm, buffer size %.3f nm\n",
 +           ls.cluster_size_i,ls.cluster_size_j,
 +           ir->rlist,ir->rlist-max(ir->rvdw,ir->rcoulomb));
 +            
 +    if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC,box))
 +    {
 +        gmx_fatal(FARGS,"The pair-list cut-off (%g nm) is longer than half the shortest box vector or longer than the smallest box diagonal element (%g nm). Increase the box size or decrease nstlist or increase verlet-buffer-drift.",ir->rlistlong,sqrt(max_cutoff2(ir->ePBC,box)));
 +    }
 +}
 +
 +int main (int argc, char *argv[])
 +{
 +  static const char *desc[] = {
 +    "The gromacs preprocessor",
 +    "reads a molecular topology file, checks the validity of the",
 +    "file, expands the topology from a molecular description to an atomic",
 +    "description. The topology file contains information about",
 +    "molecule types and the number of molecules, the preprocessor",
 +    "copies each molecule as needed. ",
 +    "There is no limitation on the number of molecule types. ",
 +    "Bonds and bond-angles can be converted into constraints, separately",
 +    "for hydrogens and heavy atoms.",
 +    "Then a coordinate file is read and velocities can be generated",
 +    "from a Maxwellian distribution if requested.",
 +    "[TT]grompp[tt] also reads parameters for the [TT]mdrun[tt] ",
 +    "(eg. number of MD steps, time step, cut-off), and others such as",
 +    "NEMD parameters, which are corrected so that the net acceleration",
 +    "is zero.",
 +    "Eventually a binary file is produced that can serve as the sole input",
 +    "file for the MD program.[PAR]",
 +    
 +    "[TT]grompp[tt] uses the atom names from the topology file. The atom names",
 +    "in the coordinate file (option [TT]-c[tt]) are only read to generate",
 +    "warnings when they do not match the atom names in the topology.",
 +    "Note that the atom names are irrelevant for the simulation as",
 +    "only the atom types are used for generating interaction parameters.[PAR]",
 +
 +    "[TT]grompp[tt] uses a built-in preprocessor to resolve includes, macros, ",
 +    "etc. The preprocessor supports the following keywords:[PAR]",
 +    "#ifdef VARIABLE[BR]",
 +    "#ifndef VARIABLE[BR]",
 +    "#else[BR]",
 +    "#endif[BR]",
 +    "#define VARIABLE[BR]",
 +    "#undef VARIABLE[BR]"
 +    "#include \"filename\"[BR]",
 +    "#include <filename>[PAR]",
 +    "The functioning of these statements in your topology may be modulated by",
 +    "using the following two flags in your [TT].mdp[tt] file:[PAR]",
 +    "[TT]define = -DVARIABLE1 -DVARIABLE2[BR]",
 +    "include = -I/home/john/doe[tt][BR]",
 +    "For further information a C-programming textbook may help you out.",
 +    "Specifying the [TT]-pp[tt] flag will get the pre-processed",
 +    "topology file written out so that you can verify its contents.[PAR]",
 +   
 +    /* cpp has been unnecessary for some time, hasn't it?
 +        "If your system does not have a C-preprocessor, you can still",
 +        "use [TT]grompp[tt], but you do not have access to the features ",
 +        "from the cpp. Command line options to the C-preprocessor can be given",
 +        "in the [TT].mdp[tt] file. See your local manual (man cpp).[PAR]",
 +    */
 +    
 +    "When using position restraints a file with restraint coordinates",
 +    "can be supplied with [TT]-r[tt], otherwise restraining will be done",
 +    "with respect to the conformation from the [TT]-c[tt] option.",
 +    "For free energy calculation the the coordinates for the B topology",
 +    "can be supplied with [TT]-rb[tt], otherwise they will be equal to",
 +    "those of the A topology.[PAR]",
 +    
 +    "Starting coordinates can be read from trajectory with [TT]-t[tt].",
 +    "The last frame with coordinates and velocities will be read,",
 +    "unless the [TT]-time[tt] option is used. Only if this information",
 +    "is absent will the coordinates in the [TT]-c[tt] file be used.",
 +    "Note that these velocities will not be used when [TT]gen_vel = yes[tt]",
 +    "in your [TT].mdp[tt] file. An energy file can be supplied with",
 +    "[TT]-e[tt] to read Nose-Hoover and/or Parrinello-Rahman coupling",
 +    "variables.[PAR]",
 +
 +    "[TT]grompp[tt] can be used to restart simulations (preserving",
 +    "continuity) by supplying just a checkpoint file with [TT]-t[tt].",
 +    "However, for simply changing the number of run steps to extend",
 +    "a run, using [TT]tpbconv[tt] is more convenient than [TT]grompp[tt].",
 +    "You then supply the old checkpoint file directly to [TT]mdrun[tt]",
 +    "with [TT]-cpi[tt]. If you wish to change the ensemble or things",
 +    "like output frequency, then supplying the checkpoint file to",
 +    "[TT]grompp[tt] with [TT]-t[tt] along with a new [TT].mdp[tt] file",
 +    "with [TT]-f[tt] is the recommended procedure.[PAR]",
 +
 +    "By default, all bonded interactions which have constant energy due to",
 +    "virtual site constructions will be removed. If this constant energy is",
 +    "not zero, this will result in a shift in the total energy. All bonded",
 +    "interactions can be kept by turning off [TT]-rmvsbds[tt]. Additionally,",
 +    "all constraints for distances which will be constant anyway because",
 +    "of virtual site constructions will be removed. If any constraints remain",
 +    "which involve virtual sites, a fatal error will result.[PAR]"
 +    
 +    "To verify your run input file, please take note of all warnings",
 +    "on the screen, and correct where necessary. Do also look at the contents",
 +    "of the [TT]mdout.mdp[tt] file; this contains comment lines, as well as",
 +    "the input that [TT]grompp[tt] has read. If in doubt, you can start [TT]grompp[tt]",
 +    "with the [TT]-debug[tt] option which will give you more information",
 +    "in a file called [TT]grompp.log[tt] (along with real debug info). You",
 +    "can see the contents of the run input file with the [TT]gmxdump[tt]",
 +    "program. [TT]gmxcheck[tt] can be used to compare the contents of two",
 +    "run input files.[PAR]"
 +
 +    "The [TT]-maxwarn[tt] option can be used to override warnings printed",
 +    "by [TT]grompp[tt] that otherwise halt output. In some cases, warnings are",
 +    "harmless, but usually they are not. The user is advised to carefully",
 +    "interpret the output messages before attempting to bypass them with",
 +    "this option."
 +  };
 +  t_gromppopts *opts;
 +  gmx_mtop_t   *sys;
 +  int          nmi;
 +  t_molinfo    *mi;
 +  gpp_atomtype_t atype;
 +  t_inputrec   *ir;
 +  int          natoms,nvsite,comb,mt;
 +  t_params     *plist;
 +  t_state      state;
 +  matrix       box;
 +  real         max_spacing,fudgeQQ;
 +  double       reppow;
 +  char         fn[STRLEN],fnB[STRLEN];
 +  const char   *mdparin;
 +  int          ntype;
 +  gmx_bool         bNeedVel,bGenVel;
 +  gmx_bool         have_atomnumber;
 +  int            n12,n13,n14;
 +  t_params     *gb_plist = NULL;
 +  gmx_genborn_t *born = NULL;
 +  output_env_t oenv;
 +  gmx_bool         bVerbose = FALSE;
 +  warninp_t    wi;
 +  char         warn_buf[STRLEN];
 +
 +  t_filenm fnm[] = {
 +    { efMDP, NULL,  NULL,        ffREAD  },
 +    { efMDP, "-po", "mdout",     ffWRITE },
 +    { efSTX, "-c",  NULL,        ffREAD  },
 +    { efSTX, "-r",  NULL,        ffOPTRD },
 +    { efSTX, "-rb", NULL,        ffOPTRD },
 +    { efNDX, NULL,  NULL,        ffOPTRD },
 +    { efTOP, NULL,  NULL,        ffREAD  },
 +    { efTOP, "-pp", "processed", ffOPTWR },
 +    { efTPX, "-o",  NULL,        ffWRITE },
 +    { efTRN, "-t",  NULL,        ffOPTRD },
 +    { efEDR, "-e",  NULL,        ffOPTRD },
 +    { efTRN, "-ref","rotref",    ffOPTRW }
 +  };
 +#define NFILE asize(fnm)
 +
 +  /* Command line options */
 +  static gmx_bool bRenum=TRUE;
 +  static gmx_bool bRmVSBds=TRUE,bZero=FALSE;
 +  static int  i,maxwarn=0;
 +  static real fr_time=-1;
 +  t_pargs pa[] = {
 +    { "-v",       FALSE, etBOOL,{&bVerbose},  
 +      "Be loud and noisy" },
 +    { "-time",    FALSE, etREAL, {&fr_time},
 +      "Take frame at or first after this time." },
 +    { "-rmvsbds",FALSE, etBOOL, {&bRmVSBds},
 +      "Remove constant bonded interactions with virtual sites" },
 +    { "-maxwarn", FALSE, etINT,  {&maxwarn},
 +      "Number of allowed warnings during input processing. Not for normal use and may generate unstable systems" },
 +    { "-zero",    FALSE, etBOOL, {&bZero},
 +      "Set parameters for bonded interactions without defaults to zero instead of generating an error" },
 +    { "-renum",   FALSE, etBOOL, {&bRenum},
 +      "Renumber atomtypes and minimize number of atomtypes" }
 +  };
 +  
 +  CopyRight(stderr,argv[0]);
 +  
 +  /* Initiate some variables */
 +  snew(ir,1);
 +  snew(opts,1);
 +  init_ir(ir,opts);
 +  
 +  /* Parse the command line */
 +  parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,
 +                    asize(desc),desc,0,NULL,&oenv);
 +  
 +  wi = init_warning(TRUE,maxwarn);
 +  
 +  /* PARAMETER file processing */
 +  mdparin = opt2fn("-f",NFILE,fnm);
 +  set_warning_line(wi,mdparin,-1);    
 +  get_ir(mdparin,opt2fn("-po",NFILE,fnm),ir,opts,wi);
 +  
 +  if (bVerbose) 
 +    fprintf(stderr,"checking input for internal consistency...\n");
 +  check_ir(mdparin,ir,opts,wi);
 +
 +  if (ir->ld_seed == -1) {
 +    ir->ld_seed = make_seed();
 +    fprintf(stderr,"Setting the LD random seed to %d\n",ir->ld_seed);
 +  }
 +
 +  if (ir->expandedvals->lmc_seed == -1) {
 +    ir->expandedvals->lmc_seed = make_seed();
 +    fprintf(stderr,"Setting the lambda MC random seed to %d\n",ir->expandedvals->lmc_seed);
 +  }
 +
 +  bNeedVel = EI_STATE_VELOCITY(ir->eI);
 +  bGenVel  = (bNeedVel && opts->bGenVel);
++  if (bGenVel && ir->bContinuation)
++  {
++      sprintf(warn_buf,
++              "Generating velocities is inconsistent with attempting "
++              "to continue a previous run. Choose only one of "
++              "gen-vel = yes and continuation = yes.");
++      warning_error(wi, warn_buf);
++  }
 +
 +  snew(plist,F_NRE);
 +  init_plist(plist);
 +  snew(sys,1);
 +  atype = init_atomtype();
 +  if (debug)
 +    pr_symtab(debug,0,"Just opened",&sys->symtab);
 +    
 +  strcpy(fn,ftp2fn(efTOP,NFILE,fnm));
 +  if (!gmx_fexist(fn)) 
 +    gmx_fatal(FARGS,"%s does not exist",fn);
 +  new_status(fn,opt2fn_null("-pp",NFILE,fnm),opt2fn("-c",NFILE,fnm),
 +           opts,ir,bZero,bGenVel,bVerbose,&state,
 +           atype,sys,&nmi,&mi,plist,&comb,&reppow,&fudgeQQ,
 +           opts->bMorse,
 +           wi);
 +  
 +  if (debug)
 +    pr_symtab(debug,0,"After new_status",&sys->symtab);
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        fprintf(stderr,"Removing all charge groups because cutoff-scheme=%s\n",
 +                ecutscheme_names[ir->cutoff_scheme]);
 +
 +        /* Remove all charge groups */
 +        gmx_mtop_remove_chargegroups(sys);
 +    }
 +  
 +  if (count_constraints(sys,mi,wi) && (ir->eConstrAlg == econtSHAKE)) {
 +    if (ir->eI == eiCG || ir->eI == eiLBFGS) {
 +        sprintf(warn_buf,"Can not do %s with %s, use %s",
 +                EI(ir->eI),econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        warning_error(wi,warn_buf);
 +    }
 +    if (ir->bPeriodicMols) {
 +        sprintf(warn_buf,"Can not do periodic molecules with %s, use %s",
 +                econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        warning_error(wi,warn_buf);
 +    }
 +  }
 +
 +  if ( EI_SD (ir->eI) &&  ir->etc != etcNO ) {
 +      warning_note(wi,"Temperature coupling is ignored with SD integrators.");
 +  }
 +
 +  /* If we are doing QM/MM, check that we got the atom numbers */
 +  have_atomnumber = TRUE;
 +  for (i=0; i<get_atomtype_ntypes(atype); i++) {
 +    have_atomnumber = have_atomnumber && (get_atomtype_atomnumber(i,atype) >= 0);
 +  }
 +  if (!have_atomnumber && ir->bQMMM)
 +  {
 +      warning_error(wi,
 +                    "\n"
 +                    "It appears as if you are trying to run a QM/MM calculation, but the force\n"
 +                    "field you are using does not contain atom numbers fields. This is an\n"
 +                    "optional field (introduced in Gromacs 3.3) for general runs, but mandatory\n"
 +                    "for QM/MM. The good news is that it is easy to add - put the atom number as\n"
 +                    "an integer just before the mass column in ffXXXnb.itp.\n"
 +                    "NB: United atoms have the same atom numbers as normal ones.\n\n"); 
 +  }
 +
 +  if (ir->bAdress) {
 +    if ((ir->adress->const_wf>1) || (ir->adress->const_wf<0)) {
 +      warning_error(wi,"AdResS contant weighting function should be between 0 and 1\n\n");
 +    }
 +    /** \TODO check size of ex+hy width against box size */
 +  }
 + 
 +  /* Check for errors in the input now, since they might cause problems
 +   * during processing further down.
 +   */
 +  check_warning_error(wi,FARGS);
 +
 +  if (opt2bSet("-r",NFILE,fnm))
 +    sprintf(fn,"%s",opt2fn("-r",NFILE,fnm));
 +  else
 +    sprintf(fn,"%s",opt2fn("-c",NFILE,fnm));
 +  if (opt2bSet("-rb",NFILE,fnm))
 +    sprintf(fnB,"%s",opt2fn("-rb",NFILE,fnm));
 +  else
 +    strcpy(fnB,fn);
 +
 +    if (nint_ftype(sys,mi,F_POSRES) > 0 || nint_ftype(sys,mi,F_FBPOSRES) > 0)
 +    {
 +        if (bVerbose)
 +        {
 +            fprintf(stderr,"Reading position restraint coords from %s",fn);
 +            if (strcmp(fn,fnB) == 0)
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            else
 +            {
 +                fprintf(stderr," and %s\n",fnB);
 +            }
 +        }
 +        gen_posres(sys,mi,fn,fnB,
 +                   ir->refcoord_scaling,ir->ePBC,
 +                   ir->posres_com,ir->posres_comB,
 +                   wi);
 +    }
 +              
 +  nvsite = 0;
 +  /* set parameters for virtual site construction (not for vsiten) */
 +  for(mt=0; mt<sys->nmoltype; mt++) {
 +    nvsite +=
 +      set_vsites(bVerbose, &sys->moltype[mt].atoms, atype, mi[mt].plist);
 +  }
 +  /* now throw away all obsolete bonds, angles and dihedrals: */
 +  /* note: constraints are ALWAYS removed */
 +  if (nvsite) {
 +    for(mt=0; mt<sys->nmoltype; mt++) {
 +      clean_vsite_bondeds(mi[mt].plist,sys->moltype[mt].atoms.nr,bRmVSBds);
 +    }
 +  }
 +  
 +      /* If we are using CMAP, setup the pre-interpolation grid */
 +      if(plist->ncmap>0)
 +      {
 +              init_cmap_grid(&sys->ffparams.cmap_grid, plist->nc, plist->grid_spacing);
 +              setup_cmap(plist->grid_spacing, plist->nc, plist->cmap,&sys->ffparams.cmap_grid);
 +      }
 +      
 +    set_wall_atomtype(atype,opts,ir,wi);
 +  if (bRenum) {
 +    renum_atype(plist, sys, ir->wall_atomtype, atype, bVerbose);
 +    ntype = get_atomtype_ntypes(atype);
 +  }
 +
 +    if (ir->implicit_solvent != eisNO)
 +    {
 +        /* Now we have renumbered the atom types, we can check the GBSA params */
 +        check_gbsa_params(ir,atype);
 +      
 +      /* Check that all atoms that have charge and/or LJ-parameters also have 
 +       * sensible GB-parameters
 +       */
 +      check_gbsa_params_charged(sys,atype);
 +    }
 +
 +      /* PELA: Copy the atomtype data to the topology atomtype list */
 +      copy_atomtype_atomtypes(atype,&(sys->atomtypes));
 +
 +      if (debug)
 +    pr_symtab(debug,0,"After renum_atype",&sys->symtab);
 +
 +  if (bVerbose) 
 +    fprintf(stderr,"converting bonded parameters...\n");
 +      
 +  ntype = get_atomtype_ntypes(atype);
 +  convert_params(ntype, plist, mi, comb, reppow, fudgeQQ, sys);
 +      
 +  if (debug)
 +    pr_symtab(debug,0,"After convert_params",&sys->symtab);
 +
 +  /* set ptype to VSite for virtual sites */
 +  for(mt=0; mt<sys->nmoltype; mt++) {
 +    set_vsites_ptype(FALSE,&sys->moltype[mt]);
 +  }
 +  if (debug) {
 +    pr_symtab(debug,0,"After virtual sites",&sys->symtab);
 +  }
 +  /* Check velocity for virtual sites and shells */
 +  if (bGenVel) {
 +    check_vel(sys,state.v);
 +  }
 +    
 +  /* check masses */
 +  check_mol(sys,wi);
 +  
 +  for(i=0; i<sys->nmoltype; i++) {
 +      check_cg_sizes(ftp2fn(efTOP,NFILE,fnm),&sys->moltype[i].cgs,wi);
 +  }
 +
 +  if (EI_DYNAMICS(ir->eI) && ir->eI != eiBD)
 +  {
 +      check_bonds_timestep(sys,ir->delta_t,wi);
 +  }
 +
 +  if (EI_ENERGY_MINIMIZATION(ir->eI) && 0 == ir->nsteps)
 +  {
 +      warning_note(wi,"Zero-step energy minimization will alter the coordinates before calculating the energy. If you just want the energy of a single point, try zero-step MD (with unconstrained_start = yes). To do multiple single-point energy evaluations of different configurations of the same topology, use mdrun -rerun.");
 +  }
 +
 +  check_warning_error(wi,FARGS);
 +      
 +  if (bVerbose) 
 +    fprintf(stderr,"initialising group options...\n");
 +  do_index(mdparin,ftp2fn_null(efNDX,NFILE,fnm),
 +           sys,bVerbose,ir,
 +           bGenVel ? state.v : NULL,
 +           wi);
 +  
 +    if (ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_drift > 0 &&
 +        ir->nstlist > 1)
 +    {
 +        if (EI_DYNAMICS(ir->eI) &&
 +            !(EI_MD(ir->eI) && ir->etc==etcNO) &&
 +            inputrec2nboundeddim(ir) == 3)
 +        {
 +            set_verlet_buffer(sys,ir,state.box,ir->verletbuf_drift,wi);
 +        }
 +    }
 +
 +  /* Init the temperature coupling state */
 +  init_gtc_state(&state,ir->opts.ngtc,0,ir->opts.nhchainlength); /* need to add nnhpres here? */
 +
 +  if (bVerbose)
 +    fprintf(stderr,"Checking consistency between energy and charge groups...\n");
 +  check_eg_vs_cg(sys);
 +  
 +  if (debug)
 +    pr_symtab(debug,0,"After index",&sys->symtab);
 +  triple_check(mdparin,ir,sys,wi);
 +  close_symtab(&sys->symtab);
 +  if (debug)
 +    pr_symtab(debug,0,"After close",&sys->symtab);
 +
 +  /* make exclusions between QM atoms */
 +  if (ir->bQMMM) {
 +    if (ir->QMMMscheme==eQMMMschemenormal && ir->ns_type == ensSIMPLE ){
 +      gmx_fatal(FARGS,"electrostatic embedding only works with grid neighboursearching, use ns-type=grid instead\n");
 +    }
 +    else {
 +     generate_qmexcl(sys,ir,wi);
 +    }
 +  }
 +
 +  if (ftp2bSet(efTRN,NFILE,fnm)) {
 +    if (bVerbose)
 +      fprintf(stderr,"getting data from old trajectory ...\n");
 +    cont_status(ftp2fn(efTRN,NFILE,fnm),ftp2fn_null(efEDR,NFILE,fnm),
 +              bNeedVel,bGenVel,fr_time,ir,&state,sys,oenv);
 +  }
 +
 +    if (ir->ePBC==epbcXY && ir->nwall!=2)
 +    {
 +        clear_rvec(state.box[ZZ]);
 +    }
 +  
 +    if (ir->cutoff_scheme != ecutsVERLET && ir->rlist > 0)
 +    {
 +        set_warning_line(wi,mdparin,-1);
 +        check_chargegroup_radii(sys,ir,state.x,wi);
 +    }
 +
 +  if (EEL_FULL(ir->coulombtype)) {
 +    /* Calculate the optimal grid dimensions */
 +    copy_mat(state.box,box);
 +    if (ir->ePBC==epbcXY && ir->nwall==2)
 +      svmul(ir->wall_ewald_zfac,box[ZZ],box[ZZ]);
 +    if (ir->nkx > 0 && ir->nky > 0 && ir->nkz > 0)
 +    {
 +        /* Mark fourier_spacing as not used */
 +        ir->fourier_spacing = 0;
 +    }
 +    else if (ir->nkx != 0 && ir->nky != 0 && ir->nkz != 0)
 +    {
 +        set_warning_line(wi,mdparin,-1);
 +        warning_error(wi,"Some of the Fourier grid sizes are set, but all of them need to be set.");
 +    }
 +    max_spacing = calc_grid(stdout,box,ir->fourier_spacing,
 +                            &(ir->nkx),&(ir->nky),&(ir->nkz));
 +  }
 +
 +  if (ir->ePull != epullNO)
 +    set_pull_init(ir,sys,state.x,state.box,oenv,opts->pull_start);
 +  
 +  if (ir->bRot)
 +  {
 +      set_reference_positions(ir->rot,sys,state.x,state.box,
 +                              opt2fn("-ref",NFILE,fnm),opt2bSet("-ref",NFILE,fnm),
 +                              wi);
 +  }
 +
 +  /*  reset_multinr(sys); */
 +  
 +  if (EEL_PME(ir->coulombtype)) {
 +      float ratio = pme_load_estimate(sys,ir,state.box);
 +      fprintf(stderr,"Estimate for the relative computational load of the PME mesh part: %.2f\n",ratio);
 +      /* With free energy we might need to do PME both for the A and B state
 +       * charges. This will double the cost, but the optimal performance will
 +       * then probably be at a slightly larger cut-off and grid spacing.
 +       */
 +      if ((ir->efep == efepNO && ratio > 1.0/2.0) ||
 +          (ir->efep != efepNO && ratio > 2.0/3.0)) {
 +          warning_note(wi,
 +                       "The optimal PME mesh load for parallel simulations is below 0.5\n"
 +                       "and for highly parallel simulations between 0.25 and 0.33,\n"
 +                       "for higher performance, increase the cut-off and the PME grid spacing.\n");
 +          if (ir->efep != efepNO) {
 +              warning_note(wi,
 +                           "For free energy simulations, the optimal load limit increases from 0.5 to 0.667\n");
 +          }
 +      }
 +  }
 +  
 +  {
 +        char warn_buf[STRLEN];
 +        double cio = compute_io(ir,sys->natoms,&sys->groups,F_NRE,1);
 +        sprintf(warn_buf,"This run will generate roughly %.0f Mb of data",cio);
 +        if (cio > 2000) {
 +            set_warning_line(wi,mdparin,-1);
 +            warning_note(wi,warn_buf);
 +        } else {
 +            printf("%s\n",warn_buf);
 +        }
 +    }
 +      
 +  /* MRS: eventually figure out better logic for initializing the fep
 +   values that makes declaring the lambda and declaring the state not
 +   potentially conflict if not handled correctly. */
 +  if (ir->efep != efepNO)
 +  {
 +      state.fep_state = ir->fepvals->init_fep_state;
 +      for (i=0;i<efptNR;i++)
 +      {
 +          /* init_lambda trumps state definitions*/
 +          if (ir->fepvals->init_lambda >= 0)
 +          {
 +              state.lambda[i] = ir->fepvals->init_lambda;
 +          }
 +          else
 +          {
 +              if (ir->fepvals->all_lambda[i] == NULL)
 +              {
 +                  gmx_fatal(FARGS,"Values of lambda not set for a free energy calculation!");
 +              }
 +              else
 +              {
 +                  state.lambda[i] = ir->fepvals->all_lambda[i][state.fep_state];
 +              }
 +          }
 +      }
 +  }
 +
 +  if (bVerbose) 
 +    fprintf(stderr,"writing run input file...\n");
 +
 +  done_warning(wi,FARGS);
 +
 +  write_tpx_state(ftp2fn(efTPX,NFILE,fnm),ir,&state,sys);
 +  
 +  thanx(stderr);
 +  
 +  return 0;
 +}
index e7704beb89732cb4062257f14992e3b0978eb816,0000000000000000000000000000000000000000..0c8a77d6104f836ee74c24bdec19c7b23fa859ea
mode 100644,000000..100644
--- /dev/null
@@@ -1,2152 -1,0 +1,2146 @@@
-     if(MASTER(cr))
-     {
-         gmx_warning("New C kernels (and force-only) kernels are now enabled,\n"
-                     "but it will be another couple of days for SSE/AVX kernels.\n\n");
-     }
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "md_support.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "pme_loadbal.h"
 +#include "bondf.h"
 +#include "membed.h"
 +#include "types/nlistheuristics.h"
 +#include "types/iteratedconstraints.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +static void reset_all_counters(FILE *fplog,t_commrec *cr,
 +                               gmx_large_int_t step,
 +                               gmx_large_int_t *step_rel,t_inputrec *ir,
 +                               gmx_wallcycle_t wcycle,t_nrnb *nrnb,
 +                               gmx_runtime_t *runtime,
 +                               nbnxn_cuda_ptr_t cu_nbv)
 +{
 +    char sbuf[STEPSTRSIZE];
 +
 +    /* Reset all the counters related to performance over the run */
 +    md_print_warn(cr,fplog,"step %s: resetting all time and cycle counters\n",
 +                  gmx_step_str(step,sbuf));
 +
 +    if (cu_nbv)
 +    {
 +        nbnxn_cuda_reset_timings(cu_nbv);
 +    }
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        reset_dd_statistics_counters(cr->dd);
 +    }
 +    init_nrnb(nrnb);
 +    ir->init_step += *step_rel;
 +    ir->nsteps    -= *step_rel;
 +    *step_rel = 0;
 +    wallcycle_start(wcycle,ewcRUN);
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
 +}
 +
 +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
 +    double     t,t0,lam0[efptNR];
 +    gmx_bool       bGStatEveryStep,bGStat,bCalcVir,bCalcEner;
 +    gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
 +               bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
 +               bBornRadii,bStartingFromCpt;
 +    gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
 +    gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
 +               bForceUpdate=FALSE,bCPT;
 +    int        mdof_flags;
 +    gmx_bool       bMasterState;
 +    int        force_flags,cglo_flags;
 +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
 +    int        i,m;
 +    t_trxstatus *status;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    t_state    *bufstate=NULL;   
 +    matrix     *scale_tot,pcoupl_mu,M,ebox;
 +    gmx_nlheur_t nlh;
 +    t_trxframe rerun_fr;
 +    gmx_repl_ex_t repl_ex=NULL;
 +    int        nchkpt=1;
 +    gmx_localtop_t *top;      
 +    t_mdebin *mdebin=NULL;
 +    df_history_t df_history;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
 +    gmx_rng_t mcrng=NULL;
 +    gmx_bool        bFFscan;
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_shellfc_t shellfc;
 +    int         count,nconverged=0;
 +    real        timestep=0;
 +    double      tcount=0;
 +    gmx_bool        bIonize=FALSE;
 +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
 +    gmx_bool        bAppend;
 +    gmx_bool        bResetCountersHalfMaxH=FALSE;
 +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
 +    gmx_bool        bUpdateDoLR;
 +    real        mu_aver=0,dvdl;
 +    int         a0,a1,gnx=0,ii;
 +    atom_id     *grpindex=NULL;
 +    char        *grpname;
 +    t_coupl_rec *tcr=NULL;
 +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
 +    matrix      boxcopy={{0}},lastbox;
 +      tensor      tmpvir;
 +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
 +      real        vetanew = 0;
 +    int         lamnew=0;
 +    /* for FEP */
 +    int         fep_state=0;
 +    int         nstfep;
 +    real        rate;
 +    double      cycles;
 +      real        saved_conserved_quantity = 0;
 +    real        last_ekin = 0;
 +      int         iter_i;
 +      t_extmass   MassQ;
 +    int         **trotter_seq; 
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t iterate;
 +    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
 +                                          simulation stops. If equal to zero, don't
 +                                          communicate any more between multisims.*/
 +    /* PME load balancing data for GPU kernels */
 +    pme_load_balancing_t pme_loadbal=NULL;
 +    double          cycles_pmes;
 +    gmx_bool        bPMETuneTry=FALSE,bPMETuneRunning=FALSE;
 +
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +    
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control 
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf,top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */ 
 +    bIterations = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
 +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
 +    
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
 +            &(state_global->fep_state),lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 +                  enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f,top_global->natoms);
 +    }
 +
 +    /* lambda Monte carlo random number generator  */
 +    if (ir->bExpanded)
 +    {
 +        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
 +    }
 +    /* copy the state into df_history */
 +    copy_df_history(&df_history,&state_global->dfhist);
 +
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */    
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global,n_flexible_constraints(constr),
 +                                 (ir->bContinuation || 
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    if (DOMAINDECOMP(cr)) {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state,1);
 +        dd_init_local_state(cr->dd,state_global,state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout) {
 +            snew(f_global,state_global->natoms);
 +        }
 +    } else {
 +        if (PAR(cr)) {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +            pd_at_range(cr,&a0,&a1);
 +        } else {
 +            top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        forcerec_set_excl_load(fr,top,cr);
 +
 +        state = partdec_init_local_state(cr,state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +        if (vsite) {
 +            set_vsite_top(vsite,top,mdatoms,cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !fr->bMolPBC) {
 +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +
 +        if (shellfc) {
 +            make_local_shells(cr,mdatoms,shellfc);
 +        }
 +
 +        init_bonded_thread_force_reduction(fr,&top->idef);
 +
 +        if (ir->pull && PAR(cr)) {
 +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            state,&f,mdatoms,top,fr,
 +                            vsite,shellfc,constr,
 +                            nrnb,wcycle,FALSE);
 +
 +    }
 +
 +    update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +
 +    if (opt2bSet("-cpi",nfile,fnm))
 +    {
 +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
 +    }
 +    else
 +    {
 +        bStateFromCP = FALSE;
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (bStateFromCP)
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if ( Flags & MD_APPENDFILES )
 +            {
 +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    } 
 +
 +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
 +    {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd,state);
 +    }
 +
 +    if (state->flags & (1<<estMC_RNG))
 +    {
 +        set_mc_state(mcrng,state);
 +    }
 +
 +    /* Initialize constraints */
 +    if (constr) {
 +        if (!DOMAINDECOMP(cr))
 +            set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT,nfile,fnm);
 +    if (bTCR) {
 +        if (MASTER(cr)) {
 +            fprintf(stderr,"Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex,gnx);
 +        for(i=0; (i<gnx); i++) {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "repl_ex_nst",&repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
 +                                        repl_ex_nst,repl_ex_nex,repl_ex_seed);
 +    }
 +
 +    /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
 +    if ((Flags & MD_TUNEPME) &&
 +        EEL_PME(fr->eeltype) &&
 +        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
 +        !bRerunMD)
 +    {
 +        pme_loadbal_init(&pme_loadbal,ir,state->box,fr->ic,fr->pmedata);
 +        cycles_pmes = 0;
 +        if (cr->duty & DUTY_PME)
 +        {
 +            /* Start tuning right away, as we can't measure the load */
 +            bPMETuneRunning = TRUE;
 +        }
 +        else
 +        {
 +            /* Separate PME nodes, we can measure the PP/PME load balance */
 +            bPMETuneTry = TRUE;
 +        }
 +    }
 +
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +  
 +    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
 +    nstfep = ir->fepvals->nstdhdl;
 +    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
 +    {
 +        nstfep = ir->expandedvals->nstexpanded;
 +    }
 +    if (repl_ex_nst > 0 && repl_ex_nst > nstfep)
 +    {
 +        nstfep = repl_ex_nst;
 +    }
 +
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
 +                  | (bVV ? CGLO_PRESSURE:0)
 +                  | (bVV ? CGLO_CONSTRAINT:0)
 +                  | (bRerunMD ? CGLO_RERUNMD:0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
 +    
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                    constr,NULL,FALSE,state->box,
 +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
 +    if (ir->eI == eiVVAK) {
 +        /* a second call to get the half step temperature initialized as well */ 
 +        /* we do the same call as above, but turn the pressure off -- internally to 
 +           compute_globals, this is recognized as a velocity verlet half-step 
 +           kinetic energy calculation.  This minimized excess variables, but 
 +           perhaps loses some logic?*/
 +        
 +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                        constr,NULL,FALSE,state->box,
 +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                        cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
 +    }
 +    
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT)) 
 +    {
 +        for(i=0; (i<ir->opts.ngtc); i++) 
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
 +        } 
 +    }
 +    if (ir->eI != eiVV) 
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +    
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterations) 
 +    {
 +            bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan) 
 +    {
 +        snew(xcopy,state->natoms);
 +        snew(vcopy,state->natoms);
 +        copy_rvecn(state->x,xcopy,0,state->natoms);
 +        copy_rvecn(state->v,vcopy,0,state->natoms);
 +        copy_mat(state->box,boxcopy);
 +    } 
 +    
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
 +    
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr,FALSE));
 +        }
 +        if (EI_STATE_VELOCITY(ir->eI))
 +        {
 +            fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
 +        }
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr,"starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf,"%s","infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                        gmx_step_str(ir->init_step,sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr,"%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
 +                                    NULL,0);
 +    if ( chkpt_ret == 0 ) 
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps 
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv,&status,
 +                                             opt2fn("-rerun",nfile,fnm),
 +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms,top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box,fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !bStateFromCP;
 +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep    = FALSE;
 +    bSumEkinhOld = FALSE;
 +    bExchanged   = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
 +
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        if (bRerunMD) {
 +            if (rerun_fr.bStep) {
 +                step = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime) {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        } 
 +        else 
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t = t0 + step*ir->delta_t;
 +        }
 +
 +        if (ir->efep != efepNO || ir->bSimTemp)
 +            {
 +            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
 +               requiring different logic. */
 +            
 +            set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
 +            bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
 +            bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
 +            bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
 +        }
 +
 +        if (bSimAnn) 
 +        {
 +            update_annealing_target_temp(&(ir->opts),t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for(i=0; i<state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box,state_global->box);
 +            copy_mat(state_global->box,state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for(ii=0; (ii<state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii],state->x[ii]);
 +                copy_rvec(vcopy[ii],state->v[ii]);
 +            }
 +            copy_mat(boxcopy,state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +            
 +            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
 +                set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
 +            }
 +        } 
 +
 +        /* check whether we should stop because another simulation has 
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
 +                 (multisim_nsteps != ir->nsteps) )  
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr, 
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep=TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii=bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
 +        {
 +            bBornRadii=TRUE;
 +        }
 +        
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                  (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog,step,state->box,graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd,state,state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle,ewcDOMDEC);
 +                dd_partition_system(fplog,step,cr,
 +                                    bMasterState,nstglobalcomm,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,
 +                                    do_verbose && !bPMETuneRunning);
 +                wallcycle_stop(wcycle,ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
 +            print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +        }
 +
 +        if ((bRerunMD && rerun_fr.bV) || bExchanged)
 +        {
 +            
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                            constr,NULL,FALSE,state->box,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +        
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
 +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
 +        }
 +        
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile,fnm,fr,
 +                                  mdatoms->nr,state->x,state->box))
 +            {
 +                gmx_finalize_par();
 +
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
 +        if (EI_VV(ir->eI) && (!bInitStep))
 +        {
 +            /* for vv, the first half actually corresponds to the last step */
 +            bCalcEner = do_per_step(step-1,ir->nstcalcenergy);
 +        }
 +        else
 +        {
 +            bCalcEner = do_per_step(step,ir->nstcalcenergy);
 +        }
 +        bCalcVir = bCalcEner ||
 +            (ir->epc != epcNO && do_per_step(step,ir->nstpcouple));
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcVir || bCalcEner || bStopCM ||
 +                  do_per_step(step,nstglobalcomm) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcVir  = TRUE;
 +            bCalcEner = TRUE;
 +            bGStat    = TRUE;
 +        }
 +        
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +            );
 +        
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
 +                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
 +                       (bDoFEP ? GMX_FORCE_DHDL : 0)
 +            );
 +
 +        if(fr->bTwinRange)
 +        {
 +            if(do_per_step(step,ir->nstcalclr))
 +            {
 +                force_flags |= GMX_FORCE_DO_LR;
 +            }
 +        }
 +        
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
 +                                      ir,bNS,force_flags,
 +                                      bStopCM,top,top_global,
 +                                      constr,enerd,fcd,
 +                                      state,f,force_vir,mdatoms,
 +                                      nrnb,wcycle,graph,groups,
 +                                      shellfc,fr,bBornRadii,t,mu_tot,
 +                                      state->natoms,&bConverged,vsite,
 +                                      outf->fp_field);
 +            tcount+=count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too. 
 +             * Check comments in sim_util.c
 +             */
 +             do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
 +                     state->box,state->x,&state->hist,
 +                     f,force_vir,mdatoms,enerd,fcd,
 +                     state->lambda,graph,
 +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +        
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
 +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
 +        }
 +        
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
 +            fprintf(fplog,"Done init_coupling\n"); 
 +            fflush(fplog);
 +        }
 +        
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI==eiVV && bInitStep) 
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the 
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
 +            } else {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
 +            }
 +
 +            /* If we are using twin-range interactions where the long-range component
 +             * is only evaluated every nstcalclr>1 steps, we should do a special update
 +             * step to combine the long-range forces on these steps.
 +             * For nstcalclr=1 this is not done, since the forces would have been added
 +             * directly to the short-range forces already.
 +             */
 +            bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
 +            
 +            update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
 +                          f,bUpdateDoLR,fr->f_twin,fcd,
 +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
 +                          cr,nrnb,constr,&top->idef);
 +            
 +            if (bIterations)
 +            {
 +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +            
 +            /* save the state */
 +            if (bIterations && iterate.bIterate) { 
 +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +            }
 +            
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || (bIterations && iterate.bIterate))
 +            {
 +                if (bIterations && iterate.bIterate) 
 +                {
 +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +                    if (bFirstIterate && bTrotter) 
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +                        
 +                        veta_save = state->veta;
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
 +                        vetanew = state->veta;
 +                        state->veta = veta_save;
 +                    } 
 +                } 
 +                
 +                bOK = TRUE;
 +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
 +                    dvdl = 0;
 +                    
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
 +                                       state,fr->bMolPBC,graph,f,
 +                                       &top->idef,shake_vir,NULL,
 +                                       cr,nrnb,wcycle,upd,constr,
 +                                       bInitStep,TRUE,bCalcVir,vetanew);
 +                    
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +                    
 +                } 
 +                else if (graph)
 +                { /* Need to unshift here if a do_force has been
 +                     called in the previous step */
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                
 +                
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
 +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 +                    /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
 +                if (bCalcEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
 +                {
 +                    bSumEkinhOld = TRUE;
 +                }
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,NULL,FALSE,state->box,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | CGLO_ENERGY 
 +                                | (bStopCM ? CGLO_STOPCM : 0)
 +                                | (bTemp ? CGLO_TEMPERATURE:0) 
 +                                | (bPres ? CGLO_PRESSURE : 0) 
 +                                | (bPres ? CGLO_CONSTRAINT : 0)
 +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_SCALEEKIN 
 +                    );
 +                /* explanation of above: 
 +                   a) We compute Ekin at the full time step
 +                   if 1) we are using the AveVel Ekin, and it's not the
 +                   initial step, or 2) if we are using AveEkin, but need the full
 +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
 +                   EkinAveVel because it's needed for the pressure */
 +                
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep) 
 +                {
 +                    if (bTrotter)
 +                    {
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
 +                    } 
 +                    else 
 +                    {
 +                        if (bExchanged)
 +                        {
 +            
 +                            /* We need the kinetic energy at minus the half step for determining
 +                             * the full step kinetic energy and possibly for T-coupling.*/
 +                            /* This may not be quite working correctly yet . . . . */
 +                            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                                            constr,NULL,FALSE,state->box,
 +                                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +                        }
 +
 +
 +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    }
 +                }
 +                
 +                if (bIterations &&
 +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                                   state->veta,&vetanew)) 
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep) {
 +                enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
 +                copy_mat(shake_vir,state->svir_prev);
 +                copy_mat(force_vir,state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI==eiVV) {
 +                copy_rvecn(cbuf,state->v,0,state->natoms);
 +            }
 +            
 +            if (fr->bSepDVDL && fplog && do_log) 
 +            {
 +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +            }
 +            enerd->term[F_DVDL_BONDED] += dvdl;
 +        }
 +    
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV) {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
 +            if (ir->eI==eiVV) 
 +            {
 +                last_ekin = enerd->term[F_EKIN];
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
 +            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd,state->lambda,ir->fepvals);
 +        }
 +        
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
 +        if (bDoExpanded) {
 +            /* perform extended ensemble sampling in lambda - we don't
 +               actually move to the new state before outputting
 +               statistics, but if performing simulated tempering, we
 +               do update the velocities and the tau_t. */
 +        
 +            lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
 +        }
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +        
 +        /* Now we have the energies and forces corresponding to the 
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
 +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
 +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
 +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
 +        if (bCPT) { mdof_flags |= MDOF_CPT; };
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +            fcReportProgress( ir->nsteps, step );
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +            fcRequestCheckPoint();
 +#endif
 +        
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd,state);
 +                }
 +                if (state->flags  & (1<<estMC_RNG))
 +                {
 +                    get_mc_state(mcrng,state);
 +                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate,ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist,mdebin);
 +                    if (ir->efep!=efepNO || ir->bSimTemp) 
 +                    {
 +                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
 +                                                                       structured so this isn't necessary.
 +                                                                       Note this reassignment is only necessary
 +                                                                       for single threads.*/
 +                        copy_df_history(&state_global->dfhist,&df_history);
 +                    }
 +                }
 +            }
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (fr->bMolPBC)
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +        
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV) 
 +        {
 +            copy_mat(state->svir_prev,shake_vir);
 +            copy_mat(state->fvir_prev,force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +        
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */    
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.sig[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.sig[eglsSTOPCOND]=-1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
 +                                                     nlh.scale_tot,state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 || 
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
 +  
 +
 +        /* at the start of step, randomize the velocities */
 +        if (ETC_ANDERSEN(ir->etc) && EI_VV(ir->eI))
 +        {
 +            gmx_bool bDoAndersenConstr;
 +            bDoAndersenConstr = (constr && update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr));
 +            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
 +            if (bDoAndersenConstr)
 +            {
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
 +                                   state,fr->bMolPBC,graph,f,
 +                                   &top->idef,tmp_vir,NULL,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,TRUE,bCalcVir,vetanew);
 +            }
 +        }
 +
 +        if (bIterations)
 +        {
 +            gmx_iterate_init(&iterate,bIterations);
 +        }
 +    
 +        /* for iterations, we save these vectors, as we will be redoing the calculations */
 +        if (bIterations && iterate.bIterate) 
 +        {
 +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +        }
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || (bIterations && iterate.bIterate))
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */    
 +            if (bIterations) 
 +            { 
 +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +            
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box,lastbox);
 +
 +            bOK = TRUE;
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle,ewcUPDATE);
 +                dvdl = 0;
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter) 
 +                {
 +                    if (bIterations && iterate.bIterate) 
 +                    {
 +                        if (bFirstIterate) 
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else 
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir,scalevir,shake_vir); 
 +                        m_add(force_vir,shake_vir,total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
 +                /* We can only do Berendsen coupling after we have summed
 +                 * the kinetic energy or virial. Since the happens
 +                 * in global_state after update, we should only do it at
 +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                 */
 +                }
 +                else 
 +                {
 +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
 +                                   upd,bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
 +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
 +
 +                    /* velocity half-step update */
 +                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
 +                                  bUpdateDoLR,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
 +                                  cr,nrnb,constr,&top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    copy_rvecn(state->x,cbuf,0,state->natoms);
 +                }
 +                bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
 +
 +                update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
 +                              bUpdateDoLR,fr->f_twin,fcd,
 +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,
 +                                   fr->bMolPBC,graph,f,
 +                                   &top->idef,shake_vir,force_vir,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,FALSE,bCalcVir,state->veta);  
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                    constr,NULL,FALSE,lastbox,
 +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                    cglo_flags | CGLO_TEMPERATURE    
 +                        );
 +                    wallcycle_start(wcycle,ewcUPDATE);
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf,state->x,0,state->natoms);
 +
 +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
 +
 +                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
 +                                  bUpdateDoLR,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                    wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure. 
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
 +                                       state,fr->bMolPBC,graph,f,
 +                                       &top->idef,tmp_vir,force_vir,
 +                                       cr,nrnb,wcycle,upd,NULL,
 +                                       bInitStep,FALSE,bCalcVir,
 +                                       state->veta);  
 +                }
 +                if (!bOK && !bFFscan) 
 +                {
 +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +                
 +                if (fr->bSepDVDL && fplog && do_log) 
 +                {
 +                    fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
 +                }
 +                enerd->term[F_DVDL_BONDED] += dvdl;
 +            } 
 +            else if (graph) 
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph,state->box,state->x);
 +            }
 +
 +            if (vsite != NULL) 
 +            {
 +                wallcycle_start(wcycle,ewcVSITECONSTR);
 +                if (graph != NULL) 
 +                {
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                
 +                if (graph != NULL) 
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                wallcycle_stop(wcycle,ewcVSITECONSTR);
 +            }
 +            
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
 +            /* With Leap-Frog we can skip compute_globals at
 +             * non-communication steps, but we need to calculate
 +             * the kinetic energy one step before communication.
 +             */
 +            if (bGStat || do_per_step(step+1,nstglobalcomm) ||
 +                EI_VV(ir->eI))
 +            {
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    gs.sig[eglsNABNSB] = nlh.nabnsb;
 +                }
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,
 +                                bFirstIterate ? &gs : NULL, 
 +                                (step_rel % gs.nstms == 0) && 
 +                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
 +                                lastbox,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0)
 +                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
 +                                | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_CONSTRAINT 
 +                    );
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    nlh.nabnsb = gs.set[eglsNABNSB];
 +                    gs.set[eglsNABNSB] = 0;
 +                }
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +        
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (bIterations && 
 +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                               trace(shake_vir),&tracevir)) 
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
 +        /* only add constraint dvdl after constraints */
 +        enerd->term[F_DVDL_BONDED] += dvdl;
 +        if (!bVV)
 +        {
 +            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd,state->lambda,ir->fepvals);
 +        }
 +        update_box(fplog,step,ir,mdatoms,state,graph,f,
 +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
 +        
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +    
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc==NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
 +                                 f,NULL,xcopy,
 +                                 &(top_global->mols),mdatoms->massT,pres))
 +            {
 +                gmx_finalize_par();
 +
 +                fprintf(stderr,"\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,                                                            
 +             * so signal that we still have to do it.                                                
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +        
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies. 
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
 +                        ir,MASTER(cr),
 +                        mdatoms,&(top->idef),mu_aver,
 +                        top_global->mols.nr,cr,
 +                        state->box,total_vir,pres,
 +                        mu_tot,state->x,f,bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI==eiVV) 
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +        
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else 
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize) 
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
 +            {
 +                fprintf(stderr,"Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep) 
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +        
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr,do_or;
 +            
 +            if (fplog && do_log && bDoExpanded)
 +            {
 +                /* only needed if doing expanded ensemble */
 +                PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
 +                                          &df_history,state->fep_state,ir->nstlog,step);
 +            }
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
 +            {
 +                if (bCalcEner)
 +                {
 +                    upd_mdebin(mdebin,bDoDHDL, TRUE,
 +                               t,mdatoms->tmass,enerd,state,
 +                               ir->fepvals,ir->expandedvals,lastbox,
 +                               shake_vir,force_vir,total_vir,pres,
 +                               ekind,mu_tot,constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +                
 +                do_dr  = do_per_step(step,ir->nstdisreout);
 +                do_or  = do_per_step(step,ir->nstorireout);
 +                
 +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
 +                           step,t,
 +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull,step,t);
 +            }
 +            
 +            if (do_per_step(step,ir->nstlog))
 +            {
 +                if(fflush(fplog) != 0)
 +                {
 +                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
 +        if (bDoExpanded)
 +        {
 +            /* Have to do this part after outputting the logfile and the edr file */
 +            state->fep_state = lamnew;
 +            for (i=0;i<efptNR;i++)
 +            {
 +                state->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
 +            }
 +        }
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
 +        {
 +            if (shellfc) 
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step,repl_ex_nst)) 
 +        {
 +            bExchanged = replica_exchange(fplog,cr,repl_ex,
 +                                          state_global,enerd,
 +                                          state,step,t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr)) 
 +            {
 +                dd_partition_system(fplog,step,cr,TRUE,1,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,FALSE);
 +            }
 +        }
 +        
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres,state->pres_prev);
 +        }
 +        
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed!=NULL) && (!bLastStep) )
 +        {
 +            rescale_membed(step_rel,membed,state_global->x);
 +        }
 +
 +        if (bRerunMD) 
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +            }
 +        }
 +        
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +        
 +        cycles = wallcycle_stop(wcycle,ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
 +        }
 +
 +        if (bPMETuneRunning || bPMETuneTry)
 +        {
 +            /* PME grid + cut-off optimization with GPUs or PME nodes */
 +
 +            /* Count the total cycles over the last steps */
 +            cycles_pmes += cycles;
 +
 +            /* We can only switch cut-off at NS steps */
 +            if (step % ir->nstlist == 0)
 +            {
 +                /* PME grid + cut-off optimization with GPUs or PME nodes */
 +                if (bPMETuneTry)
 +                {
 +                    if (DDMASTER(cr->dd))
 +                    {
 +                        /* PME node load is too high, start tuning */
 +                        bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
 +                    }
 +                    dd_bcast(cr->dd,sizeof(gmx_bool),&bPMETuneRunning);
 +
 +                    if (bPMETuneRunning || step_rel > ir->nstlist*50)
 +                    {
 +                        bPMETuneTry     = FALSE;
 +                    }
 +                }
 +                if (bPMETuneRunning)
 +                {
 +                    /* init_step might not be a multiple of nstlist,
 +                     * but the first cycle is always skipped anyhow.
 +                     */
 +                    bPMETuneRunning =
 +                        pme_load_balance(pme_loadbal,cr,
 +                                         (bVerbose && MASTER(cr)) ? stderr : NULL,
 +                                         fplog,
 +                                         ir,state,cycles_pmes,
 +                                         fr->ic,fr->nbv,&fr->pmedata,
 +                                         step);
 +
 +                    /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
 +                    fr->ewaldcoeff = fr->ic->ewaldcoeff;
 +                    fr->rlist      = fr->ic->rlist;
 +                    fr->rlistlong  = fr->ic->rlistlong;
 +                    fr->rcoulomb   = fr->ic->rcoulomb;
 +                    fr->rvdw       = fr->ic->rvdw;
 +                }
 +                cycles_pmes = 0;
 +            }
 +        }
 +
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime,
 +                               fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
 +            wcycle_set_reset_counters(wcycle,-1);
 +            /* Correct max_hours for the elapsed time */
 +            max_hours -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +    
 +    /* Stop the time */
 +    runtime_end(runtime);
 +    
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_send_finish(cr);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
 +    }
 +
 +    if (pme_loadbal != NULL)
 +    {
 +        pme_loadbal_done(pme_loadbal,fplog);
 +    }
 +
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +    
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog,repl_ex);
 +    }
 +    
 +    runtime->nsteps_done = step_rel;
 +
 +   return 0;
 +}