Merge origin/release-4-6 into master
authorRoland Schulz <roland@utk.edu>
Fri, 9 Nov 2012 21:18:27 +0000 (16:18 -0500)
committerRoland Schulz <roland@utk.edu>
Fri, 9 Nov 2012 21:21:26 +0000 (16:21 -0500)
Conflicts:
CMakeLists.txt
cmake/gmxTestRestrict.cmake (recreated)
        cmake/TestRestrict.c (recreated)
src/config.h.cmakein
src/gromacs/gmxlib/nrnb.c
src/gromacs/gmxlib/string2.c
src/gromacs/gmxlib/tpxio.c
src/gromacs/legacyheaders/string2.h
src/gromacs/libgromacs.pc.cmakein
src/kernel/CMakeLists.txt (change added to src/gromacs/CMakeLists.txt)

Deleted:
src/gromacs/gmxlib/nonbonded/nb_kernel_bluegene/*
src/gromacs/gmxlib/nonbonded/nb_kernel_c/*
src/gromacs/gmxlib/nonbonded/nb_kernel_f77_*
src/gromacs/gmxlib/nonbonded/nb_kernel_power6/*
src/gromacs/gmxlib/nonbonded/nb_kerneltype.h
src/gmxlib/nonbonded/mknb_generator/mknb_metacode.h

nstlist has been added to tpx format in 4.6 (tpx ver 82), and
now in master from version 91. Versions 83-89 are reserved for
4.6 (and will read it), while version 90 (previous master) files
are read correctly without it.

Change-Id: I9a953d213fedcc3d885ee20bff08cbdf916c86c2

235 files changed:
1  2 
CMakeLists.txt
src/config.h.cmakein
src/gromacs/CMakeLists.txt
src/gromacs/gmxlib/bondfree.c
src/gromacs/gmxlib/names.c
src/gromacs/gmxlib/nonbonded/CMakeLists.txt
src/gromacs/gmxlib/nonbonded/nb_free_energy.c
src/gromacs/gmxlib/nonbonded/nb_free_energy.h
src/gromacs/gmxlib/nonbonded/nb_generic.c
src/gromacs/gmxlib/nonbonded/nb_generic.h
src/gromacs/gmxlib/nonbonded/nb_generic_adress.c
src/gromacs/gmxlib/nonbonded/nb_generic_cg.c
src/gromacs/gmxlib/nonbonded/nb_generic_cg.h
src/gromacs/gmxlib/nonbonded/nb_kernel.c
src/gromacs/gmxlib/nonbonded/nb_kernel.h
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.h
src/gromacs/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomP1P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3W3_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4P1_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4W4_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.h
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h
src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel400_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel410_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel430_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h
src/gromacs/gmxlib/nonbonded/nonbonded.c
src/gromacs/gmxlib/nonbonded/preprocessor/gmxpreprocess.py
src/gromacs/gmxlib/nrnb.c
src/gromacs/gmxlib/string2.c
src/gromacs/gmxlib/tpxio.c
src/gromacs/gmxlib/txtdump.c
src/gromacs/gmxpreprocess/readir.c
src/gromacs/legacyheaders/force.h
src/gromacs/legacyheaders/names.h
src/gromacs/legacyheaders/nonbonded.h
src/gromacs/legacyheaders/ns.h
src/gromacs/legacyheaders/string2.h
src/gromacs/legacyheaders/tables.h
src/gromacs/legacyheaders/types/enums.h
src/gromacs/legacyheaders/types/force_flags.h
src/gromacs/legacyheaders/types/forcerec.h
src/gromacs/legacyheaders/types/idef.h
src/gromacs/legacyheaders/types/inputrec.h
src/gromacs/legacyheaders/types/interaction_const.h
src/gromacs/legacyheaders/types/nblist.h
src/gromacs/legacyheaders/types/nrnb.h
src/gromacs/legacyheaders/types/simple.h
src/gromacs/legacyheaders/vec.h
src/gromacs/libgromacs.pc.cmakein
src/gromacs/mdlib/adress.c
src/gromacs/mdlib/domdec.c
src/gromacs/mdlib/force.c
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/genborn.c
src/gromacs/mdlib/minimize.c
src/gromacs/mdlib/nbnxn_atomdata.c
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
src/gromacs/mdlib/ns.c
src/gromacs/mdlib/qmmm.c
src/gromacs/mdlib/sim_util.c
src/gromacs/mdlib/tables.c
src/gromacs/mdlib/tpi.c
src/gromacs/mdlib/update.c
src/gromacs/mdlib/wall.c
src/gromacs/mdlib/wnblist.c
src/programs/gmxcheck/tpbcmp.c
src/programs/mdrun/do_gct.c
src/programs/mdrun/gctio.c
src/programs/mdrun/md.c
src/programs/mdrun/mdrun.c
src/programs/mdrun/openmm_wrapper.cpp
src/programs/mdrun/pme_loadbal.c
src/programs/mdrun/runner.c
src/tools/gmx_h2order.c

diff --cc CMakeLists.txt
Simple merge
index e5895c753ac095de939a01bebf7ac21b9a1bbc85,bd115207deeb5140a3c149874d9afc5da8f2c579..6217bda9fa1ef25a22ec7019fbfb712341848c06
  /* Define to 1 if you have the sigaction() function. */
  #cmakedefine HAVE_SIGACTION
  
 -/* Define to 1 if you have the <string.h> header file. */
 -#cmakedefine HAVE_STRING_H
 -
 -/* Define to 1 if yo have the <math.h> header file. */
 -#cmakedefine HAVE_MATH_H
 -
 -/* Define to 1 if yo have the <limits.h> header file. */
 -#cmakedefine HAVE_LIMITS_H
 -
 -/* Define to 1 if yo have the <memory.h> header file. */
 -#cmakedefine HAVE_MEMORY_H
 -
+ /* Define to 1 if you have the rsqrt() function. */
+ #cmakedefine HAVE_RSQRT
+ /* Define to 1 if you have the rsqrtf() function. */
+ #cmakedefine HAVE_RSQRTF
+ /* Define to 1 if you have the sqrtf() function. */
+ #cmakedefine HAVE_SQRTF
  /* Define to 1 if yo have the <unistd.h> header file. */
  #cmakedefine HAVE_UNISTD_H
  
  /* Some systems requires this to be set to 64 for large file support */
  #cmakedefine _FILE_OFFSET_BITS @_FILE_OFFSET_BITS@
  
 -/* Gromacs shortcut define for fseeko & ftello being present with 64-bit support */
 -#cmakedefine GMX_LARGEFILES
 -
 -/* Define to int if <sys/types.h> does not define. */
 -#cmakedefine gid_t int
 -
  /* Define to __inline__ or __inline if that is what the C compiler
-    calls it, or to nothing if inline is not supported under any name.  */
+    calls it, or to nothing if inline is not supported under any name.
+    Please do NOT remove the gmx_inline keyword from here. The classical
+    C++ inline keyword is merely a recommendation to the compiler, and
+    many compilers support stronger alternatives (e.g. __forceinline)
+    that we might want to use. */
+ #define gmx_inline ${INLINE_KEYWORD}
  #ifndef __cplusplus
  #define inline ${INLINE_KEYWORD}
  #endif
  
 -
 -#ifndef CPLUSPLUS
 -#ifdef __cplusplus
 -#define CPLUSPLUS
 -#endif
 -#endif  
 -
 -/* Define to long int if <sys/types.h> does not define. */                    
 -#cmakedefine off_t int
 -
 -/* Define to unsigned int if <sys/types.h> does not define. */
 -#cmakedefine size_t int
 -
 -/* Define to int if <sys/types.h> does not define. */
 -#cmakedefine uid_t int
 -
+ /* Define to __restrict__ or __restrict if that is what the C compiler
+    calls it, unless we are on C99 when it is simply called restrict.
+    Since restrict is a reserved key word in C99 we are not allowed to
+    redefine the word itself, so call this gmx_restrict to avoid having
+    to identify the language standard level. If it is not supported, it
+    is still defined to an empty string here. */
+ #define gmx_restrict ${RESTRICT_KEYWORD}
  /* Build special-purpose mdrun library */
  #cmakedefine GMX_FAHCORE   
  
index 06171b854d32e414ce8944841ff71cfa4bc67b93,0000000000000000000000000000000000000000..cb0111d4d7f30f0eab766f0328adab7a43537bb8
mode 100644,000000..100644
--- /dev/null
@@@ -1,79 -1,0 +1,95 @@@
 +set(LIBGROMACS_SOURCES)
 +
 +add_subdirectory(legacyheaders)
 +add_subdirectory(gmxlib)
 +add_subdirectory(mdlib)
 +add_subdirectory(gmxpreprocess)
 +add_subdirectory(analysisdata)
 +add_subdirectory(commandline)
 +add_subdirectory(linearalgebra)
 +add_subdirectory(onlinehelp)
 +add_subdirectory(options)
 +add_subdirectory(selection)
 +add_subdirectory(trajectoryanalysis)
 +add_subdirectory(utility)
 +
 +file(GLOB LIBGROMACS_HEADERS *.h)
 +install(FILES ${LIBGROMACS_HEADERS} DESTINATION ${INCL_INSTALL_DIR}/gromacs
 +        COMPONENT development)
 +
 +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES})
 +
 +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/version.h)
 +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.h
 +    DESTINATION ${INCL_INSTALL_DIR}/gromacs
 +    COMPONENT development)
 +
 +# Add target that generates gitversion.c every time make is run
 +# if git version info is requested
 +# This code is here instead of utility/CMakeLists.txt because CMake
 +# ignores set_source_file_properties from subdirectories.
 +if (GMX_GIT_VERSION_INFO)
 +    set(GENERATED_VERSION_FILE ${CMAKE_CURRENT_BINARY_DIR}/utility/gitversion.c)
 +    add_custom_target(gmx_version ALL
 +            COMMAND ${CMAKE_COMMAND}
 +                -D GIT_EXECUTABLE="${GIT_EXECUTABLE}"
 +                -D GIT_VERSION="${GIT_VERSION}"
 +                -D PROJECT_VERSION="${PROJECT_VERSION}"
 +                -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}"
 +                -D VERSION_C_CMAKEIN="${CMAKE_CURRENT_SOURCE_DIR}/utility/gitversion.c.cmakein"
 +                -D VERSION_C_OUT=${GENERATED_VERSION_FILE}
 +                -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake
 +            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
 +            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/utility/gitversion.c.cmakein
 +            COMMENT "Generating git version information")
 +    set_source_files_properties(${GENERATED_VERSION_FILE}
 +                                PROPERTIES GENERATED true)
 +    list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
 +endif()
 +
 +# apply gcc 4.4.x bug workaround
 +if(GMX_USE_GCC44_BUG_WORKAROUND)
 +   include(gmxGCC44O3BugWorkaround)
 +   gmx_apply_gcc44_bug_workaround("gmxlib/bondfree.c")
 +   gmx_apply_gcc44_bug_workaround("mdlib/force.c")
 +   gmx_apply_gcc44_bug_workaround("mdlib/constr.c")
 +endif()
 +
 +add_library(libgromacs ${LIBGROMACS_SOURCES})
 +if (GMX_GIT_VERSION_INFO)
 +    add_dependencies(libgromacs gmx_version)
 +endif ()
 +
 +target_link_libraries(libgromacs ${GMX_GPU_LIBRARIES}
 +                      ${GMX_EXTRA_LIBRARIES} ${FFT_LIBRARIES} ${XML_LIBRARIES}
 +                      ${THREAD_LIB} ${OpenMP_SHARED_LINKER_FLAGS})
 +set_target_properties(libgromacs PROPERTIES
 +                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
 +                      SOVERSION ${SOVERSION}
 +                      INSTALL_NAME_DIR "${LIB_INSTALL_DIR}"
 +                      COMPILE_FLAGS "${OpenMP_C_FLAGS}")
 +
 +install(TARGETS libgromacs DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
 +
 +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein
 +               ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY)
 +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc
 +        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
 +        RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc"
 +        COMPONENT development)
++
++if (INSTALL_CUDART_LIB) #can be set manual by user
++    if (GMX_OPENMM OR GMX_GPU)
++        foreach(CUDA_LIB ${CUDA_LIBRARIES})
++            string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB})
++            if(IS_CUDART) #libcuda should not be installed
++                #install also name-links (linker uses those)
++                file(GLOB CUDA_LIBS ${CUDA_LIB}*)
++                install(FILES ${CUDA_LIBS} DESTINATION
++                    ${LIB_INSTALL_DIR} COMPONENT libraries)
++            endif()
++        endforeach()
++    else()
++        message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_OPENMM or GMX_GPU")
++    endif()
++endif ()
index 57377b5d1afdc8c936db48444c695280276421a9,0000000000000000000000000000000000000000..2b7a8f7b5e1156393a42da6408cab59370570297
mode 100644,000000..100644
--- /dev/null
@@@ -1,4008 -1,0 +1,4008 @@@
-   VFtab    = table->tab;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "physics.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "txtdump.h"
 +#include "bondf.h"
 +#include "smalloc.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "macros.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "mshift.h"
 +#include "main.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +
 +#if !defined GMX_DOUBLE && defined GMX_X86_SSE2
 +#include "gmx_x86_simd_single.h"
 +#define SSE_PROPER_DIHEDRALS
 +#endif
 +
 +/* Find a better place for this? */
 +const int cmap_coeff_matrix[] = {
 +1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4 ,
 +0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4 ,
 +0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
 +0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2 ,
 +0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2 ,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
 +0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
 +0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2 ,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
 +0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2 ,
 +0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
 +0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
 +0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
 +};
 +
 +
 +
 +int glatnr(int *global_atom_index,int i)
 +{
 +    int atnr;
 +
 +    if (global_atom_index == NULL) {
 +        atnr = i + 1;
 +    } else {
 +        atnr = global_atom_index[i] + 1;
 +    }
 +
 +    return atnr;
 +}
 +
 +static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 +{
 +  if (pbc) {
 +    return pbc_dx_aiuc(pbc,xi,xj,dx);
 +  }
 +  else {
 +    rvec_sub(xi,xj,dx);
 +    return CENTRAL;
 +  }
 +}
 +
 +/*
 + * Morse potential bond by Frank Everdij
 + *
 + * Three parameters needed:
 + *
 + * b0 = equilibrium distance in nm
 + * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 + * cb = well depth in kJ/mol
 + *
 + * Note: the potential is referenced to be +cb at infinite separation
 + *       and zero at the equilibrium distance!
 + */
 +
 +real morse_bonds(int nbonds,
 +               const t_iatom forceatoms[],const t_iparams forceparams[],
 +               const rvec x[],rvec f[],rvec fshift[],
 +               const t_pbc *pbc,const t_graph *g,
 +               real lambda,real *dvdlambda,
 +               const t_mdatoms *md,t_fcdata *fcd,
 +               int *global_atom_index)
 +{
 +  const real one=1.0;
 +  const real two=2.0;
 +  real  dr,dr2,temp,omtemp,cbomtemp,fbond,vbond,fij,vtot;
 +  real  b0,be,cb,b0A,beA,cbA,b0B,beB,cbB,L1;
 +  rvec  dx;
 +  int   i,m,ki,type,ai,aj;
 +  ivec  dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    
 +    b0A   = forceparams[type].morse.b0A;
 +    beA   = forceparams[type].morse.betaA;
 +    cbA   = forceparams[type].morse.cbA;
 +
 +    b0B   = forceparams[type].morse.b0B;
 +    beB   = forceparams[type].morse.betaB;
 +    cbB   = forceparams[type].morse.cbB;
 +
 +    L1 = one-lambda;                      /* 1 */
 +    b0 = L1*b0A + lambda*b0B;             /* 3 */
 +    be = L1*beA + lambda*beB;             /* 3 */
 +    cb = L1*cbA + lambda*cbB;             /* 3 */
 +
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
 +    dr2  = iprod(dx,dx);                            /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
 +    temp = exp(-be*(dr-b0));                        /*  12          */
 +    
 +    if (temp == one)
 +    {
 +        /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 +        *dvdlambda += cbB-cbA;
 +        continue;
 +    }
 +
 +    omtemp   = one-temp;                               /*   1          */
 +    cbomtemp = cb*omtemp;                              /*   1          */
 +    vbond    = cbomtemp*omtemp;                        /*   1          */
 +    fbond    = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /*   9          */
 +    vtot     += vbond;                                 /*   1          */
 +
 +    *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki = IVEC2IS(dt);
 +    }
 +
 +    for (m=0; (m<DIM); m++) {                          /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                           /*  83 TOTAL    */
 +  return vtot;
 +}
 +
 +real cubic_bonds(int nbonds,
 +               const t_iatom forceatoms[],const t_iparams forceparams[],
 +               const rvec x[],rvec f[],rvec fshift[],
 +               const t_pbc *pbc,const t_graph *g,
 +               real lambda,real *dvdlambda,
 +               const t_mdatoms *md,t_fcdata *fcd,
 +               int *global_atom_index)
 +{
 +  const real three = 3.0;
 +  const real two   = 2.0;
 +  real  kb,b0,kcub;
 +  real  dr,dr2,dist,kdist,kdist2,fbond,vbond,fij,vtot;
 +  rvec  dx;
 +  int   i,m,ki,type,ai,aj;
 +  ivec  dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    
 +    b0   = forceparams[type].cubic.b0;
 +    kb   = forceparams[type].cubic.kb;
 +    kcub = forceparams[type].cubic.kcub;
 +
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);                /*   3          */
 +    dr2  = iprod(dx,dx);                                /*   5          */
 +    
 +    if (dr2 == 0.0)
 +      continue;
 +      
 +    dr         = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +    dist       = dr-b0;
 +    kdist      = kb*dist;
 +    kdist2     = kdist*dist;
 +    
 +    vbond      = kdist2 + kcub*kdist2*dist;
 +    fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 +
 +    vtot      += vbond;       /* 21 */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                          /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                           /*  54 TOTAL    */
 +  return vtot;
 +}
 +
 +real FENE_bonds(int nbonds,
 +              const t_iatom forceatoms[],const t_iparams forceparams[],
 +              const rvec x[],rvec f[],rvec fshift[],
 +              const t_pbc *pbc,const t_graph *g,
 +              real lambda,real *dvdlambda,
 +              const t_mdatoms *md,t_fcdata *fcd,
 +              int *global_atom_index)
 +{
 +  const real half=0.5;
 +  const real one=1.0;
 +  real  bm,kb;
 +  real  dr,dr2,bm2,omdr2obm2,fbond,vbond,fij,vtot;
 +  rvec  dx;
 +  int   i,m,ki,type,ai,aj;
 +  ivec  dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    
 +    bm   = forceparams[type].fene.bm;
 +    kb   = forceparams[type].fene.kb;
 +
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
 +    dr2  = iprod(dx,dx);                                /*   5          */
 +    
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    bm2 = bm*bm;
 +
 +    if (dr2 >= bm2)
 +      gmx_fatal(FARGS,
 +              "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 +              dr2,bm2,
 +              glatnr(global_atom_index,ai),
 +              glatnr(global_atom_index,aj));
 +      
 +    omdr2obm2  = one - dr2/bm2;
 +    
 +    vbond      = -half*kb*bm2*log(omdr2obm2);
 +    fbond      = -kb/omdr2obm2;
 +
 +    vtot      += vbond;       /* 35 */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                          /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                           /*  58 TOTAL    */
 +  return vtot;
 +}
 +
 +real harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
 +            real *V,real *F)
 +{
 +  const real half=0.5;
 +  real  L1,kk,x0,dx,dx2;
 +  real  v,f,dvdlambda;
 +  
 +  L1    = 1.0-lambda;
 +  kk    = L1*kA+lambda*kB;
 +  x0    = L1*xA+lambda*xB;
 +
 +  dx    = x-x0;
 +  dx2   = dx*dx;
 +
 +  f     = -kk*dx;
 +  v     = half*kk*dx2;
 +  dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 +
 +  *F    = f;
 +  *V    = v;
 +
 +  return dvdlambda;
 +
 +  /* That was 19 flops */
 +}
 +
 +
 +real bonds(int nbonds,
 +         const t_iatom forceatoms[],const t_iparams forceparams[],
 +         const rvec x[],rvec f[],rvec fshift[],
 +         const t_pbc *pbc,const t_graph *g,
 +         real lambda,real *dvdlambda,
 +         const t_mdatoms *md,t_fcdata *fcd,
 +         int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr,dr2,fbond,vbond,fij,vtot;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 +                           forceparams[type].harmonic.krB,
 +                           forceparams[type].harmonic.rA,
 +                           forceparams[type].harmonic.rB,
 +                           dr,lambda,&vbond,&fbond);  /*  19  */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    
 +    vtot  += vbond;/* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +            dr,vbond,fbond);
 +#endif
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 59 TOTAL     */
 +  return vtot;
 +}
 +
 +real restraint_bonds(int nbonds,
 +                     const t_iatom forceatoms[],const t_iparams forceparams[],
 +                     const rvec x[],rvec f[],rvec fshift[],
 +                     const t_pbc *pbc,const t_graph *g,
 +                     real lambda,real *dvdlambda,
 +                     const t_mdatoms *md,t_fcdata *fcd,
 +                     int *global_atom_index)
 +{
 +    int  i,m,ki,ai,aj,type;
 +    real dr,dr2,fbond,vbond,fij,vtot;
 +    real L1;
 +    real low,dlow,up1,dup1,up2,dup2,k,dk;
 +    real drh,drh2;
 +    rvec dx;
 +    ivec dt;
 +
 +    L1   = 1.0 - lambda;
 +
 +    vtot = 0.0;
 +    for(i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        aj   = forceatoms[i++];
 +        
 +        ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);      /*   3          */
 +        dr2  = iprod(dx,dx);                          /*   5          */
 +        dr   = dr2*gmx_invsqrt(dr2);                  /*  10          */
 +
 +        low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 +        dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 +        up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 +        dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 +        up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 +        dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 +        k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 +        dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 +        /* 24 */
 +
 +        if (dr < low)
 +        {
 +            drh   = dr - low;
 +            drh2  = drh*drh;
 +            vbond = 0.5*k*drh2;
 +            fbond = -k*drh;
 +            *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 +        } /* 11 */
 +        else if (dr <= up1)
 +        {
 +            vbond = 0;
 +            fbond = 0;
 +        }
 +        else if (dr <= up2)
 +        {
 +            drh   = dr - up1;
 +            drh2  = drh*drh;
 +            vbond = 0.5*k*drh2;
 +            fbond = -k*drh;
 +            *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 +        } /* 11       */
 +        else
 +        {
 +            drh   = dr - up2;
 +            vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 +            fbond = -k*(up2 - up1);
 +            *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 +                + k*(dup2 - dup1)*(up2 - up1 + drh)
 +                - k*(up2 - up1)*dup2;
 +        }
 +   
 +        if (dr2 == 0.0)
 +            continue;
 +        
 +        vtot  += vbond;/* 1*/
 +        fbond *= gmx_invsqrt(dr2);                    /*   6          */
 +#ifdef DEBUG
 +        if (debug)
 +            fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +                    dr,vbond,fbond);
 +#endif
 +        if (g) {
 +            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +            ki=IVEC2IS(dt);
 +        }
 +        for (m=0; (m<DIM); m++) {                     /*  15          */
 +            fij=fbond*dx[m];
 +            f[ai][m]+=fij;
 +            f[aj][m]-=fij;
 +            fshift[ki][m]+=fij;
 +            fshift[CENTRAL][m]-=fij;
 +        }
 +    }                                 /* 59 TOTAL     */
 +
 +    return vtot;
 +}
 +
 +real polarize(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr,dr2,fbond,vbond,fij,vtot,ksh;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 +    if (debug)
 +      fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +    
 +    vtot  += vbond;/* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 59 TOTAL     */
 +  return vtot;
 +}
 +
 +real anharm_polarize(int nbonds,
 +                     const t_iatom forceatoms[],const t_iparams forceparams[],
 +                     const rvec x[],rvec f[],rvec fshift[],
 +                     const t_pbc *pbc,const t_graph *g,
 +                     real lambda,real *dvdlambda,
 +                     const t_mdatoms *md,t_fcdata *fcd,
 +                     int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr,dr2,fbond,vbond,fij,vtot,ksh,khyp,drcut,ddr,ddr3;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type  = forceatoms[i++];
 +    ai    = forceatoms[i++];
 +    aj    = forceatoms[i++];
 +    ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 +    khyp  = forceparams[type].anharm_polarize.khyp;
 +    drcut = forceparams[type].anharm_polarize.drcut;
 +    if (debug)
 +      fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +    
 +    if (dr > drcut) {
 +        ddr    = dr-drcut;
 +        ddr3   = ddr*ddr*ddr;
 +        vbond += khyp*ddr*ddr3;
 +        fbond -= 4*khyp*ddr3;
 +    }
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +    vtot  += vbond;/* 1*/
 +
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 72 TOTAL     */
 +  return vtot;
 +}
 +
 +real water_pol(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  /* This routine implements anisotropic polarizibility for water, through
 +   * a shell connected to a dummy with spring constant that differ in the
 +   * three spatial dimensions in the molecular frame.
 +   */
 +  int  i,m,aO,aH1,aH2,aD,aS,type,type0;
 +  rvec dOH1,dOH2,dHH,dOD,dDS,nW,kk,dx,kdx,proj;
 +#ifdef DEBUG
 +  rvec df;
 +#endif
 +  real vtot,fij,r_HH,r_OD,r_nW,tx,ty,tz,qS;
 +
 +  vtot = 0.0;
 +  if (nbonds > 0) {
 +    type0  = forceatoms[0];
 +    aS     = forceatoms[5];
 +    qS     = md->chargeA[aS];
 +    kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 +    kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 +    kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 +    r_HH   = 1.0/forceparams[type0].wpol.rHH;
 +    r_OD   = 1.0/forceparams[type0].wpol.rOD;
 +    if (debug) {
 +      fprintf(debug,"WPOL: qS  = %10.5f aS = %5d\n",qS,aS);
 +      fprintf(debug,"WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
 +            kk[XX],kk[YY],kk[ZZ]);
 +      fprintf(debug,"WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
 +            forceparams[type0].wpol.rOH,
 +            forceparams[type0].wpol.rHH,
 +            forceparams[type0].wpol.rOD);
 +    }
 +    for(i=0; (i<nbonds); i+=6) {
 +      type = forceatoms[i];
 +      if (type != type0)
 +      gmx_fatal(FARGS,"Sorry, type = %d, type0 = %d, file = %s, line = %d",
 +                  type,type0,__FILE__,__LINE__);
 +      aO   = forceatoms[i+1];
 +      aH1  = forceatoms[i+2];
 +      aH2  = forceatoms[i+3];
 +      aD   = forceatoms[i+4];
 +      aS   = forceatoms[i+5];
 +      
 +      /* Compute vectors describing the water frame */
 +      rvec_sub(x[aH1],x[aO], dOH1);
 +      rvec_sub(x[aH2],x[aO], dOH2);
 +      rvec_sub(x[aH2],x[aH1],dHH);
 +      rvec_sub(x[aD], x[aO], dOD);
 +      rvec_sub(x[aS], x[aD], dDS);
 +      cprod(dOH1,dOH2,nW);
 +      
 +      /* Compute inverse length of normal vector 
 +       * (this one could be precomputed, but I'm too lazy now)
 +       */
 +      r_nW = gmx_invsqrt(iprod(nW,nW));
 +      /* This is for precision, but does not make a big difference,
 +       * it can go later.
 +       */
 +      r_OD = gmx_invsqrt(iprod(dOD,dOD)); 
 +      
 +      /* Normalize the vectors in the water frame */
 +      svmul(r_nW,nW,nW);
 +      svmul(r_HH,dHH,dHH);
 +      svmul(r_OD,dOD,dOD);
 +      
 +      /* Compute displacement of shell along components of the vector */
 +      dx[ZZ] = iprod(dDS,dOD);
 +      /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 +      for(m=0; (m<DIM); m++)
 +      proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 +      
 +      /*dx[XX] = iprod(dDS,nW);
 +      dx[YY] = iprod(dDS,dHH);*/
 +      dx[XX] = iprod(proj,nW);
 +      for(m=0; (m<DIM); m++)
 +      proj[m] -= dx[XX]*nW[m];
 +      dx[YY] = iprod(proj,dHH);
 +      /*#define DEBUG*/
 +#ifdef DEBUG
 +      if (debug) {
 +      fprintf(debug,"WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
 +              sqr(dx[XX]),sqr(dx[YY]),sqr(dx[ZZ]),iprod(dx,dx),iprod(dDS,dDS));
 +      fprintf(debug,"WPOL: dHH=(%10g,%10g,%10g)\n",dHH[XX],dHH[YY],dHH[ZZ]);
 +      fprintf(debug,"WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
 +              dOD[XX],dOD[YY],dOD[ZZ],1/r_OD);
 +      fprintf(debug,"WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
 +              nW[XX],nW[YY],nW[ZZ],1/r_nW);
 +      fprintf(debug,"WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
 +              dx[XX],dx[YY],dx[ZZ]);
 +      fprintf(debug,"WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
 +              dDS[XX],dDS[YY],dDS[ZZ]);
 +      }
 +#endif
 +      /* Now compute the forces and energy */
 +      kdx[XX] = kk[XX]*dx[XX];
 +      kdx[YY] = kk[YY]*dx[YY];
 +      kdx[ZZ] = kk[ZZ]*dx[ZZ];
 +      vtot   += iprod(dx,kdx);
 +      for(m=0; (m<DIM); m++) {
 +      /* This is a tensor operation but written out for speed */
 +      tx        =  nW[m]*kdx[XX];
 +      ty        = dHH[m]*kdx[YY];
 +      tz        = dOD[m]*kdx[ZZ];
 +      fij       = -tx-ty-tz;
 +#ifdef DEBUG
 +      df[m] = fij;
 +#endif
 +      f[aS][m] += fij;
 +      f[aD][m] -= fij;
 +      }
 +#ifdef DEBUG
 +      if (debug) {
 +      fprintf(debug,"WPOL: vwpol=%g\n",0.5*iprod(dx,kdx));
 +      fprintf(debug,"WPOL: df = (%10g, %10g, %10g)\n",df[XX],df[YY],df[ZZ]);
 +      }
 +#endif
 +    } 
 +  }
 +  return 0.5*vtot;
 +}
 +
 +static real do_1_thole(const rvec xi,const rvec xj,rvec fi,rvec fj,
 +                     const t_pbc *pbc,real qq,
 +                     rvec fshift[],real afac)
 +{
 +  rvec r12;
 +  real r12sq,r12_1,r12n,r12bar,v0,v1,fscal,ebar,fff;
 +  int  m,t;
 +    
 +  t      = pbc_rvec_sub(pbc,xi,xj,r12); /*  3 */
 +  
 +  r12sq  = iprod(r12,r12);              /*  5 */
 +  r12_1  = gmx_invsqrt(r12sq);              /*  5 */
 +  r12bar = afac/r12_1;                  /*  5 */
 +  v0     = qq*ONE_4PI_EPS0*r12_1;       /*  2 */
 +  ebar   = exp(-r12bar);                /*  5 */
 +  v1     = (1-(1+0.5*r12bar)*ebar);     /*  4 */
 +  fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 +  if (debug)
 +    fprintf(debug,"THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n",v0,v1,1/r12_1,r12bar,fscal,ebar);
 +  
 +  for(m=0; (m<DIM); m++) {
 +    fff    = fscal*r12[m];
 +    fi[m] += fff;
 +    fj[m] -= fff;             
 +    fshift[t][m]       += fff;
 +    fshift[CENTRAL][m] -= fff;
 +  } /* 15 */
 +  
 +  return v0*v1; /* 1 */
 +  /* 54 */
 +}
 +
 +real thole_pol(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  /* Interaction between two pairs of particles with opposite charge */
 +  int i,type,a1,da1,a2,da2;
 +  real q1,q2,qq,a,al1,al2,afac;
 +  real V=0;
 +  
 +  for(i=0; (i<nbonds); ) {
 +    type  = forceatoms[i++];
 +    a1    = forceatoms[i++];
 +    da1   = forceatoms[i++];
 +    a2    = forceatoms[i++];
 +    da2   = forceatoms[i++];
 +    q1    = md->chargeA[da1];
 +    q2    = md->chargeA[da2];
 +    a     = forceparams[type].thole.a;
 +    al1   = forceparams[type].thole.alpha1;
 +    al2   = forceparams[type].thole.alpha2;
 +    qq    = q1*q2;
 +    afac  = a*pow(al1*al2,-1.0/6.0);
 +    V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq,fshift,afac);
 +    V += do_1_thole(x[da1],x[a2], f[da1],f[a2], pbc,-qq,fshift,afac);
 +    V += do_1_thole(x[a1], x[da2],f[a1], f[da2],pbc,-qq,fshift,afac);
 +    V += do_1_thole(x[da1],x[da2],f[da1],f[da2],pbc, qq,fshift,afac);
 +  }
 +  /* 290 flops */
 +  return V;
 +}
 +
 +real bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
 +              rvec r_ij,rvec r_kj,real *costh,
 +              int *t1,int *t2)
 +/* Return value is the angle between the bonds i-j and j-k */
 +{
 +  /* 41 FLOPS */
 +  real th;
 +  
 +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
 +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
 +
 +  *costh=cos_angle(r_ij,r_kj);                /* 25           */
 +  th=acos(*costh);                    /* 10           */
 +                                      /* 41 TOTAL     */
 +  return th;
 +}
 +
 +real angles(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +    int  i,ai,aj,ak,t1,t2,type;
 +    rvec r_ij,r_kj;
 +    real cos_theta,cos_theta2,theta,dVdt,va,vtot;
 +    ivec jt,dt_ij,dt_kj;
 +
 +    vtot = 0.0;
 +    for(i=0; i<nbonds; )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        aj   = forceatoms[i++];
 +        ak   = forceatoms[i++];
 +
 +        theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                            r_ij,r_kj,&cos_theta,&t1,&t2);    /*  41          */
 +  
 +        *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 +                               forceparams[type].harmonic.krB,
 +                               forceparams[type].harmonic.rA*DEG2RAD,
 +                               forceparams[type].harmonic.rB*DEG2RAD,
 +                               theta,lambda,&va,&dVdt);  /*  21  */
 +        vtot += va;
 +
 +        cos_theta2 = sqr(cos_theta);
 +        if (cos_theta2 < 1)
 +        {
 +            int  m;
 +            real st,sth;
 +            real cik,cii,ckk;
 +            real nrkj2,nrij2;
 +            real nrkj_1,nrij_1;
 +            rvec f_i,f_j,f_k;
 +
 +            st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
 +            sth = st*cos_theta;                       /*   1          */
 +#ifdef DEBUG
 +            if (debug)
 +                fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +                        theta*RAD2DEG,va,dVdt);
 +#endif
 +            nrij2 = iprod(r_ij,r_ij);                 /*   5          */
 +            nrkj2 = iprod(r_kj,r_kj);                 /*   5          */
 +
 +            nrij_1 = gmx_invsqrt(nrij2);              /*  10          */
 +            nrkj_1 = gmx_invsqrt(nrkj2);              /*  10          */
 +
 +            cik = st*nrij_1*nrkj_1;                   /*   2          */
 +            cii = sth*nrij_1*nrij_1;                  /*   2          */
 +            ckk = sth*nrkj_1*nrkj_1;                  /*   2          */
 +      
 +            for (m=0; m<DIM; m++)
 +            {                 /*  39          */
 +                f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
 +                f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
 +                f_j[m]    = -f_i[m] - f_k[m];
 +                f[ai][m] += f_i[m];
 +                f[aj][m] += f_j[m];
 +                f[ak][m] += f_k[m];
 +            }
 +            if (g != NULL)
 +            {
 +                copy_ivec(SHIFT_IVEC(g,aj),jt);
 +
 +                ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +                ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +                t1 = IVEC2IS(dt_ij);
 +                t2 = IVEC2IS(dt_kj);
 +            }
 +            rvec_inc(fshift[t1],f_i);
 +            rvec_inc(fshift[CENTRAL],f_j);
 +            rvec_inc(fshift[t2],f_k);
 +        }                                           /* 161 TOTAL      */
 +    }
 +
 +    return vtot;
 +}
 +
 +real linear_angles(int nbonds,
 +                   const t_iatom forceatoms[],const t_iparams forceparams[],
 +                   const rvec x[],rvec f[],rvec fshift[],
 +                   const t_pbc *pbc,const t_graph *g,
 +                   real lambda,real *dvdlambda,
 +                   const t_mdatoms *md,t_fcdata *fcd,
 +                   int *global_atom_index)
 +{
 +  int  i,m,ai,aj,ak,t1,t2,type;
 +  rvec f_i,f_j,f_k;
 +  real L1,kA,kB,aA,aB,dr,dr2,va,vtot,a,b,klin;
 +  ivec jt,dt_ij,dt_kj;
 +  rvec r_ij,r_kj,r_ik,dx;
 +    
 +  L1   = 1-lambda;
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    
 +    kA = forceparams[type].linangle.klinA;
 +    kB = forceparams[type].linangle.klinB;
 +    klin = L1*kA + lambda*kB;
 +    
 +    aA   = forceparams[type].linangle.aA;
 +    aB   = forceparams[type].linangle.aB;
 +    a    = L1*aA+lambda*aB;
 +    b    = 1-a;
 +    
 +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
 +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
 +    rvec_sub(r_ij,r_kj,r_ik);
 +    
 +    dr2 = 0;
 +    for(m=0; (m<DIM); m++) 
 +    {
 +        dr     = - a * r_ij[m] - b * r_kj[m];
 +        dr2   += dr*dr;
 +        dx[m]  = dr;
 +        f_i[m] = a*klin*dr;
 +        f_k[m] = b*klin*dr;
 +        f_j[m] = -(f_i[m]+f_k[m]);
 +        f[ai][m] += f_i[m];
 +        f[aj][m] += f_j[m];
 +        f[ak][m] += f_k[m];
 +    }
 +    va    = 0.5*klin*dr2;
 +    *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx,r_ik); 
 +            
 +    vtot += va;
 +    
 +    if (g) {
 +        copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +        ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +        ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +        t1=IVEC2IS(dt_ij);
 +        t2=IVEC2IS(dt_kj);
 +    }
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);
 +  }                                           /* 57 TOTAL     */
 +  return vtot;
 +}
 +
 +real urey_bradley(int nbonds,
 +                const t_iatom forceatoms[],const t_iparams forceparams[],
 +                const rvec x[],rvec f[],rvec fshift[],
 +                const t_pbc *pbc,const t_graph *g,
 +                real lambda,real *dvdlambda,
 +                const t_mdatoms *md,t_fcdata *fcd,
 +                int *global_atom_index)
 +{
 +  int  i,m,ai,aj,ak,t1,t2,type,ki;
 +  rvec r_ij,r_kj,r_ik;
 +  real cos_theta,cos_theta2,theta;
 +  real dVdt,va,vtot,dr,dr2,vbond,fbond,fik;
 +  real kthA,th0A,kUBA,r13A,kthB,th0B,kUBB,r13B;
 +  ivec jt,dt_ij,dt_kj,dt_ik;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
 +    kthA  = forceparams[type].u_b.kthetaA;
 +    r13A  = forceparams[type].u_b.r13A;
 +    kUBA  = forceparams[type].u_b.kUBA;
 +    th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
 +    kthB  = forceparams[type].u_b.kthetaB;
 +    r13B  = forceparams[type].u_b.r13B;
 +    kUBB  = forceparams[type].u_b.kUBB;
 +    
 +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
 +  
 +    *dvdlambda += harmonic(kthA,kthB,th0A,th0B,theta,lambda,&va,&dVdt);  /*  21  */
 +    vtot += va;
 +    
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);        /*   3          */
 +    dr2  = iprod(r_ik,r_ik);                  /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(kUBA,kUBB,r13A,r13B,dr,lambda,&vbond,&fbond); /*  19  */
 +
 +    cos_theta2 = sqr(cos_theta);                /*   1                */
 +    if (cos_theta2 < 1) {
 +      real st,sth;
 +      real cik,cii,ckk;
 +      real nrkj2,nrij2;
 +      rvec f_i,f_j,f_k;
 +      
 +      st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12          */
 +      sth = st*cos_theta;                     /*   1          */
 +#ifdef DEBUG
 +      if (debug)
 +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +              theta*RAD2DEG,va,dVdt);
 +#endif
 +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
 +      nrij2=iprod(r_ij,r_ij);
 +      
 +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
 +      cii=sth/nrij2;                          /*  10          */
 +      ckk=sth/nrkj2;                          /*  10          */
 +      
 +      for (m=0; (m<DIM); m++) {                       /*  39          */
 +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
 +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +      }
 +      if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_inc(fshift[CENTRAL],f_j);
 +      rvec_inc(fshift[t2],f_k);
 +    }                                           /* 161 TOTAL  */
 +    /* Time for the bond calculations */
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    vtot  += vbond;  /* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),dt_ik);
 +      ki=IVEC2IS(dt_ik);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fik=fbond*r_ik[m];
 +      f[ai][m]+=fik;
 +      f[ak][m]-=fik;
 +      fshift[ki][m]+=fik;
 +      fshift[CENTRAL][m]-=fik;
 +    }
 +  }
 +  return vtot;
 +}
 +
 +real quartic_angles(int nbonds,
 +                  const t_iatom forceatoms[],const t_iparams forceparams[],
 +                  const rvec x[],rvec f[],rvec fshift[],
 +                  const t_pbc *pbc,const t_graph *g,
 +                  real lambda,real *dvdlambda,
 +                  const t_mdatoms *md,t_fcdata *fcd,
 +                  int *global_atom_index)
 +{
 +  int  i,j,ai,aj,ak,t1,t2,type;
 +  rvec r_ij,r_kj;
 +  real cos_theta,cos_theta2,theta,dt,dVdt,va,dtp,c,vtot;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +
 +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
 +
 +    dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2          */
 +
 +    dVdt = 0;
 +    va = forceparams[type].qangle.c[0];
 +    dtp = 1.0;
 +    for(j=1; j<=4; j++) {
 +      c = forceparams[type].qangle.c[j];
 +      dVdt -= j*c*dtp;
 +      dtp *= dt;
 +      va += c*dtp;
 +    }
 +    /* 20 */
 +
 +    vtot += va;
 +    
 +    cos_theta2 = sqr(cos_theta);                /*   1                */
 +    if (cos_theta2 < 1) {
 +      int  m;
 +      real st,sth;
 +      real cik,cii,ckk;
 +      real nrkj2,nrij2;
 +      rvec f_i,f_j,f_k;
 +      
 +      st  = dVdt*gmx_invsqrt(1 - cos_theta2);         /*  12          */
 +      sth = st*cos_theta;                     /*   1          */
 +#ifdef DEBUG
 +      if (debug)
 +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +              theta*RAD2DEG,va,dVdt);
 +#endif
 +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
 +      nrij2=iprod(r_ij,r_ij);
 +      
 +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
 +      cii=sth/nrij2;                          /*  10          */
 +      ckk=sth/nrkj2;                          /*  10          */
 +      
 +      for (m=0; (m<DIM); m++) {                       /*  39          */
 +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
 +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +      }
 +      if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_inc(fshift[CENTRAL],f_j);
 +      rvec_inc(fshift[t2],f_k);
 +    }                                           /* 153 TOTAL  */
 +  }
 +  return vtot;
 +}
 +
 +real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
 +               const t_pbc *pbc,
 +               rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n,
 +               real *sign,int *t1,int *t2,int *t3)
 +{
 +  real ipr,phi;
 +
 +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
 +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
 +  *t3 = pbc_rvec_sub(pbc,xk,xl,r_kl);                 /*  3           */
 +
 +  cprod(r_ij,r_kj,m);                         /*  9           */
 +  cprod(r_kj,r_kl,n);                 /*  9           */
 +  phi=gmx_angle(m,n);                         /* 49 (assuming 25 for atan2) */
 +  ipr=iprod(r_ij,n);                  /*  5           */
 +  (*sign)=(ipr<0.0)?-1.0:1.0;
 +  phi=(*sign)*phi;                    /*  1           */
 +                                      /* 82 TOTAL     */
 +  return phi;
 +}
 +
 +
 +#ifdef SSE_PROPER_DIHEDRALS
 +
 +/* x86 SIMD inner-product of 4 float vectors */
 +#define GMX_MM_IPROD_PS(ax,ay,az,bx,by,bz)                 \
 +    _mm_add_ps(_mm_add_ps(_mm_mul_ps(ax,bx),_mm_mul_ps(ay,by)),_mm_mul_ps(az,bz))
 +
 +/* x86 SIMD norm^2 of 4 float vectors */
 +#define GMX_MM_NORM2_PS(ax,ay,az) GMX_MM_IPROD_PS(ax,ay,az,ax,ay,az)
 +
 +/* x86 SIMD cross-product of 4 float vectors */
 +#define GMX_MM_CPROD_PS(ax,ay,az,bx,by,bz,cx,cy,cz)        \
 +{                                                          \
 +    cx = _mm_sub_ps(_mm_mul_ps(ay,bz),_mm_mul_ps(az,by));  \
 +    cy = _mm_sub_ps(_mm_mul_ps(az,bx),_mm_mul_ps(ax,bz));  \
 +    cz = _mm_sub_ps(_mm_mul_ps(ax,by),_mm_mul_ps(ay,bx));  \
 +}
 +
 +/* load 4 rvec's into 3 x86 SIMD float registers */
 +#define load_rvec4(r0,r1,r2,r3,rx_SSE,ry_SSE,rz_SSE)          \
 +{                                                             \
 +    __m128 tmp;                                               \
 +    rx_SSE = _mm_load_ps(r0);                                 \
 +    ry_SSE = _mm_load_ps(r1);                                 \
 +    rz_SSE = _mm_load_ps(r2);                                 \
 +    tmp    = _mm_load_ps(r3);                                 \
 +    _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
 +}
 +
 +#define store_rvec4(rx_SSE,ry_SSE,rz_SSE,r0,r1,r2,r3)         \
 +{                                                             \
 +    __m128 tmp=_mm_setzero_ps();                              \
 +    _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
 +    _mm_store_ps(r0,rx_SSE);                                  \
 +    _mm_store_ps(r1,ry_SSE);                                  \
 +    _mm_store_ps(r2,rz_SSE);                                  \
 +    _mm_store_ps(r3,tmp   );                                  \
 +}
 +
 +/* An rvec in a structure which can be allocated 16-byte aligned */
 +typedef struct {
 +    rvec  v;
 +    float f;
 +} rvec_sse_t;
 +
 +/* As dih_angle above, but calculates 4 dihedral angles at once using SSE,
 + * also calculates the pre-factor required for the dihedral force update.
 + * Note that bv and buf should be 16-byte aligned.
 + */
 +static void
 +dih_angle_sse(const rvec *x,
 +              int ai[4],int aj[4],int ak[4],int al[4],
 +              const t_pbc *pbc,
 +              int t1[4],int t2[4],int t3[4],
 +              rvec_sse_t *bv,
 +              real *buf)
 +{
 +    int s;
 +    __m128 rijx_SSE,rijy_SSE,rijz_SSE;
 +    __m128 rkjx_SSE,rkjy_SSE,rkjz_SSE;
 +    __m128 rklx_SSE,rkly_SSE,rklz_SSE;
 +    __m128 mx_SSE,my_SSE,mz_SSE;
 +    __m128 nx_SSE,ny_SSE,nz_SSE;
 +    __m128 cx_SSE,cy_SSE,cz_SSE;
 +    __m128 cn_SSE;
 +    __m128 s_SSE;
 +    __m128 phi_SSE;
 +    __m128 ipr_SSE;
 +    int signs;
 +    __m128 iprm_SSE,iprn_SSE;
 +    __m128 nrkj2_SSE,nrkj_1_SSE,nrkj_2_SSE,nrkj_SSE;
 +    __m128 nrkj_m2_SSE,nrkj_n2_SSE;
 +    __m128 p_SSE,q_SSE;
 +    __m128 fmin_SSE=_mm_set1_ps(GMX_FLOAT_MIN);
 +
 +    for(s=0; s<4; s++)
 +    {
 +        t1[s] = pbc_rvec_sub(pbc,x[ai[s]],x[aj[s]],bv[0+s].v);
 +        t2[s] = pbc_rvec_sub(pbc,x[ak[s]],x[aj[s]],bv[4+s].v);
 +        t3[s] = pbc_rvec_sub(pbc,x[ak[s]],x[al[s]],bv[8+s].v);
 +    }
 +
 +    load_rvec4(bv[0].v,bv[1].v,bv[2].v,bv[3].v,rijx_SSE,rijy_SSE,rijz_SSE);
 +    load_rvec4(bv[4].v,bv[5].v,bv[6].v,bv[7].v,rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +    load_rvec4(bv[8].v,bv[9].v,bv[10].v,bv[11].v,rklx_SSE,rkly_SSE,rklz_SSE);
 +
 +    GMX_MM_CPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
 +                    rkjx_SSE,rkjy_SSE,rkjz_SSE,
 +                    mx_SSE,my_SSE,mz_SSE);
 +
 +    GMX_MM_CPROD_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE,
 +                    rklx_SSE,rkly_SSE,rklz_SSE,
 +                    nx_SSE,ny_SSE,nz_SSE);
 +
 +    GMX_MM_CPROD_PS(mx_SSE,my_SSE,mz_SSE,
 +                    nx_SSE,ny_SSE,nz_SSE,
 +                    cx_SSE,cy_SSE,cz_SSE);
 +
 +    cn_SSE = gmx_mm_sqrt_ps(GMX_MM_NORM2_PS(cx_SSE,cy_SSE,cz_SSE));
 +    
 +    s_SSE = GMX_MM_IPROD_PS(mx_SSE,my_SSE,mz_SSE,nx_SSE,ny_SSE,nz_SSE);
 +
 +    phi_SSE = gmx_mm_atan2_ps(cn_SSE,s_SSE);
 +    _mm_store_ps(buf+16,phi_SSE);
 +
 +    ipr_SSE = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
 +                              nx_SSE,ny_SSE,nz_SSE);
 +
 +    signs = _mm_movemask_ps(ipr_SSE);
 +    
 +    for(s=0; s<4; s++)
 +    {
 +        if (signs & (1<<s))
 +        {
 +            buf[16+s] = -buf[16+s];
 +        }
 +    }
 +
 +    iprm_SSE    = GMX_MM_NORM2_PS(mx_SSE,my_SSE,mz_SSE);
 +    iprn_SSE    = GMX_MM_NORM2_PS(nx_SSE,ny_SSE,nz_SSE);
 +
 +    /* store_rvec4 messes with the input, don't use it after this! */
 +    store_rvec4(mx_SSE,my_SSE,mz_SSE,bv[0].v,bv[1].v,bv[2].v,bv[3].v);
 +    store_rvec4(nx_SSE,ny_SSE,nz_SSE,bv[4].v,bv[5].v,bv[6].v,bv[7].v);
 +
 +    nrkj2_SSE   = GMX_MM_NORM2_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +
 +    /* Avoid division by zero. When zero, the result is multiplied by 0
 +     * anyhow, so the 3 max below do not affect the final result.
 +     */
 +    nrkj2_SSE   = _mm_max_ps(nrkj2_SSE,fmin_SSE);
 +    nrkj_1_SSE  = gmx_mm_invsqrt_ps(nrkj2_SSE);
 +    nrkj_2_SSE  = _mm_mul_ps(nrkj_1_SSE,nrkj_1_SSE);
 +    nrkj_SSE    = _mm_mul_ps(nrkj2_SSE,nrkj_1_SSE);
 +
 +    iprm_SSE    = _mm_max_ps(iprm_SSE,fmin_SSE);
 +    iprn_SSE    = _mm_max_ps(iprn_SSE,fmin_SSE);
 +    nrkj_m2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprm_SSE));
 +    nrkj_n2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprn_SSE));
 +
 +    _mm_store_ps(buf+0,nrkj_m2_SSE);
 +    _mm_store_ps(buf+4,nrkj_n2_SSE);
 +
 +    p_SSE       = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
 +                                  rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +    p_SSE       = _mm_mul_ps(p_SSE,nrkj_2_SSE);
 +
 +    q_SSE       = GMX_MM_IPROD_PS(rklx_SSE,rkly_SSE,rklz_SSE,
 +                                  rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +    q_SSE       = _mm_mul_ps(q_SSE,nrkj_2_SSE);
 +
 +    _mm_store_ps(buf+8 ,p_SSE);
 +    _mm_store_ps(buf+12,q_SSE);
 +}
 +
 +#endif /* SSE_PROPER_DIHEDRALS */
 +
 +
 +void do_dih_fup(int i,int j,int k,int l,real ddphi,
 +              rvec r_ij,rvec r_kj,rvec r_kl,
 +              rvec m,rvec n,rvec f[],rvec fshift[],
 +              const t_pbc *pbc,const t_graph *g,
 +              const rvec x[],int t1,int t2,int t3)
 +{
 +  /* 143 FLOPS */
 +  rvec f_i,f_j,f_k,f_l;
 +  rvec uvec,vvec,svec,dx_jl;
 +  real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
 +  real a,b,p,q,toler;
 +  ivec jt,dt_ij,dt_kj,dt_lj;  
 +  
 +  iprm  = iprod(m,m);         /*  5   */
 +  iprn  = iprod(n,n);         /*  5   */
 +  nrkj2 = iprod(r_kj,r_kj);   /*  5   */
 +  toler = nrkj2*GMX_REAL_EPS;
 +  if ((iprm > toler) && (iprn > toler)) {
 +    nrkj_1 = gmx_invsqrt(nrkj2);      /* 10   */
 +    nrkj_2 = nrkj_1*nrkj_1;   /*  1   */
 +    nrkj  = nrkj2*nrkj_1;     /*  1   */
 +    a     = -ddphi*nrkj/iprm; /* 11   */
 +    svmul(a,m,f_i);           /*  3   */
 +    b     = ddphi*nrkj/iprn;  /* 11   */
 +    svmul(b,n,f_l);           /*  3   */
 +    p     = iprod(r_ij,r_kj); /*  5   */
 +    p    *= nrkj_2;           /*  1   */
 +    q     = iprod(r_kl,r_kj); /*  5   */
 +    q    *= nrkj_2;           /*  1   */
 +    svmul(p,f_i,uvec);                /*  3   */
 +    svmul(q,f_l,vvec);                /*  3   */
 +    rvec_sub(uvec,vvec,svec); /*  3   */
 +    rvec_sub(f_i,svec,f_j);   /*  3   */
 +    rvec_add(f_l,svec,f_k);   /*  3   */
 +    rvec_inc(f[i],f_i);       /*  3   */
 +    rvec_dec(f[j],f_j);       /*  3   */
 +    rvec_dec(f[k],f_k);       /*  3   */
 +    rvec_inc(f[l],f_l);       /*  3   */
 +    
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,j),jt);
 +      ivec_sub(SHIFT_IVEC(g,i),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,k),jt,dt_kj);
 +      ivec_sub(SHIFT_IVEC(g,l),jt,dt_lj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      t3=IVEC2IS(dt_lj);
 +    } else if (pbc) {
 +      t3 = pbc_rvec_sub(pbc,x[l],x[j],dx_jl);
 +    } else {
 +      t3 = CENTRAL;
 +    }
 +    
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_dec(fshift[CENTRAL],f_j);
 +    rvec_dec(fshift[t2],f_k);
 +    rvec_inc(fshift[t3],f_l);
 +  }
 +  /* 112 TOTAL        */
 +}
 +
 +/* As do_dih_fup above, but without shift forces */
 +static void
 +do_dih_fup_noshiftf(int i,int j,int k,int l,real ddphi,
 +                    rvec r_ij,rvec r_kj,rvec r_kl,
 +                    rvec m,rvec n,rvec f[])
 +{
 +  rvec f_i,f_j,f_k,f_l;
 +  rvec uvec,vvec,svec,dx_jl;
 +  real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
 +  real a,b,p,q,toler;
 +  ivec jt,dt_ij,dt_kj,dt_lj;  
 +  
 +  iprm  = iprod(m,m);         /*  5   */
 +  iprn  = iprod(n,n);         /*  5   */
 +  nrkj2 = iprod(r_kj,r_kj);   /*  5   */
 +  toler = nrkj2*GMX_REAL_EPS;
 +  if ((iprm > toler) && (iprn > toler)) {
 +    nrkj_1 = gmx_invsqrt(nrkj2);      /* 10   */
 +    nrkj_2 = nrkj_1*nrkj_1;   /*  1   */
 +    nrkj  = nrkj2*nrkj_1;     /*  1   */
 +    a     = -ddphi*nrkj/iprm; /* 11   */
 +    svmul(a,m,f_i);           /*  3   */
 +    b     = ddphi*nrkj/iprn;  /* 11   */
 +    svmul(b,n,f_l);           /*  3   */
 +    p     = iprod(r_ij,r_kj); /*  5   */
 +    p    *= nrkj_2;           /*  1   */
 +    q     = iprod(r_kl,r_kj); /*  5   */
 +    q    *= nrkj_2;           /*  1   */
 +    svmul(p,f_i,uvec);                /*  3   */
 +    svmul(q,f_l,vvec);                /*  3   */
 +    rvec_sub(uvec,vvec,svec); /*  3   */
 +    rvec_sub(f_i,svec,f_j);   /*  3   */
 +    rvec_add(f_l,svec,f_k);   /*  3   */
 +    rvec_inc(f[i],f_i);       /*  3   */
 +    rvec_dec(f[j],f_j);       /*  3   */
 +    rvec_dec(f[k],f_k);       /*  3   */
 +    rvec_inc(f[l],f_l);       /*  3   */
 +  }
 +}
 +
 +/* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
 +static void
 +do_dih_fup_noshiftf_precalc(int i,int j,int k,int l,real ddphi,
 +                            real nrkj_m2,real nrkj_n2,
 +                            real p,real q,
 +                            rvec m,rvec n,rvec f[])
 +{
 +    rvec f_i,f_j,f_k,f_l;
 +    rvec uvec,vvec,svec,dx_jl;
 +    real a,b,toler;
 +    ivec jt,dt_ij,dt_kj,dt_lj;  
 +  
 +    a = -ddphi*nrkj_m2;
 +    svmul(a,m,f_i);
 +    b =  ddphi*nrkj_n2;
 +    svmul(b,n,f_l);
 +    svmul(p,f_i,uvec);
 +    svmul(q,f_l,vvec);
 +    rvec_sub(uvec,vvec,svec);
 +    rvec_sub(f_i,svec,f_j);
 +    rvec_add(f_l,svec,f_k);
 +    rvec_inc(f[i],f_i);
 +    rvec_dec(f[j],f_j);
 +    rvec_dec(f[k],f_k);
 +    rvec_inc(f[l],f_l);
 +}
 +
 +
 +real dopdihs(real cpA,real cpB,real phiA,real phiB,int mult,
 +           real phi,real lambda,real *V,real *F)
 +{
 +  real v,dvdlambda,mdphi,v1,sdphi,ddphi;
 +  real L1   = 1.0 - lambda;
 +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +  real dph0 = (phiB - phiA)*DEG2RAD;
 +  real cp   = L1*cpA + lambda*cpB;
 +  
 +  mdphi =  mult*phi - ph0;
 +  sdphi = sin(mdphi);
 +  ddphi = -cp*mult*sdphi;
 +  v1    = 1.0 + cos(mdphi);
 +  v     = cp*v1;
 +  
 +  dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
 +  
 +  *V = v;
 +  *F = ddphi;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 40 flops */
 +}
 +
 +static void
 +dopdihs_noener(real cpA,real cpB,real phiA,real phiB,int mult,
 +               real phi,real lambda,real *F)
 +{
 +  real mdphi,sdphi,ddphi;
 +  real L1   = 1.0 - lambda;
 +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +  real cp   = L1*cpA + lambda*cpB;
 +  
 +  mdphi = mult*phi - ph0;
 +  sdphi = sin(mdphi);
 +  ddphi = -cp*mult*sdphi;
 +  
 +  *F = ddphi;
 +  
 +  /* That was 20 flops */
 +}
 +
 +static void
 +dopdihs_mdphi(real cpA,real cpB,real phiA,real phiB,int mult,
 +              real phi,real lambda,real *cp,real *mdphi)
 +{
 +    real L1   = 1.0 - lambda;
 +    real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +
 +    *cp    = L1*cpA + lambda*cpB;
 +
 +    *mdphi = mult*phi - ph0;
 +}
 +
 +static real dopdihs_min(real cpA,real cpB,real phiA,real phiB,int mult,
 +                      real phi,real lambda,real *V,real *F)
 +     /* similar to dopdihs, except for a minus sign  *
 +      * and a different treatment of mult/phi0       */
 +{
 +  real v,dvdlambda,mdphi,v1,sdphi,ddphi;
 +  real L1   = 1.0 - lambda;
 +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +  real dph0 = (phiB - phiA)*DEG2RAD;
 +  real cp   = L1*cpA + lambda*cpB;
 +  
 +  mdphi = mult*(phi-ph0);
 +  sdphi = sin(mdphi);
 +  ddphi = cp*mult*sdphi;
 +  v1    = 1.0-cos(mdphi);
 +  v     = cp*v1;
 +  
 +  dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
 +  
 +  *V = v;
 +  *F = ddphi;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 40 flops */
 +}
 +
 +real pdihs(int nbonds,
 +         const t_iatom forceatoms[],const t_iparams forceparams[],
 +         const rvec x[],rvec f[],rvec fshift[],
 +         const t_pbc *pbc,const t_graph *g,
 +         real lambda,real *dvdlambda,
 +         const t_mdatoms *md,t_fcdata *fcd,
 +         int *global_atom_index)
 +{
 +  int  i,type,ai,aj,ak,al;
 +  int  t1,t2,t3;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real phi,sign,ddphi,vpd,vtot;
 +
 +  vtot = 0.0;
 +
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +    
 +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                  &sign,&t1,&t2,&t3);                 /*  84          */
 +    *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
 +                          forceparams[type].pdihs.cpB,
 +                          forceparams[type].pdihs.phiA,
 +                          forceparams[type].pdihs.phiB,
 +                          forceparams[type].pdihs.mult,
 +                          phi,lambda,&vpd,&ddphi);
 +
 +    vtot += vpd;
 +    do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
 +
 +#ifdef DEBUG
 +    fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
 +          ai,aj,ak,al,phi);
 +#endif
 +  } /* 223 TOTAL      */
 +
 +  return vtot;
 +}
 +
 +void make_dp_periodic(real *dp)  /* 1 flop? */
 +{
 +    /* dp cannot be outside (-pi,pi) */
 +    if (*dp >= M_PI)
 +    {
 +        *dp -= 2*M_PI;
 +    }
 +    else if (*dp < -M_PI)
 +    {
 +        *dp += 2*M_PI;
 +    }
 +    return;
 +}
 +
 +/* As pdihs above, but without calculating energies and shift forces */
 +static void
 +pdihs_noener(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +    int  i,type,ai,aj,ak,al;
 +    int  t1,t2,t3;
 +    rvec r_ij,r_kj,r_kl,m,n;
 +    real phi,sign,ddphi_tot,ddphi;
 +
 +    for(i=0; (i<nbonds); )
 +    {
 +        ai   = forceatoms[i+1];
 +        aj   = forceatoms[i+2];
 +        ak   = forceatoms[i+3];
 +        al   = forceatoms[i+4];
 +
 +        phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                        &sign,&t1,&t2,&t3);
 +
 +        ddphi_tot = 0;
 +
 +        /* Loop over dihedrals working on the same atoms,
 +         * so we avoid recalculating angles and force distributions.
 +         */
 +        do
 +        {
 +            type = forceatoms[i];
 +            dopdihs_noener(forceparams[type].pdihs.cpA,
 +                           forceparams[type].pdihs.cpB,
 +                           forceparams[type].pdihs.phiA,
 +                           forceparams[type].pdihs.phiB,
 +                           forceparams[type].pdihs.mult,
 +                           phi,lambda,&ddphi);
 +            ddphi_tot += ddphi;
 +
 +            i += 5;
 +        }
 +        while(i < nbonds &&
 +              forceatoms[i+1] == ai &&
 +              forceatoms[i+2] == aj &&
 +              forceatoms[i+3] == ak &&
 +              forceatoms[i+4] == al);
 +
 +        do_dih_fup_noshiftf(ai,aj,ak,al,ddphi_tot,r_ij,r_kj,r_kl,m,n,f);
 +    }
 +}
 +
 +
 +#ifdef SSE_PROPER_DIHEDRALS
 +
 +/* As pdihs_noner above, but using SSE to calculate 4 dihedrals at once */
 +static void
 +pdihs_noener_sse(int nbonds,
 +                 const t_iatom forceatoms[],const t_iparams forceparams[],
 +                 const rvec x[],rvec f[],
 +                 const t_pbc *pbc,const t_graph *g,
 +                 real lambda,
 +                 const t_mdatoms *md,t_fcdata *fcd,
 +                 int *global_atom_index)
 +{
 +    int  i,i4,s;
 +    int  type,ai[4],aj[4],ak[4],al[4];
 +    int  t1[4],t2[4],t3[4];
 +    int  mult[4];
 +    real cp[4],mdphi[4];
 +    real ddphi;
 +    rvec_sse_t rs_array[13],*rs;
 +    real buf_array[24],*buf;
 +    __m128 mdphi_SSE,sin_SSE,cos_SSE;
 +
 +    /* Ensure 16-byte alignment */
 +    rs  = (rvec_sse_t *)(((size_t)(rs_array +1)) & (~((size_t)15)));
 +    buf =      (float *)(((size_t)(buf_array+3)) & (~((size_t)15)));
 +
 +    for(i=0; (i<nbonds); i+=20)
 +    {
 +        /* Collect atoms quadruplets for 4 dihedrals */
 +        i4 = i;
 +        for(s=0; s<4; s++)
 +        {
 +            ai[s] = forceatoms[i4+1];
 +            aj[s] = forceatoms[i4+2];
 +            ak[s] = forceatoms[i4+3];
 +            al[s] = forceatoms[i4+4];
 +            /* At the end fill the arrays with identical entries */
 +            if (i4 + 5 < nbonds)
 +            {
 +                i4 += 5;
 +            }
 +        }
 +
 +        /* Caclulate 4 dihedral angles at once */
 +        dih_angle_sse(x,ai,aj,ak,al,pbc,t1,t2,t3,rs,buf);
 +
 +        i4 = i;
 +        for(s=0; s<4; s++)
 +        {
 +            if (i4 < nbonds)
 +            {
 +                /* Calculate the coefficient and angle deviation */
 +                type = forceatoms[i4];
 +                dopdihs_mdphi(forceparams[type].pdihs.cpA,
 +                              forceparams[type].pdihs.cpB,
 +                              forceparams[type].pdihs.phiA,
 +                              forceparams[type].pdihs.phiB,
 +                              forceparams[type].pdihs.mult,
 +                              buf[16+s],lambda,&cp[s],&buf[16+s]);
 +                mult[s] = forceparams[type].pdihs.mult;
 +            }
 +            else
 +            {
 +                buf[16+s] = 0;
 +            }
 +            i4 += 5;
 +        }
 +
 +        /* Calculate 4 sines at once */
 +        mdphi_SSE = _mm_load_ps(buf+16);
 +        gmx_mm_sincos_ps(mdphi_SSE,&sin_SSE,&cos_SSE);
 +        _mm_store_ps(buf+16,sin_SSE);
 +
 +        i4 = i;
 +        s = 0;
 +        do
 +        {
 +            ddphi = -cp[s]*mult[s]*buf[16+s];
 +
 +            do_dih_fup_noshiftf_precalc(ai[s],aj[s],ak[s],al[s],ddphi,
 +                                        buf[ 0+s],buf[ 4+s],
 +                                        buf[ 8+s],buf[12+s],
 +                                        rs[0+s].v,rs[4+s].v,
 +                                        f);
 +            s++;
 +            i4 += 5;
 +        }
 +        while (s < 4 && i4 < nbonds);
 +    }
 +}
 +
 +#endif /* SSE_PROPER_DIHEDRALS */
 +
 +
 +real idihs(int nbonds,
 +         const t_iatom forceatoms[],const t_iparams forceparams[],
 +         const rvec x[],rvec f[],rvec fshift[],
 +         const t_pbc *pbc,const t_graph *g,
 +         real lambda,real *dvdlambda,
 +         const t_mdatoms *md,t_fcdata *fcd,
 +         int *global_atom_index)
 +{
 +  int  i,type,ai,aj,ak,al;
 +  int  t1,t2,t3;
 +  real phi,phi0,dphi0,ddphi,sign,vtot;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real L1,kk,dp,dp2,kA,kB,pA,pB,dvdl_term;
 +
 +  L1 = 1.0-lambda;
 +  dvdl_term = 0;
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +    
 +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                  &sign,&t1,&t2,&t3);                 /*  84          */
 +    
 +    /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
 +     * force changes if we just apply a normal harmonic.
 +     * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
 +     * This means we will never have the periodicity problem, unless
 +     * the dihedral is Pi away from phiO, which is very unlikely due to
 +     * the potential.
 +     */
 +    kA = forceparams[type].harmonic.krA;
 +    kB = forceparams[type].harmonic.krB;
 +    pA = forceparams[type].harmonic.rA;
 +    pB = forceparams[type].harmonic.rB;
 +
 +    kk    = L1*kA + lambda*kB;
 +    phi0  = (L1*pA + lambda*pB)*DEG2RAD;
 +    dphi0 = (pB - pA)*DEG2RAD;
 +
 +    dp = phi-phi0;  
 +
 +    make_dp_periodic(&dp);
 +    
 +    dp2 = dp*dp;
 +
 +    vtot += 0.5*kk*dp2;
 +    ddphi = -kk*dp;
 +    
 +    dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
 +
 +    do_dih_fup(ai,aj,ak,al,(real)(-ddphi),r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
 +    /* 218 TOTAL      */
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"idih: (%d,%d,%d,%d) phi=%g\n",
 +            ai,aj,ak,al,phi);
 +#endif
 +  }
 +  
 +  *dvdlambda += dvdl_term;
 +  return vtot;
 +}
 +
 +
 +/*! \brief returns dx, rdist, and dpdl for functions posres() and fbposres()        
 + */
 +static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
 +                      const rvec comA_sc, const rvec comB_sc,
 +                      real lambda,
 +                      t_pbc *pbc, int refcoord_scaling,int npbcdim,
 +                      rvec dx, rvec rdist, rvec dpdl)
 +{
 +    int m,d;
 +    real posA, posB, L1, ref=0.;
 +    rvec pos;
 +
 +    L1=1.0-lambda;
 +
 +    for(m=0; m<DIM; m++)
 +    {
 +        posA = pos0A[m];
 +        posB = pos0B[m];
 +        if (m < npbcdim)
 +        {
 +            switch (refcoord_scaling)
 +            {
 +            case erscNO:
 +                ref      = 0;
 +                rdist[m] = L1*posA + lambda*posB;
 +                dpdl[m]  = posB - posA;
 +                    break;
 +            case erscALL:
 +                /* Box relative coordinates are stored for dimensions with pbc */
 +                posA *= pbc->box[m][m];
 +                posB *= pbc->box[m][m];
 +                for(d=m+1; d<npbcdim; d++)
 +                {
 +                    posA += pos0A[d]*pbc->box[d][m];
 +                    posB += pos0B[d]*pbc->box[d][m];
 +                }
 +                ref      = L1*posA + lambda*posB;
 +                rdist[m] = 0;
 +                dpdl[m]  = posB - posA;
 +                break;
 +            case erscCOM:
 +                ref      = L1*comA_sc[m] + lambda*comB_sc[m];
 +                rdist[m] = L1*posA       + lambda*posB;
 +                dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
 +                break;
 +            default:
 +                gmx_fatal(FARGS, "No such scaling method implemented");
 +            }
 +        }
 +        else
 +        {
 +            ref      = L1*posA + lambda*posB;
 +            rdist[m] = 0;
 +            dpdl[m]  = posB - posA;
 +        }
 +
 +        /* We do pbc_dx with ref+rdist,
 +         * since with only ref we can be up to half a box vector wrong.
 +         */
 +        pos[m] = ref + rdist[m];
 +    }
 +
 +    if (pbc)
 +    {
 +        pbc_dx(pbc,x,pos,dx);
 +    }
 +    else
 +    {
 +        rvec_sub(x,pos,dx);
 +    }
 +}
 +
 +/*! \brief Adds forces of flat-bottomed positions restraints to f[]
 + *         and fixes vir_diag. Returns the flat-bottomed potential. */
 +real fbposres(int nbonds,
 +              const t_iatom forceatoms[],const t_iparams forceparams[],
 +              const rvec x[],rvec f[],rvec vir_diag,
 +              t_pbc *pbc,
 +              int refcoord_scaling,int ePBC,rvec com)
 +/* compute flat-bottomed positions restraints */
 +{
 +    int  i,ai,m,d,type,npbcdim=0,fbdim;
 +    const t_iparams *pr;
 +    real vtot,kk,v;
 +    real ref=0,dr,dr2,rpot,rfb,rfb2,fact,invdr;
 +    rvec com_sc,rdist,pos,dx,dpdl,fm;
 +    gmx_bool bInvert;
 +
 +    npbcdim = ePBC2npbcdim(ePBC);
 +
 +    if (refcoord_scaling == erscCOM)
 +    {
 +        clear_rvec(com_sc);
 +        for(m=0; m<npbcdim; m++)
 +        {
 +            for(d=m; d<npbcdim; d++)
 +            {
 +                com_sc[m] += com[d]*pbc->box[d][m];
 +            }
 +        }
 +    }
 +
 +    vtot = 0.0;
 +    for(i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        pr   = &forceparams[type];
 +
 +        /* same calculation as for normal posres, but with identical A and B states, and lambda==0 */
 +        posres_dx(x[ai],forceparams[type].fbposres.pos0, forceparams[type].fbposres.pos0,
 +                  com_sc, com_sc, 0.0,
 +                  pbc, refcoord_scaling, npbcdim,
 +                  dx, rdist, dpdl);
 +
 +        clear_rvec(fm);
 +        v=0.0;
 +
 +        kk=pr->fbposres.k;
 +        rfb=pr->fbposres.r;
 +        rfb2=sqr(rfb);
 +
 +        /* with rfb<0, push particle out of the sphere/cylinder/layer */
 +        bInvert=FALSE;
 +        if (rfb<0.){
 +            bInvert=TRUE;
 +            rfb=-rfb;
 +        }
 +
 +        switch (pr->fbposres.geom)
 +        {
 +        case efbposresSPHERE:
 +            /* spherical flat-bottom posres */
 +            dr2=norm2(dx);
 +            if ( dr2 > 0.0 &&
 +                 ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
 +                )
 +            {
 +                dr=sqrt(dr2);
 +                v = 0.5*kk*sqr(dr - rfb);
 +                fact = -kk*(dr-rfb)/dr;  /* Force pointing to the center pos0 */
 +                svmul(fact,dx,fm);
 +            }
 +            break;
 +        case efbposresCYLINDER:
 +            /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
 +            dr2=sqr(dx[XX])+sqr(dx[YY]);
 +            if  ( dr2 > 0.0 &&
 +                  ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
 +                )
 +            {
 +                dr=sqrt(dr2);
 +                invdr=1./dr;
 +                v = 0.5*kk*sqr(dr - rfb);
 +                fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr;  /* Force pointing to the center */
 +                fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
 +            }
 +            break;
 +        case efbposresX: /* fbdim=XX */
 +        case efbposresY: /* fbdim=YY */
 +        case efbposresZ: /* fbdim=ZZ */
 +            /* 1D flat-bottom potential */
 +            fbdim = pr->fbposres.geom - efbposresX;
 +            dr=dx[fbdim];
 +            if ( ( dr>rfb && bInvert==FALSE ) || ( 0<dr && dr<rfb && bInvert==TRUE )  )
 +            {
 +                v = 0.5*kk*sqr(dr - rfb);
 +                fm[fbdim] = -kk*(dr - rfb);
 +            }
 +            else if ( (dr < (-rfb) && bInvert==FALSE ) || ( (-rfb)<dr && dr<0 && bInvert==TRUE ))
 +            {
 +                v = 0.5*kk*sqr(dr + rfb);
 +                fm[fbdim] = -kk*(dr + rfb);
 +            }
 +            break;
 +        }
 +
 +        vtot += v;
 +
 +        for (m=0; (m<DIM); m++)
 +        {
 +            f[ai][m]   += fm[m];
 +            /* Here we correct for the pbc_dx which included rdist */
 +            vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm[m];
 +        }
 +    }
 +
 +    return vtot;
 +}
 +
 +
 +real posres(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec vir_diag,
 +            t_pbc *pbc,
 +            real lambda,real *dvdlambda,
 +            int refcoord_scaling,int ePBC,rvec comA,rvec comB)
 +{
 +    int  i,ai,m,d,type,ki,npbcdim=0;
 +    const t_iparams *pr;
 +    real L1;
 +    real vtot,kk,fm;
 +    real posA,posB,ref=0;
 +    rvec comA_sc,comB_sc,rdist,dpdl,pos,dx;
 +    gmx_bool bForceValid = TRUE;
 +
 +    if ((f==NULL) || (vir_diag==NULL)) {  /* should both be null together! */
 +        bForceValid = FALSE;
 +    }
 +
 +    npbcdim = ePBC2npbcdim(ePBC);
 +
 +    if (refcoord_scaling == erscCOM)
 +    {
 +        clear_rvec(comA_sc);
 +        clear_rvec(comB_sc);
 +        for(m=0; m<npbcdim; m++)
 +        {
 +            for(d=m; d<npbcdim; d++)
 +            {
 +                comA_sc[m] += comA[d]*pbc->box[d][m];
 +                comB_sc[m] += comB[d]*pbc->box[d][m];
 +            }
 +        }
 +    }
 +
 +    L1 = 1.0 - lambda;
 +
 +    vtot = 0.0;
 +    for(i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        pr   = &forceparams[type];
 +        
 +        /* return dx, rdist, and dpdl */
 +        posres_dx(x[ai],forceparams[type].posres.pos0A, forceparams[type].posres.pos0B,
 +                  comA_sc, comB_sc, lambda,
 +                  pbc, refcoord_scaling, npbcdim,
 +                  dx, rdist, dpdl);
 +
 +        for (m=0; (m<DIM); m++)
 +        {
 +            kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
 +            fm          = -kk*dx[m];
 +            vtot       += 0.5*kk*dx[m]*dx[m];
 +            *dvdlambda +=
 +                0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
 +                -fm*dpdl[m];
 +
 +            /* Here we correct for the pbc_dx which included rdist */
 +            if (bForceValid) {
 +                f[ai][m]   += fm;
 +                vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
 +            }
 +        }
 +    }
 +
 +    return vtot;
 +}
 +
 +static real low_angres(int nbonds,
 +                     const t_iatom forceatoms[],const t_iparams forceparams[],
 +                     const rvec x[],rvec f[],rvec fshift[],
 +                     const t_pbc *pbc,const t_graph *g,
 +                     real lambda,real *dvdlambda,
 +                     gmx_bool bZAxis)
 +{
 +  int  i,m,type,ai,aj,ak,al;
 +  int  t1,t2;
 +  real phi,cos_phi,cos_phi2,vid,vtot,dVdphi;
 +  rvec r_ij,r_kl,f_i,f_k={0,0,0};
 +  real st,sth,nrij2,nrkl2,c,cij,ckl;
 +
 +  ivec dt;  
 +  t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
 +
 +  vtot = 0.0;
 +  ak=al=0; /* to avoid warnings */
 +  for(i=0; i<nbonds; ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    t1   = pbc_rvec_sub(pbc,x[aj],x[ai],r_ij);                /*  3           */
 +    if (!bZAxis) {      
 +      ak   = forceatoms[i++];
 +      al   = forceatoms[i++];
 +      t2   = pbc_rvec_sub(pbc,x[al],x[ak],r_kl);           /*  3              */
 +    } else {
 +      r_kl[XX] = 0;
 +      r_kl[YY] = 0;
 +      r_kl[ZZ] = 1;
 +    }
 +
 +    cos_phi = cos_angle(r_ij,r_kl);           /* 25           */
 +    phi     = acos(cos_phi);                    /* 10           */
 +
 +    *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
 +                              forceparams[type].pdihs.cpB,
 +                              forceparams[type].pdihs.phiA,
 +                              forceparams[type].pdihs.phiB,
 +                              forceparams[type].pdihs.mult,
 +                              phi,lambda,&vid,&dVdphi); /*  40  */
 +    
 +    vtot += vid;
 +
 +    cos_phi2 = sqr(cos_phi);                    /*   1                */
 +    if (cos_phi2 < 1) {
 +      st  = -dVdphi*gmx_invsqrt(1 - cos_phi2);      /*  12            */
 +      sth = st*cos_phi;                               /*   1          */
 +      nrij2 = iprod(r_ij,r_ij);                       /*   5          */
 +      nrkl2 = iprod(r_kl,r_kl);                 /*   5          */
 +      
 +      c   = st*gmx_invsqrt(nrij2*nrkl2);              /*  11          */ 
 +      cij = sth/nrij2;                                /*  10          */
 +      ckl = sth/nrkl2;                                /*  10          */
 +      
 +      for (m=0; m<DIM; m++) {                 /*  18+18       */
 +      f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
 +      f[ai][m] += f_i[m];
 +      f[aj][m] -= f_i[m];
 +      if (!bZAxis) {
 +        f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
 +        f[ak][m] += f_k[m];
 +        f[al][m] -= f_k[m];
 +      }
 +      }
 +      
 +      if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      t1=IVEC2IS(dt);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_dec(fshift[CENTRAL],f_i);
 +      if (!bZAxis) {
 +      if (g) {
 +        ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,al),dt);
 +        t2=IVEC2IS(dt);
 +      }
 +      rvec_inc(fshift[t2],f_k);
 +      rvec_dec(fshift[CENTRAL],f_k);
 +      }
 +    }
 +  }
 +
 +  return vtot;  /*  184 / 157 (bZAxis)  total  */
 +}
 +
 +real angres(int nbonds,
 +          const t_iatom forceatoms[],const t_iparams forceparams[],
 +          const rvec x[],rvec f[],rvec fshift[],
 +          const t_pbc *pbc,const t_graph *g,
 +          real lambda,real *dvdlambda,
 +          const t_mdatoms *md,t_fcdata *fcd,
 +          int *global_atom_index)
 +{
 +  return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
 +                  lambda,dvdlambda,FALSE);
 +}
 +
 +real angresz(int nbonds,
 +           const t_iatom forceatoms[],const t_iparams forceparams[],
 +           const rvec x[],rvec f[],rvec fshift[],
 +           const t_pbc *pbc,const t_graph *g,
 +           real lambda,real *dvdlambda,
 +           const t_mdatoms *md,t_fcdata *fcd,
 +           int *global_atom_index)
 +{
 +  return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
 +                    lambda,dvdlambda,TRUE);
 +}
 +
 +real dihres(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +    real vtot = 0;
 +    int  ai,aj,ak,al,i,k,type,t1,t2,t3;
 +    real phi0A,phi0B,dphiA,dphiB,kfacA,kfacB,phi0,dphi,kfac;
 +    real phi,ddphi,ddp,ddp2,dp,sign,d2r,fc,L1;
 +    rvec r_ij,r_kj,r_kl,m,n;
 +
 +    L1 = 1.0-lambda;
 +
 +    d2r = DEG2RAD;
 +    k   = 0;
 +
 +    for (i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        aj   = forceatoms[i++];
 +        ak   = forceatoms[i++];
 +        al   = forceatoms[i++];
 +
 +        phi0A  = forceparams[type].dihres.phiA*d2r;
 +        dphiA  = forceparams[type].dihres.dphiA*d2r;
 +        kfacA  = forceparams[type].dihres.kfacA;
 +
 +        phi0B  = forceparams[type].dihres.phiB*d2r;
 +        dphiB  = forceparams[type].dihres.dphiB*d2r;
 +        kfacB  = forceparams[type].dihres.kfacB;
 +
 +        phi0  = L1*phi0A + lambda*phi0B;
 +        dphi  = L1*dphiA + lambda*dphiB;
 +        kfac = L1*kfacA + lambda*kfacB;
 +
 +        phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                        &sign,&t1,&t2,&t3);
 +        /* 84 flops */
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
 +                    k++,ai,aj,ak,al,phi0,dphi,kfac);
 +        }
 +        /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
 +         * force changes if we just apply a normal harmonic.
 +         * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
 +         * This means we will never have the periodicity problem, unless
 +         * the dihedral is Pi away from phiO, which is very unlikely due to
 +         * the potential.
 +         */
 +        dp = phi-phi0;
 +        make_dp_periodic(&dp);
 +
 +        if (dp > dphi)
 +        {
 +            ddp = dp-dphi;
 +        }
 +        else if (dp < -dphi)
 +        {
 +            ddp = dp+dphi;
 +        }
 +        else
 +        {
 +            ddp = 0;
 +        }
 +
 +        if (ddp != 0.0)
 +        {
 +            ddp2 = ddp*ddp;
 +            vtot += 0.5*kfac*ddp2;
 +            ddphi = kfac*ddp;
 +
 +            *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
 +            /* lambda dependence from changing restraint distances */
 +            if (ddp > 0)
 +            {
 +                *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
 +            }
 +            else if (ddp < 0)
 +            {
 +                *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
 +            }
 +            do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
 +                       f,fshift,pbc,g,x,t1,t2,t3);            /* 112          */
 +        }
 +    }
 +    return vtot;
 +}
 +
 +
 +real unimplemented(int nbonds,
 +                 const t_iatom forceatoms[],const t_iparams forceparams[],
 +                 const rvec x[],rvec f[],rvec fshift[],
 +                 const t_pbc *pbc,const t_graph *g,
 +                 real lambda,real *dvdlambda,
 +                 const t_mdatoms *md,t_fcdata *fcd,
 +                 int *global_atom_index)
 +{
 +  gmx_impl("*** you are using a not implemented function");
 +
 +  return 0.0; /* To make the compiler happy */
 +}
 +
 +real rbdihs(int nbonds,
 +          const t_iatom forceatoms[],const t_iparams forceparams[],
 +          const rvec x[],rvec f[],rvec fshift[],
 +          const t_pbc *pbc,const t_graph *g,
 +          real lambda,real *dvdlambda,
 +          const t_mdatoms *md,t_fcdata *fcd,
 +          int *global_atom_index)
 +{
 +  const real c0=0.0,c1=1.0,c2=2.0,c3=3.0,c4=4.0,c5=5.0;
 +  int  type,ai,aj,ak,al,i,j;
 +  int  t1,t2,t3;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real parmA[NR_RBDIHS];
 +  real parmB[NR_RBDIHS];
 +  real parm[NR_RBDIHS];
 +  real cos_phi,phi,rbp,rbpBA;
 +  real v,sign,ddphi,sin_phi;
 +  real cosfac,vtot;
 +  real L1   = 1.0-lambda;
 +  real dvdl_term=0;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +
 +      phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                    &sign,&t1,&t2,&t3);                       /*  84          */
 +
 +    /* Change to polymer convention */
 +    if (phi < c0)
 +      phi += M_PI;
 +    else
 +      phi -= M_PI;                    /*   1          */
 +      
 +    cos_phi = cos(phi);               
 +    /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
 +    sin_phi = sin(phi);
 +
 +    for(j=0; (j<NR_RBDIHS); j++) {
 +      parmA[j] = forceparams[type].rbdihs.rbcA[j];
 +      parmB[j] = forceparams[type].rbdihs.rbcB[j];
 +      parm[j]  = L1*parmA[j]+lambda*parmB[j];
 +    }
 +    /* Calculate cosine powers */
 +    /* Calculate the energy */
 +    /* Calculate the derivative */
 +
 +    v       = parm[0];
 +    dvdl_term   += (parmB[0]-parmA[0]);
 +    ddphi   = c0;
 +    cosfac  = c1;
 +    
 +    rbp     = parm[1];
 +    rbpBA   = parmB[1]-parmA[1];
 +    ddphi  += rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[2];
 +    rbpBA   = parmB[2]-parmA[2];    
 +    ddphi  += c2*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[3];
 +    rbpBA   = parmB[3]-parmA[3];
 +    ddphi  += c3*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[4];
 +    rbpBA   = parmB[4]-parmA[4];
 +    ddphi  += c4*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[5];
 +    rbpBA   = parmB[5]-parmA[5];
 +    ddphi  += c5*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +   
 +    ddphi = -ddphi*sin_phi;                           /*  11          */
 +    
 +    do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
 +    vtot += v;
 +  }  
 +  *dvdlambda += dvdl_term;
 +
 +  return vtot;
 +}
 +
 +int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
 +{
 +      int im1, ip1, ip2;
 +      
 +      if(ip<0)
 +      {
 +              ip = ip + grid_spacing - 1;
 +      }
 +      else if(ip > grid_spacing)
 +      {
 +              ip = ip - grid_spacing - 1;
 +      }
 +      
 +      im1 = ip - 1;
 +      ip1 = ip + 1;
 +      ip2 = ip + 2;
 +      
 +      if(ip == 0)
 +      {
 +              im1 = grid_spacing - 1;
 +      }
 +      else if(ip == grid_spacing-2)
 +      {
 +              ip2 = 0;
 +      }
 +      else if(ip == grid_spacing-1)
 +      {
 +              ip1 = 0;
 +              ip2 = 1;
 +      }
 +      
 +      *ipm1 = im1;
 +      *ipp1 = ip1;
 +      *ipp2 = ip2;
 +      
 +      return ip;
 +      
 +}
 +
 +real cmap_dihs(int nbonds,
 +                         const t_iatom forceatoms[],const t_iparams forceparams[],
 +               const gmx_cmap_t *cmap_grid,
 +                         const rvec x[],rvec f[],rvec fshift[],
 +                         const t_pbc *pbc,const t_graph *g,
 +                         real lambda,real *dvdlambda,
 +                         const t_mdatoms *md,t_fcdata *fcd,
 +                         int *global_atom_index)
 +{
 +      int i,j,k,n,idx;
 +      int ai,aj,ak,al,am;
 +      int a1i,a1j,a1k,a1l,a2i,a2j,a2k,a2l;
 +      int type,cmapA;
 +      int t11,t21,t31,t12,t22,t32;
 +      int iphi1,ip1m1,ip1p1,ip1p2;
 +      int iphi2,ip2m1,ip2p1,ip2p2;
 +      int l1,l2,l3,l4;
 +      int pos1,pos2,pos3,pos4,tmp;
 +      
 +      real ty[4],ty1[4],ty2[4],ty12[4],tc[16],tx[16];
 +      real phi1,psi1,cos_phi1,sin_phi1,sign1,xphi1;
 +      real phi2,psi2,cos_phi2,sin_phi2,sign2,xphi2;
 +      real dx,xx,tt,tu,e,df1,df2,ddf1,ddf2,ddf12,vtot;
 +      real ra21,rb21,rg21,rg1,rgr1,ra2r1,rb2r1,rabr1;
 +      real ra22,rb22,rg22,rg2,rgr2,ra2r2,rb2r2,rabr2;
 +      real fg1,hg1,fga1,hgb1,gaa1,gbb1;
 +      real fg2,hg2,fga2,hgb2,gaa2,gbb2;
 +      real fac;
 +      
 +      rvec r1_ij, r1_kj, r1_kl,m1,n1;
 +      rvec r2_ij, r2_kj, r2_kl,m2,n2;
 +      rvec f1_i,f1_j,f1_k,f1_l;
 +      rvec f2_i,f2_j,f2_k,f2_l;
 +      rvec a1,b1,a2,b2;
 +      rvec f1,g1,h1,f2,g2,h2;
 +      rvec dtf1,dtg1,dth1,dtf2,dtg2,dth2;
 +      ivec jt1,dt1_ij,dt1_kj,dt1_lj;
 +      ivec jt2,dt2_ij,dt2_kj,dt2_lj;
 +
 +    const real *cmapd;
 +
 +      int loop_index[4][4] = {
 +              {0,4,8,12},
 +              {1,5,9,13},
 +              {2,6,10,14},
 +              {3,7,11,15}
 +      };
 +      
 +      /* Total CMAP energy */
 +      vtot = 0;
 +      
 +      for(n=0;n<nbonds; )
 +      {
 +              /* Five atoms are involved in the two torsions */
 +              type   = forceatoms[n++];
 +              ai     = forceatoms[n++];
 +              aj     = forceatoms[n++];
 +              ak     = forceatoms[n++];
 +              al     = forceatoms[n++];
 +              am     = forceatoms[n++];
 +              
 +              /* Which CMAP type is this */
 +              cmapA = forceparams[type].cmap.cmapA;
 +        cmapd = cmap_grid->cmapdata[cmapA].cmap;
 +
 +              /* First torsion */
 +              a1i   = ai;
 +              a1j   = aj;
 +              a1k   = ak;
 +              a1l   = al;
 +              
 +              phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
 +                                                 &sign1, &t11, &t21, &t31); /* 84 */
 +              
 +        cos_phi1 = cos(phi1);
 +        
 +              a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
 +              a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
 +              a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
 +              
 +              b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
 +              b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
 +              b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
 +              
 +              tmp = pbc_rvec_sub(pbc,x[a1l],x[a1k],h1);
 +              
 +              ra21  = iprod(a1,a1);       /* 5 */
 +              rb21  = iprod(b1,b1);       /* 5 */
 +              rg21  = iprod(r1_kj,r1_kj); /* 5 */
 +              rg1   = sqrt(rg21);
 +              
 +              rgr1  = 1.0/rg1;
 +              ra2r1 = 1.0/ra21;
 +              rb2r1 = 1.0/rb21;
 +              rabr1 = sqrt(ra2r1*rb2r1);
 +              
 +              sin_phi1 = rg1 * rabr1 * iprod(a1,h1) * (-1);
 +              
 +              if(cos_phi1 < -0.5 || cos_phi1 > 0.5)
 +              {
 +                      phi1 = asin(sin_phi1);
 +                      
 +                      if(cos_phi1 < 0)
 +                      {
 +                              if(phi1 > 0)
 +                              {
 +                                      phi1 = M_PI - phi1;
 +                              }
 +                              else
 +                              {
 +                                      phi1 = -M_PI - phi1;
 +                              }
 +                      }
 +              }
 +              else
 +              {
 +                      phi1 = acos(cos_phi1);
 +                      
 +                      if(sin_phi1 < 0)
 +                      {
 +                              phi1 = -phi1;
 +                      }
 +              }
 +              
 +              xphi1 = phi1 + M_PI; /* 1 */
 +              
 +              /* Second torsion */
 +              a2i   = aj;
 +              a2j   = ak;
 +              a2k   = al;
 +              a2l   = am;
 +              
 +              phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
 +                                                &sign2, &t12, &t22, &t32); /* 84 */
 +              
 +        cos_phi2 = cos(phi2);
 +
 +              a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
 +              a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
 +              a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
 +              
 +              b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
 +              b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
 +              b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
 +              
 +              tmp = pbc_rvec_sub(pbc,x[a2l],x[a2k],h2);
 +              
 +              ra22  = iprod(a2,a2);         /* 5 */
 +              rb22  = iprod(b2,b2);         /* 5 */
 +              rg22  = iprod(r2_kj,r2_kj);   /* 5 */
 +              rg2   = sqrt(rg22);
 +              
 +              rgr2  = 1.0/rg2;
 +              ra2r2 = 1.0/ra22;
 +              rb2r2 = 1.0/rb22;
 +              rabr2 = sqrt(ra2r2*rb2r2);
 +              
 +              sin_phi2 = rg2 * rabr2 * iprod(a2,h2) * (-1);
 +              
 +              if(cos_phi2 < -0.5 || cos_phi2 > 0.5)
 +              {
 +                      phi2 = asin(sin_phi2);
 +                      
 +                      if(cos_phi2 < 0)
 +                      {
 +                              if(phi2 > 0)
 +                              {
 +                                      phi2 = M_PI - phi2;
 +                              }
 +                              else
 +                              {
 +                                      phi2 = -M_PI - phi2;
 +                              }
 +                      }
 +              }
 +              else
 +              {
 +                      phi2 = acos(cos_phi2);
 +                      
 +                      if(sin_phi2 < 0)
 +                      {
 +                              phi2 = -phi2;
 +                      }
 +              }
 +              
 +              xphi2 = phi2 + M_PI; /* 1 */
 +              
 +              /* Range mangling */
 +              if(xphi1<0)
 +              {
 +                      xphi1 = xphi1 + 2*M_PI;
 +              }
 +              else if(xphi1>=2*M_PI)
 +              {
 +                      xphi1 = xphi1 - 2*M_PI;
 +              }
 +              
 +              if(xphi2<0)
 +              {
 +                      xphi2 = xphi2 + 2*M_PI;
 +              }
 +              else if(xphi2>=2*M_PI)
 +              {
 +                      xphi2 = xphi2 - 2*M_PI;
 +              }
 +              
 +              /* Number of grid points */
 +              dx = 2*M_PI / cmap_grid->grid_spacing;
 +              
 +              /* Where on the grid are we */
 +              iphi1 = (int)(xphi1/dx);
 +              iphi2 = (int)(xphi2/dx);
 +              
 +              iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1,&ip1p1,&ip1p2);
 +              iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1,&ip2p1,&ip2p2);
 +              
 +              pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
 +              pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
 +              pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
 +              pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
 +
 +              ty[0]   = cmapd[pos1*4];
 +              ty[1]   = cmapd[pos2*4];
 +              ty[2]   = cmapd[pos3*4];
 +              ty[3]   = cmapd[pos4*4];
 +              
 +              ty1[0]   = cmapd[pos1*4+1];
 +              ty1[1]   = cmapd[pos2*4+1];
 +              ty1[2]   = cmapd[pos3*4+1];
 +              ty1[3]   = cmapd[pos4*4+1];
 +              
 +              ty2[0]   = cmapd[pos1*4+2];
 +              ty2[1]   = cmapd[pos2*4+2];
 +              ty2[2]   = cmapd[pos3*4+2];
 +              ty2[3]   = cmapd[pos4*4+2];
 +              
 +              ty12[0]   = cmapd[pos1*4+3];
 +              ty12[1]   = cmapd[pos2*4+3];
 +              ty12[2]   = cmapd[pos3*4+3];
 +              ty12[3]   = cmapd[pos4*4+3];
 +              
 +              /* Switch to degrees */
 +              dx = 360.0 / cmap_grid->grid_spacing;
 +              xphi1 = xphi1 * RAD2DEG;
 +              xphi2 = xphi2 * RAD2DEG; 
 +              
 +              for(i=0;i<4;i++) /* 16 */
 +              {
 +                      tx[i] = ty[i];
 +                      tx[i+4] = ty1[i]*dx;
 +                      tx[i+8] = ty2[i]*dx;
 +                      tx[i+12] = ty12[i]*dx*dx;
 +              }
 +              
 +              idx=0;
 +              for(i=0;i<4;i++) /* 1056 */
 +              {
 +                      for(j=0;j<4;j++)
 +                      {
 +                              xx = 0;
 +                              for(k=0;k<16;k++)
 +                              {
 +                                      xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
 +                              }
 +                              
 +                              idx++;
 +                              tc[i*4+j]=xx;
 +                      }
 +              }
 +              
 +              tt    = (xphi1-iphi1*dx)/dx;
 +              tu    = (xphi2-iphi2*dx)/dx;
 +              
 +              e     = 0;
 +              df1   = 0;
 +              df2   = 0;
 +              ddf1  = 0;
 +              ddf2  = 0;
 +              ddf12 = 0;
 +              
 +              for(i=3;i>=0;i--)
 +              {
 +                      l1 = loop_index[i][3];
 +                      l2 = loop_index[i][2];
 +                      l3 = loop_index[i][1];
 +                      
 +                      e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
 +                      df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
 +                      df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
 +                      ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
 +                      ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
 +              }
 +              
 +              ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
 +              3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
 +              
 +              fac     = RAD2DEG/dx;
 +              df1     = df1   * fac;
 +              df2     = df2   * fac;
 +              ddf1    = ddf1  * fac * fac;
 +              ddf2    = ddf2  * fac * fac;
 +              ddf12   = ddf12 * fac * fac;
 +              
 +              /* CMAP energy */
 +              vtot += e;
 +              
 +              /* Do forces - first torsion */
 +              fg1       = iprod(r1_ij,r1_kj);
 +              hg1       = iprod(r1_kl,r1_kj);
 +              fga1      = fg1*ra2r1*rgr1;
 +              hgb1      = hg1*rb2r1*rgr1;
 +              gaa1      = -ra2r1*rg1;
 +              gbb1      = rb2r1*rg1;
 +              
 +              for(i=0;i<DIM;i++)
 +              {
 +                      dtf1[i]   = gaa1 * a1[i];
 +                      dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
 +                      dth1[i]   = gbb1 * b1[i];
 +                      
 +                      f1[i]     = df1  * dtf1[i];
 +                      g1[i]     = df1  * dtg1[i];
 +                      h1[i]     = df1  * dth1[i];
 +                      
 +                      f1_i[i]   =  f1[i];
 +                      f1_j[i]   = -f1[i] - g1[i];
 +                      f1_k[i]   =  h1[i] + g1[i];
 +                      f1_l[i]   = -h1[i];
 +                      
 +                      f[a1i][i] = f[a1i][i] + f1_i[i];
 +                      f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */                                                            
 +                      f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */                                                            
 +                      f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */                                                                       
 +              }
 +              
 +              /* Do forces - second torsion */
 +              fg2       = iprod(r2_ij,r2_kj);
 +              hg2       = iprod(r2_kl,r2_kj);
 +              fga2      = fg2*ra2r2*rgr2;
 +              hgb2      = hg2*rb2r2*rgr2;
 +              gaa2      = -ra2r2*rg2;
 +              gbb2      = rb2r2*rg2;
 +              
 +              for(i=0;i<DIM;i++)
 +              {
 +                      dtf2[i]   = gaa2 * a2[i];
 +                      dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
 +                      dth2[i]   = gbb2 * b2[i];
 +                      
 +                      f2[i]     = df2  * dtf2[i];
 +                      g2[i]     = df2  * dtg2[i];
 +                      h2[i]     = df2  * dth2[i];
 +                      
 +                      f2_i[i]   =  f2[i];
 +                      f2_j[i]   = -f2[i] - g2[i];
 +                      f2_k[i]   =  h2[i] + g2[i];
 +                      f2_l[i]   = -h2[i];
 +                      
 +                      f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */                                                                        
 +                      f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */                                                              
 +                      f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */                            
 +                      f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */                                                                      
 +              }
 +              
 +              /* Shift forces */
 +              if(g)
 +              {
 +                      copy_ivec(SHIFT_IVEC(g,a1j), jt1);
 +                      ivec_sub(SHIFT_IVEC(g,a1i),  jt1,dt1_ij);
 +                      ivec_sub(SHIFT_IVEC(g,a1k),  jt1,dt1_kj);
 +                      ivec_sub(SHIFT_IVEC(g,a1l),  jt1,dt1_lj);
 +                      t11 = IVEC2IS(dt1_ij);
 +                      t21 = IVEC2IS(dt1_kj);
 +                      t31 = IVEC2IS(dt1_lj);
 +                      
 +                      copy_ivec(SHIFT_IVEC(g,a2j), jt2);
 +                      ivec_sub(SHIFT_IVEC(g,a2i),  jt2,dt2_ij);
 +                      ivec_sub(SHIFT_IVEC(g,a2k),  jt2,dt2_kj);
 +                      ivec_sub(SHIFT_IVEC(g,a2l),  jt2,dt2_lj);
 +                      t12 = IVEC2IS(dt2_ij);
 +                      t22 = IVEC2IS(dt2_kj);
 +                      t32 = IVEC2IS(dt2_lj);
 +              }
 +              else if(pbc)
 +              {
 +                      t31 = pbc_rvec_sub(pbc,x[a1l],x[a1j],h1);
 +                      t32 = pbc_rvec_sub(pbc,x[a2l],x[a2j],h2);
 +              }
 +              else
 +              {
 +                      t31 = CENTRAL;
 +                      t32 = CENTRAL;
 +              }
 +              
 +              rvec_inc(fshift[t11],f1_i);
 +              rvec_inc(fshift[CENTRAL],f1_j);
 +              rvec_inc(fshift[t21],f1_k);
 +              rvec_inc(fshift[t31],f1_l);
 +              
 +              rvec_inc(fshift[t21],f2_i);
 +              rvec_inc(fshift[CENTRAL],f2_j);
 +              rvec_inc(fshift[t22],f2_k);
 +              rvec_inc(fshift[t32],f2_l);
 +      }       
 +      return vtot;
 +}
 +
 +
 +
 +/***********************************************************
 + *
 + *   G R O M O S  9 6   F U N C T I O N S
 + *
 + ***********************************************************/
 +real g96harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
 +               real *V,real *F)
 +{
 +  const real half=0.5;
 +  real  L1,kk,x0,dx,dx2;
 +  real  v,f,dvdlambda;
 +  
 +  L1    = 1.0-lambda;
 +  kk    = L1*kA+lambda*kB;
 +  x0    = L1*xA+lambda*xB;
 +  
 +  dx    = x-x0;
 +  dx2   = dx*dx;
 +  
 +  f     = -kk*dx;
 +  v     = half*kk*dx2;
 +  dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 +  
 +  *F    = f;
 +  *V    = v;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 21 flops */
 +}
 +
 +real g96bonds(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr2,fbond,vbond,fij,vtot;
 +  rvec dx;
 +  ivec dt;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);          /*   3          */
 +    dr2  = iprod(dx,dx);                              /*   5          */
 +      
 +    *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
 +                              forceparams[type].harmonic.krB,
 +                              forceparams[type].harmonic.rA,
 +                              forceparams[type].harmonic.rB,
 +                              dr2,lambda,&vbond,&fbond);
 +
 +    vtot  += 0.5*vbond;                             /* 1*/
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +            sqrt(dr2),vbond,fbond);
 +#endif
 +   
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 44 TOTAL     */
 +  return vtot;
 +}
 +
 +real g96bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
 +                 rvec r_ij,rvec r_kj,
 +                 int *t1,int *t2)
 +/* Return value is the angle between the bonds i-j and j-k */
 +{
 +  real costh;
 +  
 +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
 +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
 +
 +  costh=cos_angle(r_ij,r_kj);                 /* 25           */
 +                                      /* 41 TOTAL     */
 +  return costh;
 +}
 +
 +real g96angles(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  int  i,ai,aj,ak,type,m,t1,t2;
 +  rvec r_ij,r_kj;
 +  real cos_theta,dVdt,va,vtot;
 +  real rij_1,rij_2,rkj_1,rkj_2,rijrkj_1;
 +  rvec f_i,f_j,f_k;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    
 +    cos_theta  = g96bond_angle(x[ai],x[aj],x[ak],pbc,r_ij,r_kj,&t1,&t2);
 +
 +    *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
 +                              forceparams[type].harmonic.krB,
 +                              forceparams[type].harmonic.rA,
 +                              forceparams[type].harmonic.rB,
 +                              cos_theta,lambda,&va,&dVdt);
 +    vtot    += va;
 +    
 +    rij_1    = gmx_invsqrt(iprod(r_ij,r_ij));
 +    rkj_1    = gmx_invsqrt(iprod(r_kj,r_kj));
 +    rij_2    = rij_1*rij_1;
 +    rkj_2    = rkj_1*rkj_1;
 +    rijrkj_1 = rij_1*rkj_1;                     /* 23 */
 +    
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
 +            cos_theta,va,dVdt);
 +#endif
 +    for (m=0; (m<DIM); m++) {                 /*  42  */
 +      f_i[m]=dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
 +      f_k[m]=dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +    }
 +    
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +    }      
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);               /* 9 */
 +    /* 163 TOTAL      */
 +  }
 +  return vtot;
 +}
 +
 +real cross_bond_bond(int nbonds,
 +                   const t_iatom forceatoms[],const t_iparams forceparams[],
 +                   const rvec x[],rvec f[],rvec fshift[],
 +                   const t_pbc *pbc,const t_graph *g,
 +                   real lambda,real *dvdlambda,
 +                   const t_mdatoms *md,t_fcdata *fcd,
 +                   int *global_atom_index)
 +{
 +  /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
 +   * pp. 842-847
 +   */
 +  int  i,ai,aj,ak,type,m,t1,t2;
 +  rvec r_ij,r_kj;
 +  real vtot,vrr,s1,s2,r1,r2,r1e,r2e,krr;
 +  rvec f_i,f_j,f_k;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    r1e  = forceparams[type].cross_bb.r1e;
 +    r2e  = forceparams[type].cross_bb.r2e;
 +    krr  = forceparams[type].cross_bb.krr;
 +    
 +    /* Compute distance vectors ... */
 +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
 +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
 +    
 +    /* ... and their lengths */
 +    r1 = norm(r_ij);
 +    r2 = norm(r_kj);
 +    
 +    /* Deviations from ideality */
 +    s1 = r1-r1e;
 +    s2 = r2-r2e;
 +    
 +    /* Energy (can be negative!) */
 +    vrr   = krr*s1*s2;
 +    vtot += vrr;
 +    
 +    /* Forces */
 +    svmul(-krr*s2/r1,r_ij,f_i);
 +    svmul(-krr*s1/r2,r_kj,f_k);
 +    
 +    for (m=0; (m<DIM); m++) {                 /*  12  */
 +      f_j[m]    = -f_i[m] - f_k[m];
 +      f[ai][m] += f_i[m];
 +      f[aj][m] += f_j[m];
 +      f[ak][m] += f_k[m];
 +    }
 +    
 +    /* Virial stuff */
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +    }      
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);               /* 9 */
 +    /* 163 TOTAL      */
 +  }
 +  return vtot;
 +}
 +
 +real cross_bond_angle(int nbonds,
 +                    const t_iatom forceatoms[],const t_iparams forceparams[],
 +                    const rvec x[],rvec f[],rvec fshift[],
 +                    const t_pbc *pbc,const t_graph *g,
 +                    real lambda,real *dvdlambda,
 +                    const t_mdatoms *md,t_fcdata *fcd,
 +                    int *global_atom_index)
 +{
 +  /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
 +   * pp. 842-847
 +   */
 +  int  i,ai,aj,ak,type,m,t1,t2,t3;
 +  rvec r_ij,r_kj,r_ik;
 +  real vtot,vrt,s1,s2,s3,r1,r2,r3,r1e,r2e,r3e,krt,k1,k2,k3;
 +  rvec f_i,f_j,f_k;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    r1e  = forceparams[type].cross_ba.r1e;
 +    r2e  = forceparams[type].cross_ba.r2e;
 +    r3e  = forceparams[type].cross_ba.r3e;
 +    krt  = forceparams[type].cross_ba.krt;
 +    
 +    /* Compute distance vectors ... */
 +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
 +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
 +    t3 = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);
 +    
 +    /* ... and their lengths */
 +    r1 = norm(r_ij);
 +    r2 = norm(r_kj);
 +    r3 = norm(r_ik);
 +    
 +    /* Deviations from ideality */
 +    s1 = r1-r1e;
 +    s2 = r2-r2e;
 +    s3 = r3-r3e;
 +    
 +    /* Energy (can be negative!) */
 +    vrt   = krt*s3*(s1+s2);
 +    vtot += vrt;
 +    
 +    /* Forces */
 +    k1 = -krt*(s3/r1);
 +    k2 = -krt*(s3/r2);
 +    k3 = -krt*(s1+s2)/r3;
 +    for(m=0; (m<DIM); m++) {
 +      f_i[m] = k1*r_ij[m] + k3*r_ik[m];
 +      f_k[m] = k2*r_kj[m] - k3*r_ik[m];
 +      f_j[m] = -f_i[m] - f_k[m];
 +    }
 +    
 +    for (m=0; (m<DIM); m++) {                 /*  12  */
 +      f[ai][m] += f_i[m];
 +      f[aj][m] += f_j[m];
 +      f[ak][m] += f_k[m];
 +    }
 +    
 +    /* Virial stuff */
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +    }      
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);               /* 9 */
 +    /* 163 TOTAL      */
 +  }
 +  return vtot;
 +}
 +
 +static real bonded_tab(const char *type,int table_nr,
 +                     const bondedtable_t *table,real kA,real kB,real r,
 +                     real lambda,real *V,real *F)
 +{
 +  real k,tabscale,*VFtab,rt,eps,eps2,Yt,Ft,Geps,Heps2,Fp,VV,FF;
 +  int  n0,nnn;
 +  real v,f,dvdlambda;
 +
 +  k = (1.0 - lambda)*kA + lambda*kB;
 +
 +  tabscale = table->scale;
-             v = do_listed_vdw_q(ftype,nbn,iatoms+nb0,
-                                 idef->iparams,
-                                 (const rvec*)x,f,fshift,
-                                 pbc,g,lambda,dvdl,
-                                 md,fr,grpp,global_atom_index);
++  VFtab    = table->data;
 +  
 +  rt    = r*tabscale;
 +  n0    = rt;
 +  if (n0 >= table->n) {
 +    gmx_fatal(FARGS,"A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
 +            type,table_nr,r,n0,n0+1,table->n);
 +  }
 +  eps   = rt - n0;
 +  eps2  = eps*eps;
 +  nnn   = 4*n0;
 +  Yt    = VFtab[nnn];
 +  Ft    = VFtab[nnn+1];
 +  Geps  = VFtab[nnn+2]*eps;
 +  Heps2 = VFtab[nnn+3]*eps2;
 +  Fp    = Ft + Geps + Heps2;
 +  VV    = Yt + Fp*eps;
 +  FF    = Fp + Geps + 2.0*Heps2;
 +  
 +  *F    = -k*FF*tabscale;
 +  *V    = k*VV;
 +  dvdlambda  = (kB - kA)*VV;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 22 flops */
 +}
 +
 +real tab_bonds(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type,table;
 +  real dr,dr2,fbond,vbond,fij,vtot;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    table = forceparams[type].tab.table;
 +
 +    *dvdlambda += bonded_tab("bond",table,
 +                             &fcd->bondtab[table],
 +                             forceparams[type].tab.kA,
 +                             forceparams[type].tab.kB,
 +                             dr,lambda,&vbond,&fbond);  /*  22 */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    
 +    vtot  += vbond;/* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +            dr,vbond,fbond);
 +#endif
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 62 TOTAL     */
 +  return vtot;
 +}
 +
 +real tab_angles(int nbonds,
 +              const t_iatom forceatoms[],const t_iparams forceparams[],
 +              const rvec x[],rvec f[],rvec fshift[],
 +              const t_pbc *pbc,const t_graph *g,
 +              real lambda,real *dvdlambda,
 +              const t_mdatoms *md,t_fcdata *fcd,
 +              int *global_atom_index)
 +{
 +  int  i,ai,aj,ak,t1,t2,type,table;
 +  rvec r_ij,r_kj;
 +  real cos_theta,cos_theta2,theta,dVdt,va,vtot;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    
 +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
 +
 +    table = forceparams[type].tab.table;
 +  
 +    *dvdlambda += bonded_tab("angle",table,
 +                             &fcd->angletab[table],
 +                             forceparams[type].tab.kA,
 +                             forceparams[type].tab.kB,
 +                             theta,lambda,&va,&dVdt);  /*  22  */
 +    vtot += va;
 +    
 +    cos_theta2 = sqr(cos_theta);                /*   1                */
 +    if (cos_theta2 < 1) {
 +      int  m;
 +      real snt,st,sth;
 +      real cik,cii,ckk;
 +      real nrkj2,nrij2;
 +      rvec f_i,f_j,f_k;
 +      
 +      st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12          */
 +      sth = st*cos_theta;                     /*   1          */
 +#ifdef DEBUG
 +      if (debug)
 +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +              theta*RAD2DEG,va,dVdt);
 +#endif
 +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
 +      nrij2=iprod(r_ij,r_ij);
 +      
 +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
 +      cii=sth/nrij2;                          /*  10          */
 +      ckk=sth/nrkj2;                          /*  10          */
 +      
 +      for (m=0; (m<DIM); m++) {                       /*  39          */
 +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
 +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +      }
 +      if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_inc(fshift[CENTRAL],f_j);
 +      rvec_inc(fshift[t2],f_k);
 +    }                                           /* 169 TOTAL  */
 +  }
 +  return vtot;
 +}
 +
 +real tab_dihs(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +  int  i,type,ai,aj,ak,al,table;
 +  int  t1,t2,t3;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real phi,sign,ddphi,vpd,vtot;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +    
 +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                  &sign,&t1,&t2,&t3);                 /*  84  */
 +
 +    table = forceparams[type].tab.table;
 +
 +    /* Hopefully phi+M_PI never results in values < 0 */
 +    *dvdlambda += bonded_tab("dihedral",table,
 +                             &fcd->dihtab[table],
 +                             forceparams[type].tab.kA,
 +                             forceparams[type].tab.kB,
 +                             phi+M_PI,lambda,&vpd,&ddphi);
 +                     
 +    vtot += vpd;
 +    do_dih_fup(ai,aj,ak,al,-ddphi,r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112  */
 +
 +#ifdef DEBUG
 +    fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
 +          ai,aj,ak,al,phi);
 +#endif
 +  } /* 227 TOTAL      */
 +
 +  return vtot;
 +}
 +
 +static unsigned
 +calc_bonded_reduction_mask(const t_idef *idef,
 +                           int shift,
 +                           int t,int nt)
 +{
 +    unsigned mask;
 +    int ftype,nb,nat1,nb0,nb1,i,a;
 +
 +    mask = 0;
 +
 +    for(ftype=0; ftype<F_NRE; ftype++)
 +    {
 +        if (interaction_function[ftype].flags & IF_BOND &&
 +            !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
 +            (ftype<F_GB12 || ftype>F_GB14))
 +        {
 +            nb = idef->il[ftype].nr;
 +            if (nb > 0)
 +            {
 +                nat1 = interaction_function[ftype].nratoms + 1;
 +
 +                /* Divide this interaction equally over the threads.
 +                 * This is not stored: should match division in calc_bonds.
 +                 */
 +                nb0 = (((nb/nat1)* t   )/nt)*nat1;
 +                nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
 +
 +                for(i=nb0; i<nb1; i+=nat1)
 +                {
 +                    for(a=1; a<nat1; a++)
 +                    {
 +                        mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    return mask;
 +}
 +
 +void init_bonded_thread_force_reduction(t_forcerec *fr,
 +                                        const t_idef *idef)
 +{
 +#define MAX_BLOCK_BITS 32
 +    int t;
 +    int ctot,c,b;
 +
 +    if (fr->nthreads <= 1)
 +    {
 +        fr->red_nblock = 0;
 +
 +        return;
 +    }
 +
 +    /* We divide the force array in a maximum of 32 blocks.
 +     * Minimum force block reduction size is 2^6=64.
 +     */
 +    fr->red_ashift = 6;
 +    while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
 +    {
 +        fr->red_ashift++;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"bonded force buffer block atom shift %d bits\n",
 +                fr->red_ashift);
 +    }
 +
 +    /* Determine to which blocks each thread's bonded force calculation
 +     * contributes. Store this is a mask for each thread.
 +     */
 +#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
 +    for(t=1; t<fr->nthreads; t++)
 +    {
 +        fr->f_t[t].red_mask =
 +            calc_bonded_reduction_mask(idef,fr->red_ashift,t,fr->nthreads);
 +    }
 +
 +    /* Determine the maximum number of blocks we need to reduce over */
 +    fr->red_nblock = 0;
 +    ctot = 0;
 +    for(t=0; t<fr->nthreads; t++)
 +    {
 +        c = 0;
 +        for(b=0; b<MAX_BLOCK_BITS; b++)
 +        {
 +            if (fr->f_t[t].red_mask & (1U<<b))
 +            {
 +                fr->red_nblock = max(fr->red_nblock,b+1);
 +                c++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"thread %d flags %x count %d\n",
 +                    t,fr->f_t[t].red_mask,c);
 +        }
 +        ctot += c;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"Number of blocks to reduce: %d of size %d\n",
 +                fr->red_nblock,1<<fr->red_ashift);
 +        fprintf(debug,"Reduction density %.2f density/#thread %.2f\n",
 +                ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
 +                ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
 +    }
 +}
 +
 +static void zero_thread_forces(f_thread_t *f_t,int n,
 +                               int nblock,int blocksize)
 +{
 +    int b,a0,a1,a,i,j;
 +
 +    if (n > f_t->f_nalloc)
 +    {
 +        f_t->f_nalloc = over_alloc_large(n);
 +        srenew(f_t->f,f_t->f_nalloc);
 +    }
 +
 +    if (f_t->red_mask != 0)
 +    {
 +        for(b=0; b<nblock; b++)
 +        {
 +            if (f_t->red_mask && (1U<<b))
 +            {
 +                a0 = b*blocksize;
 +                a1 = min((b+1)*blocksize,n);
 +                for(a=a0; a<a1; a++)
 +                {
 +                    clear_rvec(f_t->f[a]);
 +                }
 +            }
 +        }
 +    }
 +    for(i=0; i<SHIFTS; i++)
 +    {
 +        clear_rvec(f_t->fshift[i]);
 +    }
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        f_t->ener[i] = 0;
 +    }
 +    for(i=0; i<egNR; i++)
 +    {
 +        for(j=0; j<f_t->grpp.nener; j++)
 +        {
 +            f_t->grpp.ener[i][j] = 0;
 +        }
 +    }
 +    for(i=0; i<efptNR; i++)
 +    {
 +        f_t->dvdl[i] = 0;
 +    }
 +}
 +
 +static void reduce_thread_force_buffer(int n,rvec *f,
 +                                       int nthreads,f_thread_t *f_t,
 +                                       int nblock,int block_size)
 +{
 +    /* The max thread number is arbitrary,
 +     * we used a fixed number to avoid memory management.
 +     * Using more than 16 threads is probably never useful performance wise.
 +     */
 +#define MAX_BONDED_THREADS 256
 +    int b;
 +
 +    if (nthreads > MAX_BONDED_THREADS)
 +    {
 +        gmx_fatal(FARGS,"Can not reduce bonded forces on more than %d threads",
 +                  MAX_BONDED_THREADS);
 +    }
 +
 +    /* This reduction can run on any number of threads,
 +     * independently of nthreads.
 +     */
 +#pragma omp parallel for num_threads(nthreads) schedule(static)
 +    for(b=0; b<nblock; b++)
 +    {
 +        rvec *fp[MAX_BONDED_THREADS];
 +        int nfb,ft,fb;
 +        int a0,a1,a;
 +
 +        /* Determine which threads contribute to this block */
 +        nfb = 0;
 +        for(ft=1; ft<nthreads; ft++)
 +        {
 +            if (f_t[ft].red_mask & (1U<<b))
 +            {
 +                fp[nfb++] = f_t[ft].f;
 +            }
 +        }
 +        if (nfb > 0)
 +        {
 +            /* Reduce force buffers for threads that contribute */
 +            a0 =  b   *block_size;
 +            a1 = (b+1)*block_size;
 +            a1 = min(a1,n);
 +            for(a=a0; a<a1; a++)
 +            {
 +                for(fb=0; fb<nfb; fb++)
 +                {
 +                    rvec_inc(f[a],fp[fb][a]);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void reduce_thread_forces(int n,rvec *f,rvec *fshift,
 +                                 real *ener,gmx_grppairener_t *grpp,real *dvdl,
 +                                 int nthreads,f_thread_t *f_t,
 +                                 int nblock,int block_size,
 +                                 gmx_bool bCalcEnerVir,
 +                                 gmx_bool bDHDL)
 +{
 +    if (nblock > 0)
 +    {
 +        /* Reduce the bonded force buffer */
 +        reduce_thread_force_buffer(n,f,nthreads,f_t,nblock,block_size);
 +    }
 +
 +    /* When necessary, reduce energy and virial using one thread only */
 +    if (bCalcEnerVir)
 +    {
 +        int t,i,j;
 +
 +        for(i=0; i<SHIFTS; i++)
 +        {
 +            for(t=1; t<nthreads; t++)
 +            {
 +                rvec_inc(fshift[i],f_t[t].fshift[i]);
 +            }
 +        }
 +        for(i=0; i<F_NRE; i++)
 +        {
 +            for(t=1; t<nthreads; t++)
 +            {
 +                ener[i] += f_t[t].ener[i];
 +            }
 +        }
 +        for(i=0; i<egNR; i++)
 +        {
 +            for(j=0; j<f_t[1].grpp.nener; j++)
 +            {
 +                for(t=1; t<nthreads; t++)
 +                {
 +                    
 +                    grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
 +                }
 +            }
 +        }
 +        if (bDHDL)
 +        {
 +            for(i=0; i<efptNR; i++)
 +            {
 +                
 +                for(t=1; t<nthreads; t++)
 +                {
 +                    dvdl[i] += f_t[t].dvdl[i];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static real calc_one_bond(FILE *fplog,int thread,
 +                          int ftype,const t_idef *idef,
 +                          rvec x[], rvec f[], rvec fshift[],
 +                          t_forcerec *fr,
 +                          const t_pbc *pbc,const t_graph *g,
 +                          gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
 +                          t_nrnb *nrnb,
 +                          real *lambda, real *dvdl,
 +                          const t_mdatoms *md,t_fcdata *fcd,
 +                          gmx_bool bCalcEnerVir,
 +                          int *global_atom_index, gmx_bool bPrintSepPot)
 +{
 +    int ind,nat1,nbonds,efptFTYPE;
 +    real v=0;
 +    t_iatom *iatoms;
 +    int nb0,nbn;
 +
 +    if (IS_RESTRAINT_TYPE(ftype))
 +    {
 +        efptFTYPE = efptRESTRAINT;
 +    }
 +    else
 +    {
 +        efptFTYPE = efptBONDED;
 +    }
 +
 +    if (interaction_function[ftype].flags & IF_BOND &&
 +        !(ftype == F_CONNBONDS || ftype == F_POSRES))
 +    {
 +        ind  = interaction_function[ftype].nrnb_ind;
 +        nat1 = interaction_function[ftype].nratoms + 1;
 +        nbonds    = idef->il[ftype].nr/nat1;
 +        iatoms    = idef->il[ftype].iatoms;
 +
 +        nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
 +        nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
 +
 +        if (!IS_LISTED_LJ_C(ftype))
 +        {
 +            if(ftype==F_CMAP)
 +            {
 +                v = cmap_dihs(nbn,iatoms+nb0,
 +                              idef->iparams,&idef->cmap_grid,
 +                              (const rvec*)x,f,fshift,
 +                              pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
 +                              md,fcd,global_atom_index);
 +            }
 +            else if (ftype == F_PDIHS &&
 +                     !bCalcEnerVir && fr->efep==efepNO)
 +            {
 +                /* No energies, shift forces, dvdl */
 +#ifndef SSE_PROPER_DIHEDRALS
 +                pdihs_noener
 +#else
 +                pdihs_noener_sse
 +#endif
 +                    (nbn,idef->il[ftype].iatoms+nb0,
 +                     idef->iparams,
 +                     (const rvec*)x,f,
 +                     pbc,g,lambda[efptFTYPE],md,fcd,
 +                     global_atom_index);
 +                v = 0;
 +            }
 +            else
 +            {
 +                v = interaction_function[ftype].ifunc(nbn,iatoms+nb0,
 +                                                      idef->iparams,
 +                                                      (const rvec*)x,f,fshift,
 +                                                      pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
 +                                                      md,fcd,global_atom_index);
 +            }
 +            if (bPrintSepPot)
 +            {
 +                fprintf(fplog,"  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
 +                        interaction_function[ftype].longname,
 +                        nbonds/nat1,v,lambda[efptFTYPE]);
 +            }
 +        }
 +        else
 +        {
-                     v = do_listed_vdw_q(ftype,nbonds,iatoms,
-                                         idef->iparams,
-                                         (const rvec*)x,f,fr->fshift,
-                                         pbc,g,lambda,dvdl,
-                                         md,fr,&enerd->grpp,global_atom_index);
++            v = do_nonbonded_listed(ftype,nbn,iatoms+nb0,idef->iparams,(const rvec*)x,f,fshift,
++                                    pbc,g,lambda,dvdl,md,fr,grpp,global_atom_index);
++
++            enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
++            enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
 +            
 +            if (bPrintSepPot)
 +            {
 +                fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
 +                        interaction_function[ftype].longname,
 +                        interaction_function[F_LJ14].longname,nbonds/nat1,dvdl[efptVDW]);
 +                fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
 +                        interaction_function[ftype].longname,
 +                        interaction_function[F_COUL14].longname,nbonds/nat1,dvdl[efptCOUL]);
 +            }
 +        }
 +        if (ind != -1 && thread == 0)
 +        {
 +            inc_nrnb(nrnb,ind,nbonds);
 +        }
 +    }
 +
 +    return v;
 +}
 +
 +/* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
 +   function, or horrible things will happen when doing free energy
 +   calculations!  In a good coding world, this would not be a
 +   different function, but for speed reasons, it needs to be made a
 +   separate function.  TODO for 5.0 - figure out a way to reorganize
 +   to reduce duplication.
 +*/
 +
 +static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
 +                                  rvec x[], rvec f[], t_forcerec *fr,
 +                                  const t_pbc *pbc,const t_graph *g,
 +                                  gmx_enerdata_t *enerd, t_nrnb *nrnb,
 +                                  real *lambda, real *dvdl,
 +                                  const t_mdatoms *md,t_fcdata *fcd,
 +                                  int *global_atom_index, gmx_bool bPrintSepPot)
 +{
 +    int ind,nat1,nbonds,efptFTYPE,nbonds_np;
 +    real v=0;
 +    t_iatom *iatoms;
 +
 +    if (IS_RESTRAINT_TYPE(ftype))
 +    {
 +        efptFTYPE = efptRESTRAINT;
 +    }
 +    else
 +    {
 +        efptFTYPE = efptBONDED;
 +    }
 +
 +    if (ftype<F_GB12 || ftype>F_GB14)
 +    {
 +        if (interaction_function[ftype].flags & IF_BOND &&
 +            !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
 +        {
 +            ind  = interaction_function[ftype].nrnb_ind;
 +            nat1 = interaction_function[ftype].nratoms+1;
 +            nbonds_np = idef->il[ftype].nr_nonperturbed;
 +            nbonds    = idef->il[ftype].nr - nbonds_np;
 +            iatoms    = idef->il[ftype].iatoms + nbonds_np;
 +            if (nbonds > 0)
 +            {
 +                if (!IS_LISTED_LJ_C(ftype))
 +                {
 +                    if(ftype==F_CMAP)
 +                    {
 +                        v = cmap_dihs(nbonds,iatoms,
 +                                      idef->iparams,&idef->cmap_grid,
 +                                      (const rvec*)x,f,fr->fshift,
 +                                      pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),md,fcd,
 +                                      global_atom_index);
 +                    }
 +                    else
 +                    {
 +                        v =       interaction_function[ftype].ifunc(nbonds,iatoms,
 +                                                                  idef->iparams,
 +                                                                  (const rvec*)x,f,fr->fshift,
 +                                                                  pbc,g,lambda[efptFTYPE],&dvdl[efptFTYPE],
 +                                                                  md,fcd,global_atom_index);
 +                    }
 +                }
 +                else
 +                {
++                    v = do_nonbonded_listed(ftype,nbonds,iatoms,
++                                            idef->iparams,
++                                            (const rvec*)x,f,fr->fshift,
++                                            pbc,g,lambda,dvdl,
++                                            md,fr,&enerd->grpp,global_atom_index);
 +                }
 +                if (ind != -1)
 +                {
 +                    inc_nrnb(nrnb,ind,nbonds/nat1);
 +                }
 +            }
 +        }
 +    }
 +    return v;
 +}
 +
 +void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
 +                const t_idef *idef,
 +                rvec x[],history_t *hist,
 +                rvec f[],t_forcerec *fr,
 +                const t_pbc *pbc,const t_graph *g,
 +                gmx_enerdata_t *enerd,t_nrnb *nrnb,
 +                real *lambda,
 +                const t_mdatoms *md,
 +                t_fcdata *fcd,int *global_atom_index,
 +                t_atomtypes *atype, gmx_genborn_t *born,
 +                int force_flags,
 +                gmx_bool bPrintSepPot,gmx_large_int_t step)
 +{
 +    gmx_bool bCalcEnerVir;
 +    int    i;
 +    real   v,dvdl[efptNR],dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
 +                                               of lambda, which will be thrown away in the end*/
 +    const  t_pbc *pbc_null;
 +    char   buf[22];
 +    int    thread;
 +
 +    bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        dvdl[i] = 0.0;
 +    }
 +    if (fr->bMolPBC)
 +    {
 +        pbc_null = pbc;
 +    }
 +    else
 +    {
 +        pbc_null = NULL;
 +    }
 +    if (bPrintSepPot)
 +    {
 +        fprintf(fplog,"Step %s: bonded V and dVdl for this node\n",
 +                gmx_step_str(step,buf));
 +    }
 +
 +#ifdef DEBUG
 +    if (g && debug)
 +    {
 +        p_graph(debug,"Bondage is fun",g);
 +    }
 +#endif
 +
 +    /* Do pre force calculation stuff which might require communication */
 +    if (idef->il[F_ORIRES].nr)
 +    {
 +        enerd->term[F_ORIRESDEV] =
 +            calc_orires_dev(ms,idef->il[F_ORIRES].nr,
 +                            idef->il[F_ORIRES].iatoms,
 +                            idef->iparams,md,(const rvec*)x,
 +                            pbc_null,fcd,hist);
 +    }
 +    if (idef->il[F_DISRES].nr)
 +    {
 +        calc_disres_R_6(ms,idef->il[F_DISRES].nr,
 +                        idef->il[F_DISRES].iatoms,
 +                        idef->iparams,(const rvec*)x,pbc_null,
 +                        fcd,hist);
 +    }
 +
 +#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
 +    for(thread=0; thread<fr->nthreads; thread++)
 +    {
 +        int    ftype,nbonds,ind,nat1;
 +        real   *epot,v;
 +        /* thread stuff */
 +        rvec   *ft,*fshift;
 +        real   *dvdlt;
 +        gmx_grppairener_t *grpp;
 +        int    nb0,nbn;
 +
 +        if (thread == 0)
 +        {
 +            ft     = f;
 +            fshift = fr->fshift;
 +            epot   = enerd->term;
 +            grpp   = &enerd->grpp;
 +            dvdlt  = dvdl;
 +        }
 +        else
 +        {
 +            zero_thread_forces(&fr->f_t[thread],fr->natoms_force,
 +                               fr->red_nblock,1<<fr->red_ashift);
 +
 +            ft     = fr->f_t[thread].f;
 +            fshift = fr->f_t[thread].fshift;
 +            epot   = fr->f_t[thread].ener;
 +            grpp   = &fr->f_t[thread].grpp;
 +            dvdlt  = fr->f_t[thread].dvdl;
 +        }
 +        /* Loop over all bonded force types to calculate the bonded forces */
 +        for(ftype=0; (ftype<F_NRE); ftype++)
 +        {
 +            if (idef->il[ftype].nr > 0 &&
 +                (interaction_function[ftype].flags & IF_BOND) &&
 +                (ftype < F_GB12 || ftype > F_GB14) &&
 +                !(ftype == F_CONNBONDS || ftype == F_POSRES))
 +            {
 +                v = calc_one_bond(fplog,thread,ftype,idef,x, 
 +                                  ft,fshift,fr,pbc_null,g,enerd,grpp,
 +                                  nrnb,lambda,dvdlt,
 +                                  md,fcd,bCalcEnerVir,
 +                                  global_atom_index,bPrintSepPot);
 +                epot[ftype]        += v;
 +            }
 +        }
 +    }
 +    if (fr->nthreads > 1)
 +    {
 +        reduce_thread_forces(fr->natoms_force,f,fr->fshift,
 +                             enerd->term,&enerd->grpp,dvdl,
 +                             fr->nthreads,fr->f_t,
 +                             fr->red_nblock,1<<fr->red_ashift,
 +                             bCalcEnerVir,
 +                             force_flags & GMX_FORCE_DHDL);
 +    }
 +    if (force_flags & GMX_FORCE_DHDL)
 +    {
 +        for(i=0; i<efptNR; i++)
 +        {
 +            enerd->dvdl_nonlin[i] += dvdl[i];
 +        }
 +    }
 +
 +    /* Copy the sum of violations for the distance restraints from fcd */
 +    if (fcd)
 +    {
 +        enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
 +
 +    }
 +}
 +
 +void calc_bonds_lambda(FILE *fplog,
 +                       const t_idef *idef,
 +                       rvec x[],
 +                       t_forcerec *fr,
 +                       const t_pbc *pbc,const t_graph *g,
 +                       gmx_enerdata_t *enerd,t_nrnb *nrnb,
 +                       real *lambda,
 +                       const t_mdatoms *md,
 +                       t_fcdata *fcd,
 +                       int *global_atom_index)
 +{
 +    int    i,ftype,nbonds_np,nbonds,ind,nat;
 +    real   v,dr,dr2,*epot;
 +    real   dvdl_dum[efptNR];
 +    rvec   *f,*fshift_orig;
 +    const  t_pbc *pbc_null;
 +    t_iatom *iatom_fe;
 +
 +    if (fr->bMolPBC)
 +    {
 +        pbc_null = pbc;
 +    }
 +    else
 +    {
 +        pbc_null = NULL;
 +    }
 +
 +    epot = enerd->term;
 +
 +    snew(f,fr->natoms_force);
 +    /* We want to preserve the fshift array in forcerec */
 +    fshift_orig = fr->fshift;
 +    snew(fr->fshift,SHIFTS);
 +
 +    /* Loop over all bonded force types to calculate the bonded forces */
 +    for(ftype=0; (ftype<F_NRE); ftype++) 
 +    {
 +        v = calc_one_bond_foreign(fplog,ftype,idef,x, 
 +                                  f,fr,pbc_null,g,enerd,nrnb,lambda,dvdl_dum,
 +                                  md,fcd,global_atom_index,FALSE);
 +        epot[ftype] += v;
 +    }
 +
 +    sfree(fr->fshift);
 +    fr->fshift = fshift_orig;
 +    sfree(f);
 +}
Simple merge
index 0000000000000000000000000000000000000000,c210e21046a97a8995d0175b6d61f28362e775bb..c210e21046a97a8995d0175b6d61f28362e775bb
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2830ec6017b03d9973980cb96bf2a5ff1c1da8c4..2830ec6017b03d9973980cb96bf2a5ff1c1da8c4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,579b49063baccb7185ac4b836eb8f800c0329bb6..579b49063baccb7185ac4b836eb8f800c0329bb6
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,1a01086317ef3bbf453d8a71ed829821c6955367..1a01086317ef3bbf453d8a71ed829821c6955367
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,19788cc1f3c4756a2faf69dac5ccfbca21a48919..19788cc1f3c4756a2faf69dac5ccfbca21a48919
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3eda3ba3c7f1152c1e962619dee59c1e690c2b1a..3eda3ba3c7f1152c1e962619dee59c1e690c2b1a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3cc3b21b2910db4e9d3252d2202707298b639d33..3cc3b21b2910db4e9d3252d2202707298b639d33
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2b5f528d267d7527852fff6d346c884222c72b14..2b5f528d267d7527852fff6d346c884222c72b14
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,cbd63bde47f3aef497ee328ab213ac7d71071bb4..cbd63bde47f3aef497ee328ab213ac7d71071bb4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fc4d13a0b141c7a162a14fe2a2c0ce1776dd636e..fc4d13a0b141c7a162a14fe2a2c0ce1776dd636e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,42b8b576b4a5752a57a38ac9f9b76bfbd526be1c..42b8b576b4a5752a57a38ac9f9b76bfbd526be1c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8a9584e37d6817f928af40aae4b2e85f50ade6b4..8a9584e37d6817f928af40aae4b2e85f50ade6b4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,24fcfdeb41b308f755032ad218e0d5bdc73efce5..24fcfdeb41b308f755032ad218e0d5bdc73efce5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,7e32a822a181e5e2982070526d14f00c399dd932..7e32a822a181e5e2982070526d14f00c399dd932
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,5aad06d3193bb268501e3060fd119ae71ecf6172..5aad06d3193bb268501e3060fd119ae71ecf6172
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,29d1d791c326fe4e8b168f9d4242f956a5587e86..29d1d791c326fe4e8b168f9d4242f956a5587e86
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bbaf3be4ec5188ae407ff6c5bddce7fc65820de2..bbaf3be4ec5188ae407ff6c5bddce7fc65820de2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8e939d28cf1ce95e535517cd9fb65c04339ce8aa..8e939d28cf1ce95e535517cd9fb65c04339ce8aa
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,93669483f9bc64735d649840bc140376b3be1f60..93669483f9bc64735d649840bc140376b3be1f60
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,523f48147d323f58ad1aea97df80e24eea40ede2..523f48147d323f58ad1aea97df80e24eea40ede2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,90b175729088e12878a90871dc743decfeb8db73..90b175729088e12878a90871dc743decfeb8db73
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a661b1998c4041149227a5cfe7e1bb080541accd..a661b1998c4041149227a5cfe7e1bb080541accd
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,02fbd6416dbfa8a7fd4224e4545544a6e343f20e..02fbd6416dbfa8a7fd4224e4545544a6e343f20e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f7be55203d9c6ea81f6b513b59600411fcda7ec2..f7be55203d9c6ea81f6b513b59600411fcda7ec2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,32bdadae715f43b096edcacadc317966ac4aba64..32bdadae715f43b096edcacadc317966ac4aba64
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4fe9979915a5c8c12a3ffa5e9d489590f74cd21d..4fe9979915a5c8c12a3ffa5e9d489590f74cd21d
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,03af0bb325c77219370af4c0692a1c3160afc22c..03af0bb325c77219370af4c0692a1c3160afc22c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8274b81a55def77d12299ef6ae96568190f971c3..8274b81a55def77d12299ef6ae96568190f971c3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f6a33acecbadef7eed83c41329ad9466f28d98fb..f6a33acecbadef7eed83c41329ad9466f28d98fb
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,300dc84081dadca1f5d2768b5d3df164875d2bdf..300dc84081dadca1f5d2768b5d3df164875d2bdf
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4023e7a08588d863aae4cf49fc42ebf9181b2422..4023e7a08588d863aae4cf49fc42ebf9181b2422
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,38fe3d356c18496e1658b129620eb9c54176fd0c..38fe3d356c18496e1658b129620eb9c54176fd0c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c1ddbc0ea8c21faea7b79a4b7eb5dc49a80dd472..c1ddbc0ea8c21faea7b79a4b7eb5dc49a80dd472
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f256bcf67dc699a2a1562dc17e8584420260b4e4..f256bcf67dc699a2a1562dc17e8584420260b4e4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a04f56e5cb40066998a89a34d397784497fc5338..a04f56e5cb40066998a89a34d397784497fc5338
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fdbe63fa142496b461ad7f200e8939eca5d6b450..fdbe63fa142496b461ad7f200e8939eca5d6b450
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a51c687f0af408d2a403820de212b332b0d869c5..a51c687f0af408d2a403820de212b332b0d869c5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b78968cf7c02cc047ed49f778fdc76405032cbfc..b78968cf7c02cc047ed49f778fdc76405032cbfc
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,36bef38c2eebf09649d57cc6b345faef02885baa..36bef38c2eebf09649d57cc6b345faef02885baa
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,892270f471c9ed217779af9b138469568785caa4..892270f471c9ed217779af9b138469568785caa4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,81b1831ffd6f9cdc758998c00aadd6848806a5af..81b1831ffd6f9cdc758998c00aadd6848806a5af
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ebb7ac0ebfb0ea8ff08efec78d09c726272975b8..ebb7ac0ebfb0ea8ff08efec78d09c726272975b8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,cb255fc20a48acfb8b32549a17b37d13a942ed6d..cb255fc20a48acfb8b32549a17b37d13a942ed6d
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,16f6b234c23c22e1a1cdfc3f201dde28fbc0fa5d..16f6b234c23c22e1a1cdfc3f201dde28fbc0fa5d
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,956f68d0c4415fc1bd73dc7b0451904120992870..956f68d0c4415fc1bd73dc7b0451904120992870
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,249d2b893eacb7da1802958504d8064b8d1ae028..249d2b893eacb7da1802958504d8064b8d1ae028
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c3619b7024ce50c64cf4936221c4f28bf6d2d1d3..c3619b7024ce50c64cf4936221c4f28bf6d2d1d3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bfe65ef78df874089a7bc0fead6b9061e89581f9..bfe65ef78df874089a7bc0fead6b9061e89581f9
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,88c102fcb26d65f8c285b28b2b3f72bea1e8e8e2..88c102fcb26d65f8c285b28b2b3f72bea1e8e8e2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,6cd740fd701c4991ee91e0190bd1ba60d89ed1ba..6cd740fd701c4991ee91e0190bd1ba60d89ed1ba
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9cdb942978c9c8dfbda67fb618d6cbe62ddf4df2..9cdb942978c9c8dfbda67fb618d6cbe62ddf4df2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3e872574b0b88307b9a93e6467f7587761ae0c8a..3e872574b0b88307b9a93e6467f7587761ae0c8a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2cb43bdc6cb9620b7971b81b413a9f70c35cd635..2cb43bdc6cb9620b7971b81b413a9f70c35cd635
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,664442fb632cb73c3ea48660c9d428ed70abd2f8..664442fb632cb73c3ea48660c9d428ed70abd2f8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ab8259cdb3517a13d25c9ecacf283d2fbe5ccfbc..ab8259cdb3517a13d25c9ecacf283d2fbe5ccfbc
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ff0122b249dc7fc6eaca0a05ac68931133b93219..ff0122b249dc7fc6eaca0a05ac68931133b93219
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e760e227001c85fef0d48754f2e0572b3ef0fe8b..e760e227001c85fef0d48754f2e0572b3ef0fe8b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,6565b719663389f21b41ebfd99364120bdebd36e..6565b719663389f21b41ebfd99364120bdebd36e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a6355fc02f4289b120e9b02adc04cb5fea6749c5..a6355fc02f4289b120e9b02adc04cb5fea6749c5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,929fcb3c740decd41473b7cd2d0aabe66acfbdd0..929fcb3c740decd41473b7cd2d0aabe66acfbdd0
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,d81ba8230b71118b5b1bbdceb4b43dfefdc99158..d81ba8230b71118b5b1bbdceb4b43dfefdc99158
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a74856f5139aa600f5c65cb99735c14a4b9899c2..a74856f5139aa600f5c65cb99735c14a4b9899c2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,72efffe030196bb36738dc286b26a3ec9f1345e8..72efffe030196bb36738dc286b26a3ec9f1345e8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2778e51c27fba3ca39b6a4e560e6eb6a02b65c28..2778e51c27fba3ca39b6a4e560e6eb6a02b65c28
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,6b8bb55c351b58446afaab30134e5d7501b61bc5..6b8bb55c351b58446afaab30134e5d7501b61bc5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c3127b11b42fc9c11f5f9623b8689ceb7472f21d..c3127b11b42fc9c11f5f9623b8689ceb7472f21d
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4c67fca85ef097c5f773ff5c4405d48669a609de..4c67fca85ef097c5f773ff5c4405d48669a609de
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,002b30d61a148d208457545dbda259ac0e2bd069..002b30d61a148d208457545dbda259ac0e2bd069
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e9064bdcb69fe308d5957aa4d14498c99154c3f5..e9064bdcb69fe308d5957aa4d14498c99154c3f5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,88c5b565aac7d909e36bfdcbbb43e7e5bccdd76a..88c5b565aac7d909e36bfdcbbb43e7e5bccdd76a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0ee3689742c262e1a08b359b0d7a8a1769ef0fc1..0ee3689742c262e1a08b359b0d7a8a1769ef0fc1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e366295f002a60bf8526841e31bf73f6f86f4522..e366295f002a60bf8526841e31bf73f6f86f4522
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9aaf4b804f11ede742daa275c6406c941bc6905d..9aaf4b804f11ede742daa275c6406c941bc6905d
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a04671ae8731abcbeabda89c9a597c309af23e7f..a04671ae8731abcbeabda89c9a597c309af23e7f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,92aef65c834950e2ecc22de698df7c854547ecc6..92aef65c834950e2ecc22de698df7c854547ecc6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,984892f8e3c7c48ae1767d5c158bbe3564ae539f..984892f8e3c7c48ae1767d5c158bbe3564ae539f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ff5f2801efe4fbce0e89fbc09f49fe6e6cfc1b62..ff5f2801efe4fbce0e89fbc09f49fe6e6cfc1b62
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1757d4414dcb96f4c5e4b33ad394d8d3c9430840..1757d4414dcb96f4c5e4b33ad394d8d3c9430840
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,679e890d4ff4e1528965311ec78f5854b8d98d9b..679e890d4ff4e1528965311ec78f5854b8d98d9b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fb05c2c517c8bf2880e9ca26095686368d986685..fb05c2c517c8bf2880e9ca26095686368d986685
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a71be52a7cb79849e498c370c09f59d4523e20c4..a71be52a7cb79849e498c370c09f59d4523e20c4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f691cfcde6046a1f883618aa5297627562ffaf5a..f691cfcde6046a1f883618aa5297627562ffaf5a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ade44129679387eeb526f4c8e77aceb1db919057..ade44129679387eeb526f4c8e77aceb1db919057
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,83beeaf6ef7d28344f2de65aae53a606f936880b..83beeaf6ef7d28344f2de65aae53a606f936880b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,053a2f3c56d24cca84d3e8975a1926dd1c99697b..053a2f3c56d24cca84d3e8975a1926dd1c99697b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,069758133851adc0a9fa4d1d3c0ad9ffa035ddbf..069758133851adc0a9fa4d1d3c0ad9ffa035ddbf
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3c912edee22b3116de15c9256fccde942a97f747..3c912edee22b3116de15c9256fccde942a97f747
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,00790581588e45650899249f2cec0030643b015a..00790581588e45650899249f2cec0030643b015a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,dcb565351ef0dfd9ab93777def0158bd7dda9478..dcb565351ef0dfd9ab93777def0158bd7dda9478
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,161a3352cfc319963b8b08b803200e6843067e96..161a3352cfc319963b8b08b803200e6843067e96
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3e9e4ecde55b8cfd89ebf27b501e6721d1e6ba23..3e9e4ecde55b8cfd89ebf27b501e6721d1e6ba23
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c6904776e031f6257faf4935854e5b0e7e4b75c2..c6904776e031f6257faf4935854e5b0e7e4b75c2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,035931c6232060a737bcd4e339ed0a66d178c201..035931c6232060a737bcd4e339ed0a66d178c201
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,92b5b6ba4e47dcf7607ae6d7f0c22029a17ba348..92b5b6ba4e47dcf7607ae6d7f0c22029a17ba348
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e436b3e0db4bf17d6b6db113ffe0ec370dd0717b..e436b3e0db4bf17d6b6db113ffe0ec370dd0717b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1d61f4f663383e04c75228df49ea33e8446b7897..1d61f4f663383e04c75228df49ea33e8446b7897
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0ef6e30584ec01a0a5edcf1e4469799de3688892..0ef6e30584ec01a0a5edcf1e4469799de3688892
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,97ce5781f2ebbf9c599844f2082c5b2d9e508642..97ce5781f2ebbf9c599844f2082c5b2d9e508642
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e3c51cf0a01d677a47b0f0597a569b7f35762729..e3c51cf0a01d677a47b0f0597a569b7f35762729
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,434429e51930bd68e02a6374fbd025f7c97938b5..434429e51930bd68e02a6374fbd025f7c97938b5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b841a2be86e02211b52b6b9b3e817d5fefd34626..b841a2be86e02211b52b6b9b3e817d5fefd34626
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,d5b5b1d61ff9ca8562ed50d78cb9c5c9c8968d18..d5b5b1d61ff9ca8562ed50d78cb9c5c9c8968d18
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,752e34d113495daa3ee454044e5214354f1d589c..752e34d113495daa3ee454044e5214354f1d589c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2e7594012632e3dbcf0ef21d402a047c3fe6efce..2e7594012632e3dbcf0ef21d402a047c3fe6efce
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,742e1230c758448a753e14ad9f619e7c8cc697d1..742e1230c758448a753e14ad9f619e7c8cc697d1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,90d703229876379b542b4fa0bb75af39a8aff52d..90d703229876379b542b4fa0bb75af39a8aff52d
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,83d23be9971336a7db4e50316570e5afe74faa40..83d23be9971336a7db4e50316570e5afe74faa40
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,82357426f937354a78299562f053494d3086fad7..82357426f937354a78299562f053494d3086fad7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b93632a8a251ddb4d2178dab83eef65e63ffbb1f..b93632a8a251ddb4d2178dab83eef65e63ffbb1f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f2e61892c74550ae30e8746d7fdf05b1266c7686..f2e61892c74550ae30e8746d7fdf05b1266c7686
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,dc0fddcfa5e900b46c46b0adef18226f66cb53dc..dc0fddcfa5e900b46c46b0adef18226f66cb53dc
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,eb8408153bf987521778c14b573bc2d19df201e6..eb8408153bf987521778c14b573bc2d19df201e6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,6857a6d5988ad910ce5d21fc19663c092c9240e1..6857a6d5988ad910ce5d21fc19663c092c9240e1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f96242365720731cfa052a463221a74ec84716e2..f96242365720731cfa052a463221a74ec84716e2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8d00b31da4e7fc288cd1b9ab428b454afee7c445..8d00b31da4e7fc288cd1b9ab428b454afee7c445
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1e55e56e5aada503ff11b64abb7501bebb510dfa..1e55e56e5aada503ff11b64abb7501bebb510dfa
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1a1ef76e6d088b354227c87dbe947f47dccbc3e1..1a1ef76e6d088b354227c87dbe947f47dccbc3e1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,967a00384154c77c39abf674232c8a8dd899d46c..967a00384154c77c39abf674232c8a8dd899d46c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3c6532a63f41be89c48ec76130aeaf364b8d4e0c..3c6532a63f41be89c48ec76130aeaf364b8d4e0c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,396f264b55c6528c80a68092dc64ac582e418355..396f264b55c6528c80a68092dc64ac582e418355
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,41e87fd5e05df5efff0c0c9e74529f0ad558745c..41e87fd5e05df5efff0c0c9e74529f0ad558745c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c7d255b55a769edbdff1735d20e8533dcf3ffc56..c7d255b55a769edbdff1735d20e8533dcf3ffc56
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3a290c409c41c51ef8077f525c62ffe93c44c3de..3a290c409c41c51ef8077f525c62ffe93c44c3de
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,7d23b9f99cec9bf9d3e291644eb0d2d341fbaffe..7d23b9f99cec9bf9d3e291644eb0d2d341fbaffe
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,23d17b41ada0562b6500b150d1e3989d18693c1f..23d17b41ada0562b6500b150d1e3989d18693c1f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ba5456d9e07c815917c34c7c92d362af87a7aeed..ba5456d9e07c815917c34c7c92d362af87a7aeed
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8900cbc2b71718c158deba94a92feb0e3402ca86..8900cbc2b71718c158deba94a92feb0e3402ca86
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c8d861e7d8267e31ad25f3a087987de9c80c0af5..c8d861e7d8267e31ad25f3a087987de9c80c0af5
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8aed7696310e20333a8ba199dd86fe47cbbc2df3..8aed7696310e20333a8ba199dd86fe47cbbc2df3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b200f3ae732955a89e157811c237adce4dee7ab7..b200f3ae732955a89e157811c237adce4dee7ab7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c53d9648a5e869268ada25db9aeec535de07ecb6..c53d9648a5e869268ada25db9aeec535de07ecb6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8c8af559733ac1db9204dfce1b1d5b79f4cd678f..8c8af559733ac1db9204dfce1b1d5b79f4cd678f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e8a4b2c54da976139da8098dc381cd55e3bdae08..e8a4b2c54da976139da8098dc381cd55e3bdae08
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4f6c8d7e8723fa7dbb116261842c7c1a2c507a5a..4f6c8d7e8723fa7dbb116261842c7c1a2c507a5a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3af1e9de4d78f9c78e35bf7401dcbfbfb9105f9f..3af1e9de4d78f9c78e35bf7401dcbfbfb9105f9f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e59218cdbf79aba128409eede7a181da6e5fcdcf..e59218cdbf79aba128409eede7a181da6e5fcdcf
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,848ef5c8c761d3c7806ec8579a3551329b682be7..848ef5c8c761d3c7806ec8579a3551329b682be7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c45c40862c220eb9d0bc6e575930aecd61b43672..c45c40862c220eb9d0bc6e575930aecd61b43672
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4ec58cccc219582bed9f7e11462543698f283ec8..4ec58cccc219582bed9f7e11462543698f283ec8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,6c071ee1605b18166624c5ffbba6ee0c49170ab0..6c071ee1605b18166624c5ffbba6ee0c49170ab0
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,24a3000268778c260588836b58de3184f7962319..24a3000268778c260588836b58de3184f7962319
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,84a90f5b9877b9518cfa4e51a62d0eeef2e709b3..84a90f5b9877b9518cfa4e51a62d0eeef2e709b3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2184f12159f061ea4e224741a68a7865c6f92291..2184f12159f061ea4e224741a68a7865c6f92291
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ca01432e16f4c4bdf16f96a0d8c1099a14454069..ca01432e16f4c4bdf16f96a0d8c1099a14454069
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ca7e9b87cf6a91cfc5854763981e85a039432537..ca7e9b87cf6a91cfc5854763981e85a039432537
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,90fb09b8e322fdafe31e3eae0ea21dbdf4925824..90fb09b8e322fdafe31e3eae0ea21dbdf4925824
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0e3f3d1ee37b5644171cc77eda35c5bb94f73d35..0e3f3d1ee37b5644171cc77eda35c5bb94f73d35
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,6cd471918f95da78c33f3d87a4b1e0d427fc71c7..6cd471918f95da78c33f3d87a4b1e0d427fc71c7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,5c90acb0081fd7baa36f225241fb7d1ddbc60523..5c90acb0081fd7baa36f225241fb7d1ddbc60523
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2f74aea88ad3a18fb28824aec1ca6eb9b4b5c283..2f74aea88ad3a18fb28824aec1ca6eb9b4b5c283
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ea3ce8bd05593cf2edcfbf970da3d3888b267a40..ea3ce8bd05593cf2edcfbf970da3d3888b267a40
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,d8ae0cc2a5bc1fdc0482ce2c23a8f8f8f877f7ec..d8ae0cc2a5bc1fdc0482ce2c23a8f8f8f877f7ec
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9208f9f3c3c1f0f1a70628dca4930c494c35dada..9208f9f3c3c1f0f1a70628dca4930c494c35dada
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0a198f8e1982d2c3c4bb0ff8f9606f606c34e199..0a198f8e1982d2c3c4bb0ff8f9606f606c34e199
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b7cf50763db6e7ffb6b7745d0cbf70585d932cc3..b7cf50763db6e7ffb6b7745d0cbf70585d932cc3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..8bf709048f185b238ac5d165c4285f71f9a88283
new file mode 100755 (executable)
--- /dev/null
--- /dev/null
@@@ -1,0 -1,0 +1,971 @@@
++#!/usr/bin/env python
++# Copyright (c) 2002-2008 ActiveState Software Inc.
++# License: MIT License (http://www.opensource.org/licenses/mit-license.php)
++# Original filename preprocess.py, see http://code.google.com/p/preprocess/
++#
++# Modified by Erik Lindahl 2009-2012 <lindahl@gromacs.org>
++# to enable advanced preprocessing for Gromacs kernels, including
++# preprocessor for-loops and substitution into preprocessor directives
++# as well as program strings.
++#
++# Please feel free to redistribute under same license as original (MIT),
++# but don't blame the original authors for mistakes in this version.
++#
++
++"""
++    Preprocess a file.
++
++    Command Line Usage:
++        gmxpreprocess [<options>...] <infile>
++
++    Options:
++        -h, --help      Print this help and exit.
++        -V, --version   Print the version info and exit.
++        -v, --verbose   Give verbose output for errors.
++
++        -o <outfile>    Write output to the given file instead of to stdout.
++        -f, --force     Overwrite given output file. (Otherwise an IOError
++                        will be raised if <outfile> already exists.
++        -D <define>     Define a variable for preprocessing. <define>
++                        can simply be a variable name (in which case it
++                        will be true) or it can be of the form
++                        <var>=<val>. An attempt will be made to convert
++                        <val> to an integer so "-D FOO=0" will create a
++                        false value.
++        -I <dir>        Add an directory to the include path for
++                        #include directives.
++
++        -k, --keep-lines    Emit empty lines for preprocessor statement
++                        lines and skipped output lines. This allows line
++                        numbers to stay constant.
++        -s, --no-substitute  Do NOT Substitute defines into emitted lines.
++        -c, --content-types-path <path>
++                        Specify a path to a content.types file to assist
++                        with filetype determination. See the
++                        `_gDefaultContentTypes` string in this file for
++                        details on its format.
++
++    Module Usage:
++        from gmxpreprocess import gmxpreprocess
++        gmxpreprocess(infile, outfile=sys.stdout, defines={}, force=0,
++                      keepLines=0, includePath=[], substitute=1,
++                      contentType=None)
++
++    The <infile> can be marked up with special preprocessor statement lines
++    of the form:
++        <comment-prefix> <preprocessor-statement> <comment-suffix>
++    where the <comment-prefix/suffix> are the native comment delimiters for
++    that file type.
++
++
++    Examples
++    --------
++
++    HTML (*.htm, *.html) or XML (*.xml, *.kpf, *.xul) files:
++
++        <!-- #if FOO -->
++        ...
++        <!-- #endif -->
++
++    Python (*.py), Perl (*.pl), Tcl (*.tcl), Ruby (*.rb), Bash (*.sh),
++    or make ([Mm]akefile*) files:
++
++        # #if defined('FAV_COLOR') and FAV_COLOR == "blue"
++        ...
++        # #elif FAV_COLOR == "red"
++        ...
++        # #else
++        ...
++        # #endif
++
++    C (*.c, *.h), C++ (*.cpp, *.cxx, *.cc, *.h, *.hpp, *.hxx, *.hh),
++    Java (*.java), PHP (*.php) or C# (*.cs) files:
++
++        // #define FAV_COLOR 'blue'
++        ...
++        /* #ifndef FAV_COLOR */
++        ...
++        // #endif
++
++    Fortran 77 (*.f) or 90/95 (*.f90) files:
++
++        C     #if COEFF == 'var'
++              ...
++        C     #endif
++
++    And other languages.
++
++
++    Preprocessor Syntax
++    -------------------
++
++    - Valid statements:
++        #define <var> [<value>]
++        #undef <var>
++        #ifdef <var>
++        #ifndef <var>
++        #if <expr>
++        #elif <expr>
++        #else
++        #endif
++        #error <error string>
++        #include "<file>"
++        #include <var>
++      where <expr> is any valid Python expression.
++    - The expression after #if/elif may be a Python statement. It is an
++      error to refer to a variable that has not been defined by a -D
++      option or by an in-content #define.
++    - Special built-in methods for expressions:
++        defined(varName)    Return true if given variable is defined.
++
++
++    Tips
++    ----
++
++    A suggested file naming convention is to let input files to
++    preprocess be of the form <basename>.p.<ext> and direct the output
++    of preprocess to <basename>.<ext>, e.g.:
++        preprocess -o foo.py foo.p.py
++    The advantage is that other tools (esp. editors) will still
++    recognize the unpreprocessed file as the original language.
++"""
++
++__version_info__ = (1, 1, 0)
++__version__ = '.'.join(map(str, __version_info__))
++
++import os
++import sys
++import getopt
++import types
++import re
++import pprint
++
++
++
++#---- exceptions
++
++class PreprocessError(Exception):
++    def __init__(self, errmsg, file=None, lineno=None, line=None):
++        self.errmsg = str(errmsg)
++        self.file = file
++        self.lineno = lineno
++        self.line = line
++        Exception.__init__(self, errmsg, file, lineno, line)
++    def __str__(self):
++        s = ""
++        if self.file is not None:
++            s += self.file + ":"
++        if self.lineno is not None:
++            s += str(self.lineno) + ":"
++        if self.file is not None or self.lineno is not None:
++            s += " "
++        s += self.errmsg
++        #if self.line is not None:
++        #    s += ": " + self.line
++        return s
++
++
++
++#---- global data
++
++# Comment delimiter info.
++#   A mapping of content type to a list of 2-tuples defining the line
++#   prefix and suffix for a comment. Each prefix or suffix can either
++#   be a string (in which case it is transformed into a pattern allowing
++#   whitespace on either side) or a compiled regex.
++_commentGroups = {
++    "Python":     [ ('#', '') ],
++    "Perl":       [ ('#', '') ],
++    "PHP":        [ ('/*', '*/'), ('//', ''), ('#', '') ],
++    "Ruby":       [ ('#', '') ],
++    "Tcl":        [ ('#', '') ],
++    "Shell":      [ ('#', '') ],
++    # Allowing for CSS and JavaScript comments in XML/HTML.
++    "XML":        [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
++    "HTML":       [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
++    "Makefile":   [ ('#', '') ],
++    "JavaScript": [ ('/*', '*/'), ('//', '') ],
++    "CSS":        [ ('/*', '*/') ],
++    "C":          [ ('/*', '*/') ],
++    "C++":        [ ('/*', '*/'), ('//', '') ],
++    "Java":       [ ('/*', '*/'), ('//', '') ],
++    "C#":         [ ('/*', '*/'), ('//', '') ],
++    "IDL":        [ ('/*', '*/'), ('//', '') ],
++    "Text":       [ ('#', '') ],
++    "Fortran":    [ (re.compile(r'^[a-zA-Z*$]\s*'), ''), ('!', '') ],
++    "TeX":        [ ('%', '') ],
++}
++
++
++
++#---- internal logging facility
++
++class _Logger:
++    DEBUG, INFO, WARN, ERROR, CRITICAL = range(5)
++    def __init__(self, name, level=None, streamOrFileName=sys.stderr):
++        self._name = name
++        if level is None:
++            self.level = self.WARN
++        else:
++            self.level = level
++        if type(streamOrFileName) == types.StringType:
++            self.stream = open(streamOrFileName, 'w')
++            self._opennedStream = 1
++        else:
++            self.stream = streamOrFileName
++            self._opennedStream = 0
++    def __del__(self):
++        if self._opennedStream:
++            self.stream.close()
++    def getLevel(self):
++        return self.level
++    def setLevel(self, level):
++        self.level = level
++    def _getLevelName(self, level):
++        levelNameMap = {
++            self.DEBUG: "DEBUG",
++            self.INFO: "INFO",
++            self.WARN: "WARN",
++            self.ERROR: "ERROR",
++            self.CRITICAL: "CRITICAL",
++        }
++        return levelNameMap[level]
++    def isEnabled(self, level):
++        return level >= self.level
++    def isDebugEnabled(self): return self.isEnabled(self.DEBUG)
++    def isInfoEnabled(self): return self.isEnabled(self.INFO)
++    def isWarnEnabled(self): return self.isEnabled(self.WARN)
++    def isErrorEnabled(self): return self.isEnabled(self.ERROR)
++    def isFatalEnabled(self): return self.isEnabled(self.FATAL)
++    def log(self, level, msg, *args):
++        if level < self.level:
++            return
++        message = "%s: %s: " % (self._name, self._getLevelName(level).lower())
++        message = message + (msg % args) + "\n"
++        self.stream.write(message)
++        self.stream.flush()
++    def debug(self, msg, *args):
++        self.log(self.DEBUG, msg, *args)
++    def info(self, msg, *args):
++        self.log(self.INFO, msg, *args)
++    def warn(self, msg, *args):
++        self.log(self.WARN, msg, *args)
++    def error(self, msg, *args):
++        self.log(self.ERROR, msg, *args)
++    def fatal(self, msg, *args):
++        self.log(self.CRITICAL, msg, *args)
++
++log = _Logger("gmxpreprocess", _Logger.WARN)
++
++
++
++#---- internal support stuff
++
++def SubstituteInternal(expr, defines):
++    prevexpr = ''
++    while (expr!=prevexpr):
++        prevexpr=expr
++        for name in reversed(sorted(defines, key=len)):
++            value = defines[name]
++            expr = expr.replace(name, str(value))
++    return expr
++
++def SubstituteInCode(expr, defines):
++    prevexpr = ''
++    while (expr!=prevexpr):
++        prevexpr=expr
++        for name in reversed(sorted(defines, key=len)):
++            value = defines[name]
++            expr = expr.replace('{' + name + '}', str(value))
++    return expr
++
++
++def _evaluate(expr, defines):
++    """Evaluate the given expression string with the given context.
++
++    WARNING: This runs eval() on a user string. This is unsafe.
++    """
++    #interpolated = _interpolate(s, defines)
++
++    try:
++        rv = eval(expr, {'defined':lambda v: v in defines}, defines)
++    except Exception, ex:
++        msg = str(ex)
++        if msg.startswith("name '") and msg.endswith("' is not defined"):
++            # A common error (at least this is presumed:) is to have
++            #   defined(FOO)   instead of   defined('FOO')
++            # We should give a little as to what might be wrong.
++            # msg == "name 'FOO' is not defined"  -->  varName == "FOO"
++            varName = msg[len("name '"):-len("' is not defined")]
++            if expr.find("defined(%s)" % varName) != -1:
++                # "defined(FOO)" in expr instead of "defined('FOO')"
++                msg += " (perhaps you want \"defined('%s')\" instead of "\
++                       "\"defined(%s)\")" % (varName, varName)
++        elif msg.startswith("invalid syntax"):
++            msg = "invalid syntax: '%s'" % expr
++        raise PreprocessError(msg, defines['__FILE__'], defines['__LINE__'])
++    log.debug("evaluate %r -> %s (defines=%r)", expr, rv, defines)
++
++    return rv
++
++#---- module API
++
++def gmxpreprocess(infile, outfile=sys.stdout, defines={},
++               force=0, keepLines=0, includePath=[], substitute=1,
++               contentType=None, contentTypesRegistry=None,
++               __preprocessedFiles=None):
++    """Preprocess the given file.
++
++    "infile" is the input path.
++    "outfile" is the output path or stream (default is sys.stdout).
++    "defines" is a dictionary of defined variables that will be
++        understood in preprocessor statements. Keys must be strings and,
++        currently, only the truth value of any key's value matters.
++    "force" will overwrite the given outfile if it already exists. Otherwise
++        an IOError will be raise if the outfile already exists.
++    "keepLines" will cause blank lines to be emitted for preprocessor lines
++        and content lines that would otherwise be skipped.
++    "includePath" is a list of directories to search for given #include
++        directives. The directory of the file being processed is presumed.
++    "substitute", if true, will allow substitution of defines into emitted
++        lines. (NOTE: This substitution will happen within program strings
++        as well. This may not be what you expect.)
++    "contentType" can be used to specify the content type of the input
++        file. It not given, it will be guessed.
++    "contentTypesRegistry" is an instance of ContentTypesRegistry. If not specified
++        a default registry will be created.
++    "__preprocessedFiles" (for internal use only) is used to ensure files
++        are not recusively preprocessed.
++
++    Returns the modified dictionary of defines or raises PreprocessError if
++    there was some problem.
++    """
++    if __preprocessedFiles is None:
++        __preprocessedFiles = []
++    log.info("preprocess(infile=%r, outfile=%r, defines=%r, force=%r, "\
++             "keepLines=%r, includePath=%r, contentType=%r, "\
++             "__preprocessedFiles=%r)", infile, outfile, defines, force,
++             keepLines, includePath, contentType, __preprocessedFiles)
++    absInfile = os.path.normpath(os.path.abspath(infile))
++    if absInfile in __preprocessedFiles:
++        raise PreprocessError("detected recursive #include of '%s'"\
++                              % infile)
++    __preprocessedFiles.append(os.path.abspath(infile))
++
++    # Determine the content type and comment info for the input file.
++    if contentType is None:
++        registry = contentTypesRegistry or getDefaultContentTypesRegistry()
++        contentType = registry.getContentType(infile)
++        if contentType is None:
++            contentType = "Text"
++            log.warn("defaulting content type for '%s' to '%s'",
++                     infile, contentType)
++    try:
++        cgs = _commentGroups[contentType]
++    except KeyError:
++        raise PreprocessError("don't know comment delimiters for content "\
++                              "type '%s' (file '%s')"\
++                              % (contentType, infile))
++
++    # Generate statement parsing regexes. Basic format:
++    #       <comment-prefix> <preprocessor-stmt> <comment-suffix>
++    #  Examples:
++    #       <!-- #if foo -->
++    #       ...
++    #       <!-- #endif -->
++    #
++    #       # #if BAR
++    #       ...
++    #       # #else
++    #       ...
++    #       # #endif
++    stmts = ['##\s*(?P<op>.*?)',
++             '#\s*(?P<op>if|elif|ifdef|ifndef)\s+(?P<expr>.*?)',
++             '#\s*(?P<op>else|endif)',
++             '#\s*(?P<op>error)\s+(?P<error>.*?)',
++             '#\s*(?P<op>define)\s+(?P<var>[^\s]*?)(\s+(?P<val>.+?))?',
++             '#\s*(?P<op>undef)\s+(?P<var>[^\s]*?)',
++             '#\s*(?P<op>for)\s+(?P<var>.*?)\s+((in)|(IN))\s+(?P<valuelist>.*?)',
++             '#\s*(?P<op>endfor)',
++             '#\s*(?P<op>include)\s+"(?P<fname>.*?)"',
++             r'#\s*(?P<op>include)\s+(?P<var>[^\s]+?)',
++            ]
++    patterns = []
++    for stmt in stmts:
++        # The comment group prefix and suffix can either be just a
++        # string or a compiled regex.
++        for cprefix, csuffix in cgs:
++            if hasattr(cprefix, "pattern"):
++                pattern = cprefix.pattern
++            else:
++                pattern = r"^\s*%s\s*" % re.escape(cprefix)
++            pattern += stmt
++            if hasattr(csuffix, "pattern"):
++                pattern += csuffix.pattern
++            else:
++                pattern += r"\s*%s\s*$" % re.escape(csuffix)
++            patterns.append(pattern)
++    stmtRes = [re.compile(p) for p in patterns]
++
++    # Process the input file.
++    # (Would be helpful if I knew anything about lexing and parsing
++    # simple grammars.)
++    fin = open(infile, 'r')
++    lines = fin.readlines()
++    # Merge multi-line comments
++    for i in range(len(lines)-1,-1,-1):
++        line = lines[i].rstrip(' \r\n')
++        if len(line)>0 and line[-1]=='\\':
++            lines[i] = line[:-1] + ' ' + lines[i+1]
++            lines[i+1] = ''    # keep an empty line to avoid screwing up line numbers
++
++    fin.close()
++    if type(outfile) in types.StringTypes:
++        if force and os.path.exists(outfile):
++            os.chmod(outfile, 0777)
++            os.remove(outfile)
++        fout = open(outfile, 'w')
++    else:
++        fout = outfile
++
++    defines['__FILE__'] = infile
++    SKIP, EMIT = range(2) # states
++    states = [(EMIT,   # a state is (<emit-or-skip-lines-in-this-section>,
++               0,      #             <have-emitted-in-this-if-block>,
++               0)]     #             <have-seen-'else'-in-this-if-block>)
++    lineNum = 0
++    nlines = len(lines)
++    forlevel = 0
++    forvar = {}
++    forvaluelist = {}
++    forstartline = {}
++    foriteration = {}
++    last_emitted_was_blank = True
++
++    while lineNum<nlines:
++
++        line = lines[lineNum]
++
++        log.debug("line %d: %r", lineNum+1, line)
++        defines['__LINE__'] = lineNum+1
++
++        # Is this line a preprocessor stmt line?
++        #XXX Could probably speed this up by optimizing common case of
++        #    line NOT being a preprocessor stmt line.
++        for stmtRe in stmtRes:
++            match = stmtRe.match(line)
++            if match:
++                break
++        else:
++            match = None
++
++        if match:
++
++            # Remove contents after ## (comment)
++            idx=line.find("##")
++            if(idx>0):
++                line = line[0:idx]
++
++            op = match.group("op")
++            log.debug("%r stmt (states: %r)", op, states)
++            if op == "define":
++                if not (states and states[-1][0] == SKIP):
++                    var, val = match.group("var", "val")
++                    val = SubstituteInternal(str(val), defines)
++
++                    if val is None:
++                        val = None
++                    else:
++                        try:
++                            val = eval(val, {}, {})
++                        except:
++                            pass
++                    defines[var] = val
++            elif op == "undef":
++                if not (states and states[-1][0] == SKIP):
++                    var = match.group("var")
++                    try:
++                        del defines[var]
++                    except KeyError:
++                        pass
++            elif op == "include":
++                if not (states and states[-1][0] == SKIP):
++                    if "var" in match.groupdict():
++                        # This is the second include form: #include VAR
++                        var = match.group("var")
++                        f = defines[var]
++                    else:
++                        # This is the first include form: #include "path"
++                        f = match.group("fname")
++
++                    for d in [os.path.dirname(infile)] + includePath:
++                        fname = os.path.normpath(os.path.join(d, f))
++                        if os.path.exists(fname):
++                            break
++                    else:
++                        raise PreprocessError("could not find #include'd file "\
++                                              "\"%s\" on include path: %r"\
++                                              % (f, includePath))
++                    defines = gmxpreprocess(fname, fout, defines, force,
++                                            keepLines, includePath, substitute,
++                                            contentTypesRegistry=contentTypesRegistry,
++                                            __preprocessedFiles=__preprocessedFiles)
++            elif op in ("if", "ifdef", "ifndef"):
++                if op == "if":
++                    expr = match.group("expr")
++                elif op == "ifdef":
++                    expr = "defined('%s')" % match.group("expr")
++                elif op == "ifndef":
++                    expr = "not defined('%s')" % match.group("expr")
++                try:
++                    if states and states[-1][0] == SKIP:
++                        # Were are nested in a SKIP-portion of an if-block.
++                        states.append((SKIP, 0, 0))
++                    elif _evaluate(expr, defines):
++                        states.append((EMIT, 1, 0))
++                    else:
++                        states.append((SKIP, 0, 0))
++                except KeyError:
++                    raise PreprocessError("use of undefined variable in "\
++                                          "#%s stmt" % op, defines['__FILE__'],
++                                          defines['__LINE__'], line)
++            elif op == "elif":
++                expr = match.group("expr")
++                try:
++                    if states[-1][2]: # already had #else in this if-block
++                        raise PreprocessError("illegal #elif after #else in "\
++                            "same #if block", defines['__FILE__'],
++                            defines['__LINE__'], line)
++                    elif states[-1][1]: # if have emitted in this if-block
++                        states[-1] = (SKIP, 1, 0)
++                    elif states[:-1] and states[-2][0] == SKIP:
++                        # Were are nested in a SKIP-portion of an if-block.
++                        states[-1] = (SKIP, 0, 0)
++                    elif _evaluate(expr, defines):
++                        states[-1] = (EMIT, 1, 0)
++                    else:
++                        states[-1] = (SKIP, 0, 0)
++                except IndexError:
++                    raise PreprocessError("#elif stmt without leading #if "\
++                                          "stmt", defines['__FILE__'],
++                                          defines['__LINE__'], line)
++            elif op == "else":
++                try:
++                    if states[-1][2]: # already had #else in this if-block
++                        raise PreprocessError("illegal #else after #else in "\
++                            "same #if block", defines['__FILE__'],
++                            defines['__LINE__'], line)
++                    elif states[-1][1]: # if have emitted in this if-block
++                        states[-1] = (SKIP, 1, 1)
++                    elif states[:-1] and states[-2][0] == SKIP:
++                        # Were are nested in a SKIP-portion of an if-block.
++                        states[-1] = (SKIP, 0, 1)
++                    else:
++                        states[-1] = (EMIT, 1, 1)
++                except IndexError:
++                    raise PreprocessError("#else stmt without leading #if "\
++                                          "stmt", defines['__FILE__'],
++                                          defines['__LINE__'], line)
++            elif op == "endif":
++                try:
++                    states.pop()
++                except IndexError:
++                    raise PreprocessError("#endif stmt without leading #if"\
++                                          "stmt", defines['__FILE__'],
++                                          defines['__LINE__'], line)
++            elif op == "for":
++
++                tmpstr     = match.group("var")
++                thisforvar = tmpstr.split(",")
++                for s in thisforvar:
++                    s.strip(" ")
++
++                # Thisforvar is now a _list_ if 1 or more for variables, without whitespace
++
++                # Evaluate the list-of-values just in case it refers to a list variable
++                valuelist = _evaluate(match.group("valuelist"),defines)
++                # If a string, evaluate it again
++                if(isinstance(valuelist,str)):
++                    valuelist = eval(valuelist)
++
++                forlevel += 1
++
++                forvar[forlevel]       = thisforvar
++                forvaluelist[forlevel] = valuelist
++                forstartline[forlevel] = lineNum + 1
++                foriteration[forlevel] = 0
++
++                if(len(valuelist)>0):
++                    # set the variable for this for-loop to the first value in the list for this level
++                    nvar=len(thisforvar)
++                    for i in range(nvar):
++                        if(nvar==1):
++                            val=valuelist[0]
++                        else:
++                            val=valuelist[0][i]
++                        defines[thisforvar[i]] = val
++
++                else:
++                    # list was empty, so skip this entire section
++                    states.append((SKIP, 0, 0))
++
++            elif op == "endfor":
++                foriteration[forlevel] += 1
++                # Should we do one more iteration on this level?
++                iter       = foriteration[forlevel]
++                thisforvar = forvar[forlevel]
++                valuelist  = forvaluelist[forlevel]
++
++                if(iter<len(valuelist)):
++
++                    nvar = len(thisforvar)
++                    for i in range(len(thisforvar)):
++                        if(nvar==1):
++                           val=valuelist[iter]
++                        else:
++                            val=valuelist[iter][i]
++                        defines[thisforvar[i]] = val
++
++                    lineNum             = forstartline[forlevel]
++                    continue
++                else:
++                    forlevel -= 1
++                    if(len(valuelist)==0):
++                        states.pop()
++
++            elif op == "error":
++                if not (states and states[-1][0] == SKIP):
++                    error = match.group("error")
++                    raise PreprocessError("#error: "+error, defines['__FILE__'],
++                                          defines['__LINE__'], line)
++            log.debug("states: %r", states)
++            if keepLines:
++                fout.write("\n")
++        else:
++            try:
++                if states[-1][0] == EMIT:
++                    log.debug("emit line (%s)" % states[-1][1])
++                    # Substitute all defines into line.
++                    # XXX Should avoid recursive substitutions. But that
++                    #     would be a pain right now.
++
++                    sline = line
++                    if substitute:
++                        sline = SubstituteInCode(sline,defines)
++
++                    emitted_line_is_blank = (sline.strip()=='')
++                    if( not (emitted_line_is_blank and last_emitted_was_blank) and not keepLines):
++                        fout.write(sline)
++                        last_emitted_was_blank = emitted_line_is_blank
++
++                elif keepLines:
++                    log.debug("keep blank line (%s)" % states[-1][1])
++                    fout.write("\n")
++                else:
++                    log.debug("skip line (%s)" % states[-1][1])
++            except IndexError:
++                raise PreprocessError("superfluous #endif before this line",
++                                      defines['__FILE__'],
++                                      defines['__LINE__'])
++        lineNum += 1
++
++    if len(states) > 1:
++        raise PreprocessError("unterminated #if block", defines['__FILE__'],
++                              defines['__LINE__'])
++    elif len(states) < 1:
++        raise PreprocessError("superfluous #endif on or before this line",
++                              defines['__FILE__'], defines['__LINE__'])
++
++    if fout != outfile:
++        fout.close()
++
++    return defines
++
++
++#---- content-type handling
++
++_gDefaultContentTypes = """
++    # Default file types understood by "gmxpreprocess.py".
++    #
++    # Format is an extension of 'mime.types' file syntax.
++    #   - '#' indicates a comment to the end of the line.
++    #   - a line is:
++    #       <filetype> [<pattern>...]
++    #     where,
++    #       <filetype>'s are equivalent in spirit to the names used in the Windows
++    #           registry in HKCR, but some of those names suck or are inconsistent;
++    #           and
++    #       <pattern> is a suffix (pattern starts with a '.'), a regular expression
++    #           (pattern is enclosed in '/' characters), a full filename (anything
++    #           else).
++    #
++    # Notes on case-sensitivity:
++    #
++    # A suffix pattern is case-insensitive on Windows and case-sensitive
++    # elsewhere.  A filename pattern is case-sensitive everywhere. A regex
++    # pattern's case-sensitivity is defined by the regex. This means it is by
++    # default case-sensitive, but this can be changed using Python's inline
++    # regex option syntax. E.g.:
++    #         Makefile            /^(?i)makefile.*$/   # case-INsensitive regex
++
++    Python              .py
++    Python              .pyw
++    Perl                .pl
++    Ruby                .rb
++    Tcl                 .tcl
++    XML                 .xml
++    XML                 .kpf
++    XML                 .xul
++    XML                 .rdf
++    XML                 .xslt
++    XML                 .xsl
++    XML                 .wxs
++    XML                 .wxi
++    HTML                .htm
++    HTML                .html
++    XML                 .xhtml
++    Makefile            /^[Mm]akefile.*$/
++    PHP                 .php
++    JavaScript          .js
++    CSS                 .css
++    C++                 .c       # C++ because then we can use //-style comments
++    C++                 .cpp
++    C++                 .cxx
++    C++                 .cc
++    C++                 .h
++    C++                 .hpp
++    C++                 .hxx
++    C++                 .hh
++    C++                 .gpp     # Gromacs pre-preprocessing
++    IDL                 .idl
++    Text                .txt
++    Fortran             .f
++    Fortran             .f90
++    Shell               .sh
++    Shell               .csh
++    Shell               .ksh
++    Shell               .zsh
++    Java                .java
++    C#                  .cs
++    TeX                 .tex
++
++    # Some Komodo-specific file extensions
++    Python              .ksf  # Fonts & Colors scheme files
++    Text                .kkf  # Keybinding schemes files
++"""
++
++class ContentTypesRegistry:
++    """A class that handles determining the filetype of a given path.
++
++    Usage:
++        >>> registry = ContentTypesRegistry()
++        >>> registry.getContentType("foo.py")
++        "Python"
++    """
++
++    def __init__(self, contentTypesPaths=None):
++        """The constructor.
++
++        @param contentTypesPaths {str} Optional path to content.types file.
++        """
++        ## Path to content.types file to decide language
++        self.contentTypesPaths = contentTypesPaths
++        self._load()
++
++    def _load(self):
++        from os.path import dirname, join, exists
++
++        ## initialize map of file suffixes to language
++        self.suffixMap = {}
++        ## initialize map of filename regex to language
++        self.regexMap = {}
++        ## initialize map of filenames to language
++        self.filenameMap = {}
++
++        self._loadContentType(_gDefaultContentTypes)
++        localContentTypesPath = join(dirname(__file__), "content.types")
++        if exists(localContentTypesPath):
++            log.debug("load content types file: `%r'" % localContentTypesPath)
++            self._loadContentType(open(localContentTypesPath, 'r').read())
++        for path in (self.contentTypesPaths or []):
++            log.debug("load content types file: `%r'" % path)
++            self._loadContentType(open(path, 'r').read())
++
++    def _loadContentType(self, content, path=None):
++        """Return the registry for the given content.types file.
++
++        The registry is three mappings:
++            <suffix> -> <content type>
++            <regex> -> <content type>
++            <filename> -> <content type>
++        """
++        for line in content.splitlines(0):
++            words = line.strip().split()
++            for i in range(len(words)):
++                if words[i][0] == '#':
++                    del words[i:]
++                    break
++            if not words: continue
++            contentType, patterns = words[0], words[1:]
++            if not patterns:
++                if line[-1] == '\n': line = line[:-1]
++                raise PreprocessError("bogus content.types line, there must "\
++                                      "be one or more patterns: '%s'" % line)
++            for pattern in patterns:
++                if pattern.startswith('.'):
++                    if sys.platform.startswith("win"):
++                        # Suffix patterns are case-insensitive on Windows.
++                        pattern = pattern.lower()
++                    self.suffixMap[pattern] = contentType
++                elif pattern.startswith('/') and pattern.endswith('/'):
++                    self.regexMap[re.compile(pattern[1:-1])] = contentType
++                else:
++                    self.filenameMap[pattern] = contentType
++
++    def getContentType(self, path):
++        """Return a content type for the given path.
++
++        @param path {str} The path of file for which to guess the
++            content type.
++        @returns {str|None} Returns None if could not determine the
++            content type.
++        """
++        basename = os.path.basename(path)
++        contentType = None
++        # Try to determine from the path.
++        if not contentType and self.filenameMap.has_key(basename):
++            contentType = self.filenameMap[basename]
++            log.debug("Content type of '%s' is '%s' (determined from full "\
++                      "path).", path, contentType)
++        # Try to determine from the suffix.
++        if not contentType and '.' in basename:
++            suffix = "." + basename.split(".")[-1]
++            if sys.platform.startswith("win"):
++                # Suffix patterns are case-insensitive on Windows.
++                suffix = suffix.lower()
++            if self.suffixMap.has_key(suffix):
++                contentType = self.suffixMap[suffix]
++                log.debug("Content type of '%s' is '%s' (determined from "\
++                          "suffix '%s').", path, contentType, suffix)
++        # Try to determine from the registered set of regex patterns.
++        if not contentType:
++            for regex, ctype in self.regexMap.items():
++                if regex.search(basename):
++                    contentType = ctype
++                    log.debug("Content type of '%s' is '%s' (matches regex '%s')",
++                              path, contentType, regex.pattern)
++                    break
++        # Try to determine from the file contents.
++        content = open(path, 'rb').read()
++        if content.startswith("<?xml"):  # cheap XML sniffing
++            contentType = "XML"
++        return contentType
++
++_gDefaultContentTypesRegistry = None
++def getDefaultContentTypesRegistry():
++    global _gDefaultContentTypesRegistry
++    if _gDefaultContentTypesRegistry is None:
++        _gDefaultContentTypesRegistry = ContentTypesRegistry()
++    return _gDefaultContentTypesRegistry
++
++
++#---- internal support stuff
++#TODO: move other internal stuff down to this section
++
++try:
++    reversed
++except NameError:
++    # 'reversed' added in Python 2.4 (http://www.python.org/doc/2.4/whatsnew/node7.html)
++    def reversed(seq):
++        rseq = list(seq)
++        rseq.reverse()
++        for item in rseq:
++            yield item
++try:
++    sorted
++except NameError:
++    # 'sorted' added in Python 2.4. Note that I'm only implementing enough
++    # of sorted as is used in this module.
++    def sorted(seq, key=None):
++        identity = lambda x: x
++        key_func = (key or identity)
++        sseq = list(seq)
++        sseq.sort(lambda self, other: cmp(key_func(self), key_func(other)))
++        for item in sseq:
++            yield item
++
++
++#---- mainline
++
++def main(argv):
++    try:
++        optlist, args = getopt.getopt(argv[1:], 'hVvo:D:fkI:sc:',
++            ['help', 'version', 'verbose', 'force', 'keep-lines',
++             'no-substitute', 'content-types-path='])
++    except getopt.GetoptError, msg:
++        sys.stderr.write("gmxpreprocess: error: %s. Your invocation was: %s\n"\
++                         % (msg, argv))
++        sys.stderr.write("See 'gmxpreprocess --help'.\n")
++        return 1
++    outfile = sys.stdout
++    defines = {}
++    force = 0
++    keepLines = 0
++    substitute = 1
++    includePath = []
++    contentTypesPaths = []
++    for opt, optarg in optlist:
++        if opt in ('-h', '--help'):
++            sys.stdout.write(__doc__)
++            return 0
++        elif opt in ('-V', '--version'):
++            sys.stdout.write("gmxpreprocess %s\n" % __version__)
++            return 0
++        elif opt in ('-v', '--verbose'):
++            log.setLevel(log.DEBUG)
++        elif opt == '-o':
++            outfile = optarg
++        if opt in ('-f', '--force'):
++            force = 1
++        elif opt == '-D':
++            if optarg.find('=') != -1:
++                var, val = optarg.split('=', 1)
++                try:
++                    val = int(val)
++                except ValueError:
++                    pass
++            else:
++                var, val = optarg, None
++            defines[var] = val
++        elif opt in ('-k', '--keep-lines'):
++            keepLines = 1
++        elif opt == '-I':
++            includePath.append(optarg)
++        elif opt in ('-s', '--no-substitute'):
++            substitute = 0
++        elif opt in ('-c', '--content-types-path'):
++            contentTypesPaths.append(optarg)
++
++    if len(args) != 1:
++        sys.stderr.write("gmxpreprocess: error: incorrect number of "\
++                         "arguments: argv=%r\n" % argv)
++        return 1
++    else:
++        infile = args[0]
++
++    try:
++        contentTypesRegistry = ContentTypesRegistry(contentTypesPaths)
++        gmxpreprocess(infile, outfile, defines, force, keepLines, includePath,
++                   substitute, contentTypesRegistry=contentTypesRegistry)
++    except PreprocessError, ex:
++        if log.isDebugEnabled():
++            import traceback
++            traceback.print_exc(file=sys.stderr)
++        else:
++            sys.stderr.write("gmxpreprocess: error: %s\n" % str(ex))
++        return 1
++
++if __name__ == "__main__":
++    __file__ = sys.argv[0]
++    sys.exit( main(sys.argv) )
++
index 1039778a32d50b970f199512d66f5832309ba5e3,0000000000000000000000000000000000000000..b1ab471b836f2ea6a7b20ec8f7a2aaf143239987
mode 100644,000000..100644
--- /dev/null
@@@ -1,572 -1,0 +1,473 @@@
-     { "LJ",                             33 }, /* nb_kernel010 */
-     { "Buckingham",                     61 }, /* nb_kernel020 */ 
-     { "VdW(T)",                         54 }, /* nb_kernel030 */
-     { "Coulomb",                        27 }, /* nb_kernel100 */
-     { "Coulomb [W3]",                   80 }, /* nb_kernel101 */
-     { "Coulomb [W3-W3]",               234 }, /* nb_kernel102 */
-     { "Coulomb [W4]",                   80 }, /* nb_kernel103 */
-     { "Coulomb [W4-W4]",               234 }, /* nb_kernel104 */
-     { "Coulomb + LJ",                   38 }, /* nb_kernel110 */
-     { "Coulomb + LJ [W3]",              91 }, /* nb_kernel111 */
-     { "Coulomb + LJ [W3-W3]",          245 }, /* nb_kernel112 */
-     { "Coulomb + LJ [W4]",             113 }, /* nb_kernel113 */
-     { "Coulomb + LJ [W4-W4]",          267 }, /* nb_kernel114 */
-     { "Coulomb + Bham ",                64 }, /* nb_kernel120 */
-     { "Coulomb + Bham [W3]",           117 }, /* nb_kernel121 */
-     { "Coulomb + Bham [W3-W3]",        271 }, /* nb_kernel122 */
-     { "Coulomb + Bham [W4]",           141 }, /* nb_kernel123 */
-     { "Coulomb + Bham [W4-W4]",        295 }, /* nb_kernel124 */
-     { "Coulomb + VdW(T) ",              59 }, /* nb_kernel130 */
-     { "Coulomb + VdW(T) [W3]",         112 }, /* nb_kernel131 */
-     { "Coulomb + VdW(T) [W3-W3]",      266 }, /* nb_kernel132 */
-     { "Coulomb + VdW(T) [W4]",         134 }, /* nb_kernel133 */
-     { "Coulomb + VdW(T) [W4-W4]",      288 }, /* nb_kernel134 */
-     { "RF Coul",                        33 }, /* nb_kernel200 */
-     { "RF Coul [W3]",                   98 }, /* nb_kernel201 */
-     { "RF Coul [W3-W3]",               288 }, /* nb_kernel202 */
-     { "RF Coul [W4]",                   98 }, /* nb_kernel203 */
-     { "RF Coul [W4-W4]",               288 }, /* nb_kernel204 */
-     { "RF Coul + LJ",                   44 }, /* nb_kernel210 */
-     { "RF Coul + LJ [W3]",             109 }, /* nb_kernel211 */
-     { "RF Coul + LJ [W3-W3]",          299 }, /* nb_kernel212 */
-     { "RF Coul + LJ [W4]",             131 }, /* nb_kernel213 */
-     { "RF Coul + LJ [W4-W4]",          321 }, /* nb_kernel214 */
-     { "RF Coul + Bham ",                70 }, /* nb_kernel220 */
-     { "RF Coul + Bham [W3]",           135 }, /* nb_kernel221 */
-     { "RF Coul + Bham [W3-W3]",        325 }, /* nb_kernel222 */
-     { "RF Coul + Bham [W4]",           159 }, /* nb_kernel223 */
-     { "RF Coul + Bham [W4-W4]",        349 }, /* nb_kernel224 */
-     { "RF Coul + VdW(T) ",              65 }, /* nb_kernel230 */
-     { "RF Coul + VdW(T) [W3]",         130 }, /* nb_kernel231 */
-     { "RF Coul + VdW(T) [W3-W3]",      320 }, /* nb_kernel232 */
-     { "RF Coul + VdW(T) [W4]",         152 }, /* nb_kernel233 */
-     { "RF Coul + VdW(T) [W4-W4]",      342 }, /* nb_kernel234 */
-     { "Coul(T)",                        42 }, /* nb_kernel300 */
-     { "Coul(T) [W3]",                  125 }, /* nb_kernel301 */
-     { "Coul(T) [W3-W3]",               369 }, /* nb_kernel302 */
-     { "Coul(T) [W4]",                  125 }, /* nb_kernel303 */
-     { "Coul(T) [W4-W4]",               369 }, /* nb_kernel304 */
-     { "Coul(T) + LJ",                   55 }, /* nb_kernel310 */
-     { "Coul(T) + LJ [W3]",             138 }, /* nb_kernel311 */
-     { "Coul(T) + LJ [W3-W3]",          382 }, /* nb_kernel312 */
-     { "Coul(T) + LJ [W4]",             158 }, /* nb_kernel313 */
-     { "Coul(T) + LJ [W4-W4]",          402 }, /* nb_kernel314 */
-     { "Coul(T) + Bham",                 81 }, /* nb_kernel320 */
-     { "Coul(T) + Bham [W3]",           164 }, /* nb_kernel321 */
-     { "Coul(T) + Bham [W3-W3]",        408 }, /* nb_kernel322 */
-     { "Coul(T) + Bham [W4]",           186 }, /* nb_kernel323 */
-     { "Coul(T) + Bham [W4-W4]",        430 }, /* nb_kernel324 */
-     { "Coul(T) + VdW(T)",               68 }, /* nb_kernel330 */
-     { "Coul(T) + VdW(T) [W3]",         151 }, /* nb_kernel331 */
-     { "Coul(T) + VdW(T) [W3-W3]",      395 }, /* nb_kernel332 */
-     { "Coul(T) + VdW(T) [W4]",         179 }, /* nb_kernel333 */
-     { "Coul(T) + VdW(T) [W4-W4]",      423 }, /* nb_kernel334 */
-     { "Generalized Born Coulomb",       48 }, /* nb_kernel400 */
-     { "GB Coulomb + LJ",                61 }, /* nb_kernel410 */
-     { "GB Coulomb + VdW(T)",            79 }, /* nb_kernel430 */
-     { "LJ NF",                          19 }, /* nb_kernel010nf */
-     { "Buckingham NF",                  48 }, /* nb_kernel020nf */ 
-     { "VdW(T) NF",                      33 }, /* nb_kernel030nf */
-     { "Coulomb NF",                     16 }, /* nb_kernel100nf */
-     { "Coulomb [W3] NF",                47 }, /* nb_kernel101nf */
-     { "Coulomb [W3-W3] NF",            135 }, /* nb_kernel102nf */
-     { "Coulomb [W4] NF",                47 }, /* nb_kernel103nf */
-     { "Coulomb [W4-W4] NF",            135 }, /* nb_kernel104nf */
-     { "Coulomb + LJ NF",                24 }, /* nb_kernel110nf */
-     { "Coulomb + LJ [W3] NF",           55 }, /* nb_kernel111nf */
-     { "Coulomb + LJ [W3-W3] NF",       143 }, /* nb_kernel112nf */
-     { "Coulomb + LJ [W4] NF",           66 }, /* nb_kernel113nf */
-     { "Coulomb + LJ [W4-W4] NF",       154 }, /* nb_kernel114nf */
-     { "Coulomb + Bham  NF",             51 }, /* nb_kernel120nf */
-     { "Coulomb + Bham [W3] NF",         82 }, /* nb_kernel121nf */
-     { "Coulomb + Bham [W3-W3] NF",     170 }, /* nb_kernel122nf */
-     { "Coulomb + Bham [W4] NF",         95 }, /* nb_kernel123nf */
-     { "Coulomb + Bham [W4-W4] NF",     183 }, /* nb_kernel124nf */
-     { "Coulomb + VdW(T)  NF",           36 }, /* nb_kernel130nf */
-     { "Coulomb + VdW(T) [W3] NF",       67 }, /* nb_kernel131nf */
-     { "Coulomb + VdW(T) [W3-W3] NF",   155 }, /* nb_kernel132nf */
-     { "Coulomb + VdW(T) [W4] NF",       80 }, /* nb_kernel133nf */
-     { "Coulomb + VdW(T) [W4-W4] NF",   168 }, /* nb_kernel134nf */
-     { "RF Coul NF",                     19 }, /* nb_kernel200nf */
-     { "RF Coul [W3] NF",                56 }, /* nb_kernel201nf */
-     { "RF Coul [W3-W3] NF",            162 }, /* nb_kernel202nf */
-     { "RF Coul [W4] NF",                56 }, /* nb_kernel203nf */
-     { "RF Coul [W4-W4] NF",            162 }, /* nb_kernel204nf */
-     { "RF Coul + LJ NF",                27 }, /* nb_kernel210nf */
-     { "RF Coul + LJ [W3] NF",           64 }, /* nb_kernel211nf */
-     { "RF Coul + LJ [W3-W3] NF",       170 }, /* nb_kernel212nf */
-     { "RF Coul + LJ [W4] NF",           75 }, /* nb_kernel213nf */
-     { "RF Coul + LJ [W4-W4] NF",       181 }, /* nb_kernel214nf */
-     { "RF Coul + Bham  NF",             54 }, /* nb_kernel220nf */
-     { "RF Coul + Bham [W3] NF",         91 }, /* nb_kernel221nf */
-     { "RF Coul + Bham [W3-W3] NF",     197 }, /* nb_kernel222nf */
-     { "RF Coul + Bham [W4] NF",        104 }, /* nb_kernel223nf */
-     { "RF Coul + Bham [W4-W4] NF",     210 }, /* nb_kernel224nf */
-     { "RF Coul + VdW(T)  NF",           39 }, /* nb_kernel230nf */
-     { "RF Coul + VdW(T) [W3] NF",       76 }, /* nb_kernel231nf */
-     { "RF Coul + VdW(T) [W3-W3] NF",   182 }, /* nb_kernel232nf */
-     { "RF Coul + VdW(T) [W4] NF",       89 }, /* nb_kernel233nf */
-     { "RF Coul + VdW(T) [W4-W4] NF",   195 }, /* nb_kernel234nf */
-     { "Coul(T) NF",                     26 }, /* nb_kernel300nf */
-     { "Coul(T) [W3] NF",                77 }, /* nb_kernel301nf */
-     { "Coul(T) [W3-W3] NF",            225 }, /* nb_kernel302nf */
-     { "Coul(T) [W4] NF",                77 }, /* nb_kernel303nf */
-     { "Coul(T) [W4-W4] NF",            225 }, /* nb_kernel304nf */
-     { "Coul(T) + LJ NF",                34 }, /* nb_kernel310nf */
-     { "Coul(T) + LJ [W3] NF",           85 }, /* nb_kernel311nf */
-     { "Coul(T) + LJ [W3-W3] NF",       233 }, /* nb_kernel312nf */
-     { "Coul(T) + LJ [W4] NF",           96 }, /* nb_kernel313nf */
-     { "Coul(T) + LJ [W4-W4] NF",       244 }, /* nb_kernel314nf */
-     { "Coul(T) + Bham NF",              61 }, /* nb_kernel320nf */
-     { "Coul(T) + Bham [W3] NF",        112 }, /* nb_kernel321nf */
-     { "Coul(T) + Bham [W3-W3] NF",     260 }, /* nb_kernel322nf */
-     { "Coul(T) + Bham [W4] NF",        125 }, /* nb_kernel323nf */
-     { "Coul(T) + Bham [W4-W4] NF",     273 }, /* nb_kernel324nf */
-     { "Coul(T) + VdW(T) NF",            42 }, /* nb_kernel330nf */
-     { "Coul(T) + VdW(T) [W3] NF",       93 }, /* nb_kernel331nf */
-     { "Coul(T) + VdW(T) [W3-W3] NF",   241 }, /* nb_kernel332nf */
-     { "Coul(T) + VdW(T) [W4] NF",      110 }, /* nb_kernel333nf */
-     { "Coul(T) + VdW(T) [W4-W4] NF",   258 }, /* nb_kernel334nf */
-     { "Generalized Born Coulomb NF",    29 }, /* nb_kernel400nf */
-     { "GB Coulomb + LJ NF",             37 }, /* nb_kernel410nf */
-     { "GB Coulomb + VdW(T) NF",         49 }, /* nb_kernel430nf */
-     { "Free energy innerloop",         150 }, /* free energy, estimate */  
-     { "All-vs-All, Coul + LJ",          38 },
-     { "All-vs-All, GB + LJ",            61 },
-     { "Outer nonbonded loop",           10 },
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include "types/commrec.h"
 +#include "sysstuff.h"
 +#include "gmx_fatal.h"
 +#include "names.h"
 +#include "macros.h"
 +#include "nrnb.h"
 +#include "main.h"
 +#include "smalloc.h"
 +#include "copyrite.h"
 +
++
++
++
++
 +typedef struct {
 +  const char *name;
 +  int  flop;
 +} t_nrnb_data;
 +
 +
 +static const t_nrnb_data nbdata[eNRNB] = {
-     { "LJ + Coulomb RF (F)",            38 }, /* nbnxn kernel LJ+RF, no ener */
-     { "LJ + Coulomb RF (F+E)",          54 },
-     { "LJ + Coulomb tabulated (F)",     41 }, /* nbnxn kernel LJ+tab, no en */
-     { "LJ + Coulomb tabulated (F+E)",   59 },
-     { "LJ (F)",                         33 }, /* nbnxn kernel LJ, no ener */
-     { "LJ (F+E)",                       43 },
-     { "Coulomb RF (F)",                 31 }, /* nbnxn kernel RF, no ener */
-     { "Coulomb RF (F+E)",               36 },
-     { "Coulomb tabulated (F)",          34 }, /* nbnxn kernel tab, no ener */
-     { "Coulomb tabulated (F+E)",        41 },
++    /* These are re-used for different NB kernels, since there are so many.
++     * The actual number of flops is set dynamically.
++     */
++    { "NB VdW [V&F]",                    1 },
++    { "NB VdW [F]",                      1 },
++    { "NB Elec. [V&F]",                  1 },
++    { "NB Elec. [F]",                    1 },
++    { "NB Elec. [W3,V&F]",               1 },
++    { "NB Elec. [W3,F]",                 1 },
++    { "NB Elec. [W3-W3,V&F]",            1 },
++    { "NB Elec. [W3-W3,F]",              1 },
++    { "NB Elec. [W4,V&F]",               1 },
++    { "NB Elec. [W4,F]",                 1 },
++    { "NB Elec. [W4-W4,V&F]",            1 },
++    { "NB Elec. [W4-W4,F]",              1 },
++    { "NB VdW & Elec. [V&F]",            1 },
++    { "NB VdW & Elec. [F]",              1 },
++    { "NB VdW & Elec. [W3,V&F]",         1 },
++    { "NB VdW & Elec. [W3,F]",           1 },
++    { "NB VdW & Elec. [W3-W3,V&F]",      1 },
++    { "NB VdW & Elec. [W3-W3,F]",        1 },
++    { "NB VdW & Elec. [W4,V&F]",         1 },
++    { "NB VdW & Elec. [W4,F]",           1 },
++    { "NB VdW & Elec. [W4-W4,V&F]",      1 },
++    { "NB VdW & Elec. [W4-W4,F]",        1 },
++    
++    { "NB Generic kernel",               1 },
++    { "NB Free energy kernel",           1 },
++    { "NB All-vs-all",                   1 },
++    { "NB All-vs-all, GB",               1 },
++
 +    { "Pair Search distance check",      9 }, /* nbnxn pair dist. check */
 +    /* nbnxn kernel flops are based on inner-loops without exclusion checks.
 +     * Plain Coulomb runs through the RF kernels, except with CUDA.
 +     * invsqrt is counted as 6 flops: 1 for _mm_rsqt_ps + 5 for iteration.
 +     * The flops are equal for plain-C, x86 SIMD and CUDA, except for:
 +     * - plain-C kernel uses one flop more for Coulomb-only (F) than listed
 +     * - x86 SIMD LJ geom-comb.rule kernels (fastest) use 2 more flops
 +     * - x86 SIMD LJ LB-comb.rule kernels (fast) use 3 (8 for F+E) more flops
 +     * - GPU always does exclusions, which requires 2-4 flops, but as invsqrt
 +     *   is always counted as 6 flops, this roughly compensates.
 +     */
-     { "All-vs-All Still radii",         47 },
-     { "All-vs-All HCT/OBC radii",      183 },
-     { "All-vs-All Born chain rule",     15 },
++    { "NxN RF Elec. + VdW [F]",         38 }, /* nbnxn kernel LJ+RF, no ener */
++    { "NxN RF Elec. + VdW [V&F]",       54 },
++    { "NxN CSTab Elec. + VdW [F]",      41 }, /* nbnxn kernel LJ+tab, no en */
++    { "NxN CSTab Elec. + VdW [V&F]",    59 },
++    { "NxN VdW [F]",                    33 }, /* nbnxn kernel LJ, no ener */
++    { "NxN VdW [V&F]",                  43 },
++    { "NxN RF Electrostatics [F]",      31 }, /* nbnxn kernel RF, no ener */
++    { "NxN RF Electrostatics [V&F]",    36 },
++    { "NxN CSTab Elec. [F]",            34 }, /* nbnxn kernel tab, no ener */
++    { "NxN CSTab Elec. [V&F]",          41 },
 +    { "1,4 nonbonded interactions",     90 },
 +    { "Born radii (Still)",             47 },
 +    { "Born radii (HCT/OBC)",          183 },
 +    { "Born force chain rule",          15 },
-   for(i=0; (i<eNR_NBKERNEL_NR); i++) {
++    { "All-vs-All Still radii",          1 },
++    { "All-vs-All HCT/OBC radii",        1 },
++    { "All-vs-All Born chain rule",      1 },
 +    { "Calc Weights",                   36 },
 +    { "Spread Q",                        6 },
 +    { "Spread Q Bspline",                2 }, 
 +    { "Gather F",                      23  },
 +    { "Gather F Bspline",              6   }, 
 +    { "3D-FFT",                        8   },
 +    { "Convolution",                   4   },
 +    { "Solve PME",                     64  },
 +    { "NS-Pairs",                      21  },
 +    { "Reset In Box",                  3   },
 +    { "Shift-X",                       6   },
 +    { "CG-CoM",                        3   },
 +    { "Sum Forces",                    1   },
 +    { "Bonds",                         59  },
 +    { "G96Bonds",                      44  },
 +    { "FENE Bonds",                    58  },
 +    { "Tab. Bonds",                    62  },
 +    { "Restraint Potential",           86  },
 +    { "Linear Angles",                 57  },
 +    { "Angles",                        168 },
 +    { "G96Angles",                     150 },
 +    { "Quartic Angles",                160 },
 +    { "Tab. Angles",                   169 },
 +    { "Propers",                       229 },
 +    { "Impropers",                     208 },
 +    { "RB-Dihedrals",                  247 },
 +    { "Four. Dihedrals",               247 },
 +    { "Tab. Dihedrals",                227 },
 +    { "Dist. Restr.",                  200 },
 +    { "Orient. Restr.",                200 },
 +    { "Dihedral Restr.",               200 },
 +    { "Pos. Restr.",                   50  },
 +    { "Flat-bottom posres",            50  },
 +    { "Angle Restr.",                  191 },
 +    { "Angle Restr. Z",                164 },
 +    { "Morse Potent.",                 83  },
 +    { "Cubic Bonds",                   54  },
 +    { "Walls",                         31  },
 +    { "Polarization",                  59  },
 +    { "Anharmonic Polarization",       72  },
 +    { "Water Pol.",                    62  },
 +    { "Thole Pol.",                    296 },
 +    { "Virial",                        18  },
 +    { "Update",                        31  },
 +    { "Ext.ens. Update",               54  },
 +    { "Stop-CM",                       10  },
 +    { "P-Coupling",                    6   },
 +    { "Calc-Ekin",                     27  },
 +    { "Lincs",                         60  },
 +    { "Lincs-Mat",                     4   },
 +    { "Shake",                         30  },
 +    { "Constraint-V",                   8  },
 +    { "Shake-Init",                    10  },
 +    { "Constraint-Vir",                24  },
 +    { "Settle",                        323 },
 +    { "Virtual Site 2",                23  },
 +    { "Virtual Site 3",                37  },
 +    { "Virtual Site 3fd",              95  },
 +    { "Virtual Site 3fad",             176 },
 +    { "Virtual Site 3out",             87  },
 +    { "Virtual Site 4fd",              110 }, 
 +    { "Virtual Site 4fdn",             254 }, 
 +    { "Virtual Site N",                 15 },
 +    { "Mixed Generalized Born stuff",   10 } 
 +};
 +
 +
 +void init_nrnb(t_nrnb *nrnb)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    nrnb->n[i]=0.0;
 +}
 +
 +void cp_nrnb(t_nrnb *dest, t_nrnb *src)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    dest->n[i]=src->n[i];
 +}
 +
 +void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    dest->n[i]=s1->n[i]+s2->n[i];
 +}
 +
 +void print_nrnb(FILE *out, t_nrnb *nrnb)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    if (nrnb->n[i] > 0)
 +      fprintf(out," %-26s %10.0f.\n",nbdata[i].name,nrnb->n[i]);
 +}
 +
 +void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
 +{
 +  nrnb->n[enr]+=inc;
 +#ifdef DEBUG_NRNB
 +  printf("nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
 +        nbdata[enr].name,enr,inc,file,line);
 +#endif
 +}
 +
 +void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
 +{
 +  int    i;
 +  double mni,frac,tfrac,tflop;
 +  const char   *myline = "-----------------------------------------------------------------------------";
 +  
 +  *nbfs = 0.0;
-   if (out) {
-     fprintf(out,"   RF=Reaction-Field  FE=Free Energy  SCFE=Soft-Core/Free Energy\n");
-     fprintf(out,"   T=Tabulated        W3=SPC/TIP3p    W4=TIP4p (single or pairs)\n");
-     fprintf(out,"   NF=No Forces\n\n");
-     
-     fprintf(out," %-32s %16s %15s  %7s\n",
-           "Computing:","M-Number","M-Flops","% Flops");
-     fprintf(out,"%s\n",myline);
++  for(i=0; (i<eNR_NBKERNEL_ALLVSALLGB); i++) {
 +    if (strstr(nbdata[i].name,"W3-W3") != NULL)
 +      *nbfs += 9e-6*nrnb->n[i];
 +    else if (strstr(nbdata[i].name,"W3") != NULL)
 +      *nbfs += 3e-6*nrnb->n[i];
 +    else if (strstr(nbdata[i].name,"W4-W4") != NULL)
 +      *nbfs += 10e-6*nrnb->n[i];
 +    else if (strstr(nbdata[i].name,"W4") != NULL)
 +      *nbfs += 4e-6*nrnb->n[i];
 +    else
 +      *nbfs += 1e-6*nrnb->n[i];
 +  }
 +  tflop=0;
 +  for(i=0; (i<eNRNB); i++) 
 +    tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
 +  
 +  if (tflop == 0) {
 +    fprintf(out,"No MEGA Flopsen this time\n");
 +    return;
 +  }
 +  if (out) {
 +    fprintf(out,"\n\tM E G A - F L O P S   A C C O U N T I N G\n\n");
 +  }
 +
-   eNR_FBPOSRES,  eNR_NS,     eNR_NBKERNEL_OUTER
++  if (out)
++  {
++      fprintf(out," NB=Group-cutoff nonbonded kernels    NxN=N-by-N tile Verlet kernels\n");
++      fprintf(out," RF=Reaction-Field  VdW=Van der Waals  CSTab=Cubic-spline table\n");
++      fprintf(out," W3=SPC/TIP3p  W4=TIP4p (single or pairs)\n");
++      fprintf(out," V&F=Potential and force  V=Potential only  F=Force only\n\n");
++
++      fprintf(out," %-32s %16s %15s  %7s\n",
++              "Computing:","M-Number","M-Flops","% Flops");
++      fprintf(out,"%s\n",myline);
 +  }
 +  *mflop=0.0;
 +  tfrac=0.0;
 +  for(i=0; (i<eNRNB); i++) {
 +    mni     = 1e-6*nrnb->n[i];
 +    *mflop += mni*nbdata[i].flop;
 +    frac    = 100.0*mni*nbdata[i].flop/tflop;
 +    tfrac  += frac;
 +    if (out && mni != 0)
 +      fprintf(out," %-32s %16.6f %15.3f  %6.1f\n",
 +            nbdata[i].name,mni,mni*nbdata[i].flop,frac);
 +  }
 +  if (out) {
 +    fprintf(out,"%s\n",myline);
 +    fprintf(out," %-32s %16s %15.3f  %6.1f\n",
 +          "Total","",*mflop,tfrac);
 +    fprintf(out,"%s\n\n",myline);
 +  }
 +}
 +
 +void print_perf(FILE *out,double nodetime,double realtime,int nprocs,
 +              gmx_large_int_t nsteps,real delta_t,
 +              double nbfs,double mflop,
 +                int omp_nth_pp)
 +{
 +  real runtime;
 +
 +  fprintf(out,"\n");
 +
 +  if (realtime > 0) 
 +  {
 +    fprintf(out,"%12s %12s %12s %10s\n","","Core t (s)","Wall t (s)","(%)");
 +    fprintf(out,"%12s %12.3f %12.3f %10.1f\n","Time:",
 +          nodetime, realtime, 100.0*nodetime/realtime);
 +    /* only print day-hour-sec format if realtime is more than 30 min */
 +    if (realtime > 30*60)
 +    {
 +      fprintf(out,"%12s %12s","","");
 +      pr_difftime(out,realtime);
 +    }
 +    if (delta_t > 0) 
 +    {
 +      mflop = mflop/realtime;
 +      runtime = nsteps*delta_t;
 +
 +      if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
 +      {
 +          fprintf(out,"%12s %12s %12s\n",
 +                  "","(ns/day)","(hour/ns)");
 +          fprintf(out,"%12s %12.3f %12.3f\n","Performance:",
 +                  runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
 +      }
 +      else
 +      {
 +        fprintf(out,"%12s %12s %12s %12s %12s\n",
 +              "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
 +              "(ns/day)","(hour/ns)");
 +        fprintf(out,"%12s %12.3f %12.3f %12.3f %12.3f\n","Performance:",
 +              nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
 +              runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
 +      }
 +    } 
 +    else 
 +    {
 +      if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
 +      {
 +          fprintf(out,"%12s %14s\n",
 +                  "","(steps/hour)");
 +          fprintf(out,"%12s %14.1f\n","Performance:",
 +                  nsteps*3600.0/realtime);
 +      }
 +      else
 +      {
 +          fprintf(out,"%12s %12s %12s %14s\n",
 +                "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
 +                "(steps/hour)");
 +          fprintf(out,"%12s %12.3f %12.3f %14.1f\n","Performance:",
 +            nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
 +            nsteps*3600.0/realtime);
 +      }
 +    }
 +  }
 +}
 +
 +int cost_nrnb(int enr)
 +{
 +  return nbdata[enr].flop;
 +}
 +
 +const char *nrnb_str(int enr)
 +{
 +  return nbdata[enr].name;
 +}
 +
 +static const int    force_index[]={ 
 +  eNR_BONDS,  eNR_ANGLES,  eNR_PROPER, eNR_IMPROPER, 
 +  eNR_RB,     eNR_DISRES,  eNR_ORIRES, eNR_POSRES,
-       for(j=0; (j<eNR_NBKERNEL_NR); j++)
++  eNR_FBPOSRES, eNR_NS,
 +};
 +#define NFORCE_INDEX asize(force_index)
 +
 +static const int    constr_index[]={ 
 +  eNR_SHAKE,     eNR_SHAKE_RIJ, eNR_SETTLE,       eNR_UPDATE,       eNR_PCOUPL,
 +  eNR_CONSTR_VIR,eNR_CONSTR_V
 +};
 +#define NCONSTR_INDEX asize(constr_index)
 +
 +static double pr_av(FILE *log,t_commrec *cr,
 +                  double fav,double ftot[],const char *title)
 +{
 +  int    i,perc;
 +  double dperc,unb;
 +  
 +  unb=0;
 +  if (fav > 0) {
 +    fav /= cr->nnodes - cr->npmenodes;
 +    fprintf(log,"\n %-26s",title);
 +    for(i=0; (i<cr->nnodes); i++) {
 +      dperc=(100.0*ftot[i])/fav;
 +      unb=max(unb,dperc);
 +      perc=dperc;
 +      fprintf(log,"%3d ",perc);
 +    }
 +    if (unb > 0) {
 +      perc=10000.0/unb;
 +      fprintf(log,"%6d%%\n\n",perc);
 +    }
 +    else
 +      fprintf(log,"\n\n");
 +  }
 +  return unb;
 +}
 +
 +void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
 +{
 +  int    i,j,perc;
 +  double dperc,unb,uf,us;
 +  double *ftot,fav;
 +  double *stot,sav;
 +  t_nrnb *av;
 +
 +  snew(av,1);
 +  snew(ftot,cr->nnodes);
 +  snew(stot,cr->nnodes);
 +  init_nrnb(av);
 +  for(i=0; (i<cr->nnodes); i++) {
 +      add_nrnb(av,av,&(nrnb[i]));
 +      /* Cost due to forces */
++      for(j=0; (j<eNR_NBKERNEL_ALLVSALLGB); j++)
 +      ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
 +      for(j=0; (j<NFORCE_INDEX); j++) 
 +      ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
 +      /* Due to shake */
 +      for(j=0; (j<NCONSTR_INDEX); j++) {
 +      stot[i]+=nrnb[i].n[constr_index[j]]*cost_nrnb(constr_index[j]);
 +      }
 +  }   
 +  for(j=0; (j<eNRNB); j++)
 +    av->n[j]=av->n[j]/(double)(cr->nnodes - cr->npmenodes);
 +    
 +    fprintf(log,"\nDetailed load balancing info in percentage of average\n");
 +  
 +  fprintf(log," Type                 NODE:");
 +  for(i=0; (i<cr->nnodes); i++)
 +      fprintf(log,"%3d ",i);
 +  fprintf(log,"Scaling\n");
 +  fprintf(log,"---------------------------");
 +  for(i=0; (i<cr->nnodes); i++)
 +      fprintf(log,"----");
 +  fprintf(log,"-------\n");
 +  
 +  for(j=0; (j<eNRNB); j++) {
 +    unb=100.0;
 +    if (av->n[j] > 0) {
 +      fprintf(log," %-26s",nrnb_str(j));
 +      for(i=0; (i<cr->nnodes); i++) {
 +        dperc=(100.0*nrnb[i].n[j])/av->n[j];
 +        unb=max(unb,dperc);
 +        perc=dperc;
 +        fprintf(log,"%3d ",perc);
 +      }
 +      if (unb > 0) {
 +      perc=10000.0/unb;
 +      fprintf(log,"%6d%%\n",perc);
 +      }
 +      else
 +      fprintf(log,"\n");
 +    }   
 +  }
 +  fav=sav=0;
 +  for(i=0; (i<cr->nnodes); i++) {
 +    fav+=ftot[i];
 +    sav+=stot[i];
 +  }
 +  uf=pr_av(log,cr,fav,ftot,"Total Force");
 +  us=pr_av(log,cr,sav,stot,"Total Constr.");
 +  
 +  unb=(uf*fav+us*sav)/(fav+sav);
 +  if (unb > 0) {
 +    unb=10000.0/unb;
 +    fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);
 +  }
 +}
 +
index 36e177b14dd76ba164809b2cdb9b7018526d72f5,0000000000000000000000000000000000000000..36caebca45abca73089bf8c4bdaf55b99e02933e
mode 100644,000000..100644
--- /dev/null
@@@ -1,599 -1,0 +1,629 @@@
-   char ch1,ch2;
-   
-   do
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +/* This file is completely threadsafe - keep it that way! */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#ifdef GMX_CRAY_XT3
 +#undef HAVE_PWD_H
 +#endif
 +
 +#include <stdio.h>
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <errno.h>
 +#include <sys/types.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef HAVE_PWD_H
 +#include <pwd.h>
 +#endif
 +#include <time.h>
 +#include <assert.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "string2.h"
 +#include "futil.h"
 +
 +int continuing(char *s)
 +{
 +  int sl;
 +  assert(s);
 +
 +  rtrim(s);
 +  sl = strlen(s);
 +  if ((sl > 0) && (s[sl-1] == CONTINUE)) {
 +    s[sl-1] = 0;
 +    return TRUE;
 +  }
 +  else
 +    return FALSE;
 +}
 +
 +
 +
 +char *fgets2(char *line, int n, FILE *stream)
 +{
 +  char *c;
 +  if (fgets(line,n,stream) == NULL) {
 +    return NULL;
 +  }
 +  if ((c=strchr(line,'\n')) != NULL) {
 +    *c = '\0';
 +  } else {
 +    /* A line not ending in a newline can only occur at the end of a file,
 +     * or because of n being too small.
 +     * Since both cases occur very infrequently, we can check for EOF.
 +     */
 +    if (!gmx_eof(stream)) {
 +      gmx_fatal(FARGS,"An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'",n,n,line);
 +    }
 +  }
 +  if ((c=strchr(line,'\r')) != NULL) {
 +    *c = '\0';
 +  }
 +
 +  return line;
 +}
 +
 +void strip_comment (char *line)
 +{
 +  char *c;
 +
 +  if (!line)
 +    return;
 +
 +  /* search for a comment mark and replace it by a zero */
 +  if ((c = strchr(line,COMMENTSIGN)) != NULL) 
 +    (*c) = 0;
 +}
 +
 +void upstring (char *str)
 +{
 +  int i;
 +
 +  for (i=0; (i < (int)strlen(str)); i++) 
 +    str[i] = toupper(str[i]);
 +}
 +
 +void ltrim (char *str)
 +{
 +  char *tr;
 +  int i,c;
 +
 +  if (NULL == str)
 +    return;
 +
 +  c = 0;
 +  while (('\0' != str[c]) && isspace(str[c]))
 +    c++;
 +  if (c > 0) 
 +    {
 +      for(i=c; ('\0' != str[i]); i++)
 +      str[i-c] = str[i];
 +      str[i-c] = '\0';
 +    }
 +}
 +
 +void rtrim (char *str)
 +{
 +  int nul;
 +
 +  if (NULL == str)
 +    return;
 +
 +  nul = strlen(str)-1;
 +  while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) ) {
 +    str[nul] = '\0';
 +    nul--;
 +  }
 +}
 +
 +void trim (char *str)
 +{
 +  ltrim (str);
 +  rtrim (str);
 +}
 +
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n)
 +{
 +    char tmpbuf[STRLEN];
 +  
 +#ifdef GMX_NATIVE_WINDOWS
 +    /* Windows */
 +    ctime_s( tmpbuf, STRLEN, clock );
 +#elif (defined(__sun))
 +    /*Solaris*/
 +    ctime_r(clock, tmpbuf, n);
 +#else
 +    ctime_r(clock,tmpbuf);
 +#endif
 +    strncpy(buf,tmpbuf,n-1);
 +    buf[n-1]='\0';
 +    
 +    return buf;
 +}
 +          
 +void nice_header (FILE *out,const char *fn)
 +{
 +  const char *unk = "onbekend";
 +  time_t clock;
 +  const char *user=unk;
 +  int    gh;
 +#ifdef HAVE_PWD_H
 +  uid_t  uid;
 +#else
 +  int    uid;
 +#endif
 +  char   buf[256]="";
 +  char   timebuf[STRLEN];
 +#ifdef HAVE_PWD_H
 +  struct passwd *pw;
 +#endif
 +
 +  /* Print a nice header above the file */
 +  time(&clock);
 +  fprintf (out,"%c\n",COMMENTSIGN);
 +  fprintf (out,"%c\tFile '%s' was generated\n",COMMENTSIGN,fn ? fn : unk);
 +  
 +#ifdef HAVE_PWD_H
 +  uid = getuid();
 +  pw  = getpwuid(uid);
 +  gh  = gethostname(buf,255);
 +  user= pw->pw_name;
 +#else
 +  uid = 0;
 +  gh  = -1;
 +#endif
 +  
 +  gmx_ctime_r(&clock,timebuf,STRLEN);
 +  fprintf (out,"%c\tBy user: %s (%d)\n",COMMENTSIGN,
 +         user ? user : unk,(int) uid);
 +  fprintf(out,"%c\tOn host: %s\n",COMMENTSIGN,(gh == 0) ? buf : unk);
 +
 +  fprintf (out,"%c\tAt date: %s",COMMENTSIGN,timebuf);
 +  fprintf (out,"%c\n",COMMENTSIGN);
 +}
 +
++
 +int gmx_strcasecmp_min(const char *str1, const char *str2)
 +{
-       do
-       ch1=toupper(*(str1++));
-       while ((ch1=='-') || (ch1=='_'));
-       do 
-       ch2=toupper(*(str2++));
-       while ((ch2=='-') || (ch2=='_'));
-       if (ch1!=ch2) return (ch1-ch2);
++    char ch1,ch2;
++
++    do
 +    {
-   while (ch1);
-   return 0; 
++        do
++        {
++            ch1=toupper(*(str1++));
++        }
++        while ((ch1=='-') || (ch1=='_'));
++        do
++        {
++            ch2=toupper(*(str2++));
++        }
++        while ((ch2=='-') || (ch2=='_'));
++
++        if (ch1!=ch2) return (ch1-ch2);
 +    }
-   char ch1,ch2;
-   char *stri1, *stri2;
++    while (ch1);
++    return 0;
 +}
 +
 +int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 +{
-   stri1=(char *)str1;
-   stri2=(char *)str2;  
-   do
++    char ch1,ch2;
++    char *stri1, *stri2;
 +
-       do
-       ch1=toupper(*(str1++));
-       while ((ch1=='-') || (ch1=='_'));
-       do 
-       ch2=toupper(*(str2++));
-       while ((ch2=='-') || (ch2=='_'));
-       if (ch1!=ch2) return (ch1-ch2);
++    stri1=(char *)str1;
++    stri2=(char *)str2;
++    do
 +    {
-   while (ch1 && (str1-stri1<n) && (str2-stri2<n));
++        do
++        {
++            ch1=toupper(*(str1++));
++        }
++        while ((ch1=='-') || (ch1=='_'));
++        do
++        {
++            ch2=toupper(*(str2++));
++        }
++        while ((ch2=='-') || (ch2=='_'));
++
++        if (ch1!=ch2) return (ch1-ch2);
 +    }
++    while (ch1 && (str1-stri1<n) && (str2-stri2<n));
 +  return 0; 
 +}
 +
 +int gmx_strcasecmp(const char *str1, const char *str2)
 +{
 +  char ch1,ch2;
 +  
 +  do
 +    {
 +      ch1=toupper(*(str1++));
 +      ch2=toupper(*(str2++));
 +      if (ch1!=ch2) return (ch1-ch2);
 +    }
 +  while (ch1);
 +  return 0; 
 +}
 +
 +int gmx_strncasecmp(const char *str1, const char *str2, int n)
 +{
 +  char ch1,ch2;
 + 
 +  if(n==0) 
 +    return 0;
 +
 +  do
 +    {
 +      ch1=toupper(*(str1++));
 +      ch2=toupper(*(str2++));
 +      if (ch1!=ch2) return (ch1-ch2);
 +      n--;
 +    }
 +  while (ch1 && n);
 +  return 0; 
 +}
 +
 +char *gmx_strdup(const char *src)
 +{
 +  char *dest;
 +
 +  snew(dest,strlen(src)+1);
 +  strcpy(dest,src);
 +  
 +  return dest;
 +}
 +
 +char *
 +gmx_strndup(const char *src, int n)
 +{
 +    int   len;
 +    char *dest;
 +
 +    len = strlen(src);
 +    if (len > n) 
 +    {
 +        len = n;
 +    }
 +    snew(dest, len+1);
 +    strncpy(dest, src, len);
 +    dest[len] = 0;
 +    return dest;
 +}
 +
++/* Magic hash init number for Dan J. Bernsteins algorithm.
++ * Do NOT use any other value unless you really know what you are doing.
++ */
++const unsigned int
++gmx_string_hash_init = 5381;
++
++
++unsigned int
++gmx_string_hash_func(const char *s, unsigned int hash_init)
++{
++    int c;
++
++    while ((c = toupper(*s++)) != '\0')
++    {
++        if(isalnum(c)) hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
++    }
++    return hash_init;
++}
++
 +int
 +gmx_wcmatch(const char *pattern, const char *str)
 +{
 +    while (*pattern)
 +    {
 +        if (*pattern == '*')
 +        {
 +            /* Skip multiple wildcards in a sequence */
 +            while (*pattern == '*' || *pattern == '?')
 +            {
 +                ++pattern;
 +                /* For ?, we need to check that there are characters left
 +                 * in str. */
 +                if (*pattern == '?')
 +                {
 +                    if (*str == 0)
 +                    {
 +                        return GMX_NO_WCMATCH;
 +                    }
 +                    else
 +                    {
 +                        ++str;
 +                    }
 +                }
 +            }
 +            /* If the pattern ends after the star, we have a match */
 +            if (*pattern == 0)
 +            {
 +                return 0;
 +            }
 +            /* Match the rest against each possible suffix of str */
 +            while (*str)
 +            {
 +                /* Only do the recursive call if the first character
 +                 * matches. We don't have to worry about wildcards here,
 +                 * since we have processed them above. */
 +                if (*pattern == *str)
 +                {
 +                    int rc;
 +                    /* Match the suffix, and return if a match or an error */
 +                    rc = gmx_wcmatch(pattern, str);
 +                    if (rc != GMX_NO_WCMATCH)
 +                    {
 +                        return rc;
 +                    }
 +                }
 +                ++str;
 +            }
 +            /* If no suffix of str matches, we don't have a match */
 +            return GMX_NO_WCMATCH;
 +        }
 +        else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 +        {
 +            ++str;
 +        }
 +        else
 +        {
 +            return GMX_NO_WCMATCH;
 +        }
 +        ++pattern;
 +    }
 +    /* When the pattern runs out, we have a match if the string has ended. */
 +    return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 +}
 +
 +char *wrap_lines(const char *buf,int line_width, int indent,gmx_bool bIndentFirst)
 +{
 +  char *b2;
 +  int i,i0,i2,j,b2len,lspace=0,l2space=0;
 +  gmx_bool bFirst,bFitsOnLine;
 +
 +  /* characters are copied from buf to b2 with possible spaces changed
 +   * into newlines and extra space added for indentation.
 +   * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 +   * i0 points to the beginning of the current line (in buf, source)
 +   * lspace and l2space point to the last space on the current line
 +   * bFirst is set to prevent indentation of first line
 +   * bFitsOnLine says if the first space occurred before line_width, if 
 +   * that is not the case, we have a word longer than line_width which 
 +   * will also not fit on the next line, so we might as well keep it on 
 +   * the current line (where it also won't fit, but looks better)
 +   */
 +  
 +  b2=NULL;
 +  b2len=strlen(buf)+1+indent;
 +  snew(b2,b2len);
 +  i0=i2=0;
 +  if (bIndentFirst)
 +    for(i2=0; (i2<indent); i2++)
 +      b2[i2] = ' ';
 +  bFirst=TRUE;
 +  do {
 +    l2space = -1;
 +    /* find the last space before end of line */
 +    for(i=i0; ((i-i0 < line_width) || (l2space==-1)) && (buf[i]); i++) {
 +      b2[i2++] = buf[i];
 +      /* remember the position of a space */
 +      if (buf[i] == ' ') {
 +        lspace = i;
 +      l2space = i2-1;
 +      }
 +      /* if we have a newline before the line is full, reset counters */
 +      if (buf[i]=='\n' && buf[i+1]) { 
 +      i0=i+1;
 +      b2len+=indent;
 +      srenew(b2, b2len);
 +      /* add indentation after the newline */
 +      for(j=0; (j<indent); j++)
 +        b2[i2++]=' ';
 +      }
 +    }
 +    /* If we are at the last newline, copy it */
 +    if (buf[i]=='\n' && !buf[i+1]) {
 +      b2[i2++] = buf[i++];
 +    }
 +    /* if we're not at the end of the string */
 +    if (buf[i]) {
 +      /* check if one word does not fit on the line */
 +      bFitsOnLine = (i-i0 <= line_width);
 +      /* reset line counters to just after the space */
 +      i0 = lspace+1;
 +      i2 = l2space+1;
 +      /* if the words fit on the line, and we're beyond the indentation part */
 +      if ( (bFitsOnLine) && (l2space >= indent) ) {
 +      /* start a new line */
 +      b2[l2space] = '\n';
 +      /* and add indentation */
 +      if (indent) {
 +        if (bFirst) {
 +          line_width-=indent;
 +          bFirst=FALSE;
 +        }
 +        b2len+=indent;
 +        srenew(b2, b2len);
 +        for(j=0; (j<indent); j++)
 +          b2[i2++]=' ';
 +        /* no extra spaces after indent; */
 +        while(buf[i0]==' ')
 +          i0++;
 +      }
 +      }
 +    }
 +  } while (buf[i]);
 +  b2[i2] = '\0';
 +  
 +  return b2;
 +}
 +
 +char **split(char sep,const char *str)
 +{
 +  char **ptr = NULL;
 +  int  n,nn,nptr = 0;
 +  
 +  if (str == NULL)
 +    return NULL;
 +  nn = strlen(str);
 +  for(n=0; (n<nn); n++)
 +    if (str[n] == sep)
 +      nptr++;
 +  snew(ptr,nptr+2);
 +  nptr = 0;
 +  while (*str != '\0') {
 +    while ((*str != '\0') && (*str == sep))
 +      str++;
 +    if (*str != '\0') {
 +      snew(ptr[nptr],1+strlen(str));
 +      n = 0;
 +      while ((*str != '\0') && (*str != sep)) {
 +      ptr[nptr][n] = *str;
 +      str++;
 +      n++;
 +      }
 +      ptr[nptr][n] = '\0';
 +      nptr++;
 +    }
 +  }
 +  ptr[nptr] = NULL;
 +  
 +  return ptr;
 +}
 +
 +
 +gmx_large_int_t
 +str_to_large_int_t(const char *str, char **endptr)
 +{
 +      int         sign = 1;
 +      gmx_large_int_t  val  = 0;
 +      char        ch;
 +      const char  *p;
 +      
 +      p = str;
 +      if(p==NULL)
 +      {
 +              *endptr=NULL;
 +              return 0;
 +      }
 +      
 +      /* Strip off initial white space */
 +      while(isspace(*p))
 +      {
 +              p++;
 +      }
 +      /* Conform to ISO C99 - return original pointer if string does not contain a number */
 +      if(*str=='\0')
 +      {
 +              *endptr=(char *)str;
 +      }
 +      
 +      if(*p=='-')
 +      {
 +              p++;
 +              sign *= -1;
 +      }
 +      
 +      while( ((ch=*p) != '\0') && isdigit(ch) )
 +      {
 +              /* Important to add sign here, so we dont overflow in final multiplication */
 +              ch = (ch-'0')*sign; 
 +              val = val*10 + ch;
 +              if(ch != val%10) 
 +              {
 +                      /* Some sort of overflow has occured, set endptr to original string */
 +                      *endptr=(char *)str;
 +                      errno = ERANGE;
 +                      return(0);
 +              }
 +              p++;
 +      }
 +      
 +      *endptr=(char *)p;
 +      
 +      return val;
 +}
 +
 +char *gmx_strsep(char **stringp, const char *delim)
 +{
 +    char *ret;
 +    int len=strlen(delim);
 +    int i,j=0;
 +    int found=0;
 +
 +    if (! *stringp)
 +        return NULL;
 +    ret=*stringp;
 +    do
 +    {
 +        if ( (*stringp)[j] == '\0')
 +        {
 +            found=1;
 +            *stringp=NULL;
 +            break;
 +        }
 +        for (i=0;i<len;i++)
 +        {
 +            if ( (*stringp)[j]==delim[i])
 +            {
 +                (*stringp)[j]='\0';
 +                *stringp=*stringp+j+1;
 +                found=1;
 +                break;
 +            }
 +        }
 +        j++;
 +    } while (!found);
 +
 +    return ret;
 +}
 +
index 412c76945e55e5c9f6180af319299a97d5be0b33,0000000000000000000000000000000000000000..5375ae056a15899ef54d5b3ec86883f78d41ef22
mode 100644,000000..100644
--- /dev/null
@@@ -1,2935 -1,0 +1,2944 @@@
- static const int tpx_version = 90;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +/* This file is completely threadsafe - keep it that way! */
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +
 +#include <ctype.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "string2.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "names.h"
 +#include "symtab.h"
 +#include "futil.h"
 +#include "filenm.h"
 +#include "gmxfio.h"
 +#include "topsort.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "confio.h"
 +#include "atomprop.h"
 +#include "copyrite.h"
 +#include "vec.h"
 +#include "mtop_util.h"
 +
 +#define TPX_TAG_RELEASE  "release"
 +
 +/* This is the tag string which is stored in the tpx file.
 + * Change this if you want to change the tpx format in a feature branch.
 + * This ensures that there will not be different tpx formats around which
 + * can not be distinguished.
 + */
 +static const char *tpx_tag = TPX_TAG_RELEASE;
 +
 +/* This number should be increased whenever the file format changes! */
-     gmx_fio_do_int(fio,ir->coulombtype); 
++static const int tpx_version = 91;
 +
 +/* This number should only be increased when you edit the TOPOLOGY section
 + * or the HEADER of the tpx format.
 + * This way we can maintain forward compatibility too for all analysis tools
 + * and/or external programs that only need to know the atom/residue names,
 + * charges, and bond connectivity.
 + *  
 + * It first appeared in tpx version 26, when I also moved the inputrecord
 + * to the end of the tpx file, so we can just skip it if we only
 + * want the topology.
 + */
 +static const int tpx_generation = 25;
 +
 +/* This number should be the most recent backwards incompatible version 
 + * I.e., if this number is 9, we cannot read tpx version 9 with this code.
 + */
 +static const int tpx_incompatible_version = 9;
 +
 +
 +
 +/* Struct used to maintain tpx compatibility when function types are added */
 +typedef struct {
 +  int fvnr; /* file version number in which the function type first appeared */
 +  int ftype; /* function type */
 +} t_ftupd;
 +
 +/* 
 + *The entries should be ordered in:
 + * 1. ascending file version number
 + * 2. ascending function type number
 + */
 +/*static const t_ftupd ftupd[] = {
 +  { 20, F_CUBICBONDS        },
 +  { 20, F_CONNBONDS         },
 +  { 20, F_HARMONIC          },
 +  { 20, F_EQM,              },
 +  { 22, F_DISRESVIOL        },
 +  { 22, F_ORIRES            },
 +  { 22, F_ORIRESDEV         },
 +  { 26, F_FOURDIHS          },
 +  { 26, F_PIDIHS            },
 +  { 26, F_DIHRES            },
 +  { 26, F_DIHRESVIOL        },
 +  { 30, F_CROSS_BOND_BONDS  },
 +  { 30, F_CROSS_BOND_ANGLES },
 +  { 30, F_UREY_BRADLEY      },
 +  { 30, F_POLARIZATION      },
 +  { 54, F_DHDL_CON          },
 +  };*/
 +/* 
 + *The entries should be ordered in:
 + * 1. ascending function type number
 + * 2. ascending file version number
 + */
 +/* question; what is the purpose of the commented code above? */
 +static const t_ftupd ftupd[] = {
 +  { 20, F_CUBICBONDS        },
 +  { 20, F_CONNBONDS         },
 +  { 20, F_HARMONIC          },
 +  { 34, F_FENEBONDS         },
 +  { 43, F_TABBONDS          },
 +  { 43, F_TABBONDSNC        },
 +  { 70, F_RESTRBONDS        },
 +  { 76, F_LINEAR_ANGLES     },
 +  { 30, F_CROSS_BOND_BONDS  },
 +  { 30, F_CROSS_BOND_ANGLES },
 +  { 30, F_UREY_BRADLEY      },
 +  { 34, F_QUARTIC_ANGLES    },
 +  { 43, F_TABANGLES         },
 +  { 26, F_FOURDIHS          },
 +  { 26, F_PIDIHS            },
 +  { 43, F_TABDIHS           },
 +  { 65, F_CMAP              },
 +  { 60, F_GB12              },
 +  { 61, F_GB13              },
 +  { 61, F_GB14              },        
 +  { 72, F_GBPOL             },
 +  { 72, F_NPSOLVATION       },
 +  { 41, F_LJC14_Q           },
 +  { 41, F_LJC_PAIRS_NB      },
 +  { 32, F_BHAM_LR           },
 +  { 32, F_RF_EXCL           },
 +  { 32, F_COUL_RECIP        },
 +  { 46, F_DPD               },
 +  { 30, F_POLARIZATION      },
 +  { 36, F_THOLE_POL         },
 +  { 80, F_FBPOSRES          },
 +  { 22, F_DISRESVIOL        },
 +  { 22, F_ORIRES            },
 +  { 22, F_ORIRESDEV         },
 +  { 26, F_DIHRES            },
 +  { 26, F_DIHRESVIOL        },
 +  { 49, F_VSITE4FDN         },
 +  { 50, F_VSITEN            },
 +  { 46, F_COM_PULL          },
 +  { 20, F_EQM               },
 +  { 46, F_ECONSERVED        },
 +  { 69, F_VTEMP             },
 +  { 66, F_PDISPCORR         },
 +  { 54, F_DHDL_CON          },
 +  { 76, F_ANHARM_POL        },
 +  { 79, F_DVDL_COUL         },
 +  { 79, F_DVDL_VDW,         },
 +  { 79, F_DVDL_BONDED,      },
 +  { 79, F_DVDL_RESTRAINT    },
 +  { 79, F_DVDL_TEMPERATURE  },
 +  { 54, F_DHDL_CON          }
 +};
 +#define NFTUPD asize(ftupd)
 +
 +/* Needed for backward compatibility */
 +#define MAXNODES 256
 +
 +static void _do_section(t_fileio *fio,int key,gmx_bool bRead,const char *src,
 +                        int line)
 +{
 +  char buf[STRLEN];
 +  gmx_bool bDbg;
 +
 +  if (gmx_fio_getftp(fio) == efTPA) {
 +    if (!bRead) {
 +      gmx_fio_write_string(fio,itemstr[key]);
 +      bDbg       = gmx_fio_getdebug(fio);
 +      gmx_fio_setdebug(fio,FALSE);
 +      gmx_fio_write_string(fio,comment_str[key]);
 +      gmx_fio_setdebug(fio,bDbg);
 +    }
 +    else {
 +      if (gmx_fio_getdebug(fio))
 +      fprintf(stderr,"Looking for section %s (%s, %d)",
 +              itemstr[key],src,line);
 +      
 +      do {
 +      gmx_fio_do_string(fio,buf);
 +      } while ((gmx_strcasecmp(buf,itemstr[key]) != 0));
 +      
 +      if (gmx_strcasecmp(buf,itemstr[key]) != 0) 
 +      gmx_fatal(FARGS,"\nCould not find section heading %s",itemstr[key]);
 +      else if (gmx_fio_getdebug(fio))
 +      fprintf(stderr," and found it\n");
 +    }
 +  }
 +}
 +
 +#define do_section(fio,key,bRead) _do_section(fio,key,bRead,__FILE__,__LINE__)
 +
 +/**************************************************************
 + *
 + * Now the higer level routines that do io of the structures and arrays
 + *
 + **************************************************************/
 +static void do_pullgrp(t_fileio *fio, t_pullgrp *pgrp, gmx_bool bRead, 
 +                       int file_version)
 +{
 +  gmx_bool bDum=TRUE;
 +  int  i;
 +
 +  gmx_fio_do_int(fio,pgrp->nat);
 +  if (bRead)
 +    snew(pgrp->ind,pgrp->nat);
 +  bDum=gmx_fio_ndo_int(fio,pgrp->ind,pgrp->nat);
 +  gmx_fio_do_int(fio,pgrp->nweight);
 +  if (bRead)
 +    snew(pgrp->weight,pgrp->nweight);
 +  bDum=gmx_fio_ndo_real(fio,pgrp->weight,pgrp->nweight);
 +  gmx_fio_do_int(fio,pgrp->pbcatom);
 +  gmx_fio_do_rvec(fio,pgrp->vec);
 +  gmx_fio_do_rvec(fio,pgrp->init);
 +  gmx_fio_do_real(fio,pgrp->rate);
 +  gmx_fio_do_real(fio,pgrp->k);
 +  if (file_version >= 56) {
 +    gmx_fio_do_real(fio,pgrp->kB);
 +  } else {
 +    pgrp->kB = pgrp->k;
 +  }
 +}
 +
 +static void do_expandedvals(t_fileio *fio,t_expanded *expand,int n_lambda, gmx_bool bRead, int file_version)
 +{
 +  /* i is used in the ndo_double macro*/
 +  int i;
 +  real fv;
 +  gmx_bool bDum=TRUE;
 +  real rdum;
 +
 +  if (file_version >= 79)
 +  {
 +      if (n_lambda>0)
 +      {
 +          if (bRead)
 +          {
 +              snew(expand->init_lambda_weights,n_lambda);
 +          }
 +          bDum=gmx_fio_ndo_real(fio,expand->init_lambda_weights,n_lambda);
 +          gmx_fio_do_gmx_bool(fio,expand->bInit_weights);
 +      }
 +
 +      gmx_fio_do_int(fio,expand->nstexpanded);
 +      gmx_fio_do_int(fio,expand->elmcmove);
 +      gmx_fio_do_int(fio,expand->elamstats);
 +      gmx_fio_do_int(fio,expand->lmc_repeats);
 +      gmx_fio_do_int(fio,expand->gibbsdeltalam);
 +      gmx_fio_do_int(fio,expand->lmc_forced_nstart);
 +      gmx_fio_do_int(fio,expand->lmc_seed);
 +      gmx_fio_do_real(fio,expand->mc_temp);
 +      gmx_fio_do_int(fio,expand->bSymmetrizedTMatrix);
 +      gmx_fio_do_int(fio,expand->nstTij);
 +      gmx_fio_do_int(fio,expand->minvarmin);
 +      gmx_fio_do_int(fio,expand->c_range);
 +      gmx_fio_do_real(fio,expand->wl_scale);
 +      gmx_fio_do_real(fio,expand->wl_ratio);
 +      gmx_fio_do_real(fio,expand->init_wl_delta);
 +      gmx_fio_do_gmx_bool(fio,expand->bWLoneovert);
 +      gmx_fio_do_int(fio,expand->elmceq);
 +      gmx_fio_do_int(fio,expand->equil_steps);
 +      gmx_fio_do_int(fio,expand->equil_samples);
 +      gmx_fio_do_int(fio,expand->equil_n_at_lam);
 +      gmx_fio_do_real(fio,expand->equil_wl_delta);
 +      gmx_fio_do_real(fio,expand->equil_ratio);
 +  }
 +}
 +
 +static void do_simtempvals(t_fileio *fio,t_simtemp *simtemp, int n_lambda, gmx_bool bRead, 
 +                           int file_version)
 +{
 +  gmx_bool bDum=TRUE;
 +
 +  if (file_version >= 79)
 +  {
 +      gmx_fio_do_int(fio,simtemp->eSimTempScale);
 +      gmx_fio_do_real(fio,simtemp->simtemp_high);
 +      gmx_fio_do_real(fio,simtemp->simtemp_low);
 +      if (n_lambda>0)
 +      {
 +          if (bRead)
 +          {
 +              snew(simtemp->temperatures,n_lambda);
 +          }
 +          bDum=gmx_fio_ndo_real(fio,simtemp->temperatures,n_lambda);
 +      }
 +  }
 +}
 +
 +static void do_fepvals(t_fileio *fio,t_lambda *fepvals,gmx_bool bRead, int file_version)
 +{
 +  /* i is defined in the ndo_double macro; use g to iterate. */
 +  int i,g;
 +  real fv;
 +  gmx_bool bDum=TRUE;
 +  real rdum;
 +
 +  /* free energy values */
 +  if (file_version >= 79)
 +  {
 +      gmx_fio_do_int(fio,fepvals->init_fep_state);
 +      gmx_fio_do_double(fio,fepvals->init_lambda);
 +      gmx_fio_do_double(fio,fepvals->delta_lambda);
 +  }
 +  else if (file_version >= 59) {
 +      gmx_fio_do_double(fio,fepvals->init_lambda);
 +      gmx_fio_do_double(fio,fepvals->delta_lambda);
 +  } else {
 +      gmx_fio_do_real(fio,rdum);
 +      fepvals->init_lambda = rdum;
 +      gmx_fio_do_real(fio,rdum);
 +      fepvals->delta_lambda = rdum;
 +  }
 +  if (file_version >= 79)
 +  {
 +      gmx_fio_do_int(fio,fepvals->n_lambda);
 +      if (bRead)
 +      {
 +          snew(fepvals->all_lambda,efptNR);
 +      }
 +      for (g=0;g<efptNR;g++)
 +      {
 +          if (fepvals->n_lambda > 0) {
 +              if (bRead)
 +              {
 +                  snew(fepvals->all_lambda[g],fepvals->n_lambda);
 +              }
 +              bDum=gmx_fio_ndo_double(fio,fepvals->all_lambda[g],fepvals->n_lambda);
 +              bDum=gmx_fio_ndo_int(fio,fepvals->separate_dvdl,efptNR);
 +          }
 +          else if (fepvals->init_lambda >= 0)
 +          {
 +              fepvals->separate_dvdl[efptFEP] = TRUE;
 +          }
 +      }
 +  }
 +  else if (file_version >= 64)
 +  {
 +      gmx_fio_do_int(fio,fepvals->n_lambda);
 +      snew(fepvals->all_lambda,efptNR);
 +      if (bRead)
 +      {
 +          snew(fepvals->all_lambda[efptFEP],fepvals->n_lambda);
 +      }
 +      bDum=gmx_fio_ndo_double(fio,fepvals->all_lambda[efptFEP],fepvals->n_lambda);
 +      if (fepvals->init_lambda >= 0)
 +      {
 +          fepvals->separate_dvdl[efptFEP] = TRUE;
 +      }
 +      /* still allocate the all_lambda array's contents. */
 +      for (g=0;g<efptNR;g++)
 +      {
 +          if (fepvals->n_lambda > 0) {
 +              if (bRead)
 +              {
 +                  snew(fepvals->all_lambda[g],fepvals->n_lambda);
 +              }
 +          }
 +      }
 +  }
 +  else
 +  {
 +      fepvals->n_lambda = 0;
 +      fepvals->all_lambda   = NULL;
 +      if (fepvals->init_lambda >= 0)
 +      {
 +          fepvals->separate_dvdl[efptFEP] = TRUE;
 +      }
 +  }
 +  if (file_version >= 13)
 +  {
 +      gmx_fio_do_real(fio,fepvals->sc_alpha);
 +  }
 +  else
 +  {
 +      fepvals->sc_alpha = 0;
 +  }
 +  if (file_version >= 38)
 +  {
 +      gmx_fio_do_int(fio,fepvals->sc_power);
 +  }
 +  else
 +  {
 +      fepvals->sc_power = 2;
 +  }
 +  if (file_version >= 79)
 +  {
 +      gmx_fio_do_real(fio,fepvals->sc_r_power);
 +  }
 +  else
 +  {
 +      fepvals->sc_r_power = 6.0;
 +  }
 +  if (file_version >= 15)
 +  {
 +      gmx_fio_do_real(fio,fepvals->sc_sigma);
 +  }
 +  else
 +  {
 +      fepvals->sc_sigma = 0.3;
 +  }
 +  if (bRead)
 +  {
 +      if (file_version >= 71)
 +      {
 +          fepvals->sc_sigma_min = fepvals->sc_sigma;
 +      }
 +      else
 +      {
 +          fepvals->sc_sigma_min = 0;
 +      }
 +  }
 +  if (file_version >= 79)
 +  {
 +      gmx_fio_do_int(fio,fepvals->bScCoul);
 +  }
 +  else
 +  {
 +      fepvals->bScCoul = TRUE;
 +  }
 +  if (file_version >= 64) {
 +      gmx_fio_do_int(fio,fepvals->nstdhdl);
 +  } else {
 +      fepvals->nstdhdl = 1;
 +  }
 +
 +  if (file_version >= 73)
 +  {
 +      gmx_fio_do_int(fio, fepvals->separate_dhdl_file);
 +      gmx_fio_do_int(fio, fepvals->dhdl_derivatives);
 +  }
 +  else
 +  {
 +      fepvals->separate_dhdl_file = esepdhdlfileYES;
 +      fepvals->dhdl_derivatives = edhdlderivativesYES;
 +  }
 +  if (file_version >= 71)
 +  {
 +      gmx_fio_do_int(fio,fepvals->dh_hist_size);
 +      gmx_fio_do_double(fio,fepvals->dh_hist_spacing);
 +  }
 +  else
 +  {
 +      fepvals->dh_hist_size    = 0;
 +      fepvals->dh_hist_spacing = 0.1;
 +  }
 +  if (file_version >= 79)
 +  {
 +      gmx_fio_do_int(fio,fepvals->bPrintEnergy);
 +  }
 +  else
 +  {
 +      fepvals->bPrintEnergy = FALSE;
 +  }
 +}
 +
 +static void do_pull(t_fileio *fio, t_pull *pull,gmx_bool bRead, int file_version)
 +{
 +  int g;
 +
 +  gmx_fio_do_int(fio,pull->ngrp);
 +  gmx_fio_do_int(fio,pull->eGeom);
 +  gmx_fio_do_ivec(fio,pull->dim);
 +  gmx_fio_do_real(fio,pull->cyl_r1);
 +  gmx_fio_do_real(fio,pull->cyl_r0);
 +  gmx_fio_do_real(fio,pull->constr_tol);
 +  gmx_fio_do_int(fio,pull->nstxout);
 +  gmx_fio_do_int(fio,pull->nstfout);
 +  if (bRead)
 +    snew(pull->grp,pull->ngrp+1);
 +  for(g=0; g<pull->ngrp+1; g++)
 +    do_pullgrp(fio,&pull->grp[g],bRead,file_version);
 +}
 +
 +
 +static void do_rotgrp(t_fileio *fio, t_rotgrp *rotg,gmx_bool bRead, int file_version)
 +{
 +  gmx_bool bDum=TRUE;
 +  int  i;
 +
 +  gmx_fio_do_int(fio,rotg->eType);
 +  gmx_fio_do_int(fio,rotg->bMassW);
 +  gmx_fio_do_int(fio,rotg->nat);
 +  if (bRead)
 +    snew(rotg->ind,rotg->nat);
 +  gmx_fio_ndo_int(fio,rotg->ind,rotg->nat);
 +  if (bRead)
 +      snew(rotg->x_ref,rotg->nat);
 +  gmx_fio_ndo_rvec(fio,rotg->x_ref,rotg->nat);
 +  gmx_fio_do_rvec(fio,rotg->vec);
 +  gmx_fio_do_rvec(fio,rotg->pivot);
 +  gmx_fio_do_real(fio,rotg->rate);
 +  gmx_fio_do_real(fio,rotg->k);
 +  gmx_fio_do_real(fio,rotg->slab_dist);
 +  gmx_fio_do_real(fio,rotg->min_gaussian);
 +  gmx_fio_do_real(fio,rotg->eps);
 +  gmx_fio_do_int(fio,rotg->eFittype);
 +  gmx_fio_do_int(fio,rotg->PotAngle_nstep);
 +  gmx_fio_do_real(fio,rotg->PotAngle_step);
 +}
 +
 +static void do_rot(t_fileio *fio, t_rot *rot,gmx_bool bRead, int file_version)
 +{
 +  int g;
 +
 +  gmx_fio_do_int(fio,rot->ngrp);
 +  gmx_fio_do_int(fio,rot->nstrout);
 +  gmx_fio_do_int(fio,rot->nstsout);
 +  if (bRead)
 +    snew(rot->grp,rot->ngrp);
 +  for(g=0; g<rot->ngrp; g++)
 +    do_rotgrp(fio, &rot->grp[g],bRead,file_version);
 +}
 +
 +
 +static void do_inputrec(t_fileio *fio, t_inputrec *ir,gmx_bool bRead, 
 +                        int file_version, real *fudgeQQ)
 +{
 +    int  i,j,k,*tmp,idum=0; 
 +    gmx_bool bDum=TRUE;
 +    real rdum,bd_temp;
 +    rvec vdum;
 +    gmx_bool bSimAnn;
 +    real zerotemptime,finish_t,init_temp,finish_temp;
 +    
 +    if (file_version != tpx_version)
 +    {
 +        /* Give a warning about features that are not accessible */
 +        fprintf(stderr,"Note: file tpx version %d, software tpx version %d\n",
 +                file_version,tpx_version);
 +    }
 +
 +    if (bRead)
 +    {
 +        init_inputrec(ir);
 +    }
 +
 +    if (file_version == 0)
 +    {
 +        return;
 +    }
 +
 +    /* Basic inputrec stuff */  
 +    gmx_fio_do_int(fio,ir->eI); 
 +    if (file_version >= 62) {
 +      gmx_fio_do_gmx_large_int(fio, ir->nsteps);
 +    } else {
 +      gmx_fio_do_int(fio,idum);
 +      ir->nsteps = idum;
 +    }
 +    if(file_version > 25) {
 +      if (file_version >= 62) {
 +      gmx_fio_do_gmx_large_int(fio, ir->init_step);
 +      } else {
 +      gmx_fio_do_int(fio,idum);
 +      ir->init_step = idum;
 +      }
 +    }  else {
 +      ir->init_step=0;
 +    }
 +
 +      if(file_version >= 58)
 +        gmx_fio_do_int(fio,ir->simulation_part);
 +      else
 +        ir->simulation_part=1;
 +        
 +    if (file_version >= 67) {
 +      gmx_fio_do_int(fio,ir->nstcalcenergy);
 +    } else {
 +      ir->nstcalcenergy = 1;
 +    }
 +    if (file_version < 53) {
 +      /* The pbc info has been moved out of do_inputrec,
 +       * since we always want it, also without reading the inputrec.
 +       */
 +      gmx_fio_do_int(fio,ir->ePBC);
 +      if ((file_version <= 15) && (ir->ePBC == 2))
 +      ir->ePBC = epbcNONE;
 +      if (file_version >= 45) {
 +      gmx_fio_do_int(fio,ir->bPeriodicMols);
 +      } else {
 +      if (ir->ePBC == 2) {
 +        ir->ePBC = epbcXYZ;
 +        ir->bPeriodicMols = TRUE;
 +      } else {
 +      ir->bPeriodicMols = FALSE;
 +      }
 +      }
 +    }
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_int(fio,ir->cutoff_scheme);
 +    }
 +    else
 +    {
 +        ir->cutoff_scheme = ecutsGROUP;
 +    }
 +    gmx_fio_do_int(fio,ir->ns_type);
 +    gmx_fio_do_int(fio,ir->nstlist);
 +    gmx_fio_do_int(fio,ir->ndelta);
 +    if (file_version < 41) {
 +      gmx_fio_do_int(fio,idum);
 +      gmx_fio_do_int(fio,idum);
 +    }
 +    if (file_version >= 45)
 +      gmx_fio_do_real(fio,ir->rtpi);
 +    else
 +      ir->rtpi = 0.05;
 +    gmx_fio_do_int(fio,ir->nstcomm); 
 +    if (file_version > 34)
 +      gmx_fio_do_int(fio,ir->comm_mode);
 +    else if (ir->nstcomm < 0) 
 +      ir->comm_mode = ecmANGULAR;
 +    else
 +      ir->comm_mode = ecmLINEAR;
 +    ir->nstcomm = abs(ir->nstcomm);
 +    
 +    if(file_version > 25)
 +      gmx_fio_do_int(fio,ir->nstcheckpoint);
 +    else
 +      ir->nstcheckpoint=0;
 +    
 +    gmx_fio_do_int(fio,ir->nstcgsteep); 
 +
 +    if(file_version>=30)
 +      gmx_fio_do_int(fio,ir->nbfgscorr); 
 +    else if (bRead)
 +      ir->nbfgscorr = 10;
 +
 +    gmx_fio_do_int(fio,ir->nstlog); 
 +    gmx_fio_do_int(fio,ir->nstxout); 
 +    gmx_fio_do_int(fio,ir->nstvout); 
 +    gmx_fio_do_int(fio,ir->nstfout); 
 +    gmx_fio_do_int(fio,ir->nstenergy); 
 +    gmx_fio_do_int(fio,ir->nstxtcout); 
 +    if (file_version >= 59) {
 +      gmx_fio_do_double(fio,ir->init_t);
 +      gmx_fio_do_double(fio,ir->delta_t);
 +    } else {
 +      gmx_fio_do_real(fio,rdum);
 +      ir->init_t = rdum;
 +      gmx_fio_do_real(fio,rdum);
 +      ir->delta_t = rdum;
 +    }
 +    gmx_fio_do_real(fio,ir->xtcprec); 
 +    if (file_version < 19) {
 +      gmx_fio_do_int(fio,idum); 
 +      gmx_fio_do_int(fio,idum);
 +    }
 +    if(file_version < 18)
 +      gmx_fio_do_int(fio,idum); 
 +    if (file_version >= 81) {
 +      gmx_fio_do_real(fio,ir->verletbuf_drift);
 +    } else {
 +      ir->verletbuf_drift = 0;
 +    }
 +    gmx_fio_do_real(fio,ir->rlist); 
 +    if (file_version >= 67) {
 +      gmx_fio_do_real(fio,ir->rlistlong);
 +    }
++    if(file_version >= 82 && file_version != 90)
++    {
++        gmx_fio_do_int(fio,ir->nstcalclr);
++    }
++    else
++    {
++        /* Calculate at NS steps */
++        ir->nstcalclr = ir->nstlist;
++    }
++    gmx_fio_do_int(fio,ir->coulombtype);
 +    if (file_version < 32 && ir->coulombtype == eelRF)
 +      ir->coulombtype = eelRF_NEC;      
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_int(fio,ir->coulomb_modifier); 
 +    }
 +    else
 +    {
 +        ir->coulomb_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE);
 +    }
 +    gmx_fio_do_real(fio,ir->rcoulomb_switch); 
 +    gmx_fio_do_real(fio,ir->rcoulomb); 
 +    gmx_fio_do_int(fio,ir->vdwtype);
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_int(fio,ir->vdw_modifier); 
 +    }
 +    else
 +    {
 +        ir->vdw_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE);
 +    }
 +    gmx_fio_do_real(fio,ir->rvdw_switch); 
 +    gmx_fio_do_real(fio,ir->rvdw); 
 +    if (file_version < 67) {
 +      ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
 +    }
 +    gmx_fio_do_int(fio,ir->eDispCorr); 
 +    gmx_fio_do_real(fio,ir->epsilon_r);
 +    if (file_version >= 37) {
 +      gmx_fio_do_real(fio,ir->epsilon_rf);
 +    } else {
 +      if (EEL_RF(ir->coulombtype)) {
 +      ir->epsilon_rf = ir->epsilon_r;
 +      ir->epsilon_r  = 1.0;
 +      } else {
 +      ir->epsilon_rf = 1.0;
 +      }
 +    }
 +    if (file_version >= 29)
 +      gmx_fio_do_real(fio,ir->tabext);
 +    else
 +      ir->tabext=1.0;
 + 
 +    if(file_version > 25) {
 +      gmx_fio_do_int(fio,ir->gb_algorithm);
 +      gmx_fio_do_int(fio,ir->nstgbradii);
 +      gmx_fio_do_real(fio,ir->rgbradii);
 +      gmx_fio_do_real(fio,ir->gb_saltconc);
 +      gmx_fio_do_int(fio,ir->implicit_solvent);
 +    } else {
 +      ir->gb_algorithm=egbSTILL;
 +      ir->nstgbradii=1;
 +      ir->rgbradii=1.0;
 +      ir->gb_saltconc=0;
 +      ir->implicit_solvent=eisNO;
 +    }
 +      if(file_version>=55)
 +      {
 +              gmx_fio_do_real(fio,ir->gb_epsilon_solvent);
 +              gmx_fio_do_real(fio,ir->gb_obc_alpha);
 +              gmx_fio_do_real(fio,ir->gb_obc_beta);
 +              gmx_fio_do_real(fio,ir->gb_obc_gamma);
 +              if(file_version>=60)
 +              {
 +                      gmx_fio_do_real(fio,ir->gb_dielectric_offset);
 +                      gmx_fio_do_int(fio,ir->sa_algorithm);
 +              }
 +              else
 +              {
 +                      ir->gb_dielectric_offset = 0.009;
 +                      ir->sa_algorithm = esaAPPROX;
 +              }
 +              gmx_fio_do_real(fio,ir->sa_surface_tension);
 +
 +    /* Override sa_surface_tension if it is not changed in the mpd-file */
 +    if(ir->sa_surface_tension<0)
 +    {
 +      if(ir->gb_algorithm==egbSTILL)
 +      {
 +        ir->sa_surface_tension = 0.0049 * 100 * CAL2JOULE;
 +      }
 +      else if(ir->gb_algorithm==egbHCT || ir->gb_algorithm==egbOBC)
 +      {
 +        ir->sa_surface_tension = 0.0054 * 100 * CAL2JOULE;
 +      }
 +    }
 +    
 +      }
 +      else
 +      {
 +              /* Better use sensible values than insane (0.0) ones... */
 +              ir->gb_epsilon_solvent = 80;
 +              ir->gb_obc_alpha       = 1.0;
 +              ir->gb_obc_beta        = 0.8;
 +              ir->gb_obc_gamma       = 4.85;
 +              ir->sa_surface_tension = 2.092;
 +      }
 +
 +       
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_real(fio,ir->fourier_spacing); 
 +    }
 +    else
 +    {
 +        ir->fourier_spacing = 0.0;
 +    }
 +    gmx_fio_do_int(fio,ir->nkx); 
 +    gmx_fio_do_int(fio,ir->nky); 
 +    gmx_fio_do_int(fio,ir->nkz);
 +    gmx_fio_do_int(fio,ir->pme_order);
 +    gmx_fio_do_real(fio,ir->ewald_rtol);
 +
 +    if (file_version >=24) 
 +      gmx_fio_do_int(fio,ir->ewald_geometry);
 +    else
 +      ir->ewald_geometry=eewg3D;
 +
 +    if (file_version <=17) {
 +      ir->epsilon_surface=0;
 +      if (file_version==17)
 +      gmx_fio_do_int(fio,idum);
 +    } 
 +    else
 +      gmx_fio_do_real(fio,ir->epsilon_surface);
 +    
 +    gmx_fio_do_gmx_bool(fio,ir->bOptFFT);
 +
 +    gmx_fio_do_gmx_bool(fio,ir->bContinuation); 
 +    gmx_fio_do_int(fio,ir->etc);
 +    /* before version 18, ir->etc was a gmx_bool (ir->btc),
 +     * but the values 0 and 1 still mean no and
 +     * berendsen temperature coupling, respectively.
 +     */
 +    if (file_version >= 79) {
 +        gmx_fio_do_gmx_bool(fio,ir->bPrintNHChains);
 +    }
 +    if (file_version >= 71)
 +    {
 +        gmx_fio_do_int(fio,ir->nsttcouple);
 +    }
 +    else
 +    {
 +        ir->nsttcouple = ir->nstcalcenergy;
 +    }
 +    if (file_version <= 15)
 +    {
 +        gmx_fio_do_int(fio,idum);
 +    }
 +    if (file_version <=17)
 +    {
 +        gmx_fio_do_int(fio,ir->epct); 
 +        if (file_version <= 15)
 +        {
 +            if (ir->epct == 5)
 +            {
 +                ir->epct = epctSURFACETENSION;
 +            }
 +            gmx_fio_do_int(fio,idum);
 +        }
 +        ir->epct -= 1;
 +        /* we have removed the NO alternative at the beginning */
 +        if(ir->epct==-1)
 +        {
 +            ir->epc=epcNO;
 +            ir->epct=epctISOTROPIC;
 +        } 
 +        else
 +        {
 +            ir->epc=epcBERENDSEN;
 +        }
 +    } 
 +    else
 +    {
 +        gmx_fio_do_int(fio,ir->epc);
 +        gmx_fio_do_int(fio,ir->epct);
 +    }
 +    if (file_version >= 71)
 +    {
 +        gmx_fio_do_int(fio,ir->nstpcouple);
 +    }
 +    else
 +    {
 +        ir->nstpcouple = ir->nstcalcenergy;
 +    }
 +    gmx_fio_do_real(fio,ir->tau_p); 
 +    if (file_version <= 15) {
 +      gmx_fio_do_rvec(fio,vdum);
 +      clear_mat(ir->ref_p);
 +      for(i=0; i<DIM; i++)
 +      ir->ref_p[i][i] = vdum[i];
 +    } else {
 +      gmx_fio_do_rvec(fio,ir->ref_p[XX]);
 +      gmx_fio_do_rvec(fio,ir->ref_p[YY]);
 +      gmx_fio_do_rvec(fio,ir->ref_p[ZZ]);
 +    }
 +    if (file_version <= 15) {
 +      gmx_fio_do_rvec(fio,vdum);
 +      clear_mat(ir->compress);
 +      for(i=0; i<DIM; i++)
 +      ir->compress[i][i] = vdum[i];
 +    } 
 +    else {
 +      gmx_fio_do_rvec(fio,ir->compress[XX]);
 +      gmx_fio_do_rvec(fio,ir->compress[YY]);
 +      gmx_fio_do_rvec(fio,ir->compress[ZZ]);
 +    }
 +    if (file_version >= 47) {
 +      gmx_fio_do_int(fio,ir->refcoord_scaling);
 +      gmx_fio_do_rvec(fio,ir->posres_com);
 +      gmx_fio_do_rvec(fio,ir->posres_comB);
 +    } else {
 +      ir->refcoord_scaling = erscNO;
 +      clear_rvec(ir->posres_com);
 +      clear_rvec(ir->posres_comB);
 +    }
 +    if((file_version > 25) && (file_version < 79))
 +        gmx_fio_do_int(fio,ir->andersen_seed);
 +    else
 +        ir->andersen_seed=0;
 +    if(file_version < 26) {
 +      gmx_fio_do_gmx_bool(fio,bSimAnn); 
 +      gmx_fio_do_real(fio,zerotemptime);
 +    }
 +    
 +    if (file_version < 37)
 +      gmx_fio_do_real(fio,rdum); 
 +
 +    gmx_fio_do_real(fio,ir->shake_tol);
 +    if (file_version < 54)
 +      gmx_fio_do_real(fio,*fudgeQQ);
 +
 +    gmx_fio_do_int(fio,ir->efep);
 +    if (file_version <= 14 && ir->efep != efepNO)
 +    {
 +        ir->efep = efepYES;
 +    }
 +    do_fepvals(fio,ir->fepvals,bRead,file_version);
 +
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_gmx_bool(fio,ir->bSimTemp);
 +        if (ir->bSimTemp) 
 +        {
 +            ir->bSimTemp = TRUE;
 +        }
 +    }
 +    else
 +    {
 +        ir->bSimTemp = FALSE;
 +    }
 +    if (ir->bSimTemp)
 +    {
 +        do_simtempvals(fio,ir->simtempvals,ir->fepvals->n_lambda,bRead,file_version);
 +    }
 +
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_gmx_bool(fio,ir->bExpanded);
 +        if (ir->bExpanded)
 +        {
 +            ir->bExpanded = TRUE;
 +        }
 +        else
 +        {
 +            ir->bExpanded = FALSE;
 +        }
 +    }
 +    if (ir->bExpanded)
 +    {
 +        do_expandedvals(fio,ir->expandedvals,ir->fepvals->n_lambda,bRead,file_version);
 +    }
 +    if (file_version >= 57) {
 +      gmx_fio_do_int(fio,ir->eDisre); 
 +    }
 +    gmx_fio_do_int(fio,ir->eDisreWeighting); 
 +    if (file_version < 22) {
 +      if (ir->eDisreWeighting == 0)
 +      ir->eDisreWeighting = edrwEqual;
 +      else
 +      ir->eDisreWeighting = edrwConservative;
 +    }
 +    gmx_fio_do_gmx_bool(fio,ir->bDisreMixed); 
 +    gmx_fio_do_real(fio,ir->dr_fc); 
 +    gmx_fio_do_real(fio,ir->dr_tau); 
 +    gmx_fio_do_int(fio,ir->nstdisreout);
 +    if (file_version >= 22) {
 +      gmx_fio_do_real(fio,ir->orires_fc);
 +      gmx_fio_do_real(fio,ir->orires_tau);
 +      gmx_fio_do_int(fio,ir->nstorireout);
 +    } else {
 +      ir->orires_fc = 0;
 +      ir->orires_tau = 0;
 +      ir->nstorireout = 0;
 +    }
 +    if(file_version >= 26 && file_version < 79) {
 +      gmx_fio_do_real(fio,ir->dihre_fc);
 +      if (file_version < 56) 
 +      {
 +          gmx_fio_do_real(fio,rdum);
 +          gmx_fio_do_int(fio,idum);
 +      }
 +    } else {
 +        ir->dihre_fc=0;
 +    }
 +
 +    gmx_fio_do_real(fio,ir->em_stepsize); 
 +    gmx_fio_do_real(fio,ir->em_tol); 
 +    if (file_version >= 22) 
 +      gmx_fio_do_gmx_bool(fio,ir->bShakeSOR);
 +    else if (bRead)
 +      ir->bShakeSOR = TRUE;
 +    if (file_version >= 11)
 +      gmx_fio_do_int(fio,ir->niter);
 +    else if (bRead) {
 +      ir->niter = 25;
 +      fprintf(stderr,"Note: niter not in run input file, setting it to %d\n",
 +            ir->niter);
 +    }
 +    if (file_version >= 21)
 +      gmx_fio_do_real(fio,ir->fc_stepsize);
 +    else
 +      ir->fc_stepsize = 0;
 +    gmx_fio_do_int(fio,ir->eConstrAlg);
 +    gmx_fio_do_int(fio,ir->nProjOrder);
 +    gmx_fio_do_real(fio,ir->LincsWarnAngle);
 +    if (file_version <= 14)
 +      gmx_fio_do_int(fio,idum);
 +    if (file_version >=26)
 +      gmx_fio_do_int(fio,ir->nLincsIter);
 +    else if (bRead) {
 +      ir->nLincsIter = 1;
 +      fprintf(stderr,"Note: nLincsIter not in run input file, setting it to %d\n",
 +            ir->nLincsIter);
 +    }
 +    if (file_version < 33)
 +      gmx_fio_do_real(fio,bd_temp);
 +    gmx_fio_do_real(fio,ir->bd_fric);
 +    gmx_fio_do_int(fio,ir->ld_seed);
 +    if (file_version >= 33) {
 +      for(i=0; i<DIM; i++)
 +      gmx_fio_do_rvec(fio,ir->deform[i]);
 +    } else {
 +      for(i=0; i<DIM; i++)
 +      clear_rvec(ir->deform[i]);
 +    }
 +    if (file_version >= 14)
 +      gmx_fio_do_real(fio,ir->cos_accel);
 +    else if (bRead)
 +      ir->cos_accel = 0;
 +    gmx_fio_do_int(fio,ir->userint1); 
 +    gmx_fio_do_int(fio,ir->userint2); 
 +    gmx_fio_do_int(fio,ir->userint3); 
 +    gmx_fio_do_int(fio,ir->userint4); 
 +    gmx_fio_do_real(fio,ir->userreal1); 
 +    gmx_fio_do_real(fio,ir->userreal2); 
 +    gmx_fio_do_real(fio,ir->userreal3); 
 +    gmx_fio_do_real(fio,ir->userreal4); 
 +    
 +    /* AdResS stuff */
 +    if (file_version >= 77) {
 +      gmx_fio_do_gmx_bool(fio,ir->bAdress);
 +      if(ir->bAdress){
 +          if (bRead) snew(ir->adress, 1);
 +          gmx_fio_do_int(fio,ir->adress->type);
 +          gmx_fio_do_real(fio,ir->adress->const_wf);
 +          gmx_fio_do_real(fio,ir->adress->ex_width);
 +          gmx_fio_do_real(fio,ir->adress->hy_width);
 +          gmx_fio_do_int(fio,ir->adress->icor);
 +          gmx_fio_do_int(fio,ir->adress->site);
 +          gmx_fio_do_rvec(fio,ir->adress->refs);
 +          gmx_fio_do_int(fio,ir->adress->n_tf_grps);
 +          gmx_fio_do_real(fio, ir->adress->ex_forcecap);
 +          gmx_fio_do_int(fio, ir->adress->n_energy_grps);
 +          gmx_fio_do_int(fio,ir->adress->do_hybridpairs);
 +
 +          if (bRead)snew(ir->adress->tf_table_index,ir->adress->n_tf_grps);
 +          if (ir->adress->n_tf_grps > 0) {
 +            bDum=gmx_fio_ndo_int(fio,ir->adress->tf_table_index,ir->adress->n_tf_grps);
 +          }
 +          if (bRead)snew(ir->adress->group_explicit,ir->adress->n_energy_grps);
 +          if (ir->adress->n_energy_grps > 0) {
 +            bDum=gmx_fio_ndo_int(fio, ir->adress->group_explicit,ir->adress->n_energy_grps);
 +          }
 +      }
 +    } else {
 +      ir->bAdress = FALSE;
 +    }
 +
 +    /* pull stuff */
 +    if (file_version >= 48) {
 +      gmx_fio_do_int(fio,ir->ePull);
 +      if (ir->ePull != epullNO) {
 +      if (bRead)
 +        snew(ir->pull,1);
 +      do_pull(fio, ir->pull,bRead,file_version);
 +      }
 +    } else {
 +      ir->ePull = epullNO;
 +    }
 +    
 +    /* Enforced rotation */
 +    if (file_version >= 74) {
 +        gmx_fio_do_int(fio,ir->bRot);
 +        if (ir->bRot == TRUE) {
 +            if (bRead)
 +                snew(ir->rot,1);
 +            do_rot(fio, ir->rot,bRead,file_version);
 +        }
 +    } else {
 +        ir->bRot = FALSE;
 +    }
 +    
 +    /* grpopts stuff */
 +    gmx_fio_do_int(fio,ir->opts.ngtc); 
 +    if (file_version >= 69) {
 +      gmx_fio_do_int(fio,ir->opts.nhchainlength);
 +    } else {
 +      ir->opts.nhchainlength = 1;
 +    }
 +    gmx_fio_do_int(fio,ir->opts.ngacc); 
 +    gmx_fio_do_int(fio,ir->opts.ngfrz); 
 +    gmx_fio_do_int(fio,ir->opts.ngener);
 +    
 +    if (bRead) {
 +      snew(ir->opts.nrdf,   ir->opts.ngtc); 
 +      snew(ir->opts.ref_t,  ir->opts.ngtc); 
 +      snew(ir->opts.annealing, ir->opts.ngtc); 
 +      snew(ir->opts.anneal_npoints, ir->opts.ngtc); 
 +      snew(ir->opts.anneal_time, ir->opts.ngtc); 
 +      snew(ir->opts.anneal_temp, ir->opts.ngtc); 
 +      snew(ir->opts.tau_t,  ir->opts.ngtc); 
 +      snew(ir->opts.nFreeze,ir->opts.ngfrz); 
 +      snew(ir->opts.acc,    ir->opts.ngacc); 
 +      snew(ir->opts.egp_flags,ir->opts.ngener*ir->opts.ngener);
 +    } 
 +    if (ir->opts.ngtc > 0) {
 +      if (bRead && file_version<13) {
 +      snew(tmp,ir->opts.ngtc);
 +      bDum=gmx_fio_ndo_int(fio,tmp, ir->opts.ngtc);
 +      for(i=0; i<ir->opts.ngtc; i++)
 +        ir->opts.nrdf[i] = tmp[i];
 +      sfree(tmp);
 +      } else {
 +      bDum=gmx_fio_ndo_real(fio,ir->opts.nrdf, ir->opts.ngtc);
 +      }
 +      bDum=gmx_fio_ndo_real(fio,ir->opts.ref_t,ir->opts.ngtc); 
 +      bDum=gmx_fio_ndo_real(fio,ir->opts.tau_t,ir->opts.ngtc); 
 +      if (file_version<33 && ir->eI==eiBD) {
 +      for(i=0; i<ir->opts.ngtc; i++)
 +        ir->opts.tau_t[i] = bd_temp;
 +      }
 +    }
 +    if (ir->opts.ngfrz > 0) 
 +      bDum=gmx_fio_ndo_ivec(fio,ir->opts.nFreeze,ir->opts.ngfrz);
 +    if (ir->opts.ngacc > 0) 
 +      gmx_fio_ndo_rvec(fio,ir->opts.acc,ir->opts.ngacc); 
 +    if (file_version >= 12)
 +      bDum=gmx_fio_ndo_int(fio,ir->opts.egp_flags,
 +                           ir->opts.ngener*ir->opts.ngener);
 +
 +    if(bRead && file_version < 26) {
 +      for(i=0;i<ir->opts.ngtc;i++) {
 +      if(bSimAnn) {
 +        ir->opts.annealing[i] = eannSINGLE;
 +        ir->opts.anneal_npoints[i] = 2;
 +        snew(ir->opts.anneal_time[i],2);
 +        snew(ir->opts.anneal_temp[i],2);
 +        /* calculate the starting/ending temperatures from reft, zerotemptime, and nsteps */
 +        finish_t = ir->init_t + ir->nsteps * ir->delta_t;
 +        init_temp = ir->opts.ref_t[i]*(1-ir->init_t/zerotemptime);
 +        finish_temp = ir->opts.ref_t[i]*(1-finish_t/zerotemptime);
 +        ir->opts.anneal_time[i][0] = ir->init_t;
 +        ir->opts.anneal_time[i][1] = finish_t;
 +        ir->opts.anneal_temp[i][0] = init_temp;
 +        ir->opts.anneal_temp[i][1] = finish_temp;
 +      } else {
 +        ir->opts.annealing[i] = eannNO;
 +        ir->opts.anneal_npoints[i] = 0;
 +      }
 +      }
 +    } else {
 +      /* file version 26 or later */
 +      /* First read the lists with annealing and npoints for each group */
 +      bDum=gmx_fio_ndo_int(fio,ir->opts.annealing,ir->opts.ngtc);
 +      bDum=gmx_fio_ndo_int(fio,ir->opts.anneal_npoints,ir->opts.ngtc);
 +      for(j=0;j<(ir->opts.ngtc);j++) {
 +      k=ir->opts.anneal_npoints[j];
 +      if(bRead) {
 +        snew(ir->opts.anneal_time[j],k);
 +        snew(ir->opts.anneal_temp[j],k);
 +      }
 +      bDum=gmx_fio_ndo_real(fio,ir->opts.anneal_time[j],k);
 +      bDum=gmx_fio_ndo_real(fio,ir->opts.anneal_temp[j],k);
 +      }
 +    }
 +    /* Walls */
 +    if (file_version >= 45) {
 +      gmx_fio_do_int(fio,ir->nwall);
 +      gmx_fio_do_int(fio,ir->wall_type);
 +      if (file_version >= 50)
 +      gmx_fio_do_real(fio,ir->wall_r_linpot);
 +      else
 +      ir->wall_r_linpot = -1;
 +      gmx_fio_do_int(fio,ir->wall_atomtype[0]);
 +      gmx_fio_do_int(fio,ir->wall_atomtype[1]);
 +      gmx_fio_do_real(fio,ir->wall_density[0]);
 +      gmx_fio_do_real(fio,ir->wall_density[1]);
 +      gmx_fio_do_real(fio,ir->wall_ewald_zfac);
 +    } else {
 +      ir->nwall = 0;
 +      ir->wall_type = 0;
 +      ir->wall_atomtype[0] = -1;
 +      ir->wall_atomtype[1] = -1;
 +      ir->wall_density[0] = 0;
 +      ir->wall_density[1] = 0;
 +      ir->wall_ewald_zfac = 3;
 +    }
 +    /* Cosine stuff for electric fields */
 +    for(j=0; (j<DIM); j++) {
 +      gmx_fio_do_int(fio,ir->ex[j].n);
 +      gmx_fio_do_int(fio,ir->et[j].n);
 +      if (bRead) {
 +      snew(ir->ex[j].a,  ir->ex[j].n);
 +      snew(ir->ex[j].phi,ir->ex[j].n);
 +      snew(ir->et[j].a,  ir->et[j].n);
 +      snew(ir->et[j].phi,ir->et[j].n);
 +      }
 +      bDum=gmx_fio_ndo_real(fio,ir->ex[j].a,  ir->ex[j].n);
 +      bDum=gmx_fio_ndo_real(fio,ir->ex[j].phi,ir->ex[j].n);
 +      bDum=gmx_fio_ndo_real(fio,ir->et[j].a,  ir->et[j].n);
 +      bDum=gmx_fio_ndo_real(fio,ir->et[j].phi,ir->et[j].n);
 +    }
 +    
 +    /* QMMM stuff */
 +    if(file_version>=39){
 +      gmx_fio_do_gmx_bool(fio,ir->bQMMM);
 +      gmx_fio_do_int(fio,ir->QMMMscheme);
 +      gmx_fio_do_real(fio,ir->scalefactor);
 +      gmx_fio_do_int(fio,ir->opts.ngQM);
 +      if (bRead) {
 +        snew(ir->opts.QMmethod,    ir->opts.ngQM);
 +        snew(ir->opts.QMbasis,     ir->opts.ngQM);
 +        snew(ir->opts.QMcharge,    ir->opts.ngQM);
 +        snew(ir->opts.QMmult,      ir->opts.ngQM);
 +        snew(ir->opts.bSH,         ir->opts.ngQM);
 +        snew(ir->opts.CASorbitals, ir->opts.ngQM);
 +        snew(ir->opts.CASelectrons,ir->opts.ngQM);
 +        snew(ir->opts.SAon,        ir->opts.ngQM);
 +        snew(ir->opts.SAoff,       ir->opts.ngQM);
 +        snew(ir->opts.SAsteps,     ir->opts.ngQM);
 +        snew(ir->opts.bOPT,        ir->opts.ngQM);
 +        snew(ir->opts.bTS,         ir->opts.ngQM);
 +      }
 +      if (ir->opts.ngQM > 0) {
 +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMmethod,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMbasis,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMcharge,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMmult,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_gmx_bool(fio,ir->opts.bSH,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_int(fio,ir->opts.CASorbitals,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_int(fio,ir->opts.CASelectrons,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_real(fio,ir->opts.SAon,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_real(fio,ir->opts.SAoff,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_int(fio,ir->opts.SAsteps,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_gmx_bool(fio,ir->opts.bOPT,ir->opts.ngQM);
 +        bDum=gmx_fio_ndo_gmx_bool(fio,ir->opts.bTS,ir->opts.ngQM);
 +      }
 +      /* end of QMMM stuff */
 +    }    
 +}
 +
 +
 +static void do_harm(t_fileio *fio, t_iparams *iparams,gmx_bool bRead)
 +{
 +  gmx_fio_do_real(fio,iparams->harmonic.rA);
 +  gmx_fio_do_real(fio,iparams->harmonic.krA);
 +  gmx_fio_do_real(fio,iparams->harmonic.rB);
 +  gmx_fio_do_real(fio,iparams->harmonic.krB);
 +}
 +
 +void do_iparams(t_fileio *fio, t_functype ftype,t_iparams *iparams,
 +                gmx_bool bRead, int file_version)
 +{
 +  int idum;
 +  gmx_bool bDum;
 +  real rdum;
 +  
 +  if (!bRead)
 +    gmx_fio_set_comment(fio, interaction_function[ftype].name);
 +  switch (ftype) {
 +  case F_ANGLES:
 +  case F_G96ANGLES:
 +  case F_BONDS:
 +  case F_G96BONDS:
 +  case F_HARMONIC:
 +  case F_IDIHS:
 +    do_harm(fio, iparams,bRead);
 +    if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && bRead) {
 +      /* Correct incorrect storage of parameters */
 +      iparams->pdihs.phiB = iparams->pdihs.phiA;
 +      iparams->pdihs.cpB  = iparams->pdihs.cpA;
 +    }
 +    break;
 +  case F_LINEAR_ANGLES:
 +    gmx_fio_do_real(fio,iparams->linangle.klinA);
 +    gmx_fio_do_real(fio,iparams->linangle.aA);
 +    gmx_fio_do_real(fio,iparams->linangle.klinB);
 +    gmx_fio_do_real(fio,iparams->linangle.aB);
 +    break;
 +  case F_FENEBONDS:
 +    gmx_fio_do_real(fio,iparams->fene.bm);
 +    gmx_fio_do_real(fio,iparams->fene.kb);
 +    break;
 +  case F_RESTRBONDS:
 +    gmx_fio_do_real(fio,iparams->restraint.lowA);
 +    gmx_fio_do_real(fio,iparams->restraint.up1A);
 +    gmx_fio_do_real(fio,iparams->restraint.up2A);
 +    gmx_fio_do_real(fio,iparams->restraint.kA);
 +    gmx_fio_do_real(fio,iparams->restraint.lowB);
 +    gmx_fio_do_real(fio,iparams->restraint.up1B);
 +    gmx_fio_do_real(fio,iparams->restraint.up2B);
 +    gmx_fio_do_real(fio,iparams->restraint.kB);
 +    break;
 +  case F_TABBONDS:
 +  case F_TABBONDSNC:
 +  case F_TABANGLES:
 +  case F_TABDIHS:
 +    gmx_fio_do_real(fio,iparams->tab.kA);
 +    gmx_fio_do_int(fio,iparams->tab.table);
 +    gmx_fio_do_real(fio,iparams->tab.kB);
 +    break;
 +  case F_CROSS_BOND_BONDS:
 +    gmx_fio_do_real(fio,iparams->cross_bb.r1e);
 +    gmx_fio_do_real(fio,iparams->cross_bb.r2e);
 +    gmx_fio_do_real(fio,iparams->cross_bb.krr);
 +    break;
 +  case F_CROSS_BOND_ANGLES:
 +    gmx_fio_do_real(fio,iparams->cross_ba.r1e);
 +    gmx_fio_do_real(fio,iparams->cross_ba.r2e);
 +    gmx_fio_do_real(fio,iparams->cross_ba.r3e);
 +    gmx_fio_do_real(fio,iparams->cross_ba.krt);
 +    break;
 +  case F_UREY_BRADLEY:
 +    gmx_fio_do_real(fio,iparams->u_b.thetaA);
 +    gmx_fio_do_real(fio,iparams->u_b.kthetaA);
 +    gmx_fio_do_real(fio,iparams->u_b.r13A);
 +    gmx_fio_do_real(fio,iparams->u_b.kUBA);
 +    if (file_version >= 79) {
 +        gmx_fio_do_real(fio,iparams->u_b.thetaB);
 +        gmx_fio_do_real(fio,iparams->u_b.kthetaB);
 +        gmx_fio_do_real(fio,iparams->u_b.r13B);
 +        gmx_fio_do_real(fio,iparams->u_b.kUBB);
 +    } else {
 +        iparams->u_b.thetaB=iparams->u_b.thetaA;
 +        iparams->u_b.kthetaB=iparams->u_b.kthetaA;
 +        iparams->u_b.r13B=iparams->u_b.r13A;
 +        iparams->u_b.kUBB=iparams->u_b.kUBA;
 +    }
 +    break;
 +  case F_QUARTIC_ANGLES:
 +    gmx_fio_do_real(fio,iparams->qangle.theta);
 +    bDum=gmx_fio_ndo_real(fio,iparams->qangle.c,5);
 +    break;
 +  case F_BHAM:
 +    gmx_fio_do_real(fio,iparams->bham.a);
 +    gmx_fio_do_real(fio,iparams->bham.b);
 +    gmx_fio_do_real(fio,iparams->bham.c);
 +    break;
 +  case F_MORSE:
 +    gmx_fio_do_real(fio,iparams->morse.b0A);
 +    gmx_fio_do_real(fio,iparams->morse.cbA);
 +    gmx_fio_do_real(fio,iparams->morse.betaA);
 +    if (file_version >= 79) {
 +        gmx_fio_do_real(fio,iparams->morse.b0B);
 +        gmx_fio_do_real(fio,iparams->morse.cbB);
 +        gmx_fio_do_real(fio,iparams->morse.betaB);
 +    } else {
 +        iparams->morse.b0B = iparams->morse.b0A;
 +        iparams->morse.cbB = iparams->morse.cbA;
 +        iparams->morse.betaB = iparams->morse.betaA;
 +    }
 +    break;
 +  case F_CUBICBONDS:
 +    gmx_fio_do_real(fio,iparams->cubic.b0);
 +    gmx_fio_do_real(fio,iparams->cubic.kb);
 +    gmx_fio_do_real(fio,iparams->cubic.kcub);
 +    break;
 +  case F_CONNBONDS:
 +    break;
 +  case F_POLARIZATION:
 +    gmx_fio_do_real(fio,iparams->polarize.alpha);
 +    break;
 +  case F_ANHARM_POL:
 +    gmx_fio_do_real(fio,iparams->anharm_polarize.alpha);
 +    gmx_fio_do_real(fio,iparams->anharm_polarize.drcut);
 +    gmx_fio_do_real(fio,iparams->anharm_polarize.khyp);
 +    break;
 +  case F_WATER_POL:
 +    if (file_version < 31) 
 +      gmx_fatal(FARGS,"Old tpr files with water_polarization not supported. Make a new.");
 +    gmx_fio_do_real(fio,iparams->wpol.al_x);
 +    gmx_fio_do_real(fio,iparams->wpol.al_y);
 +    gmx_fio_do_real(fio,iparams->wpol.al_z);
 +    gmx_fio_do_real(fio,iparams->wpol.rOH);
 +    gmx_fio_do_real(fio,iparams->wpol.rHH);
 +    gmx_fio_do_real(fio,iparams->wpol.rOD);
 +    break;
 +  case F_THOLE_POL:
 +    gmx_fio_do_real(fio,iparams->thole.a);
 +    gmx_fio_do_real(fio,iparams->thole.alpha1);
 +    gmx_fio_do_real(fio,iparams->thole.alpha2);
 +    gmx_fio_do_real(fio,iparams->thole.rfac);
 +    break;
 +  case F_LJ:
 +    gmx_fio_do_real(fio,iparams->lj.c6);
 +    gmx_fio_do_real(fio,iparams->lj.c12);
 +    break;
 +  case F_LJ14:
 +    gmx_fio_do_real(fio,iparams->lj14.c6A);
 +    gmx_fio_do_real(fio,iparams->lj14.c12A);
 +    gmx_fio_do_real(fio,iparams->lj14.c6B);
 +    gmx_fio_do_real(fio,iparams->lj14.c12B);
 +    break;
 +  case F_LJC14_Q:
 +    gmx_fio_do_real(fio,iparams->ljc14.fqq);
 +    gmx_fio_do_real(fio,iparams->ljc14.qi);
 +    gmx_fio_do_real(fio,iparams->ljc14.qj);
 +    gmx_fio_do_real(fio,iparams->ljc14.c6);
 +    gmx_fio_do_real(fio,iparams->ljc14.c12);
 +    break;
 +  case F_LJC_PAIRS_NB:
 +    gmx_fio_do_real(fio,iparams->ljcnb.qi);
 +    gmx_fio_do_real(fio,iparams->ljcnb.qj);
 +    gmx_fio_do_real(fio,iparams->ljcnb.c6);
 +    gmx_fio_do_real(fio,iparams->ljcnb.c12);
 +    break;
 +  case F_PDIHS:
 +  case F_PIDIHS:
 +  case F_ANGRES:
 +  case F_ANGRESZ:
 +    gmx_fio_do_real(fio,iparams->pdihs.phiA);
 +    gmx_fio_do_real(fio,iparams->pdihs.cpA);
 +    if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && file_version < 42) {
 +      /* Read the incorrectly stored multiplicity */
 +      gmx_fio_do_real(fio,iparams->harmonic.rB);
 +      gmx_fio_do_real(fio,iparams->harmonic.krB);
 +      iparams->pdihs.phiB = iparams->pdihs.phiA;
 +      iparams->pdihs.cpB  = iparams->pdihs.cpA;
 +    } else {
 +      gmx_fio_do_real(fio,iparams->pdihs.phiB);
 +      gmx_fio_do_real(fio,iparams->pdihs.cpB);
 +      gmx_fio_do_int(fio,iparams->pdihs.mult);
 +    }
 +    break;
 +  case F_DISRES:
 +    gmx_fio_do_int(fio,iparams->disres.label);
 +    gmx_fio_do_int(fio,iparams->disres.type);
 +    gmx_fio_do_real(fio,iparams->disres.low);
 +    gmx_fio_do_real(fio,iparams->disres.up1);
 +    gmx_fio_do_real(fio,iparams->disres.up2);
 +    gmx_fio_do_real(fio,iparams->disres.kfac);
 +    break;
 +  case F_ORIRES:
 +    gmx_fio_do_int(fio,iparams->orires.ex);
 +    gmx_fio_do_int(fio,iparams->orires.label);
 +    gmx_fio_do_int(fio,iparams->orires.power);
 +    gmx_fio_do_real(fio,iparams->orires.c);
 +    gmx_fio_do_real(fio,iparams->orires.obs);
 +    gmx_fio_do_real(fio,iparams->orires.kfac);
 +    break;
 +  case F_DIHRES:
 +    if ( file_version < 72) {
 +        gmx_fio_do_int(fio,idum);
 +        gmx_fio_do_int(fio,idum);
 +    }
 +    gmx_fio_do_real(fio,iparams->dihres.phiA);
 +    gmx_fio_do_real(fio,iparams->dihres.dphiA);
 +    gmx_fio_do_real(fio,iparams->dihres.kfacA);
 +    if (file_version >= 72) {
 +        gmx_fio_do_real(fio,iparams->dihres.phiB);
 +        gmx_fio_do_real(fio,iparams->dihres.dphiB);
 +        gmx_fio_do_real(fio,iparams->dihres.kfacB);
 +    } else {
 +        iparams->dihres.phiB=iparams->dihres.phiA;
 +        iparams->dihres.dphiB=iparams->dihres.dphiA;
 +        iparams->dihres.kfacB=iparams->dihres.kfacA;
 +    }
 +    break;
 +  case F_POSRES:
 +    gmx_fio_do_rvec(fio,iparams->posres.pos0A);
 +    gmx_fio_do_rvec(fio,iparams->posres.fcA);
 +    if (bRead && file_version < 27) {
 +      copy_rvec(iparams->posres.pos0A,iparams->posres.pos0B);
 +      copy_rvec(iparams->posres.fcA,iparams->posres.fcB);
 +    } else {
 +      gmx_fio_do_rvec(fio,iparams->posres.pos0B);
 +      gmx_fio_do_rvec(fio,iparams->posres.fcB);
 +    }
 +    break;
 +  case F_FBPOSRES:
 +      gmx_fio_do_int(fio,iparams->fbposres.geom);
 +      gmx_fio_do_rvec(fio,iparams->fbposres.pos0);
 +      gmx_fio_do_real(fio,iparams->fbposres.r);
 +      gmx_fio_do_real(fio,iparams->fbposres.k);
 +      break;
 +  case F_RBDIHS:
 +    bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcA,NR_RBDIHS);
 +    if(file_version>=25) 
 +      bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcB,NR_RBDIHS);
 +    break;
 +  case F_FOURDIHS:
 +    /* Fourier dihedrals are internally represented
 +     * as Ryckaert-Bellemans since those are faster to compute.
 +     */
 +     bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcA, NR_RBDIHS);
 +     bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcB, NR_RBDIHS);
 +    break;
 +  case F_CONSTR:
 +  case F_CONSTRNC:
 +    gmx_fio_do_real(fio,iparams->constr.dA);
 +    gmx_fio_do_real(fio,iparams->constr.dB);
 +    break;
 +  case F_SETTLE:
 +    gmx_fio_do_real(fio,iparams->settle.doh);
 +    gmx_fio_do_real(fio,iparams->settle.dhh);
 +    break;
 +  case F_VSITE2:
 +    gmx_fio_do_real(fio,iparams->vsite.a);
 +    break;
 +  case F_VSITE3:
 +  case F_VSITE3FD:
 +  case F_VSITE3FAD:
 +    gmx_fio_do_real(fio,iparams->vsite.a);
 +    gmx_fio_do_real(fio,iparams->vsite.b);
 +    break;
 +  case F_VSITE3OUT:
 +  case F_VSITE4FD: 
 +  case F_VSITE4FDN: 
 +    gmx_fio_do_real(fio,iparams->vsite.a);
 +    gmx_fio_do_real(fio,iparams->vsite.b);
 +    gmx_fio_do_real(fio,iparams->vsite.c);
 +    break;
 +  case F_VSITEN:
 +    gmx_fio_do_int(fio,iparams->vsiten.n);
 +    gmx_fio_do_real(fio,iparams->vsiten.a);
 +    break;
 +  case F_GB12:
 +  case F_GB13:
 +  case F_GB14:
 +    /* We got rid of some parameters in version 68 */
 +    if(bRead && file_version<68)
 +    {
 +        gmx_fio_do_real(fio,rdum);    
 +        gmx_fio_do_real(fio,rdum);    
 +        gmx_fio_do_real(fio,rdum);    
 +        gmx_fio_do_real(fio,rdum);    
 +    }
 +      gmx_fio_do_real(fio,iparams->gb.sar);   
 +      gmx_fio_do_real(fio,iparams->gb.st);
 +      gmx_fio_do_real(fio,iparams->gb.pi);
 +      gmx_fio_do_real(fio,iparams->gb.gbr);
 +      gmx_fio_do_real(fio,iparams->gb.bmlt);
 +      break;
 +  case F_CMAP:
 +      gmx_fio_do_int(fio,iparams->cmap.cmapA);
 +      gmx_fio_do_int(fio,iparams->cmap.cmapB);
 +    break;
 +  default:
 +      gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
 +                ftype,interaction_function[ftype].name,__FILE__,__LINE__);
 +  }
 +  if (!bRead)
 +    gmx_fio_unset_comment(fio);
 +}
 +
 +static void do_ilist(t_fileio *fio, t_ilist *ilist,gmx_bool bRead,int file_version,
 +                   int ftype)
 +{
 +  int  i,k,idum;
 +  gmx_bool bDum=TRUE;
 +  
 +  if (!bRead) {
 +    gmx_fio_set_comment(fio, interaction_function[ftype].name);
 +  }
 +  if (file_version < 44) {
 +    for(i=0; i<MAXNODES; i++)
 +      gmx_fio_do_int(fio,idum);
 +  }
 +  gmx_fio_do_int(fio,ilist->nr);
 +  if (bRead)
 +    snew(ilist->iatoms,ilist->nr);
 +  bDum=gmx_fio_ndo_int(fio,ilist->iatoms,ilist->nr);
 +  if (!bRead)
 +    gmx_fio_unset_comment(fio);
 +}
 +
 +static void do_ffparams(t_fileio *fio, gmx_ffparams_t *ffparams,
 +                      gmx_bool bRead, int file_version)
 +{
 +  int  idum,i,j;
 +  gmx_bool bDum=TRUE;
 +  unsigned int k;
 +
 +  gmx_fio_do_int(fio,ffparams->atnr);
 +  if (file_version < 57) {
 +    gmx_fio_do_int(fio,idum);
 +  }
 +  gmx_fio_do_int(fio,ffparams->ntypes);
 +  if (bRead && debug)
 +    fprintf(debug,"ffparams->atnr = %d, ntypes = %d\n",
 +          ffparams->atnr,ffparams->ntypes);
 +  if (bRead) {
 +    snew(ffparams->functype,ffparams->ntypes);
 +    snew(ffparams->iparams,ffparams->ntypes);
 +  }
 +  /* Read/write all the function types */
 +  bDum=gmx_fio_ndo_int(fio,ffparams->functype,ffparams->ntypes);
 +  if (bRead && debug)
 +    pr_ivec(debug,0,"functype",ffparams->functype,ffparams->ntypes,TRUE);
 +
 +  if (file_version >= 66) {
 +    gmx_fio_do_double(fio,ffparams->reppow);
 +  } else {
 +    ffparams->reppow = 12.0;
 +  }
 +
 +  if (file_version >= 57) {
 +    gmx_fio_do_real(fio,ffparams->fudgeQQ);
 +  }
 +
 +  /* Check whether all these function types are supported by the code.
 +   * In practice the code is backwards compatible, which means that the
 +   * numbering may have to be altered from old numbering to new numbering
 +   */
 +  for (i=0; (i<ffparams->ntypes); i++) {
 +    if (bRead)
 +      /* Loop over file versions */
 +      for (k=0; (k<NFTUPD); k++)
 +      /* Compare the read file_version to the update table */
 +      if ((file_version < ftupd[k].fvnr) && 
 +          (ffparams->functype[i] >= ftupd[k].ftype)) {
 +        ffparams->functype[i] += 1;
 +        if (debug) {
 +          fprintf(debug,"Incrementing function type %d to %d (due to %s)\n",
 +                  i,ffparams->functype[i],
 +                  interaction_function[ftupd[k].ftype].longname);
 +          fflush(debug);
 +        }
 +      }
 +    
 +    do_iparams(fio, ffparams->functype[i],&ffparams->iparams[i],bRead,
 +               file_version);
 +    if (bRead && debug)
 +      pr_iparams(debug,ffparams->functype[i],&ffparams->iparams[i]);
 +  }
 +}
 +
 +static void add_settle_atoms(t_ilist *ilist)
 +{
 +    int i;
 +
 +    /* Settle used to only store the first atom: add the other two */
 +    srenew(ilist->iatoms,2*ilist->nr);
 +    for(i=ilist->nr/2-1; i>=0; i--)
 +    {
 +        ilist->iatoms[4*i+0] = ilist->iatoms[2*i+0];
 +        ilist->iatoms[4*i+1] = ilist->iatoms[2*i+1];
 +        ilist->iatoms[4*i+2] = ilist->iatoms[2*i+1] + 1;
 +        ilist->iatoms[4*i+3] = ilist->iatoms[2*i+1] + 2;
 +    }
 +    ilist->nr = 2*ilist->nr;
 +}
 +
 +static void do_ilists(t_fileio *fio, t_ilist *ilist,gmx_bool bRead, 
 +                      int file_version)
 +{
 +  int i,j,renum[F_NRE];
 +  gmx_bool bDum=TRUE,bClear;
 +  unsigned int k;
 +  
 +  for(j=0; (j<F_NRE); j++) {
 +    bClear = FALSE;
 +    if (bRead)
 +      for (k=0; k<NFTUPD; k++)
 +        if ((file_version < ftupd[k].fvnr) && (j == ftupd[k].ftype)) 
 +          bClear = TRUE;
 +    if (bClear) {
 +      ilist[j].nr = 0;
 +      ilist[j].iatoms = NULL;
 +    } else {
 +      do_ilist(fio, &ilist[j],bRead,file_version,j);
 +      if (file_version < 78 && j == F_SETTLE && ilist[j].nr > 0)
 +      {
 +          add_settle_atoms(&ilist[j]);
 +      }
 +    }
 +    /*
 +    if (bRead && gmx_debug_at)
 +      pr_ilist(debug,0,interaction_function[j].longname,
 +             functype,&ilist[j],TRUE);
 +    */
 +  }
 +}
 +
 +static void do_idef(t_fileio *fio, gmx_ffparams_t *ffparams,gmx_moltype_t *molt,
 +                  gmx_bool bRead, int file_version)
 +{
 +  do_ffparams(fio, ffparams,bRead,file_version);
 +    
 +  if (file_version >= 54) {
 +    gmx_fio_do_real(fio,ffparams->fudgeQQ);
 +  }
 +
 +  do_ilists(fio, molt->ilist,bRead,file_version);
 +}
 +
 +static void do_block(t_fileio *fio, t_block *block,gmx_bool bRead,int file_version)
 +{
 +  int  i,idum,dum_nra,*dum_a;
 +  gmx_bool bDum=TRUE;
 +
 +  if (file_version < 44)
 +    for(i=0; i<MAXNODES; i++)
 +      gmx_fio_do_int(fio,idum);
 +  gmx_fio_do_int(fio,block->nr);
 +  if (file_version < 51)
 +    gmx_fio_do_int(fio,dum_nra);
 +  if (bRead) {
 +    block->nalloc_index = block->nr+1;
 +    snew(block->index,block->nalloc_index);
 +  }
 +  bDum=gmx_fio_ndo_int(fio,block->index,block->nr+1);
 +
 +  if (file_version < 51 && dum_nra > 0) {
 +    snew(dum_a,dum_nra);
 +    bDum=gmx_fio_ndo_int(fio,dum_a,dum_nra);
 +    sfree(dum_a);
 +  }
 +}
 +
 +static void do_blocka(t_fileio *fio, t_blocka *block,gmx_bool bRead,
 +                      int file_version)
 +{
 +  int  i,idum;
 +  gmx_bool bDum=TRUE;
 +
 +  if (file_version < 44)
 +    for(i=0; i<MAXNODES; i++)
 +      gmx_fio_do_int(fio,idum);
 +  gmx_fio_do_int(fio,block->nr);
 +  gmx_fio_do_int(fio,block->nra);
 +  if (bRead) {
 +    block->nalloc_index = block->nr+1;
 +    snew(block->index,block->nalloc_index);
 +    block->nalloc_a = block->nra;
 +    snew(block->a,block->nalloc_a);
 +  }
 +  bDum=gmx_fio_ndo_int(fio,block->index,block->nr+1);
 +  bDum=gmx_fio_ndo_int(fio,block->a,block->nra);
 +}
 +
 +static void do_atom(t_fileio *fio, t_atom *atom,int ngrp,gmx_bool bRead, 
 +                    int file_version, gmx_groups_t *groups,int atnr)
 +{ 
 +  int i,myngrp;
 +  
 +  gmx_fio_do_real(fio,atom->m);
 +  gmx_fio_do_real(fio,atom->q);
 +  gmx_fio_do_real(fio,atom->mB);
 +  gmx_fio_do_real(fio,atom->qB);
 +  gmx_fio_do_ushort(fio, atom->type);
 +  gmx_fio_do_ushort(fio, atom->typeB);
 +  gmx_fio_do_int(fio,atom->ptype);
 +  gmx_fio_do_int(fio,atom->resind);
 +  if (file_version >= 52)
 +    gmx_fio_do_int(fio,atom->atomnumber);
 +  else if (bRead)
 +    atom->atomnumber = NOTSET;
 +  if (file_version < 23) 
 +    myngrp = 8;
 +  else if (file_version < 39) 
 +    myngrp = 9;
 +  else
 +    myngrp = ngrp;
 +
 +  if (file_version < 57) {
 +    unsigned char uchar[egcNR];
 +    gmx_fio_ndo_uchar(fio,uchar,myngrp);
 +    for(i=myngrp; (i<ngrp); i++) {
 +      uchar[i] = 0;
 +    }
 +    /* Copy the old data format to the groups struct */
 +    for(i=0; i<ngrp; i++) {
 +      groups->grpnr[i][atnr] = uchar[i];
 +    }
 +  }
 +}
 +
 +static void do_grps(t_fileio *fio, int ngrp,t_grps grps[],gmx_bool bRead, 
 +                    int file_version)
 +{
 +  int i,j,myngrp;
 +  gmx_bool bDum=TRUE;
 +  
 +  if (file_version < 23) 
 +    myngrp = 8;
 +  else if (file_version < 39) 
 +    myngrp = 9;
 +  else
 +    myngrp = ngrp;
 +
 +  for(j=0; (j<ngrp); j++) {
 +    if (j<myngrp) {
 +      gmx_fio_do_int(fio,grps[j].nr);
 +      if (bRead)
 +      snew(grps[j].nm_ind,grps[j].nr);
 +      bDum=gmx_fio_ndo_int(fio,grps[j].nm_ind,grps[j].nr);
 +    }
 +    else {
 +      grps[j].nr = 1;
 +      snew(grps[j].nm_ind,grps[j].nr);
 +    }
 +  }
 +}
 +
 +static void do_symstr(t_fileio *fio, char ***nm,gmx_bool bRead,t_symtab *symtab)
 +{
 +  int ls;
 +  
 +  if (bRead) {
 +    gmx_fio_do_int(fio,ls);
 +    *nm = get_symtab_handle(symtab,ls);
 +  }
 +  else {
 +    ls = lookup_symtab(symtab,*nm);
 +    gmx_fio_do_int(fio,ls);
 +  }
 +}
 +
 +static void do_strstr(t_fileio *fio, int nstr,char ***nm,gmx_bool bRead,
 +                      t_symtab *symtab)
 +{
 +  int  j;
 +  
 +  for (j=0; (j<nstr); j++) 
 +    do_symstr(fio, &(nm[j]),bRead,symtab);
 +}
 +
 +static void do_resinfo(t_fileio *fio, int n,t_resinfo *ri,gmx_bool bRead,
 +                       t_symtab *symtab, int file_version)
 +{
 +  int  j;
 +  
 +  for (j=0; (j<n); j++) {
 +    do_symstr(fio, &(ri[j].name),bRead,symtab);
 +    if (file_version >= 63) {
 +      gmx_fio_do_int(fio,ri[j].nr);
 +      gmx_fio_do_uchar(fio, ri[j].ic);
 +    } else {
 +      ri[j].nr = j + 1;
 +      ri[j].ic = ' ';
 +    }
 +  }
 +}
 +
 +static void do_atoms(t_fileio *fio, t_atoms *atoms,gmx_bool bRead,t_symtab *symtab,
 +                   int file_version,
 +                   gmx_groups_t *groups)
 +{
 +  int i;
 +  
 +  gmx_fio_do_int(fio,atoms->nr);
 +  gmx_fio_do_int(fio,atoms->nres);
 +  if (file_version < 57) {
 +    gmx_fio_do_int(fio,groups->ngrpname);
 +    for(i=0; i<egcNR; i++) {
 +      groups->ngrpnr[i] = atoms->nr;
 +      snew(groups->grpnr[i],groups->ngrpnr[i]);
 +    }
 +  }
 +  if (bRead) {
 +    snew(atoms->atom,atoms->nr);
 +    snew(atoms->atomname,atoms->nr);
 +    snew(atoms->atomtype,atoms->nr);
 +    snew(atoms->atomtypeB,atoms->nr);
 +    snew(atoms->resinfo,atoms->nres);
 +    if (file_version < 57) {
 +      snew(groups->grpname,groups->ngrpname);
 +    }
 +    atoms->pdbinfo = NULL;
 +  }
 +  for(i=0; (i<atoms->nr); i++) {
 +    do_atom(fio, &atoms->atom[i],egcNR,bRead, file_version,groups,i);
 +  }
 +  do_strstr(fio, atoms->nr,atoms->atomname,bRead,symtab);
 +  if (bRead && (file_version <= 20)) {
 +    for(i=0; i<atoms->nr; i++) {
 +      atoms->atomtype[i]  = put_symtab(symtab,"?");
 +      atoms->atomtypeB[i] = put_symtab(symtab,"?");
 +    }
 +  } else {
 +    do_strstr(fio, atoms->nr,atoms->atomtype,bRead,symtab);
 +    do_strstr(fio, atoms->nr,atoms->atomtypeB,bRead,symtab);
 +  }
 +  do_resinfo(fio, atoms->nres,atoms->resinfo,bRead,symtab,file_version);
 +
 +  if (file_version < 57) {
 +    do_strstr(fio, groups->ngrpname,groups->grpname,bRead,symtab);
 +  
 +    do_grps(fio, egcNR,groups->grps,bRead,file_version);
 +  }
 +}
 +
 +static void do_groups(t_fileio *fio, gmx_groups_t *groups,
 +                    gmx_bool bRead,t_symtab *symtab,
 +                    int file_version)
 +{
 +  int  g,n,i;
 +  gmx_bool bDum=TRUE;
 +
 +  do_grps(fio, egcNR,groups->grps,bRead,file_version);
 +  gmx_fio_do_int(fio,groups->ngrpname);
 +  if (bRead) {
 +    snew(groups->grpname,groups->ngrpname);
 +  }
 +  do_strstr(fio, groups->ngrpname,groups->grpname,bRead,symtab);
 +  for(g=0; g<egcNR; g++) {
 +    gmx_fio_do_int(fio,groups->ngrpnr[g]);
 +    if (groups->ngrpnr[g] == 0) {
 +      if (bRead) {
 +      groups->grpnr[g] = NULL;
 +      }
 +    } else {
 +      if (bRead) {
 +      snew(groups->grpnr[g],groups->ngrpnr[g]);
 +      }
 +      bDum=gmx_fio_ndo_uchar(fio, groups->grpnr[g],groups->ngrpnr[g]);
 +    }
 +  }
 +}
 +
 +static void do_atomtypes(t_fileio *fio, t_atomtypes *atomtypes,gmx_bool bRead,
 +                       t_symtab *symtab,int file_version)
 +{
 +  int i,j;
 +  gmx_bool bDum = TRUE;
 +  
 +  if (file_version > 25) {
 +    gmx_fio_do_int(fio,atomtypes->nr);
 +    j=atomtypes->nr;
 +    if (bRead) {
 +      snew(atomtypes->radius,j);
 +      snew(atomtypes->vol,j);
 +      snew(atomtypes->surftens,j);
 +      snew(atomtypes->atomnumber,j);
 +      snew(atomtypes->gb_radius,j);
 +      snew(atomtypes->S_hct,j);
 +    }
 +    bDum=gmx_fio_ndo_real(fio,atomtypes->radius,j);
 +    bDum=gmx_fio_ndo_real(fio,atomtypes->vol,j);
 +    bDum=gmx_fio_ndo_real(fio,atomtypes->surftens,j);
 +    if(file_version >= 40)
 +    {
 +        bDum=gmx_fio_ndo_int(fio,atomtypes->atomnumber,j);
 +    }
 +      if(file_version >= 60)
 +      {
 +              bDum=gmx_fio_ndo_real(fio,atomtypes->gb_radius,j);
 +              bDum=gmx_fio_ndo_real(fio,atomtypes->S_hct,j);
 +      }
 +  } else {
 +    /* File versions prior to 26 cannot do GBSA, 
 +     * so they dont use this structure 
 +     */
 +    atomtypes->nr = 0;
 +    atomtypes->radius = NULL;
 +    atomtypes->vol = NULL;
 +    atomtypes->surftens = NULL;
 +    atomtypes->atomnumber = NULL;
 +    atomtypes->gb_radius = NULL;
 +    atomtypes->S_hct = NULL;
 +  }  
 +}
 +
 +static void do_symtab(t_fileio *fio, t_symtab *symtab,gmx_bool bRead)
 +{
 +  int i,nr;
 +  t_symbuf *symbuf;
 +  char buf[STRLEN];
 +  
 +  gmx_fio_do_int(fio,symtab->nr);
 +  nr     = symtab->nr;
 +  if (bRead) {
 +    snew(symtab->symbuf,1);
 +    symbuf = symtab->symbuf;
 +    symbuf->bufsize = nr;
 +    snew(symbuf->buf,nr);
 +    for (i=0; (i<nr); i++) {
 +      gmx_fio_do_string(fio,buf);
 +      symbuf->buf[i]=strdup(buf);
 +    }
 +  }
 +  else {
 +    symbuf = symtab->symbuf;
 +    while (symbuf!=NULL) {
 +      for (i=0; (i<symbuf->bufsize) && (i<nr); i++) 
 +      gmx_fio_do_string(fio,symbuf->buf[i]);
 +      nr-=i;
 +      symbuf=symbuf->next;
 +    }
 +    if (nr != 0)
 +      gmx_fatal(FARGS,"nr of symtab strings left: %d",nr);
 +  }
 +}
 +
 +static void do_cmap(t_fileio *fio, gmx_cmap_t *cmap_grid, gmx_bool bRead)
 +{
 +      int i,j,ngrid,gs,nelem;
 +      
 +      gmx_fio_do_int(fio,cmap_grid->ngrid);
 +      gmx_fio_do_int(fio,cmap_grid->grid_spacing);
 +      
 +      ngrid = cmap_grid->ngrid;
 +      gs    = cmap_grid->grid_spacing;
 +      nelem = gs * gs;
 +      
 +      if(bRead)
 +      {
 +              snew(cmap_grid->cmapdata,ngrid);
 +              
 +              for(i=0;i<cmap_grid->ngrid;i++)
 +              {
 +                      snew(cmap_grid->cmapdata[i].cmap,4*nelem);
 +              }
 +      }
 +      
 +      for(i=0;i<cmap_grid->ngrid;i++)
 +      {
 +              for(j=0;j<nelem;j++)
 +              {
 +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4]);
 +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4+1]);
 +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4+2]);
 +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4+3]);
 +              }
 +      }       
 +}
 +
 +
 +void tpx_make_chain_identifiers(t_atoms *atoms,t_block *mols)
 +{
 +    int m,a,a0,a1,r;
 +    char c,chainid;
 +    int  chainnum;
 +    
 +    /* We always assign a new chain number, but save the chain id characters 
 +     * for larger molecules.
 +     */
 +#define CHAIN_MIN_ATOMS 15
 +    
 +    chainnum=0;
 +    chainid='A';
 +    for(m=0; m<mols->nr; m++) 
 +    {
 +        a0=mols->index[m];
 +        a1=mols->index[m+1];
 +        if ((a1-a0 >= CHAIN_MIN_ATOMS) && (chainid <= 'Z')) 
 +        {
 +            c=chainid;
 +            chainid++;
 +        } 
 +        else
 +        {
 +            c=' ';
 +        }
 +        for(a=a0; a<a1; a++) 
 +        {
 +            atoms->resinfo[atoms->atom[a].resind].chainnum = chainnum;
 +            atoms->resinfo[atoms->atom[a].resind].chainid  = c;
 +        }
 +        chainnum++;
 +    }
 +    
 +    /* Blank out the chain id if there was only one chain */
 +    if (chainid == 'B') 
 +    {
 +        for(r=0; r<atoms->nres; r++) 
 +        {
 +            atoms->resinfo[r].chainid = ' ';
 +        }
 +    }
 +}
 +  
 +static void do_moltype(t_fileio *fio, gmx_moltype_t *molt,gmx_bool bRead,
 +                       t_symtab *symtab, int file_version,
 +                     gmx_groups_t *groups)
 +{
 +  int i;
 +
 +  if (file_version >= 57) {
 +    do_symstr(fio, &(molt->name),bRead,symtab);
 +  }
 +
 +  do_atoms(fio, &molt->atoms, bRead, symtab, file_version, groups);
 +
 +  if (bRead && gmx_debug_at) {
 +    pr_atoms(debug,0,"atoms",&molt->atoms,TRUE);
 +  }
 +  
 +  if (file_version >= 57) {
 +    do_ilists(fio, molt->ilist,bRead,file_version);
 +
 +    do_block(fio, &molt->cgs,bRead,file_version);
 +    if (bRead && gmx_debug_at) {
 +      pr_block(debug,0,"cgs",&molt->cgs,TRUE);
 +    }
 +  }
 +
 +  /* This used to be in the atoms struct */
 +  do_blocka(fio, &molt->excls, bRead, file_version);
 +}
 +
 +static void do_molblock(t_fileio *fio, gmx_molblock_t *molb,gmx_bool bRead,
 +                        int file_version)
 +{
 +  int i;
 +
 +  gmx_fio_do_int(fio,molb->type);
 +  gmx_fio_do_int(fio,molb->nmol);
 +  gmx_fio_do_int(fio,molb->natoms_mol);
 +  /* Position restraint coordinates */
 +  gmx_fio_do_int(fio,molb->nposres_xA);
 +  if (molb->nposres_xA > 0) {
 +    if (bRead) {
 +      snew(molb->posres_xA,molb->nposres_xA);
 +    }
 +    gmx_fio_ndo_rvec(fio,molb->posres_xA,molb->nposres_xA);
 +  }
 +  gmx_fio_do_int(fio,molb->nposres_xB);
 +  if (molb->nposres_xB > 0) {
 +    if (bRead) {
 +      snew(molb->posres_xB,molb->nposres_xB);
 +    }
 +    gmx_fio_ndo_rvec(fio,molb->posres_xB,molb->nposres_xB);
 +  }
 +
 +}
 +
 +static t_block mtop_mols(gmx_mtop_t *mtop)
 +{
 +  int mb,m,a,mol;
 +  t_block mols;
 +
 +  mols.nr = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    mols.nr += mtop->molblock[mb].nmol;
 +  }
 +  mols.nalloc_index = mols.nr + 1;
 +  snew(mols.index,mols.nalloc_index);
 +
 +  a = 0;
 +  m = 0;
 +  mols.index[m] = a;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    for(mol=0; mol<mtop->molblock[mb].nmol; mol++) {
 +      a += mtop->molblock[mb].natoms_mol;
 +      m++;
 +      mols.index[m] = a;
 +    }
 +  }
 +  
 +  return mols;
 +}
 +
 +static void add_posres_molblock(gmx_mtop_t *mtop)
 +{
 +    t_ilist *il,*ilfb;
 +  int am,i,mol,a;
 +  gmx_bool bFE;
 +  gmx_molblock_t *molb;
 +  t_iparams *ip;
 +
 +  /* posres reference positions are stored in ip->posres (if present) and
 +     in ip->fbposres (if present). If normal and flat-bottomed posres are present,
 +     posres.pos0A are identical to fbposres.pos0. */
 +  il = &mtop->moltype[0].ilist[F_POSRES];
 +  ilfb = &mtop->moltype[0].ilist[F_FBPOSRES];
 +  if (il->nr == 0 && ilfb->nr == 0) {
 +    return;
 +  }
 +  am = 0;
 +  bFE = FALSE;
 +  for(i=0; i<il->nr; i+=2) {
 +    ip = &mtop->ffparams.iparams[il->iatoms[i]];
 +    am = max(am,il->iatoms[i+1]);
 +    if (ip->posres.pos0B[XX] != ip->posres.pos0A[XX] ||
 +      ip->posres.pos0B[YY] != ip->posres.pos0A[YY] ||
 +      ip->posres.pos0B[ZZ] != ip->posres.pos0A[ZZ]) {
 +      bFE = TRUE;
 +    }
 +  }
 +  /* This loop is required if we have only flat-bottomed posres:
 +     - set am
 +     - bFE == FALSE (no B-state for flat-bottomed posres) */
 +  if (il->nr == 0)
 +  {
 +      for(i=0; i<ilfb->nr; i+=2) {
 +          ip = &mtop->ffparams.iparams[ilfb->iatoms[i]];
 +          am = max(am,ilfb->iatoms[i+1]);
 +      }
 +  }
 +  /* Make the posres coordinate block end at a molecule end */
 +  mol = 0;
 +  while(am >= mtop->mols.index[mol+1]) {
 +    mol++;
 +  }
 +  molb = &mtop->molblock[0];
 +  molb->nposres_xA = mtop->mols.index[mol+1];
 +  snew(molb->posres_xA,molb->nposres_xA);
 +  if (bFE) {
 +    molb->nposres_xB = molb->nposres_xA;
 +    snew(molb->posres_xB,molb->nposres_xB);
 +  } else {
 +    molb->nposres_xB = 0;
 +  }
 +  for(i=0; i<il->nr; i+=2) {
 +    ip = &mtop->ffparams.iparams[il->iatoms[i]];
 +    a  = il->iatoms[i+1];
 +    molb->posres_xA[a][XX] = ip->posres.pos0A[XX];
 +    molb->posres_xA[a][YY] = ip->posres.pos0A[YY];
 +    molb->posres_xA[a][ZZ] = ip->posres.pos0A[ZZ];
 +    if (bFE) {
 +      molb->posres_xB[a][XX] = ip->posres.pos0B[XX];
 +      molb->posres_xB[a][YY] = ip->posres.pos0B[YY];
 +      molb->posres_xB[a][ZZ] = ip->posres.pos0B[ZZ];
 +    }
 +  }
 +  if (il->nr == 0)
 +  {
 +      /* If only flat-bottomed posres are present, take reference pos from them.
 +         Here: bFE == FALSE      */
 +      for(i=0; i<ilfb->nr; i+=2)
 +      {
 +          ip = &mtop->ffparams.iparams[ilfb->iatoms[i]];
 +          a  = ilfb->iatoms[i+1];
 +          molb->posres_xA[a][XX] = ip->fbposres.pos0[XX];
 +          molb->posres_xA[a][YY] = ip->fbposres.pos0[YY];
 +          molb->posres_xA[a][ZZ] = ip->fbposres.pos0[ZZ];
 +      }
 +  }
 +}
 +
 +static void set_disres_npair(gmx_mtop_t *mtop)
 +{
 +  int mt,i,npair;
 +  t_iparams *ip;
 +  t_ilist *il;
 +  t_iatom *a;
 +
 +  ip = mtop->ffparams.iparams;
 +
 +  for(mt=0; mt<mtop->nmoltype; mt++) {
 +    il = &mtop->moltype[mt].ilist[F_DISRES];
 +    if (il->nr > 0) {
 +      a = il->iatoms;
 +      npair = 0;
 +      for(i=0; i<il->nr; i+=3) {
 +      npair++;
 +      if (i+3 == il->nr || ip[a[i]].disres.label != ip[a[i+3]].disres.label) {
 +        ip[a[i]].disres.npair = npair;
 +        npair = 0;
 +      }
 +      }
 +    }
 +  }
 +}
 +
 +static void do_mtop(t_fileio *fio, gmx_mtop_t *mtop,gmx_bool bRead, 
 +                    int file_version)
 +{
 +  int  mt,mb,i;
 +  t_blocka dumb;
 +
 +  if (bRead)
 +    init_mtop(mtop);
 +  do_symtab(fio, &(mtop->symtab),bRead);
 +  if (bRead && debug) 
 +    pr_symtab(debug,0,"symtab",&mtop->symtab);
 +  
 +  do_symstr(fio, &(mtop->name),bRead,&(mtop->symtab));
 +  
 +  if (file_version >= 57) {
 +    do_ffparams(fio, &mtop->ffparams,bRead,file_version);
 +
 +    gmx_fio_do_int(fio,mtop->nmoltype);
 +  } else {
 +    mtop->nmoltype = 1;
 +  }
 +  if (bRead) {
 +    snew(mtop->moltype,mtop->nmoltype);
 +    if (file_version < 57) {
 +      mtop->moltype[0].name = mtop->name;
 +    }
 +  }
 +  for(mt=0; mt<mtop->nmoltype; mt++) {
 +    do_moltype(fio, &mtop->moltype[mt],bRead,&mtop->symtab,file_version,
 +             &mtop->groups);
 +  }
 +
 +  if (file_version >= 57) {
 +    gmx_fio_do_int(fio,mtop->nmolblock);
 +  } else {
 +    mtop->nmolblock = 1;
 +  }
 +  if (bRead) {
 +    snew(mtop->molblock,mtop->nmolblock);
 +  }
 +  if (file_version >= 57) {
 +    for(mb=0; mb<mtop->nmolblock; mb++) {
 +      do_molblock(fio, &mtop->molblock[mb],bRead,file_version);
 +    }
 +    gmx_fio_do_int(fio,mtop->natoms);
 +  } else {
 +    mtop->molblock[0].type = 0;
 +    mtop->molblock[0].nmol = 1;
 +    mtop->molblock[0].natoms_mol = mtop->moltype[0].atoms.nr;
 +    mtop->molblock[0].nposres_xA = 0;
 +    mtop->molblock[0].nposres_xB = 0;
 +  }
 +
 +  do_atomtypes (fio, &(mtop->atomtypes),bRead,&(mtop->symtab), file_version);
 +  if (bRead && debug) 
 +    pr_atomtypes(debug,0,"atomtypes",&mtop->atomtypes,TRUE);
 +
 +  if (file_version < 57) {
 +    /* Debug statements are inside do_idef */    
 +    do_idef (fio, &mtop->ffparams,&mtop->moltype[0],bRead,file_version);
 +    mtop->natoms = mtop->moltype[0].atoms.nr;
 +  }
 +      
 +  if(file_version >= 65)
 +  {
 +      do_cmap(fio, &mtop->ffparams.cmap_grid,bRead);
 +  }
 +  else
 +  {
 +      mtop->ffparams.cmap_grid.ngrid        = 0;
 +      mtop->ffparams.cmap_grid.grid_spacing = 0;
 +      mtop->ffparams.cmap_grid.cmapdata     = NULL;
 +  }
 +        
 +  if (file_version >= 57) {
 +    do_groups(fio, &mtop->groups,bRead,&(mtop->symtab),file_version);
 +  }
 +
 +  if (file_version < 57) {
 +    do_block(fio, &mtop->moltype[0].cgs,bRead,file_version);
 +    if (bRead && gmx_debug_at) {
 +      pr_block(debug,0,"cgs",&mtop->moltype[0].cgs,TRUE);
 +    }
 +    do_block(fio, &mtop->mols,bRead,file_version);
 +    /* Add the posres coordinates to the molblock */
 +    add_posres_molblock(mtop);
 +  }
 +  if (bRead) {
 +    if (file_version >= 57) {
 +      mtop->mols = mtop_mols(mtop);
 +    }
 +    if (gmx_debug_at) { 
 +      pr_block(debug,0,"mols",&mtop->mols,TRUE);
 +    }
 +  }
 +
 +  if (file_version < 51) {
 +    /* Here used to be the shake blocks */
 +    do_blocka(fio, &dumb,bRead,file_version);
 +    if (dumb.nr > 0)
 +      sfree(dumb.index);
 +    if (dumb.nra > 0)
 +      sfree(dumb.a);
 +  }
 +
 +  if (bRead) {
 +    close_symtab(&(mtop->symtab));
 +  }
 +}
 +
 +/* If TopOnlyOK is TRUE then we can read even future versions
 + * of tpx files, provided the file_generation hasn't changed.
 + * If it is FALSE, we need the inputrecord too, and bail out
 + * if the file is newer than the program.
 + * 
 + * The version and generation if the topology (see top of this file)
 + * are returned in the two last arguments.
 + * 
 + * If possible, we will read the inputrec even when TopOnlyOK is TRUE.
 + */
 +static void do_tpxheader(t_fileio *fio,gmx_bool bRead,t_tpxheader *tpx, 
 +                         gmx_bool TopOnlyOK, int *file_version, 
 +                         int *file_generation)
 +{
 +    char  buf[STRLEN];
 +    char  file_tag[STRLEN];
 +  gmx_bool  bDouble;
 +  int   precision;
 +  int   fver,fgen;
 +  int   idum=0;
 +  real  rdum=0;
 +
 +  gmx_fio_checktype(fio);
 +  gmx_fio_setdebug(fio,bDebugMode());
 +  
 +  /* NEW! XDR tpb file */
 +  precision = sizeof(real);
 +  if (bRead) {
 +    gmx_fio_do_string(fio,buf);
 +    if (strncmp(buf,"VERSION",7))
 +      gmx_fatal(FARGS,"Can not read file %s,\n"
 +                "             this file is from a Gromacs version which is older than 2.0\n"
 +                "             Make a new one with grompp or use a gro or pdb file, if possible",
 +                gmx_fio_getname(fio));
 +    gmx_fio_do_int(fio,precision);
 +    bDouble = (precision == sizeof(double));
 +    if ((precision != sizeof(float)) && !bDouble)
 +      gmx_fatal(FARGS,"Unknown precision in file %s: real is %d bytes "
 +                "instead of %d or %d",
 +                gmx_fio_getname(fio),precision,sizeof(float),sizeof(double));
 +    gmx_fio_setprecision(fio,bDouble);
 +    fprintf(stderr,"Reading file %s, %s (%s precision)\n",
 +          gmx_fio_getname(fio),buf,bDouble ? "double" : "single");
 +  }
 +  else {
 +    gmx_fio_write_string(fio,GromacsVersion());
 +    bDouble = (precision == sizeof(double));
 +    gmx_fio_setprecision(fio,bDouble);
 +    gmx_fio_do_int(fio,precision);
 +    fver = tpx_version;
 +    sprintf(file_tag,"%s",tpx_tag);
 +    fgen = tpx_generation;
 +  }
 +  
 +    /* Check versions! */
 +    gmx_fio_do_int(fio,fver);
 +
 +    /* This is for backward compatibility with development versions 77-79
 +     * where the tag was, mistakenly, placed before the generation,
 +     * which would cause a segv instead of a proper error message
 +     * when reading the topology only from tpx with <77 code.
 +     */
 +    if (fver >= 77 && fver <= 79)
 +    {
 +        gmx_fio_do_string(fio,file_tag);
 +    }
 +  
 +    if (fver >= 26)
 +    {
 +        gmx_fio_do_int(fio,fgen);
 +    }
 +    else
 +    {
 +        fgen = 0;
 +    }
 + 
 +    if (fver >= 81)
 +    {
 +        gmx_fio_do_string(fio,file_tag);
 +    }
 +    if (bRead)
 +    {
 +        if (fver < 77)
 +        {
 +            /* Versions before 77 don't have the tag, set it to release */
 +            sprintf(file_tag,"%s",TPX_TAG_RELEASE);
 +        }
 +
 +        if (strcmp(file_tag,tpx_tag) != 0)
 +        {
 +            fprintf(stderr,"Note: file tpx tag '%s', software tpx tag '%s'\n",
 +                    file_tag,tpx_tag);
 +
 +            /* We only support reading tpx files with the same tag as the code
 +             * or tpx files with the release tag and with lower version number.
 +             */
 +            if (!strcmp(file_tag,TPX_TAG_RELEASE) == 0 && fver < tpx_version) 
 +            {
 +                gmx_fatal(FARGS,"tpx tag/version mismatch: reading tpx file (%s) version %d, tag '%s' with program for tpx version %d, tag '%s'",
 +                          gmx_fio_getname(fio),fver,file_tag,
 +                          tpx_version,tpx_tag);
 +            }
 +        }
 +    }
 +
 +    if (file_version != NULL)
 +    {
 +        *file_version = fver;
 +    }
 +    if (file_generation != NULL)
 +    {
 +        *file_generation = fgen;
 +    }
 +   
 +  
 +  if ((fver <= tpx_incompatible_version) ||
 +      ((fver > tpx_version) && !TopOnlyOK) ||
 +      (fgen > tpx_generation))
 +    gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",
 +              gmx_fio_getname(fio),fver,tpx_version);
 +  
 +  do_section(fio,eitemHEADER,bRead);
 +  gmx_fio_do_int(fio,tpx->natoms);
 +  if (fver >= 28)
 +    gmx_fio_do_int(fio,tpx->ngtc);
 +  else
 +    tpx->ngtc = 0;
 +  if (fver < 62) {
 +      gmx_fio_do_int(fio,idum);
 +      gmx_fio_do_real(fio,rdum);
 +  }
 +  /*a better decision will eventually (5.0 or later) need to be made
 +    on how to treat the alchemical state of the system, which can now
 +    vary through a simulation, and cannot be completely described
 +    though a single lambda variable, or even a single state
 +    index. Eventually, should probably be a vector. MRS*/
 +  if (fver >= 79) 
 +  {
 +      gmx_fio_do_int(fio,tpx->fep_state);
 +  }
 +  gmx_fio_do_real(fio,tpx->lambda);
 +  gmx_fio_do_int(fio,tpx->bIr);
 +  gmx_fio_do_int(fio,tpx->bTop);
 +  gmx_fio_do_int(fio,tpx->bX);
 +  gmx_fio_do_int(fio,tpx->bV);
 +  gmx_fio_do_int(fio,tpx->bF);
 +  gmx_fio_do_int(fio,tpx->bBox);
 +
 +  if((fgen > tpx_generation)) {
 +    /* This can only happen if TopOnlyOK=TRUE */
 +    tpx->bIr=FALSE;
 +  }
 +}
 +
 +static int do_tpx(t_fileio *fio, gmx_bool bRead,
 +                t_inputrec *ir,t_state *state,rvec *f,gmx_mtop_t *mtop,
 +                gmx_bool bXVallocated)
 +{
 +  t_tpxheader tpx;
 +  t_inputrec  dum_ir;
 +  gmx_mtop_t  dum_top;
 +  gmx_bool        TopOnlyOK,bDum=TRUE;
 +  int         file_version,file_generation;
 +  int         i;
 +  rvec        *xptr,*vptr;
 +  int         ePBC;
 +  gmx_bool        bPeriodicMols;
 +
 +  if (!bRead) {
 +    tpx.natoms = state->natoms;
 +    tpx.ngtc   = state->ngtc;  /* need to add nnhpres here? */
 +    tpx.fep_state = state->fep_state;
 +    tpx.lambda = state->lambda[efptFEP];
 +    tpx.bIr  = (ir       != NULL);
 +    tpx.bTop = (mtop     != NULL);
 +    tpx.bX   = (state->x != NULL);
 +    tpx.bV   = (state->v != NULL);
 +    tpx.bF   = (f        != NULL);
 +    tpx.bBox = TRUE;
 +  }
 +  
 +  TopOnlyOK = (ir==NULL);
 +  
 +  do_tpxheader(fio,bRead,&tpx,TopOnlyOK,&file_version,&file_generation);
 +
 +  if (bRead) {
 +    state->flags  = 0;
 +    /* state->lambda = tpx.lambda;*/ /*remove this eventually? */
 +    /* The init_state calls initialize the Nose-Hoover xi integrals to zero */
 +    if (bXVallocated) {
 +      xptr = state->x;
 +      vptr = state->v;
 +      init_state(state,0,tpx.ngtc,0,0,0);  /* nose-hoover chains */ /* eventually, need to add nnhpres here? */
 +      state->natoms = tpx.natoms;
 +      state->nalloc = tpx.natoms;
 +      state->x = xptr;
 +      state->v = vptr;
 +    } else {
 +        init_state(state,tpx.natoms,tpx.ngtc,0,0,0);  /* nose-hoover chains */
 +    }
 +  }
 +
 +#define do_test(fio,b,p) if (bRead && (p!=NULL) && !b) gmx_fatal(FARGS,"No %s in %s",#p,gmx_fio_getname(fio)) 
 +
 +  do_test(fio,tpx.bBox,state->box);
 +  do_section(fio,eitemBOX,bRead);
 +  if (tpx.bBox) {
 +    gmx_fio_ndo_rvec(fio,state->box,DIM);
 +    if (file_version >= 51) {
 +      gmx_fio_ndo_rvec(fio,state->box_rel,DIM);
 +    } else {
 +      /* We initialize box_rel after reading the inputrec */
 +      clear_mat(state->box_rel);
 +    }
 +    if (file_version >= 28) {
 +      gmx_fio_ndo_rvec(fio,state->boxv,DIM);
 +      if (file_version < 56) {
 +      matrix mdum;
 +      gmx_fio_ndo_rvec(fio,mdum,DIM);
 +      }
 +    }
 +  }
 +  
 +  if (state->ngtc > 0 && file_version >= 28) {
 +    real *dumv;
 +    /*ndo_double(state->nosehoover_xi,state->ngtc,bDum);*/
 +    /*ndo_double(state->nosehoover_vxi,state->ngtc,bDum);*/
 +    /*ndo_double(state->therm_integral,state->ngtc,bDum);*/
 +    snew(dumv,state->ngtc);
 +    if (file_version < 69) {
 +      bDum=gmx_fio_ndo_real(fio,dumv,state->ngtc);
 +    }
 +    /* These used to be the Berendsen tcoupl_lambda's */
 +    bDum=gmx_fio_ndo_real(fio,dumv,state->ngtc);
 +    sfree(dumv);
 +  }
 +
 +  /* Prior to tpx version 26, the inputrec was here.
 +   * I moved it to enable partial forward-compatibility
 +   * for analysis/viewer programs.
 +   */
 +  if(file_version<26) {
 +    do_test(fio,tpx.bIr,ir);
 +    do_section(fio,eitemIR,bRead);
 +    if (tpx.bIr) {
 +      if (ir) {
 +      do_inputrec(fio, ir,bRead,file_version,
 +                    mtop ? &mtop->ffparams.fudgeQQ : NULL);
 +      if (bRead && debug) 
 +        pr_inputrec(debug,0,"inputrec",ir,FALSE);
 +      }
 +      else {
 +      do_inputrec(fio, &dum_ir,bRead,file_version,
 +                    mtop ? &mtop->ffparams.fudgeQQ :NULL);
 +      if (bRead && debug) 
 +        pr_inputrec(debug,0,"inputrec",&dum_ir,FALSE);
 +      done_inputrec(&dum_ir);
 +      }
 +      
 +    }
 +  }
 +  
 +  do_test(fio,tpx.bTop,mtop);
 +  do_section(fio,eitemTOP,bRead);
 +  if (tpx.bTop) {
 +    int mtop_file_version = file_version;
 +    /*allow reading of Gromacs 4.6 files*/
 +    if (mtop_file_version>80 && mtop_file_version<90)
 +    {
 +        mtop_file_version = 79;
 +    }
 +    if (mtop) {
 +      do_mtop(fio,mtop,bRead, mtop_file_version);
 +    } else {
 +      do_mtop(fio,&dum_top,bRead,mtop_file_version);
 +      done_mtop(&dum_top,TRUE);
 +    }
 +  }
 +  do_test(fio,tpx.bX,state->x);  
 +  do_section(fio,eitemX,bRead);
 +  if (tpx.bX) {
 +    if (bRead) {
 +      state->flags |= (1<<estX);
 +    }
 +    gmx_fio_ndo_rvec(fio,state->x,state->natoms);
 +  }
 +  
 +  do_test(fio,tpx.bV,state->v);
 +  do_section(fio,eitemV,bRead);
 +  if (tpx.bV) {
 +    if (bRead) {
 +      state->flags |= (1<<estV);
 +    }
 +    gmx_fio_ndo_rvec(fio,state->v,state->natoms);
 +  }
 +
 +  do_test(fio,tpx.bF,f);
 +  do_section(fio,eitemF,bRead);
 +  if (tpx.bF) gmx_fio_ndo_rvec(fio,f,state->natoms);
 +
 +  /* Starting with tpx version 26, we have the inputrec
 +   * at the end of the file, so we can ignore it 
 +   * if the file is never than the software (but still the
 +   * same generation - see comments at the top of this file.
 +   *
 +   * 
 +   */
 +  ePBC = -1;
 +  bPeriodicMols = FALSE;
 +  if (file_version >= 26) {
 +    do_test(fio,tpx.bIr,ir);
 +    do_section(fio,eitemIR,bRead);
 +    if (tpx.bIr) {
 +      if (file_version >= 53) {
 +      /* Removed the pbc info from do_inputrec, since we always want it */
 +      if (!bRead) {
 +        ePBC          = ir->ePBC;
 +        bPeriodicMols = ir->bPeriodicMols;
 +      }
 +      gmx_fio_do_int(fio,ePBC);
 +      gmx_fio_do_gmx_bool(fio,bPeriodicMols);
 +      }
 +      if (file_generation <= tpx_generation && ir) {
 +      do_inputrec(fio, ir,bRead,file_version,mtop ? &mtop->ffparams.fudgeQQ : NULL);
 +      if (bRead && debug) 
 +        pr_inputrec(debug,0,"inputrec",ir,FALSE);
 +      if (file_version < 51)
 +        set_box_rel(ir,state);
 +      if (file_version < 53) {
 +        ePBC          = ir->ePBC;
 +        bPeriodicMols = ir->bPeriodicMols;
 +      }
 +      }
 +      if (bRead && ir && file_version >= 53) {
 +      /* We need to do this after do_inputrec, since that initializes ir */
 +      ir->ePBC          = ePBC;
 +      ir->bPeriodicMols = bPeriodicMols;
 +      }
 +    }
 +  }
 +
 +    if (bRead)
 +    {
 +        if (tpx.bIr && ir)
 +        {
 +            if (state->ngtc == 0)
 +            {
 +                /* Reading old version without tcoupl state data: set it */
 +                init_gtc_state(state,ir->opts.ngtc,0,ir->opts.nhchainlength);
 +            }
 +            if (tpx.bTop && mtop)
 +            {
 +                if (file_version < 57)
 +                {
 +                    if (mtop->moltype[0].ilist[F_DISRES].nr > 0)
 +                    {
 +                        ir->eDisre = edrSimple;
 +                    }
 +                    else
 +                    {
 +                        ir->eDisre = edrNone;
 +                    }
 +                }
 +                set_disres_npair(mtop);
 +            }
 +        }
 +
 +        if (tpx.bTop && mtop)
 +        {
 +            gmx_mtop_finalize(mtop);
 +        }
 +
 +        if (file_version >= 57)
 +        {
 +            char *env;
 +            int  ienv;
 +            env = getenv("GMX_NOCHARGEGROUPS");
 +            if (env != NULL)
 +            {
 +                sscanf(env,"%d",&ienv);
 +                fprintf(stderr,"\nFound env.var. GMX_NOCHARGEGROUPS = %d\n",
 +                        ienv);
 +                if (ienv > 0)
 +                {
 +                    fprintf(stderr,
 +                            "Will make single atomic charge groups in non-solvent%s\n",
 +                            ienv > 1 ? " and solvent" : "");
 +                    gmx_mtop_make_atomic_charge_groups(mtop,ienv==1);
 +                }
 +                fprintf(stderr,"\n");
 +            }
 +        }
 +    }
 +
 +    return ePBC;
 +}
 +
 +/************************************************************
 + *
 + *  The following routines are the exported ones
 + *
 + ************************************************************/
 +
 +t_fileio *open_tpx(const char *fn,const char *mode)
 +{
 +  return gmx_fio_open(fn,mode);
 +}    
 + 
 +void close_tpx(t_fileio *fio)
 +{
 +  gmx_fio_close(fio);
 +}
 +
 +void read_tpxheader(const char *fn, t_tpxheader *tpx, gmx_bool TopOnlyOK,
 +                    int *file_version, int *file_generation)
 +{
 +  t_fileio *fio;
 +
 +  fio = open_tpx(fn,"r");
 +  do_tpxheader(fio,TRUE,tpx,TopOnlyOK,file_version,file_generation);
 +  close_tpx(fio);
 +}
 +
 +void write_tpx_state(const char *fn,
 +                   t_inputrec *ir,t_state *state,gmx_mtop_t *mtop)
 +{
 +  t_fileio *fio;
 +
 +  fio = open_tpx(fn,"w");
 +  do_tpx(fio,FALSE,ir,state,NULL,mtop,FALSE);
 +  close_tpx(fio);
 +}
 +
 +void read_tpx_state(const char *fn,
 +                  t_inputrec *ir,t_state *state,rvec *f,gmx_mtop_t *mtop)
 +{
 +  t_fileio *fio;
 +      
 +  fio = open_tpx(fn,"r");
 +  do_tpx(fio,TRUE,ir,state,f,mtop,FALSE);
 +  close_tpx(fio);
 +}
 +
 +int read_tpx(const char *fn,
 +           t_inputrec *ir, matrix box,int *natoms,
 +           rvec *x,rvec *v,rvec *f,gmx_mtop_t *mtop)
 +{
 +  t_fileio *fio;
 +  t_state state;
 +  int ePBC;
 +
 +  state.x = x;
 +  state.v = v;
 +  fio = open_tpx(fn,"r");
 +  ePBC = do_tpx(fio,TRUE,ir,&state,f,mtop,TRUE);
 +  close_tpx(fio);
 +  *natoms = state.natoms;
 +  if (box) 
 +    copy_mat(state.box,box);
 +  state.x = NULL;
 +  state.v = NULL;
 +  done_state(&state);
 +
 +  return ePBC;
 +}
 +
 +int read_tpx_top(const char *fn,
 +               t_inputrec *ir, matrix box,int *natoms,
 +               rvec *x,rvec *v,rvec *f,t_topology *top)
 +{
 +  gmx_mtop_t mtop;
 +  t_topology *ltop;
 +  int ePBC;
 +
 +  ePBC = read_tpx(fn,ir,box,natoms,x,v,f,&mtop);
 +  
 +  *top = gmx_mtop_t_to_t_topology(&mtop);
 +
 +  return ePBC;
 +}
 +
 +gmx_bool fn2bTPX(const char *file)
 +{
 +  switch (fn2ftp(file)) {
 +  case efTPR:
 +  case efTPB:
 +  case efTPA:
 +    return TRUE;
 +  default:
 +    return FALSE;
 +  }
 +}
 +
 +gmx_bool read_tps_conf(const char *infile,char *title,t_topology *top,int *ePBC,
 +                 rvec **x,rvec **v,matrix box,gmx_bool bMass)
 +{
 +  t_tpxheader  header;
 +  int          natoms,i,version,generation;
 +  gmx_bool         bTop,bXNULL=FALSE;
 +  gmx_mtop_t   *mtop;
 +  t_topology   *topconv;
 +  gmx_atomprop_t aps;
 +  
 +  bTop = fn2bTPX(infile);
 +  *ePBC = -1;
 +  if (bTop) {
 +    read_tpxheader(infile,&header,TRUE,&version,&generation);
 +    if (x)
 +      snew(*x,header.natoms);
 +    if (v)
 +      snew(*v,header.natoms);
 +    snew(mtop,1);
 +    *ePBC = read_tpx(infile,NULL,box,&natoms,
 +                   (x==NULL) ? NULL : *x,(v==NULL) ? NULL : *v,NULL,mtop);
 +    *top = gmx_mtop_t_to_t_topology(mtop);
 +    sfree(mtop);
 +    strcpy(title,*top->name);
 +    tpx_make_chain_identifiers(&top->atoms,&top->mols);
 +  }
 +  else {
 +    get_stx_coordnum(infile,&natoms);
 +    init_t_atoms(&top->atoms,natoms,(fn2ftp(infile) == efPDB));
 +    if (x == NULL)
 +    {
 +        snew(x,1);
 +        bXNULL = TRUE;
 +    }
 +    snew(*x,natoms);
 +    if (v)
 +      snew(*v,natoms);
 +    read_stx_conf(infile,title,&top->atoms,*x,(v==NULL) ? NULL : *v,ePBC,box);
 +    if (bXNULL)
 +    {
 +      sfree(*x);
 +      sfree(x);
 +    }
 +    if (bMass) {
 +      aps = gmx_atomprop_init();
 +      for(i=0; (i<natoms); i++)
 +      if (!gmx_atomprop_query(aps,epropMass,
 +                              *top->atoms.resinfo[top->atoms.atom[i].resind].name,
 +                              *top->atoms.atomname[i],
 +                              &(top->atoms.atom[i].m))) {
 +        if (debug) 
 +          fprintf(debug,"Can not find mass for atom %s %d %s, setting to 1\n",
 +                  *top->atoms.resinfo[top->atoms.atom[i].resind].name,
 +                  top->atoms.resinfo[top->atoms.atom[i].resind].nr,
 +                  *top->atoms.atomname[i]);
 +      }
 +      gmx_atomprop_destroy(aps);
 +    }
 +    top->idef.ntypes=-1;
 +  }
 +
 +  return bTop;
 +}
index f0511a57320851c282096bf92b8bc456bde2f101,0000000000000000000000000000000000000000..2f4218c6641586a0014e5f149fc9f3e17116f214
mode 100644,000000..100644
--- /dev/null
@@@ -1,1600 -1,0 +1,1601 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +/* This file is completely threadsafe - please keep it that way! */
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "vec.h"
 +#include "macros.h"
 +
 +
 +int pr_indent(FILE *fp,int n)
 +{
 +  int i;
 +
 +  for (i=0; i<n; i++) (void) fprintf(fp," ");
 +  return n;
 +}
 +
 +int available(FILE *fp,void *p,int indent,const char *title)
 +{
 +  if (!p) {
 +    if (indent > 0)
 +      pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s: not available\n",title);
 +  }
 +  return (p!=NULL);
 +}
 +
 +int pr_title(FILE *fp,int indent,const char *title)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s:\n",title);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_n(FILE *fp,int indent,const char *title,int n)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%d):\n",title,n);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_nxn(FILE *fp,int indent,const char *title,int n1,int n2)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%dx%d):\n",title,n1,n2);
 +  return (indent+INDENT);
 +}
 +
 +void pr_ivec(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%d\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_ivec_block(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +    
 +    if (available(fp,vec,indent,title))
 +    {
 +        indent=pr_title_n(fp,indent,title,n);
 +        i = 0;
 +        while (i < n)
 +        {
 +            j = i+1;
 +            while (j < n && vec[j] == vec[j-1]+1)
 +            {
 +                j++;
 +            }
 +            /* Print consecutive groups of 3 or more as blocks */
 +            if (j - i < 3)
 +            {
 +                while(i < j)
 +                {
 +                    (void) pr_indent(fp,indent);
 +                    (void) fprintf(fp,"%s[%d]=%d\n",
 +                                   title,bShowNumbers?i:-1,vec[i]);
 +                    i++;
 +                }
 +            }
 +            else
 +            {
 +                (void) pr_indent(fp,indent);
 +                (void) fprintf(fp,"%s[%d,...,%d] = {%d,...,%d}\n",
 +                               title,
 +                               bShowNumbers?i:-1,
 +                               bShowNumbers?j-1:-1,
 +                               vec[i],vec[j-1]); 
 +                i = j;
 +            }
 +        }
 +    }
 +}
 +
 +void pr_bvec(FILE *fp,int indent,const char *title,gmx_bool vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%s\n",title,bShowNumbers?i:-1,
 +                       EBOOL(vec[i]));
 +        }
 +    }
 +}
 +
 +void pr_ivecs(FILE *fp,int indent,const char *title,ivec vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_nxn(fp,indent,title,n,DIM);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={",title,bShowNumbers?i:-1);
 +          for (j=0; j<DIM; j++)
 +            {
 +              if (j!=0) (void) fprintf(fp,", ");
 +              fprintf(fp,"%d",vec[i][j]);
 +            }
 +          (void) fprintf(fp,"}\n");
 +        }
 +    }
 +}
 +
 +void pr_rvec(FILE *fp,int indent,const char *title,real vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          pr_indent(fp,indent);
 +          fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_dvec(FILE *fp,int indent,const char *title,double vec[],int n, gmx_bool bShowNumbers)
 +{
 +      int i;
 +      
 +      if (available(fp,vec,indent,title))
 +    {  
 +              indent=pr_title_n(fp,indent,title,n);
 +              for (i=0; i<n; i++)
 +        {
 +                      pr_indent(fp,indent);
 +                      fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/*
 +void pr_mat(FILE *fp,int indent,char *title,matrix m)
 +{
 +  int i,j;
 +  
 +  if (available(fp,m,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for(i=0; i<n; i++) {
 +      pr_indent(fp,indent);
 +      fprintf(fp,"%s[%d]=%12.5e %12.5e %12.5e\n",
 +            title,bShowNumbers?i:-1,m[i][XX],m[i][YY],m[i][ZZ]);
 +    }
 +  }
 +}
 +*/
 +
 +void pr_rvecs_len(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,"%12.5e",vec[i][j]);
 +      }
 +      (void) fprintf(fp,"} len=%12.5e\n",norm(vec[i]));
 +    }
 +  }
 +}
 +
 +void pr_rvecs(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  const char *fshort = "%12.5e";
 +  const char *flong  = "%15.8e";
 +  const char *format;
 +  int i,j;
 +
 +  if (getenv("LONGFORMAT") != NULL)
 +    format = flong;
 +  else
 +    format = fshort;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,format,vec[i][j]);
 +      }
 +      (void) fprintf(fp,"}\n");
 +    }
 +  }
 +}
 +
 +
 +void pr_reals(FILE *fp,int indent,const char *title,real *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +void pr_doubles(FILE *fp,int indent,const char *title,double *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +static void pr_int(FILE *fp,int indent,const char *title,int i)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %d\n",title,i);
 +}
 +
 +static void pr_gmx_large_int(FILE *fp,int indent,const char *title,gmx_large_int_t i)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,gmx_step_str(i,buf));
 +}
 +
 +static void pr_real(FILE *fp,int indent,const char *title,real r)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,r);
 +}
 +
 +static void pr_double(FILE *fp,int indent,const char *title,double d)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,d);
 +}
 +
 +static void pr_str(FILE *fp,int indent,const char *title,const char *s)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,s);
 +}
 +
 +void pr_qm_opts(FILE *fp,int indent,const char *title,t_grpopts *opts)
 +{
 +  int i,m,j;
 +
 +  fprintf(fp,"%s:\n",title);
 +  
 +  pr_int(fp,indent,"ngQM",opts->ngQM);
 +  if (opts->ngQM > 0) {
 +    pr_ivec(fp,indent,"QMmethod",opts->QMmethod,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMbasis",opts->QMbasis,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMcharge",opts->QMcharge,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMmult",opts->QMmult,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bSH",opts->bSH,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASorbitals",opts->CASorbitals,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASelectrons",opts->CASelectrons,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"SAsteps",opts->SAsteps,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bOPT",opts->bOPT,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bTS",opts->bTS,opts->ngQM,FALSE);
 +  }
 +}
 +
 +static void pr_grp_opts(FILE *out,int indent,const char *title,t_grpopts *opts,
 +                      gmx_bool bMDPformat)
 +{
 +  int i,m,j;
 +
 +  if (!bMDPformat)
 +    fprintf(out,"%s:\n",title);
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"nrdf%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->nrdf[i]);
 +  fprintf(out,"\n");
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"ref-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->ref_t[i]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"tau-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->tau_t[i]);
 +  fprintf(out,"\n");  
 +  
 +  /* Pretty-print the simulated annealing info */
 +  fprintf(out,"anneal%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10s",EANNEAL(opts->annealing[i]));
 +  fprintf(out,"\n");  
 + 
 +  fprintf(out,"ann-npoints%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10d",opts->anneal_npoints[i]);
 +  fprintf(out,"\n");  
 + 
 +  for(i=0; (i<opts->ngtc); i++) {
 +    if(opts->anneal_npoints[i]>0) {
 +      fprintf(out,"ann. times [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_time[i][j]);
 +      fprintf(out,"\n");  
 +      fprintf(out,"ann. temps [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_temp[i][j]);
 +      fprintf(out,"\n");  
 +    }
 +  }
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"acc:\t");
 +  for(i=0; (i<opts->ngacc); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10g",opts->acc[i][m]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"nfreeze:");
 +  for(i=0; (i<opts->ngfrz); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10s",opts->nFreeze[i][m] ? "Y" : "N");
 +  fprintf(out,"\n");
 +
 +
 +  for(i=0; (i<opts->ngener); i++) {
 +    pr_indent(out,indent);
 +    fprintf(out,"energygrp-flags[%3d]:",i);
 +    for(m=0; (m<opts->ngener); m++)
 +      fprintf(out," %d",opts->egp_flags[opts->ngener*i+m]);
 +    fprintf(out,"\n");
 +  }
 +
 +  fflush(out);
 +}
 +
 +static void pr_matrix(FILE *fp,int indent,const char *title,rvec *m,
 +                    gmx_bool bMDPformat)
 +{
 +  if (bMDPformat)
 +    fprintf(fp,"%-10s    = %g %g %g %g %g %g\n",title,
 +          m[XX][XX],m[YY][YY],m[ZZ][ZZ],m[XX][YY],m[XX][ZZ],m[YY][ZZ]);
 +  else
 +    pr_rvecs(fp,indent,title,m,DIM);
 +}
 +
 +static void pr_cosine(FILE *fp,int indent,const char *title,t_cosines *cos,
 +                    gmx_bool bMDPformat)
 +{
 +  int j;
 +  
 +  if (bMDPformat) {
 +    fprintf(fp,"%s = %d\n",title,cos->n);
 +  }
 +  else {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"n = %d\n",cos->n);
 +    if (cos->n > 0) {
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"a =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->a[j]);
 +      fprintf(fp,"\n");
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"phi =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->phi[j]);
 +      fprintf(fp,"\n");
 +    }
 +  }
 +}
 +
 +#define PS(t,s) pr_str(fp,indent,t,s)
 +#define PI(t,s) pr_int(fp,indent,t,s)
 +#define PSTEP(t,s) pr_gmx_large_int(fp,indent,t,s)
 +#define PR(t,s) pr_real(fp,indent,t,s)
 +#define PD(t,s) pr_double(fp,indent,t,s)
 +
 +static void pr_pullgrp(FILE *fp,int indent,int g,t_pullgrp *pg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"pull-group %d:\n",g);
 +  indent += 2;
 +  pr_ivec_block(fp,indent,"atom",pg->ind,pg->nat,TRUE);
 +  pr_rvec(fp,indent,"weight",pg->weight,pg->nweight,TRUE);
 +  PI("pbcatom",pg->pbcatom);
 +  pr_rvec(fp,indent,"vec",pg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"init",pg->init,DIM,TRUE);
 +  PR("rate",pg->rate);
 +  PR("k",pg->k);
 +  PR("kB",pg->kB);
 +}
 +
 +static void pr_simtempvals(FILE *fp,int indent,t_simtemp *simtemp, int n_lambda, gmx_bool bMDPformat)
 +{
 +    PR("simtemp_low",simtemp->simtemp_low);
 +    PR("simtemp_high",simtemp->simtemp_high);
 +    PS("simulated-tempering-scaling",ESIMTEMP(simtemp->eSimTempScale));
 +    pr_rvec(fp,indent,"simulated tempering temperatures",simtemp->temperatures,n_lambda,TRUE);
 +}
 +
 +static void pr_expandedvals(FILE *fp,int indent,t_expanded *expand, int n_lambda, gmx_bool bMDPformat)
 +{
 +
 +    PI("nstexpanded", expand->nstexpanded);
 +    PS("lambda-stats", elamstats_names[expand->elamstats]);
 +    PS("lambda-mc-move", elmcmove_names[expand->elmcmove]);
 +    PI("lmc-repeats",expand->lmc_repeats);
 +    PI("lmc-gibbsdelta",expand->gibbsdeltalam);
 +    PI("lmc-nstart",expand->lmc_forced_nstart);
 +    PS("symmetrized-transition-matrix", EBOOL(expand->bSymmetrizedTMatrix));
 +    PI("nst-transition-matrix",expand->nstTij);
 +    PI("mininum-var-min",expand->minvarmin); /*default is reasonable */
 +    PI("weight-c-range",expand->c_range); /* default is just C=0 */
 +    PR("wl-scale",expand->wl_scale);
 +    PR("init-wl-delta",expand->init_wl_delta);
 +    PR("wl-ratio",expand->wl_ratio);
 +    PS("bWLoneovert",EBOOL(expand->bWLoneovert));
 +    PI("lmc-seed",expand->lmc_seed);
 +    PR("mc-temperature",expand->mc_temp);
 +    PS("lmc-weights-equil",elmceq_names[expand->elmceq]);
 +    if (expand->elmceq == elmceqNUMATLAM)
 +    {
 +        PI("weight-equil-number-all-lambda",expand->equil_n_at_lam);
 +    }
 +    if (expand->elmceq == elmceqSAMPLES)
 +    {
 +        PI("weight-equil-number-samples",expand->equil_samples);
 +    }
 +    if (expand->elmceq == elmceqSTEPS)
 +    {
 +        PI("weight-equil-number-steps",expand->equil_steps);
 +    }
 +    if (expand->elmceq == elmceqWLDELTA)
 +    {
 +        PR("weight-equil-wl-delta",expand->equil_wl_delta);
 +    }
 +    if (expand->elmceq == elmceqRATIO)
 +    {
 +        PR("weight-equil-count-ratio",expand->equil_ratio);
 +    }
 +
 +    pr_indent(fp,indent);
 +    pr_rvec(fp,indent,"init-lambda-weights",expand->init_lambda_weights,n_lambda,TRUE);
 +    PS("init-weights",EBOOL(expand->bInit_weights));
 +}
 +
 +static void pr_fepvals(FILE *fp,int indent,t_lambda *fep, gmx_bool bMDPformat)
 +{
 +    int i,j;
 +
 +    PI("nstdhdl",fep->nstdhdl);
 +    PI("init-lambda-state",fep->init_fep_state);
 +    PR("init-lambda",fep->init_lambda);
 +    PR("delta-lambda",fep->delta_lambda);
 +    if (!bMDPformat)
 +    {
 +        PI("n-lambdas",fep->n_lambda);
 +    }
 +    if (fep->n_lambda > 0)
 +    {
 +        pr_indent(fp,indent);
 +        fprintf(fp,"all-lambdas%s\n",bMDPformat ? " = " : ":");
 +        for(i=0; i<efptNR; i++) {
 +            fprintf(fp,"%18s = ",efpt_names[i]);
 +            for(j=0; j<fep->n_lambda; j++)
 +            {
 +                fprintf(fp,"  %10g",fep->all_lambda[i][j]);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +
 +    PR("sc-alpha",fep->sc_alpha);
 +    PS("bScCoul",EBOOL(fep->bScCoul));
 +    PS("bScPrintEnergy",EBOOL(fep->bPrintEnergy));
 +    PI("sc-power",fep->sc_power);
 +    PR("sc-r-power",fep->sc_r_power);
 +    PR("sc-sigma",fep->sc_sigma);
 +    PR("sc-sigma-min",fep->sc_sigma_min);
 +    PS("separate-dhdl-file", SEPDHDLFILETYPE(fep->separate_dhdl_file));
 +    PS("dhdl-derivatives", DHDLDERIVATIVESTYPE(fep->dhdl_derivatives));
 +    PI("dh-hist-size", fep->dh_hist_size);
 +    PD("dh-hist-spacing", fep->dh_hist_spacing);
 +};
 +
 +static void pr_pull(FILE *fp,int indent,t_pull *pull)
 +{
 +  int g;
 +
 +  PS("pull-geometry",EPULLGEOM(pull->eGeom));
 +  pr_ivec(fp,indent,"pull-dim",pull->dim,DIM,TRUE);
 +  PR("pull-r1",pull->cyl_r1);
 +  PR("pull-r0",pull->cyl_r0);
 +  PR("pull-constr-tol",pull->constr_tol);
 +  PI("pull-nstxout",pull->nstxout);
 +  PI("pull-nstfout",pull->nstfout);
 +  PI("pull-ngrp",pull->ngrp);
 +  for(g=0; g<pull->ngrp+1; g++)
 +    pr_pullgrp(fp,indent,g,&pull->grp[g]);
 +}
 +
 +static void pr_rotgrp(FILE *fp,int indent,int g,t_rotgrp *rotg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"rotation_group %d:\n",g);
 +  indent += 2;
 +  PS("type",EROTGEOM(rotg->eType));
 +  PS("massw",EBOOL(rotg->bMassW));
 +  pr_ivec_block(fp,indent,"atom",rotg->ind,rotg->nat,TRUE);
 +  pr_rvecs(fp,indent,"x_ref",rotg->x_ref,rotg->nat);
 +  pr_rvec(fp,indent,"vec",rotg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"pivot",rotg->pivot,DIM,TRUE);
 +  PR("rate",rotg->rate);
 +  PR("k",rotg->k);
 +  PR("slab_dist",rotg->slab_dist);
 +  PR("min_gaussian",rotg->min_gaussian);
 +  PR("epsilon",rotg->eps);
 +  PS("fit_method",EROTFIT(rotg->eFittype));
 +  PI("potfitangle_nstep",rotg->PotAngle_nstep);
 +  PR("potfitangle_step",rotg->PotAngle_step);
 +}
 +
 +static void pr_rot(FILE *fp,int indent,t_rot *rot)
 +{
 +  int g;
 +
 +  PI("rot_nstrout",rot->nstrout);
 +  PI("rot_nstsout",rot->nstsout);
 +  PI("rot_ngrp",rot->ngrp);
 +  for(g=0; g<rot->ngrp; g++)
 +    pr_rotgrp(fp,indent,g,&rot->grp[g]);
 +}
 +
 +void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
 +                 gmx_bool bMDPformat)
 +{
 +  const char *infbuf="inf";
 +  int  i;
 +  
 +  if (available(fp,ir,indent,title)) {
 +    if (!bMDPformat)
 +      indent=pr_title(fp,indent,title);
 +    PS("integrator",EI(ir->eI));
 +    PSTEP("nsteps",ir->nsteps);
 +    PSTEP("init-step",ir->init_step);
 +    PS("cutoff-scheme",ECUTSCHEME(ir->cutoff_scheme));
 +    PS("ns_type",ENS(ir->ns_type));
 +    PI("nstlist",ir->nstlist);
 +    PI("ndelta",ir->ndelta);
 +    PI("nstcomm",ir->nstcomm);
 +    PS("comm-mode",ECOM(ir->comm_mode));
 +    PI("nstlog",ir->nstlog);
 +    PI("nstxout",ir->nstxout);
 +    PI("nstvout",ir->nstvout);
 +    PI("nstfout",ir->nstfout);
 +    PI("nstcalcenergy",ir->nstcalcenergy);
 +    PI("nstenergy",ir->nstenergy);
 +    PI("nstxtcout",ir->nstxtcout);
 +    PR("init-t",ir->init_t);
 +    PR("delta-t",ir->delta_t);
 +    
 +    PR("xtcprec",ir->xtcprec);
 +    PR("fourierspacing",ir->fourier_spacing);
 +    PI("nkx",ir->nkx);
 +    PI("nky",ir->nky);
 +    PI("nkz",ir->nkz);
 +    PI("pme-order",ir->pme_order);
 +    PR("ewald-rtol",ir->ewald_rtol);
 +    PR("ewald-geometry",ir->ewald_geometry);
 +    PR("epsilon-surface",ir->epsilon_surface);
 +    PS("optimize-fft",EBOOL(ir->bOptFFT));
 +    PS("ePBC",EPBC(ir->ePBC));
 +    PS("bPeriodicMols",EBOOL(ir->bPeriodicMols));
 +    PS("bContinuation",EBOOL(ir->bContinuation));
 +    PS("bShakeSOR",EBOOL(ir->bShakeSOR));
 +    PS("etc",ETCOUPLTYPE(ir->etc));
 +    PS("bPrintNHChains",EBOOL(ir->bPrintNHChains));
 +    PI("nsttcouple",ir->nsttcouple);
 +    PS("epc",EPCOUPLTYPE(ir->epc));
 +    PS("epctype",EPCOUPLTYPETYPE(ir->epct));
 +    PI("nstpcouple",ir->nstpcouple);
 +    PR("tau-p",ir->tau_p);
 +    pr_matrix(fp,indent,"ref-p",ir->ref_p,bMDPformat);
 +    pr_matrix(fp,indent,"compress",ir->compress,bMDPformat);
 +    PS("refcoord-scaling",EREFSCALINGTYPE(ir->refcoord_scaling));
 +    if (bMDPformat)
 +      fprintf(fp,"posres-com  = %g %g %g\n",ir->posres_com[XX],
 +            ir->posres_com[YY],ir->posres_com[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-com",ir->posres_com,DIM,TRUE);
 +    if (bMDPformat)
 +      fprintf(fp,"posres-comB = %g %g %g\n",ir->posres_comB[XX],
 +            ir->posres_comB[YY],ir->posres_comB[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-comB",ir->posres_comB,DIM,TRUE);
 +    PR("verlet-buffer-drift",ir->verletbuf_drift);
 +    PR("rlist",ir->rlist);
 +    PR("rlistlong",ir->rlistlong);
++    PR("nstcalclr",ir->nstcalclr);
 +    PR("rtpi",ir->rtpi);
 +    PS("coulombtype",EELTYPE(ir->coulombtype));
 +    PS("coulomb-modifier",INTMODIFIER(ir->coulomb_modifier));
 +    PR("rcoulomb-switch",ir->rcoulomb_switch);
 +    PR("rcoulomb",ir->rcoulomb);
 +    PS("vdwtype",EVDWTYPE(ir->vdwtype));
 +    PS("vdw-modifier",INTMODIFIER(ir->vdw_modifier));
 +    PR("rvdw-switch",ir->rvdw_switch);
 +    PR("rvdw",ir->rvdw);
 +    if (ir->epsilon_r != 0)
 +      PR("epsilon-r",ir->epsilon_r);
 +    else
 +      PS("epsilon-r",infbuf);
 +    if (ir->epsilon_rf != 0)
 +      PR("epsilon-rf",ir->epsilon_rf);
 +    else
 +      PS("epsilon-rf",infbuf);
 +    PR("tabext",ir->tabext);
 +    PS("implicit-solvent",EIMPLICITSOL(ir->implicit_solvent));
 +    PS("gb-algorithm",EGBALGORITHM(ir->gb_algorithm));
 +    PR("gb-epsilon-solvent",ir->gb_epsilon_solvent);
 +    PI("nstgbradii",ir->nstgbradii);
 +    PR("rgbradii",ir->rgbradii);
 +    PR("gb-saltconc",ir->gb_saltconc);
 +    PR("gb-obc-alpha",ir->gb_obc_alpha);
 +    PR("gb-obc-beta",ir->gb_obc_beta);
 +    PR("gb-obc-gamma",ir->gb_obc_gamma);
 +    PR("gb-dielectric-offset",ir->gb_dielectric_offset);
 +    PS("sa-algorithm",ESAALGORITHM(ir->gb_algorithm));
 +    PR("sa-surface-tension",ir->sa_surface_tension);
 +    PS("DispCorr",EDISPCORR(ir->eDispCorr));
 +    PS("bSimTemp",EBOOL(ir->bSimTemp));
 +    if (ir->bSimTemp) {
 +        pr_simtempvals(fp,indent,ir->simtempvals,ir->fepvals->n_lambda,bMDPformat);
 +    }
 +    PS("free-energy",EFEPTYPE(ir->efep));
 +    if (ir->efep != efepNO || ir->bSimTemp) {
 +        pr_fepvals(fp,indent,ir->fepvals,bMDPformat);
 +    }
 +    if (ir->bExpanded) {
 +        pr_expandedvals(fp,indent,ir->expandedvals,ir->fepvals->n_lambda,bMDPformat);
 +    }
 +
 +    PI("nwall",ir->nwall);
 +    PS("wall-type",EWALLTYPE(ir->wall_type));
 +    PI("wall-atomtype[0]",ir->wall_atomtype[0]);
 +    PI("wall-atomtype[1]",ir->wall_atomtype[1]);
 +    PR("wall-density[0]",ir->wall_density[0]);
 +    PR("wall-density[1]",ir->wall_density[1]);
 +    PR("wall-ewald-zfac",ir->wall_ewald_zfac);
 +
 +    PS("pull",EPULLTYPE(ir->ePull));
 +    if (ir->ePull != epullNO)
 +      pr_pull(fp,indent,ir->pull);
 +    
 +    PS("rotation",EBOOL(ir->bRot));
 +    if (ir->bRot)
 +      pr_rot(fp,indent,ir->rot);
 +
 +    PS("disre",EDISRETYPE(ir->eDisre));
 +    PS("disre-weighting",EDISREWEIGHTING(ir->eDisreWeighting));
 +    PS("disre-mixed",EBOOL(ir->bDisreMixed));
 +    PR("dr-fc",ir->dr_fc);
 +    PR("dr-tau",ir->dr_tau);
 +    PR("nstdisreout",ir->nstdisreout);
 +    PR("orires-fc",ir->orires_fc);
 +    PR("orires-tau",ir->orires_tau);
 +    PR("nstorireout",ir->nstorireout);
 +
 +    PR("dihre-fc",ir->dihre_fc);
 +    
 +    PR("em-stepsize",ir->em_stepsize);
 +    PR("em-tol",ir->em_tol);
 +    PI("niter",ir->niter);
 +    PR("fc-stepsize",ir->fc_stepsize);
 +    PI("nstcgsteep",ir->nstcgsteep);
 +    PI("nbfgscorr",ir->nbfgscorr);
 +
 +    PS("ConstAlg",ECONSTRTYPE(ir->eConstrAlg));
 +    PR("shake-tol",ir->shake_tol);
 +    PI("lincs-order",ir->nProjOrder);
 +    PR("lincs-warnangle",ir->LincsWarnAngle);
 +    PI("lincs-iter",ir->nLincsIter);
 +    PR("bd-fric",ir->bd_fric);
 +    PI("ld-seed",ir->ld_seed);
 +    PR("cos-accel",ir->cos_accel);
 +    pr_matrix(fp,indent,"deform",ir->deform,bMDPformat);
 +
 +    PS("adress",EBOOL(ir->bAdress));
 +    if (ir->bAdress){
 +        PS("adress_type",EADRESSTYPE(ir->adress->type));
 +        PR("adress_const_wf",ir->adress->const_wf);
 +        PR("adress_ex_width",ir->adress->ex_width);
 +        PR("adress_hy_width",ir->adress->hy_width);
 +        PS("adress_interface_correction",EADRESSICTYPE(ir->adress->icor));
 +        PS("adress_site",EADRESSSITETYPE(ir->adress->site));
 +        PR("adress_ex_force_cap",ir->adress->ex_forcecap);
 +        PS("adress_do_hybridpairs", EBOOL(ir->adress->do_hybridpairs));
 +
 +        pr_rvec(fp,indent,"adress_reference_coords",ir->adress->refs,DIM,TRUE);
 +    }
 +    PI("userint1",ir->userint1);
 +    PI("userint2",ir->userint2);
 +    PI("userint3",ir->userint3);
 +    PI("userint4",ir->userint4);
 +    PR("userreal1",ir->userreal1);
 +    PR("userreal2",ir->userreal2);
 +    PR("userreal3",ir->userreal3);
 +    PR("userreal4",ir->userreal4);
 +    pr_grp_opts(fp,indent,"grpopts",&(ir->opts),bMDPformat);
 +    pr_cosine(fp,indent,"efield-x",&(ir->ex[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-xt",&(ir->et[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-y",&(ir->ex[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-yt",&(ir->et[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-z",&(ir->ex[ZZ]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-zt",&(ir->et[ZZ]),bMDPformat);
 +    PS("bQMMM",EBOOL(ir->bQMMM));
 +    PI("QMconstraints",ir->QMconstraints);
 +    PI("QMMMscheme",ir->QMMMscheme);
 +    PR("scalefactor",ir->scalefactor);
 +    pr_qm_opts(fp,indent,"qm-opts",&(ir->opts));
 +  }
 +}
 +#undef PS
 +#undef PR
 +#undef PI
 +
 +static void pr_harm(FILE *fp,t_iparams *iparams,const char *r,const char *kr)
 +{
 +  fprintf(fp,"%sA=%12.5e, %sA=%12.5e, %sB=%12.5e, %sB=%12.5e\n",
 +        r,iparams->harmonic.rA,kr,iparams->harmonic.krA,
 +        r,iparams->harmonic.rB,kr,iparams->harmonic.krB);
 +}
 +
 +void pr_iparams(FILE *fp,t_functype ftype,t_iparams *iparams)
 +{
 +  int i;
 +  real VA[4],VB[4],*rbcA,*rbcB;
 +
 +  switch (ftype) {
 +  case F_ANGLES:
 +  case F_G96ANGLES:
 +    pr_harm(fp,iparams,"th","ct");
 +    break;
 +  case F_CROSS_BOND_BONDS:
 +    fprintf(fp,"r1e=%15.8e, r2e=%15.8e, krr=%15.8e\n",
 +          iparams->cross_bb.r1e,iparams->cross_bb.r2e,
 +          iparams->cross_bb.krr);
 +    break;
 +  case F_CROSS_BOND_ANGLES:
 +    fprintf(fp,"r1e=%15.8e, r1e=%15.8e, r3e=%15.8e, krt=%15.8e\n",
 +          iparams->cross_ba.r1e,iparams->cross_ba.r2e,
 +          iparams->cross_ba.r3e,iparams->cross_ba.krt);
 +    break;
 +  case F_LINEAR_ANGLES:
 +    fprintf(fp,"klinA=%15.8e, aA=%15.8e, klinB=%15.8e, aB=%15.8e\n",
 +            iparams->linangle.klinA,iparams->linangle.aA,
 +            iparams->linangle.klinB,iparams->linangle.aB);
 +    break;
 +  case F_UREY_BRADLEY:
 +      fprintf(fp,"thetaA=%15.8e, kthetaA=%15.8e, r13A=%15.8e, kUBA=%15.8e, thetaB=%15.8e, kthetaB=%15.8e, r13B=%15.8e, kUBB=%15.8e\n",iparams->u_b.thetaA,iparams->u_b.kthetaA,iparams->u_b.r13A,iparams->u_b.kUBA,iparams->u_b.thetaB,iparams->u_b.kthetaB,iparams->u_b.r13B,iparams->u_b.kUBB);
 +    break;
 +  case F_QUARTIC_ANGLES:
 +    fprintf(fp,"theta=%15.8e",iparams->qangle.theta);
 +    for(i=0; i<5; i++)
 +      fprintf(fp,", c%c=%15.8e",'0'+i,iparams->qangle.c[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_BHAM:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->bham.a,iparams->bham.b,iparams->bham.c);
 +    break;
 +  case F_BONDS:
 +  case F_G96BONDS:
 +  case F_HARMONIC:
 +    pr_harm(fp,iparams,"b0","cb");
 +    break;
 +  case F_IDIHS:
 +    pr_harm(fp,iparams,"xi","cx");
 +    break;
 +  case F_MORSE:
 +    fprintf(fp,"b0A=%15.8e, cbA=%15.8e, betaA=%15.8e, b0B=%15.8e, cbB=%15.8e, betaB=%15.8e\n",
 +            iparams->morse.b0A,iparams->morse.cbA,iparams->morse.betaA,
 +            iparams->morse.b0B,iparams->morse.cbB,iparams->morse.betaB);
 +    break;
 +  case F_CUBICBONDS:
 +    fprintf(fp,"b0=%15.8e, kb=%15.8e, kcub=%15.8e\n",
 +          iparams->cubic.b0,iparams->cubic.kb,iparams->cubic.kcub);
 +    break;
 +  case F_CONNBONDS:
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FENEBONDS:
 +    fprintf(fp,"bm=%15.8e, kb=%15.8e\n",iparams->fene.bm,iparams->fene.kb);
 +    break;
 +  case F_RESTRBONDS:
 +      fprintf(fp,"lowA=%15.8e, up1A=%15.8e, up2A=%15.8e, kA=%15.8e, lowB=%15.8e, up1B=%15.8e, up2B=%15.8e, kB=%15.8e,\n",
 +              iparams->restraint.lowA,iparams->restraint.up1A,
 +              iparams->restraint.up2A,iparams->restraint.kA,
 +              iparams->restraint.lowB,iparams->restraint.up1B,
 +              iparams->restraint.up2B,iparams->restraint.kB);
 +      break;
 +  case F_TABBONDS:
 +  case F_TABBONDSNC:
 +  case F_TABANGLES:
 +  case F_TABDIHS:
 +    fprintf(fp,"tab=%d, kA=%15.8e, kB=%15.8e\n",
 +          iparams->tab.table,iparams->tab.kA,iparams->tab.kB);
 +    break;
 +  case F_POLARIZATION:
 +    fprintf(fp,"alpha=%15.8e\n",iparams->polarize.alpha);
 +    break;
 +  case F_ANHARM_POL:
 +    fprintf(fp,"alpha=%15.8e drcut=%15.8e khyp=%15.8e\n",
 +            iparams->anharm_polarize.alpha,
 +            iparams->anharm_polarize.drcut,
 +            iparams->anharm_polarize.khyp);
 +    break;
 +  case F_THOLE_POL:
 +    fprintf(fp,"a=%15.8e, alpha1=%15.8e, alpha2=%15.8e, rfac=%15.8e\n",
 +          iparams->thole.a,iparams->thole.alpha1,iparams->thole.alpha2,
 +          iparams->thole.rfac);
 +    break;
 +  case F_WATER_POL:
 +    fprintf(fp,"al_x=%15.8e, al_y=%15.8e, al_z=%15.8e, rOH=%9.6f, rHH=%9.6f, rOD=%9.6f\n",
 +          iparams->wpol.al_x,iparams->wpol.al_y,iparams->wpol.al_z,
 +          iparams->wpol.rOH,iparams->wpol.rHH,iparams->wpol.rOD);
 +    break;
 +  case F_LJ:
 +    fprintf(fp,"c6=%15.8e, c12=%15.8e\n",iparams->lj.c6,iparams->lj.c12);
 +    break;
 +  case F_LJ14:
 +    fprintf(fp,"c6A=%15.8e, c12A=%15.8e, c6B=%15.8e, c12B=%15.8e\n",
 +          iparams->lj14.c6A,iparams->lj14.c12A,
 +          iparams->lj14.c6B,iparams->lj14.c12B);
 +    break;
 +  case F_LJC14_Q:
 +    fprintf(fp,"fqq=%15.8e, qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljc14.fqq,
 +          iparams->ljc14.qi,iparams->ljc14.qj,
 +          iparams->ljc14.c6,iparams->ljc14.c12);
 +    break;
 +  case F_LJC_PAIRS_NB:
 +    fprintf(fp,"qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljcnb.qi,iparams->ljcnb.qj,
 +          iparams->ljcnb.c6,iparams->ljcnb.c12);
 +    break;
 +  case F_PDIHS:
 +  case F_PIDIHS:
 +  case F_ANGRES:
 +  case F_ANGRESZ:
 +    fprintf(fp,"phiA=%15.8e, cpA=%15.8e, phiB=%15.8e, cpB=%15.8e, mult=%d\n",
 +          iparams->pdihs.phiA,iparams->pdihs.cpA,
 +          iparams->pdihs.phiB,iparams->pdihs.cpB,
 +          iparams->pdihs.mult);
 +    break;
 +  case F_DISRES:
 +    fprintf(fp,"label=%4d, type=%1d, low=%15.8e, up1=%15.8e, up2=%15.8e, fac=%15.8e)\n",
 +          iparams->disres.label,iparams->disres.type,
 +          iparams->disres.low,iparams->disres.up1,
 +          iparams->disres.up2,iparams->disres.kfac);
 +    break;
 +  case F_ORIRES:
 +    fprintf(fp,"ex=%4d, label=%d, power=%4d, c=%15.8e, obs=%15.8e, kfac=%15.8e)\n",
 +          iparams->orires.ex,iparams->orires.label,iparams->orires.power,
 +          iparams->orires.c,iparams->orires.obs,iparams->orires.kfac);
 +    break;
 +  case F_DIHRES:
 +      fprintf(fp,"phiA=%15.8e, dphiA=%15.8e, kfacA=%15.8e, phiB=%15.8e, dphiB=%15.8e, kfacB=%15.8e\n",
 +              iparams->dihres.phiA,iparams->dihres.dphiA,iparams->dihres.kfacA,
 +              iparams->dihres.phiB,iparams->dihres.dphiB,iparams->dihres.kfacB);
 +    break;
 +  case F_POSRES:
 +    fprintf(fp,"pos0A=(%15.8e,%15.8e,%15.8e), fcA=(%15.8e,%15.8e,%15.8e), pos0B=(%15.8e,%15.8e,%15.8e), fcB=(%15.8e,%15.8e,%15.8e)\n",
 +          iparams->posres.pos0A[XX],iparams->posres.pos0A[YY],
 +          iparams->posres.pos0A[ZZ],iparams->posres.fcA[XX],
 +          iparams->posres.fcA[YY],iparams->posres.fcA[ZZ],
 +          iparams->posres.pos0B[XX],iparams->posres.pos0B[YY],
 +          iparams->posres.pos0B[ZZ],iparams->posres.fcB[XX],
 +          iparams->posres.fcB[YY],iparams->posres.fcB[ZZ]);
 +    break;
 +  case F_FBPOSRES:
 +    fprintf(fp,"pos0=(%15.8e,%15.8e,%15.8e), geometry=%d, r=%15.8e, k=%15.8e\n",
 +        iparams->fbposres.pos0[XX], iparams->fbposres.pos0[YY],
 +        iparams->fbposres.pos0[ZZ], iparams->fbposres.geom,
 +        iparams->fbposres.r,        iparams->fbposres.k);
 +    break;
 +  case F_RBDIHS:
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcA[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcB[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FOURDIHS:
 +    /* Use the OPLS -> Ryckaert-Bellemans formula backwards to get the
 +     * OPLS potential constants back.
 +     */
 +    rbcA = iparams->rbdihs.rbcA;
 +    rbcB = iparams->rbdihs.rbcB;
 +
 +    VA[3] = -0.25*rbcA[4];
 +    VA[2] = -0.5*rbcA[3];
 +    VA[1] = 4.0*VA[3]-rbcA[2];
 +    VA[0] = 3.0*VA[2]-2.0*rbcA[1];
 +
 +    VB[3] = -0.25*rbcB[4];
 +    VB[2] = -0.5*rbcB[3];
 +    VB[1] = 4.0*VB[3]-rbcB[2];
 +    VB[0] = 3.0*VB[2]-2.0*rbcB[1];
 +
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourA[%d]=%15.8e",i==0?"":", ",i,VA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourB[%d]=%15.8e",i==0?"":", ",i,VB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +   
 +  case F_CONSTR:
 +  case F_CONSTRNC:
 +    fprintf(fp,"dA=%15.8e, dB=%15.8e\n",iparams->constr.dA,iparams->constr.dB);
 +    break;
 +  case F_SETTLE:
 +    fprintf(fp,"doh=%15.8e, dhh=%15.8e\n",iparams->settle.doh,
 +          iparams->settle.dhh);
 +    break;
 +  case F_VSITE2:
 +    fprintf(fp,"a=%15.8e\n",iparams->vsite.a);
 +    break;
 +  case F_VSITE3:
 +  case F_VSITE3FD:
 +  case F_VSITE3FAD:
 +    fprintf(fp,"a=%15.8e, b=%15.8e\n",iparams->vsite.a,iparams->vsite.b);
 +    break;
 +  case F_VSITE3OUT:
 +  case F_VSITE4FD:
 +  case F_VSITE4FDN:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->vsite.a,iparams->vsite.b,iparams->vsite.c);
 +    break;
 +  case F_VSITEN:
 +    fprintf(fp,"n=%2d, a=%15.8e\n",iparams->vsiten.n,iparams->vsiten.a);
 +    break;
 +  case F_GB12:
 +  case F_GB13:
 +  case F_GB14:
 +    fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n",iparams->gb.sar,iparams->gb.st,iparams->gb.pi,iparams->gb.gbr,iparams->gb.bmlt);
 +    break;              
 +  case F_CMAP:
 +    fprintf(fp, "cmapA=%1d, cmapB=%1d\n",iparams->cmap.cmapA, iparams->cmap.cmapB);
 +    break;              
 +  default:
 +    gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
 +            ftype,interaction_function[ftype].name,__FILE__,__LINE__);
 +  }
 +}
 +
 +void pr_ilist(FILE *fp,int indent,const char *title,
 +              t_functype *functype,t_ilist *ilist, gmx_bool bShowNumbers)
 +{
 +    int i,j,k,type,ftype;
 +    t_iatom *iatoms;
 +    
 +    if (available(fp,ilist,indent,title) && ilist->nr > 0)
 +    {  
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"nr: %d\n",ilist->nr);
 +        if (ilist->nr > 0) {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"iatoms:\n");
 +            iatoms=ilist->iatoms;
 +            for (i=j=0; i<ilist->nr;) {
 +#ifndef DEBUG
 +                (void) pr_indent(fp,indent+INDENT);
 +                type=*(iatoms++);
 +                ftype=functype[type];
 +                (void) fprintf(fp,"%d type=%d (%s)",
 +                               bShowNumbers?j:-1,bShowNumbers?type:-1,
 +                               interaction_function[ftype].name);
 +                j++;
 +                for (k=0; k<interaction_function[ftype].nratoms; k++)
 +                    (void) fprintf(fp," %u",*(iatoms++));
 +                (void) fprintf(fp,"\n");
 +                i+=1+interaction_function[ftype].nratoms;
 +#else
 +                fprintf(fp,"%5d%5d\n",i,iatoms[i]);
 +                i++;
 +#endif
 +            }
 +        }
 +    }
 +}
 +
 +static void pr_cmap(FILE *fp, int indent, const char *title,
 +                    gmx_cmap_t *cmap_grid, gmx_bool bShowNumbers)
 +{
 +    int i,j,nelem;
 +    real dx,idx;
 +      
 +    dx    = 360.0 / cmap_grid->grid_spacing;
 +    nelem = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
 +      
 +    if(available(fp,cmap_grid,indent,title))
 +    {
 +        fprintf(fp,"%s\n",title);
 +              
 +        for(i=0;i<cmap_grid->ngrid;i++)
 +        {
 +            idx = -180.0;
 +            fprintf(fp,"%8s %8s %8s %8s\n","V","dVdx","dVdy","d2dV");
 +                      
 +            fprintf(fp,"grid[%3d]={\n",bShowNumbers?i:-1);
 +                      
 +            for(j=0;j<nelem;j++)
 +            {
 +                if( (j%cmap_grid->grid_spacing)==0)
 +                {
 +                    fprintf(fp,"%8.1f\n",idx);
 +                    idx+=dx;
 +                }
 +                              
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+1]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+2]);
 +                fprintf(fp,"%8.3f\n",cmap_grid->cmapdata[i].cmap[j*4+3]);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +      
 +}
 +
 +void pr_ffparams(FILE *fp,int indent,const char *title,
 +                 gmx_ffparams_t *ffparams,
 +                 gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  indent=pr_title(fp,indent,title);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"atnr=%d\n",ffparams->atnr);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"ntypes=%d\n",ffparams->ntypes);
 +  for (i=0; i<ffparams->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                     bShowNumbers?i:-1,
 +                     interaction_function[ffparams->functype[i]].name);
 +      pr_iparams(fp,ffparams->functype[i],&ffparams->iparams[i]);
 +  }
 +  (void) pr_double(fp,indent,"reppow",ffparams->reppow);
 +  (void) pr_real(fp,indent,"fudgeQQ",ffparams->fudgeQQ);
 +  pr_cmap(fp,indent,"cmap",&ffparams->cmap_grid,bShowNumbers);
 +}
 +
 +void pr_idef(FILE *fp,int indent,const char *title,t_idef *idef, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  if (available(fp,idef,indent,title)) {  
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"atnr=%d\n",idef->atnr);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"ntypes=%d\n",idef->ntypes);
 +    for (i=0; i<idef->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                   bShowNumbers?i:-1,
 +                   interaction_function[idef->functype[i]].name);
 +      pr_iparams(fp,idef->functype[i],&idef->iparams[i]);
 +    }
 +    (void) pr_real(fp,indent,"fudgeQQ",idef->fudgeQQ);
 +
 +    for(j=0; (j<F_NRE); j++)
 +      pr_ilist(fp,indent,interaction_function[j].longname,
 +               idef->functype,&idef->il[j],bShowNumbers);
 +  }
 +}
 +
 +static int pr_block_title(FILE *fp,int indent,const char *title,t_block *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +    }
 +  return indent;
 +}
 +
 +static int pr_blocka_title(FILE *fp,int indent,const char *title,t_blocka *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nra=%d\n",block->nra);
 +    }
 +  return indent;
 +}
 +
 +static void low_pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block, gmx_bool bShowNumbers)
 +{
 +  int i;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      for (i=0; i<=block->nr; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->index[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->index[i]);
 +        }
 +      for (i=0; i<block->nra; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->a[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->a[i]);
 +        }
 +    }
 +}
 +
 +void pr_block(FILE *fp,int indent,const char *title,t_block *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_block_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={}\n",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d]={%d..%d}\n",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            start=end;
 +          }
 +    }
 +}
 +
 +void pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d][%d..%d]={",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            for (j=start; j<end; j++)
 +              {
 +                if (j>start) size+=fprintf(fp,", ");
 +                if ((size)>(USE_WIDTH))
 +                  {
 +                    (void) fprintf(fp,"\n");
 +                    size=pr_indent(fp,indent+INDENT);
 +                  }
 +                size+=fprintf(fp,"%u",block->a[j]);
 +              }
 +            (void) fprintf(fp,"}\n");
 +            start=end;
 +          }
 +      if ((end!=block->nra)||(!ok)) 
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"tables inconsistent, dumping complete tables:\n");
 +          low_pr_blocka(fp,indent,title,block,bShowNumbers);
 +        }
 +    }
 +}
 +
 +static void pr_strings(FILE *fp,int indent,const char *title,char ***nm,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]));
 +        }
 +    }
 +}
 +
 +static void pr_strings2(FILE *fp,int indent,const char *title,
 +                      char ***nm,char ***nmB,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\",nameB=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]),*(nmB[i]));
 +        }
 +    }
 +}
 +
 +static void pr_resinfo(FILE *fp,int indent,const char *title,t_resinfo *resinfo,int n, gmx_bool bShowNumbers)
 +{
 +    int i;
 +    
 +    if (available(fp,resinfo,indent,title))
 +    {  
 +        indent=pr_title_n(fp,indent,title,n);
 +        for (i=0; i<n; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            (void) fprintf(fp,"%s[%d]={name=\"%s\", nr=%d, ic='%c'}\n",
 +                           title,bShowNumbers?i:-1,
 +                           *(resinfo[i].name),resinfo[i].nr,
 +                           (resinfo[i].ic == '\0') ? ' ' : resinfo[i].ic);
 +        }
 +    }
 +}
 +
 +static void pr_atom(FILE *fp,int indent,const char *title,t_atom *atom,int n)
 +{
 +  int i,j;
 +  
 +  if (available(fp,atom,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      fprintf(fp,"%s[%6d]={type=%3d, typeB=%3d, ptype=%8s, m=%12.5e, "
 +              "q=%12.5e, mB=%12.5e, qB=%12.5e, resind=%5d, atomnumber=%3d}\n",
 +              title,i,atom[i].type,atom[i].typeB,ptype_str[atom[i].ptype],
 +              atom[i].m,atom[i].q,atom[i].mB,atom[i].qB,
 +              atom[i].resind,atom[i].atomnumber);
 +    }
 +  }
 +}
 +
 +static void pr_grps(FILE *fp,int indent,const char *title,t_grps grps[],
 +                  char **grpname[], gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +
 +    for(i=0; (i<egcNR); i++)
 +    {
 +        fprintf(fp,"%s[%-12s] nr=%d, name=[",title,gtypes[i],grps[i].nr);
 +        for(j=0; (j<grps[i].nr); j++)
 +        {
 +            fprintf(fp," %s",*(grpname[grps[i].nm_ind[j]]));
 +        }
 +        fprintf(fp,"]\n");
 +    }
 +}
 +
 +static void pr_groups(FILE *fp,int indent,const char *title,
 +                      gmx_groups_t *groups,
 +                      gmx_bool bShowNumbers)
 +{
 +    int grpnr[egcNR];
 +    int nat_max,i,g;
 +
 +    pr_grps(fp,indent,"grp",groups->grps,groups->grpname,bShowNumbers);
 +    pr_strings(fp,indent,"grpname",groups->grpname,groups->ngrpname,bShowNumbers);
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"groups          ");
 +    for(g=0; g<egcNR; g++)
 +    {
 +       printf(" %5.5s",gtypes[g]);
 +    }
 +    printf("\n");
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"allocated       ");
 +    nat_max = 0;
 +    for(g=0; g<egcNR; g++)
 +    {
 +        printf(" %5d",groups->ngrpnr[g]);
 +        nat_max = max(nat_max,groups->ngrpnr[g]);
 +    }
 +    printf("\n");
 +
 +    if (nat_max == 0)
 +    {
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"groupnr[%5s] =","*");
 +        for(g=0; g<egcNR; g++)
 +        {
 +            fprintf(fp,"  %3d ",0);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +    else
 +    {
 +        for(i=0; i<nat_max; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"groupnr[%5d] =",i);
 +            for(g=0; g<egcNR; g++)
 +            {
 +                fprintf(fp,"  %3d ",
 +                        groups->grpnr[g] ? groups->grpnr[g][i] : 0);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +}
 +
 +void pr_atoms(FILE *fp,int indent,const char *title,t_atoms *atoms, 
 +            gmx_bool bShownumbers)
 +{
 +  if (available(fp,atoms,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_atom(fp,indent,"atom",atoms->atom,atoms->nr);
 +      pr_strings(fp,indent,"atom",atoms->atomname,atoms->nr,bShownumbers);
 +      pr_strings2(fp,indent,"type",atoms->atomtype,atoms->atomtypeB,atoms->nr,bShownumbers);
 +      pr_resinfo(fp,indent,"residue",atoms->resinfo,atoms->nres,bShownumbers);
 +    }
 +}
 +
 +
 +void pr_atomtypes(FILE *fp,int indent,const char *title,t_atomtypes *atomtypes, 
 +                gmx_bool bShowNumbers)
 +{
 +  int i;
 +  if (available(fp,atomtypes,indent,title)) 
 +  {
 +    indent=pr_title(fp,indent,title);
 +    for(i=0;i<atomtypes->nr;i++) {
 +      pr_indent(fp,indent);
 +              fprintf(fp,
 +                              "atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n",
 +                              bShowNumbers?i:-1,atomtypes->radius[i],atomtypes->vol[i],
 +                              atomtypes->gb_radius[i],
 +                              atomtypes->surftens[i],atomtypes->atomnumber[i],atomtypes->S_hct[i]);
 +    }
 +  }
 +}
 +
 +static void pr_moltype(FILE *fp,int indent,const char *title,
 +                       gmx_moltype_t *molt,int n,
 +                       gmx_ffparams_t *ffparams,
 +                       gmx_bool bShowNumbers)
 +{
 +    int j;
 +
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(molt->name));
 +    pr_atoms(fp,indent,"atoms",&(molt->atoms),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&molt->cgs, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&molt->excls, bShowNumbers);
 +    for(j=0; (j<F_NRE); j++) {
 +        pr_ilist(fp,indent,interaction_function[j].longname,
 +                 ffparams->functype,&molt->ilist[j],bShowNumbers);
 +    }
 +}
 +
 +static void pr_molblock(FILE *fp,int indent,const char *title,
 +                        gmx_molblock_t *molb,int n,
 +                        gmx_moltype_t *molt,
 +                        gmx_bool bShowNumbers)
 +{
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%-20s = %d \"%s\"\n",
 +                   "moltype",molb->type,*(molt[molb->type].name));
 +    pr_int(fp,indent,"#molecules",molb->nmol);
 +    pr_int(fp,indent,"#atoms_mol",molb->natoms_mol);
 +    pr_int(fp,indent,"#posres_xA",molb->nposres_xA);
 +    if (molb->nposres_xA > 0) {
 +        pr_rvecs(fp,indent,"posres_xA",molb->posres_xA,molb->nposres_xA);
 +    }
 +    pr_int(fp,indent,"#posres_xB",molb->nposres_xB);
 +    if (molb->nposres_xB > 0) {
 +        pr_rvecs(fp,indent,"posres_xB",molb->posres_xB,molb->nposres_xB);
 +    }
 +}
 +
 +void pr_mtop(FILE *fp,int indent,const char *title,gmx_mtop_t *mtop,
 +             gmx_bool bShowNumbers)
 +{
 +    int mt,mb;
 +
 +    if (available(fp,mtop,indent,title)) {
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        (void) fprintf(fp,"name=\"%s\"\n",*(mtop->name));
 +        pr_int(fp,indent,"#atoms",mtop->natoms);
 +        pr_int(fp,indent,"#molblock",mtop->nmolblock);
 +        for(mb=0; mb<mtop->nmolblock; mb++) {
 +            pr_molblock(fp,indent,"molblock",&mtop->molblock[mb],mb,
 +                        mtop->moltype,bShowNumbers);
 +        }
 +        pr_ffparams(fp,indent,"ffparams",&(mtop->ffparams),bShowNumbers);
 +        pr_atomtypes(fp,indent,"atomtypes",&(mtop->atomtypes),bShowNumbers);
 +        for(mt=0; mt<mtop->nmoltype; mt++) {
 +            pr_moltype(fp,indent,"moltype",&mtop->moltype[mt],mt,
 +                       &mtop->ffparams,bShowNumbers);
 +        }
 +        pr_groups(fp,indent,"groups",&mtop->groups,bShowNumbers);
 +    }
 +}
 +
 +void pr_top(FILE *fp,int indent,const char *title,t_topology *top, gmx_bool bShowNumbers)
 +{
 +  if (available(fp,top,indent,title)) {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(top->name));
 +    pr_atoms(fp,indent,"atoms",&(top->atoms),bShowNumbers);
 +    pr_atomtypes(fp,indent,"atomtypes",&(top->atomtypes),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&top->cgs, bShowNumbers);
 +    pr_block(fp,indent,"mols",&top->mols, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&top->excls, bShowNumbers);
 +    pr_idef(fp,indent,"idef",&top->idef,bShowNumbers);
 +  }
 +}
 +
 +void pr_header(FILE *fp,int indent,const char *title,t_tpxheader *sh)
 +{
 +  char buf[22];
 +    
 +  if (available(fp,sh,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bIr    = %spresent\n",sh->bIr?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bBox   = %spresent\n",sh->bBox?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bTop   = %spresent\n",sh->bTop?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bX     = %spresent\n",sh->bX?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bV     = %spresent\n",sh->bV?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bF     = %spresent\n",sh->bF?"":"not ");
 +      
 +      pr_indent(fp,indent);
 +      fprintf(fp,"natoms = %d\n",sh->natoms);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"lambda = %e\n",sh->lambda);
 +    }
 +}
 +
 +void pr_commrec(FILE *fp,int indent,t_commrec *cr)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"commrec:\n");
 +  indent+=2;
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nodeid    = %d\n",cr->nodeid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nnodes    = %d\n",cr->nnodes);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"npmenodes = %d\n",cr->npmenodes);
 +  /*
 +  pr_indent(fp,indent);
 +  fprintf(fp,"threadid  = %d\n",cr->threadid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nthreads  = %d\n",cr->nthreads);
 +  */
 +}
index a08ca054101eb433dd40b713914b35ec2ebe9cd7,0000000000000000000000000000000000000000..695695d3a49d7271f840aad323348496499c1cfd
mode 100644,000000..100644
--- /dev/null
@@@ -1,3416 -1,0 +1,3459 @@@
-         if (ir->coulomb_modifier != eintmodNONE ||
-             ir->vdw_modifier != eintmodNONE)
-         {
-             warning_error(wi,"potential modifiers are not supported (yet) with the group cut-off scheme");
-         }
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <limits.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "index.h"
 +#include "symtab.h"
 +#include "string2.h"
 +#include "readinp.h"
 +#include "warninp.h"
 +#include "readir.h" 
 +#include "toputil.h"
 +#include "index.h"
 +#include "network.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "mtop_util.h"
 +#include "chargegroup.h"
 +#include "inputrec.h"
 +
 +#define MAXPTR 254
 +#define NOGID  255
 +#define MAXLAMBDAS 1024
 +
 +/* Resource parameters 
 + * Do not change any of these until you read the instruction
 + * in readinp.h. Some cpp's do not take spaces after the backslash
 + * (like the c-shell), which will give you a very weird compiler
 + * message.
 + */
 +
 +static char tcgrps[STRLEN],tau_t[STRLEN],ref_t[STRLEN],
 +  acc[STRLEN],accgrps[STRLEN],freeze[STRLEN],frdim[STRLEN],
 +  energy[STRLEN],user1[STRLEN],user2[STRLEN],vcm[STRLEN],xtc_grps[STRLEN],
 +  couple_moltype[STRLEN],orirefitgrp[STRLEN],egptable[STRLEN],egpexcl[STRLEN],
 +  wall_atomtype[STRLEN],wall_density[STRLEN],deform[STRLEN],QMMM[STRLEN];
 +static char fep_lambda[efptNR][STRLEN];
 +static char lambda_weights[STRLEN];
 +static char **pull_grp;
 +static char **rot_grp;
 +static char anneal[STRLEN],anneal_npoints[STRLEN],
 +  anneal_time[STRLEN],anneal_temp[STRLEN];
 +static char QMmethod[STRLEN],QMbasis[STRLEN],QMcharge[STRLEN],QMmult[STRLEN],
 +  bSH[STRLEN],CASorbitals[STRLEN], CASelectrons[STRLEN],SAon[STRLEN],
 +  SAoff[STRLEN],SAsteps[STRLEN],bTS[STRLEN],bOPT[STRLEN]; 
 +static char efield_x[STRLEN],efield_xt[STRLEN],efield_y[STRLEN],
 +  efield_yt[STRLEN],efield_z[STRLEN],efield_zt[STRLEN];
 +
 +enum {
 +    egrptpALL,         /* All particles have to be a member of a group.     */
 +    egrptpALL_GENREST, /* A rest group with name is generated for particles *
 +                        * that are not part of any group.                   */
 +    egrptpPART,        /* As egrptpALL_GENREST, but no name is generated    *
 +                        * for the rest group.                               */
 +    egrptpONE          /* Merge all selected groups into one group,         *
 +                        * make a rest group for the remaining particles.    */
 +};
 +
 +
 +void init_ir(t_inputrec *ir, t_gromppopts *opts)
 +{
 +  snew(opts->include,STRLEN); 
 +  snew(opts->define,STRLEN);
 +  snew(ir->fepvals,1);
 +  snew(ir->expandedvals,1);
 +  snew(ir->simtempvals,1);
 +}
 +
 +static void GetSimTemps(int ntemps, t_simtemp *simtemp, double *temperature_lambdas)
 +{
 +
 +    int i;
 +
 +    for (i=0;i<ntemps;i++)
 +    {
 +        /* simple linear scaling -- allows more control */
 +        if (simtemp->eSimTempScale == esimtempLINEAR)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*temperature_lambdas[i];
 +        }
 +        else if (simtemp->eSimTempScale == esimtempGEOMETRIC)  /* should give roughly equal acceptance for constant heat capacity . . . */
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low * pow(simtemp->simtemp_high/simtemp->simtemp_low,(1.0*i)/(ntemps-1));
 +        }
 +        else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
 +        }
 +        else
 +        {
 +            char errorstr[128];
 +            sprintf(errorstr,"eSimTempScale=%d not defined",simtemp->eSimTempScale);
 +            gmx_fatal(FARGS,errorstr);
 +        }
 +    }
 +}
 +
 +
 +
 +static void _low_check(gmx_bool b,char *s,warninp_t wi)
 +{
 +    if (b)
 +    {
 +        warning_error(wi,s);
 +    }
 +}
 +
 +static void check_nst(const char *desc_nst,int nst,
 +                      const char *desc_p,int *p,
 +                      warninp_t wi)
 +{
 +    char buf[STRLEN];
 +
 +    if (*p > 0 && *p % nst != 0)
 +    {
 +        /* Round up to the next multiple of nst */
 +        *p = ((*p)/nst + 1)*nst;
 +        sprintf(buf,"%s should be a multiple of %s, changing %s to %d\n",
 +              desc_p,desc_nst,desc_p,*p);
 +        warning(wi,buf);
 +    }
 +}
 +
 +static gmx_bool ir_NVE(const t_inputrec *ir)
 +{
 +    return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO);
 +}
 +
 +static int lcd(int n1,int n2)
 +{
 +    int d,i;
 +    
 +    d = 1;
 +    for(i=2; (i<=n1 && i<=n2); i++)
 +    {
 +        if (n1 % i == 0 && n2 % i == 0)
 +        {
 +            d = i;
 +        }
 +    }
 +    
 +  return d;
 +}
 +
 +static void process_interaction_modifier(const t_inputrec *ir,int *eintmod)
 +{
 +    if (*eintmod == eintmodPOTSHIFT_VERLET)
 +    {
 +        if (ir->cutoff_scheme == ecutsVERLET)
 +        {
 +            *eintmod = eintmodPOTSHIFT;
 +        }
 +        else
 +        {
 +            *eintmod = eintmodNONE;
 +        }
 +    }
 +}
 +
 +void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
 +              warninp_t wi)
 +/* Check internal consistency */
 +{
 +    /* Strange macro: first one fills the err_buf, and then one can check 
 +     * the condition, which will print the message and increase the error
 +     * counter.
 +     */
 +#define CHECK(b) _low_check(b,err_buf,wi)
 +    char err_buf[256],warn_buf[STRLEN];
 +    int i,j;
 +    int  ns_type=0;
 +    real dt_coupl=0;
 +    real dt_pcoupl;
 +    int  nstcmin;
 +    t_lambda *fep = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +    /* BASIC CUT-OFF STUFF */
 +    if (ir->rcoulomb < 0)
 +    {
 +        warning_error(wi,"rcoulomb should be >= 0");
 +    }
 +    if (ir->rvdw < 0)
 +    {
 +        warning_error(wi,"rvdw should be >= 0");
 +    }
 +    if (ir->rlist < 0 &&
 +        !(ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_drift > 0))
 +    {
 +        warning_error(wi,"rlist should be >= 0");
 +    }
 +
 +    process_interaction_modifier(ir,&ir->coulomb_modifier);
 +    process_interaction_modifier(ir,&ir->vdw_modifier);
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        /* BASIC CUT-OFF STUFF */
 +        if (ir->rlist == 0 ||
 +            !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
 +              (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)    && ir->rvdw     > ir->rlist))) {
 +            /* No switched potential and/or no twin-range:
 +             * we can set the long-range cut-off to the maximum of the other cut-offs.
 +             */
 +            ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
 +        }
 +        else if (ir->rlistlong < 0)
 +        {
 +            ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
 +            sprintf(warn_buf,"rlistlong was not set, setting it to %g (no buffer)",
 +                    ir->rlistlong);
 +            warning(wi,warn_buf);
 +        }
 +        if (ir->rlistlong == 0 && ir->ePBC != epbcNONE)
 +        {
 +            warning_error(wi,"Can not have an infinite cut-off with PBC");
 +        }
 +        if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist))
 +        {
 +            warning_error(wi,"rlistlong can not be shorter than rlist");
 +        }
 +        if (IR_TWINRANGE(*ir) && ir->nstlist <= 0)
 +        {
 +            warning_error(wi,"Can not have nstlist<=0 with twin-range interactions");
 +        }
 +    }
-     sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious artifacts, advice: use coulombtype = %s",
++    
++    if(ir->rlistlong == ir->rlist)
++    {
++        ir->nstcalclr = 0;
++    }
++    else if(ir->rlistlong>ir->rlist && ir->nstcalclr==0)
++    {
++        warning_error(wi,"With different cutoffs for electrostatics and VdW, nstcalclr must be -1 or a positive number");
++    }
++    
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        real rc_max;
 +
 +        /* Normal Verlet type neighbor-list, currently only limited feature support */
 +        if (inputrec2nboundeddim(ir) < 3)
 +        {
 +            warning_error(wi,"With Verlet lists only full pbc or pbc=xy with walls is supported");
 +        }
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            warning_error(wi,"With Verlet lists rcoulomb!=rvdw is not supported");
 +        }
 +        if (ir->vdwtype != evdwCUT)
 +        {
 +            warning_error(wi,"With Verlet lists only cut-off LJ interactions are supported");
 +        }
 +        if (!(ir->coulombtype == eelCUT ||
 +              (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC) ||
 +              EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD))
 +        {
 +            warning_error(wi,"With Verlet lists only cut-off, reaction-field, PME and Ewald electrostatics are supported");
 +        }
 +
 +        if (ir->nstlist <= 0)
 +        {
 +             warning_error(wi,"With Verlet lists nstlist should be larger than 0");
 +        }
 +
 +        if (ir->nstlist < 10)
 +        {
 +            warning_note(wi,"With Verlet lists the optimal nstlist is >= 10, with GPUs >= 20. Note that with the Verlet scheme, nstlist has no effect on the accuracy of your simulation.");
 +        }
 +
 +        rc_max = max(ir->rvdw,ir->rcoulomb);
 +
 +        if (ir->verletbuf_drift <= 0)
 +        {
 +            if (ir->verletbuf_drift == 0)
 +            {
 +                warning_error(wi,"Can not have an energy drift of exactly 0");
 +            }
 +
 +            if (ir->rlist < rc_max)
 +            {
 +                warning_error(wi,"With verlet lists rlist can not be smaller than rvdw or rcoulomb");
 +            }
 +            
 +            if (ir->rlist == rc_max && ir->nstlist > 1)
 +            {
 +                warning_note(wi,"rlist is equal to rvdw and/or rcoulomb: there is no explicit Verlet buffer. The cluster pair list does have a buffering effect, but choosing a larger rlist might be necessary for good energy conservation.");
 +            }
 +        }
 +        else
 +        {
 +            if (ir->rlist > rc_max)
 +            {
 +                warning_note(wi,"You have set rlist larger than the interaction cut-off, but you also have verlet-buffer-drift > 0. Will set rlist using verlet-buffer-drift.");
 +            }
 +
 +            if (ir->nstlist == 1)
 +            {
 +                /* No buffer required */
 +                ir->rlist = rc_max;
 +            }
 +            else
 +            {
 +                if (EI_DYNAMICS(ir->eI))
 +                {
 +                    if (EI_MD(ir->eI) && ir->etc == etcNO)
 +                    {
 +                        warning_error(wi,"Temperature coupling is required for calculating rlist using the energy drift with verlet-buffer-drift > 0. Either use temperature coupling or set rlist yourself together with verlet-buffer-drift = -1."); 
 +                    }
 +
 +                    if (inputrec2nboundeddim(ir) < 3)
 +                    {
 +                        warning_error(wi,"The box volume is required for calculating rlist from the energy drift with verlet-buffer-drift > 0. You are using at least one unbounded dimension, so no volume can be computed. Either use a finite box, or set rlist yourself together with verlet-buffer-drift = -1.");
 +                    }
 +                    /* Set rlist temporarily so we can continue processing */
 +                    ir->rlist = rc_max;
 +                }
 +                else
 +                {
 +                    /* Set the buffer to 5% of the cut-off */
 +                    ir->rlist = 1.05*rc_max;
 +                }
 +            }
 +        }
 +
 +        /* No twin-range calculations with Verlet lists */
 +        ir->rlistlong = ir->rlist;
 +    }
 +
++    if(ir->nstcalclr==-1)
++    {
++        /* if rlist=rlistlong, this will later be changed to nstcalclr=0 */
++        ir->nstcalclr = ir->nstlist;
++    }
++    else if(ir->nstcalclr>0)
++    {
++        if(ir->nstlist>0 && (ir->nstlist % ir->nstcalclr != 0))
++        {
++            warning_error(wi,"nstlist must be evenly divisible by nstcalclr. Use nstcalclr = -1 to automatically follow nstlist");
++        }
++    }
++    else if(ir->nstcalclr<-1)
++    {
++        warning_error(wi,"nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
++    }
++    
++    if(EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr>1)
++    {
++        warning_error(wi,"When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
++    }
++       
 +    /* GENERAL INTEGRATOR STUFF */
 +    if (!(ir->eI == eiMD || EI_VV(ir->eI)))
 +    {
 +        ir->etc = etcNO;
 +    }
 +    if (ir->eI == eiVVAK) {
 +        sprintf(warn_buf,"Integrator method %s is implemented primarily for validation purposes; for molecular dynamics, you should probably be using %s or %s",ei_names[eiVVAK],ei_names[eiMD],ei_names[eiVV]);
 +        warning_note(wi,warn_buf);
 +    }
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        ir->epc = epcNO;
 +    }
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        if (ir->nstcalcenergy < 0)
 +        {
 +            ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir);
 +            if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy)
 +            {
 +                /* nstcalcenergy larger than nstener does not make sense.
 +                 * We ideally want nstcalcenergy=nstener.
 +                 */
 +                if (ir->nstlist > 0)
 +                {
 +                    ir->nstcalcenergy = lcd(ir->nstenergy,ir->nstlist);
 +                }
 +                else
 +                {
 +                    ir->nstcalcenergy = ir->nstenergy;
 +                }
 +            }
 +        }
 +        else if (ir->nstenergy > 0 && ir->nstcalcenergy > ir->nstenergy)
 +        {
 +            /* If the user sets nstenergy small, we should respect that */
 +            sprintf(warn_buf,"Setting nstcalcenergy (%d) equal to nstenergy (%d)",ir->nstcalcenergy,ir->nstenergy);
 +            ir->nstcalcenergy = ir->nstenergy;
 +        }
 +
 +        if (ir->epc != epcNO)
 +        {
 +            if (ir->nstpcouple < 0)
 +            {
 +                ir->nstpcouple = ir_optimal_nstpcouple(ir);
 +            }
 +        }
 +        if (IR_TWINRANGE(*ir))
 +        {
 +            check_nst("nstlist",ir->nstlist,
 +                      "nstcalcenergy",&ir->nstcalcenergy,wi);
 +            if (ir->epc != epcNO)
 +            {
 +                check_nst("nstlist",ir->nstlist,
 +                          "nstpcouple",&ir->nstpcouple,wi); 
 +            }
 +        }
 +
 +        if (ir->nstcalcenergy > 1)
 +        {
 +            /* for storing exact averages nstenergy should be
 +             * a multiple of nstcalcenergy
 +             */
 +            check_nst("nstcalcenergy",ir->nstcalcenergy,
 +                      "nstenergy",&ir->nstenergy,wi);
 +            if (ir->efep != efepNO)
 +            {
 +                /* nstdhdl should be a multiple of nstcalcenergy */
 +                check_nst("nstcalcenergy",ir->nstcalcenergy,
 +                          "nstdhdl",&ir->fepvals->nstdhdl,wi);
++                /* nstexpanded should be a multiple of nstcalcenergy */
++                check_nst("nstcalcenergy",ir->nstcalcenergy,
++                          "nstdhdl",&ir->expandedvals->nstexpanded,wi);
 +            }
 +        }
 +    }
 +
 +  /* LD STUFF */
 +  if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +      ir->bContinuation && ir->ld_seed != -1) {
 +      warning_note(wi,"You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +  }
 +
 +  /* TPI STUFF */
 +  if (EI_TPI(ir->eI)) {
 +    sprintf(err_buf,"TPI only works with pbc = %s",epbc_names[epbcXYZ]);
 +    CHECK(ir->ePBC != epbcXYZ);
 +    sprintf(err_buf,"TPI only works with ns = %s",ens_names[ensGRID]);
 +    CHECK(ir->ns_type != ensGRID);
 +    sprintf(err_buf,"with TPI nstlist should be larger than zero");
 +    CHECK(ir->nstlist <= 0);
 +    sprintf(err_buf,"TPI does not work with full electrostatics other than PME");
 +    CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
 +  }
 +
 +  /* SHAKE / LINCS */
 +  if ( (opts->nshake > 0) && (opts->bMorse) ) {
 +      sprintf(warn_buf,
 +              "Using morse bond-potentials while constraining bonds is useless");
 +      warning(wi,warn_buf);
 +  }
 +
 +  if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +      ir->bContinuation && ir->ld_seed != -1) {
 +      warning_note(wi,"You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +  }
 +  /* verify simulated tempering options */
 +
 +  if (ir->bSimTemp) {
 +      gmx_bool bAllTempZero = TRUE;
 +      for (i=0;i<fep->n_lambda;i++)
 +      {
 +          sprintf(err_buf,"Entry %d for %s must be between 0 and 1, instead is %g",i,efpt_names[efptTEMPERATURE],fep->all_lambda[efptTEMPERATURE][i]);
 +          CHECK((fep->all_lambda[efptTEMPERATURE][i] < 0) || (fep->all_lambda[efptTEMPERATURE][i] > 1));
 +          if (fep->all_lambda[efptTEMPERATURE][i] > 0)
 +          {
 +              bAllTempZero = FALSE;
 +          }
 +      }
 +      sprintf(err_buf,"if simulated tempering is on, temperature-lambdas may not be all zero");
 +      CHECK(bAllTempZero==TRUE);
 +
 +      sprintf(err_buf,"Simulated tempering is currently only compatible with md-vv");
 +      CHECK(ir->eI != eiVV);
 +
 +      /* check compatability of the temperature coupling with simulated tempering */
 +
 +      if (ir->etc == etcNOSEHOOVER) {
 +          sprintf(warn_buf,"Nose-Hoover based temperature control such as [%s] my not be entirelyconsistent with simulated tempering",etcoupl_names[ir->etc]);
 +          warning_note(wi,warn_buf);
 +      }
 +
 +      /* check that the temperatures make sense */
 +
 +      sprintf(err_buf,"Higher simulated tempering temperature (%g) must be >= than the simulated tempering lower temperature (%g)",ir->simtempvals->simtemp_high,ir->simtempvals->simtemp_low);
 +      CHECK(ir->simtempvals->simtemp_high <= ir->simtempvals->simtemp_low);
 +
 +      sprintf(err_buf,"Higher simulated tempering temperature (%g) must be >= zero",ir->simtempvals->simtemp_high);
 +      CHECK(ir->simtempvals->simtemp_high <= 0);
 +
 +      sprintf(err_buf,"Lower simulated tempering temperature (%g) must be >= zero",ir->simtempvals->simtemp_low);
 +      CHECK(ir->simtempvals->simtemp_low <= 0);
 +  }
 +
 +  /* verify free energy options */
 +
 +  if (ir->efep != efepNO) {
 +      fep = ir->fepvals;
 +      sprintf(err_buf,"The soft-core power is %d and can only be 1 or 2",
 +              fep->sc_power);
 +      CHECK(fep->sc_alpha!=0 && fep->sc_power!=1 && fep->sc_power!=2);
 +
 +      sprintf(err_buf,"The soft-core sc-r-power is %d and can only be 6 or 48",
 +              (int)fep->sc_r_power);
 +      CHECK(fep->sc_alpha!=0 && fep->sc_r_power!=6.0 && fep->sc_r_power!=48.0);
 +
 +      /* check validity of options */
 +      if (fep->n_lambda > 0 && ir->rlist < max(ir->rvdw,ir->rcoulomb))
 +      {
 +          sprintf(warn_buf,
 +                  "For foreign lambda free energy differences it is assumed that the soft-core interactions have no effect beyond the neighborlist cut-off");
 +          warning(wi,warn_buf);
 +      }
 +
 +      sprintf(err_buf,"Can't use postive delta-lambda (%g) if initial state/lambda does not start at zero",fep->delta_lambda);
 +      CHECK(fep->delta_lambda > 0 && ((fep->init_fep_state !=0) ||  (fep->init_lambda !=0)));
 +
 +      sprintf(err_buf,"Can't use postive delta-lambda (%g) with expanded ensemble simulations",fep->delta_lambda);
 +      CHECK(fep->delta_lambda > 0 && (ir->efep == efepEXPANDED));
 +
 +      sprintf(err_buf,"Free-energy not implemented for Ewald");
 +      CHECK(ir->coulombtype==eelEWALD);
 +
 +      /* check validty of lambda inputs */
 +      sprintf(err_buf,"initial thermodynamic state %d does not exist, only goes to %d",fep->init_fep_state,fep->n_lambda);
 +      CHECK((fep->init_fep_state > fep->n_lambda));
 +
 +      for (j=0;j<efptNR;j++)
 +      {
 +          for (i=0;i<fep->n_lambda;i++)
 +          {
 +              sprintf(err_buf,"Entry %d for %s must be between 0 and 1, instead is %g",i,efpt_names[j],fep->all_lambda[j][i]);
 +              CHECK((fep->all_lambda[j][i] < 0) || (fep->all_lambda[j][i] > 1));
 +          }
 +      }
 +
 +      if ((fep->sc_alpha>0) && (!fep->bScCoul))
 +      {
 +          for (i=0;i<fep->n_lambda;i++)
 +          {
 +              sprintf(err_buf,"For state %d, vdw-lambdas (%f) is changing with vdw softcore, while coul-lambdas (%f) is nonzero without coulomb softcore: this will lead to crashes, and is not supported.",i,fep->all_lambda[efptVDW][i],
 +                      fep->all_lambda[efptCOUL][i]);
 +              CHECK((fep->sc_alpha>0) &&
 +                    (((fep->all_lambda[efptCOUL][i] > 0.0) &&
 +                      (fep->all_lambda[efptCOUL][i] < 1.0)) &&
 +                     ((fep->all_lambda[efptVDW][i] > 0.0) &&
 +                      (fep->all_lambda[efptVDW][i] < 1.0))));
 +          }
 +      }
 +
 +      if ((fep->bScCoul) && (EEL_PME(ir->coulombtype)))
 +      {
 +          sprintf(warn_buf,"With coulomb soft core, the reciprocal space calculation will not necessarily cancel.  It may be necessary to decrease the reciprocal space energy, and increase the cutoff radius to get sufficiently close matches to energies with free energy turned off.");
 +          warning(wi, warn_buf);
 +      }
 +
 +      /*  Free Energy Checks -- In an ideal world, slow growth and FEP would
 +          be treated differently, but that's the next step */
 +
 +      for (i=0;i<efptNR;i++) {
 +          for (j=0;j<fep->n_lambda;j++) {
 +              sprintf(err_buf,"%s[%d] must be between 0 and 1",efpt_names[i],j);
 +              CHECK((fep->all_lambda[i][j] < 0) || (fep->all_lambda[i][j] > 1));
 +          }
 +      }
 +  }
 +
 +  if ((ir->bSimTemp) || (ir->efep == efepEXPANDED)) {
 +      fep = ir->fepvals;
 +      expand = ir->expandedvals;
 +
 +      /* checking equilibration of weights inputs for validity */
 +
 +      sprintf(err_buf,"weight-equil-number-all-lambda (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_n_at_lam,elmceq_names[elmceqNUMATLAM]);
 +      CHECK((expand->equil_n_at_lam>0) && (expand->elmceq!=elmceqNUMATLAM));
 +
 +      sprintf(err_buf,"weight-equil-number-samples (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_samples,elmceq_names[elmceqSAMPLES]);
 +      CHECK((expand->equil_samples>0) && (expand->elmceq!=elmceqSAMPLES));
 +
 +      sprintf(err_buf,"weight-equil-number-steps (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_steps,elmceq_names[elmceqSTEPS]);
 +      CHECK((expand->equil_steps>0) && (expand->elmceq!=elmceqSTEPS));
 +
 +      sprintf(err_buf,"weight-equil-wl-delta (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_samples,elmceq_names[elmceqWLDELTA]);
 +      CHECK((expand->equil_wl_delta>0) && (expand->elmceq!=elmceqWLDELTA));
 +
 +      sprintf(err_buf,"weight-equil-count-ratio (%f) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_ratio,elmceq_names[elmceqRATIO]);
 +      CHECK((expand->equil_ratio>0) && (expand->elmceq!=elmceqRATIO));
 +
 +      sprintf(err_buf,"weight-equil-number-all-lambda (%d) must be a positive integer if lmc-weights-equil=%s",
 +              expand->equil_n_at_lam,elmceq_names[elmceqNUMATLAM]);
 +      CHECK((expand->equil_n_at_lam<=0) && (expand->elmceq==elmceqNUMATLAM));
 +
 +      sprintf(err_buf,"weight-equil-number-samples (%d) must be a positive integer if lmc-weights-equil=%s",
 +              expand->equil_samples,elmceq_names[elmceqSAMPLES]);
 +      CHECK((expand->equil_samples<=0) && (expand->elmceq==elmceqSAMPLES));
 +
 +      sprintf(err_buf,"weight-equil-number-steps (%d) must be a positive integer if lmc-weights-equil=%s",
 +              expand->equil_steps,elmceq_names[elmceqSTEPS]);
 +      CHECK((expand->equil_steps<=0) && (expand->elmceq==elmceqSTEPS));
 +
 +      sprintf(err_buf,"weight-equil-wl-delta (%f) must be > 0 if lmc-weights-equil=%s",
 +              expand->equil_wl_delta,elmceq_names[elmceqWLDELTA]);
 +      CHECK((expand->equil_wl_delta<=0) && (expand->elmceq==elmceqWLDELTA));
 +
 +      sprintf(err_buf,"weight-equil-count-ratio (%f) must be > 0 if lmc-weights-equil=%s",
 +              expand->equil_ratio,elmceq_names[elmceqRATIO]);
 +      CHECK((expand->equil_ratio<=0) && (expand->elmceq==elmceqRATIO));
 +
 +      sprintf(err_buf,"lmc-weights-equil=%s only possible when lmc-stats = %s or lmc-stats %s",
 +              elmceq_names[elmceqWLDELTA],elamstats_names[elamstatsWL],elamstats_names[elamstatsWWL]);
 +      CHECK((expand->elmceq==elmceqWLDELTA) && (!EWL(expand->elamstats)));
 +
 +      sprintf(err_buf,"lmc-repeats (%d) must be greater than 0",expand->lmc_repeats);
 +      CHECK((expand->lmc_repeats <= 0));
 +      sprintf(err_buf,"minimum-var-min (%d) must be greater than 0",expand->minvarmin);
 +      CHECK((expand->minvarmin <= 0));
 +      sprintf(err_buf,"weight-c-range (%d) must be greater or equal to 0",expand->c_range);
 +      CHECK((expand->c_range < 0));
 +      sprintf(err_buf,"init-lambda-state (%d) must be zero if lmc-forced-nstart (%d)> 0 and lmc-move != 'no'",
 +              fep->init_fep_state, expand->lmc_forced_nstart);
 +      CHECK((fep->init_fep_state!=0) && (expand->lmc_forced_nstart>0) && (expand->elmcmove!=elmcmoveNO));
 +      sprintf(err_buf,"lmc-forced-nstart (%d) must not be negative",expand->lmc_forced_nstart);
 +      CHECK((expand->lmc_forced_nstart < 0));
 +      sprintf(err_buf,"init-lambda-state (%d) must be in the interval [0,number of lambdas)",fep->init_fep_state);
 +      CHECK((fep->init_fep_state < 0) || (fep->init_fep_state >= fep->n_lambda));
 +
 +      sprintf(err_buf,"init-wl-delta (%f) must be greater than or equal to 0",expand->init_wl_delta);
 +      CHECK((expand->init_wl_delta < 0));
 +      sprintf(err_buf,"wl-ratio (%f) must be between 0 and 1",expand->wl_ratio);
 +      CHECK((expand->wl_ratio <= 0) || (expand->wl_ratio >= 1));
 +      sprintf(err_buf,"wl-scale (%f) must be between 0 and 1",expand->wl_scale);
 +      CHECK((expand->wl_scale <= 0) || (expand->wl_scale >= 1));
 +
 +      /* if there is no temperature control, we need to specify an MC temperature */
 +      sprintf(err_buf,"If there is no temperature control, and lmc-mcmove!= 'no',mc_temperature must be set to a positive number");
 +      if (expand->nstTij > 0)
 +      {
 +          sprintf(err_buf,"nst-transition-matrix (%d) must be an integer multiple of nstlog (%d)",
 +                  expand->nstTij,ir->nstlog);
 +          CHECK((mod(expand->nstTij,ir->nstlog)!=0));
 +      }
 +  }
 +
 +  /* PBC/WALLS */
 +  sprintf(err_buf,"walls only work with pbc=%s",epbc_names[epbcXY]);
 +  CHECK(ir->nwall && ir->ePBC!=epbcXY);
 +
 +  /* VACUUM STUFF */
 +  if (ir->ePBC != epbcXYZ && ir->nwall != 2) {
 +    if (ir->ePBC == epbcNONE) {
 +      if (ir->epc != epcNO) {
 +          warning(wi,"Turning off pressure coupling for vacuum system");
 +          ir->epc = epcNO;
 +      }
 +    } else {
 +      sprintf(err_buf,"Can not have pressure coupling with pbc=%s",
 +            epbc_names[ir->ePBC]);
 +      CHECK(ir->epc != epcNO);
 +    }
 +    sprintf(err_buf,"Can not have Ewald with pbc=%s",epbc_names[ir->ePBC]);
 +    CHECK(EEL_FULL(ir->coulombtype));
 +
 +    sprintf(err_buf,"Can not have dispersion correction with pbc=%s",
 +          epbc_names[ir->ePBC]);
 +    CHECK(ir->eDispCorr != edispcNO);
 +  }
 +
 +  if (ir->rlist == 0.0) {
 +    sprintf(err_buf,"can only have neighborlist cut-off zero (=infinite)\n"
 +          "with coulombtype = %s or coulombtype = %s\n"
 +          "without periodic boundary conditions (pbc = %s) and\n"
 +          "rcoulomb and rvdw set to zero",
 +          eel_names[eelCUT],eel_names[eelUSER],epbc_names[epbcNONE]);
 +    CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) ||
 +        (ir->ePBC     != epbcNONE) ||
 +        (ir->rcoulomb != 0.0)      || (ir->rvdw != 0.0));
 +
 +    if (ir->nstlist < 0) {
 +        warning_error(wi,"Can not have heuristic neighborlist updates without cut-off");
 +    }
 +    if (ir->nstlist > 0) {
 +        warning_note(wi,"Simulating without cut-offs is usually (slightly) faster with nstlist=0, nstype=simple and particle decomposition");
 +    }
 +  }
 +
 +  /* COMM STUFF */
 +  if (ir->nstcomm == 0) {
 +    ir->comm_mode = ecmNO;
 +  }
 +  if (ir->comm_mode != ecmNO) {
 +    if (ir->nstcomm < 0) {
 +        warning(wi,"If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value");
 +      ir->nstcomm = abs(ir->nstcomm);
 +    }
 +
 +    if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy) {
 +        warning_note(wi,"nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy");
 +        ir->nstcomm = ir->nstcalcenergy;
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR) {
 +      sprintf(err_buf,"Can not remove the rotation around the center of mass with periodic molecules");
 +      CHECK(ir->bPeriodicMols);
 +      if (ir->ePBC != epbcNONE)
 +          warning(wi,"Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule).");
 +    }
 +  }
 +
 +  if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR) {
 +      warning_note(wi,"Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR.");
 +  }
 +  
 +  sprintf(err_buf,"Twin-range neighbour searching (NS) with simple NS"
 +        " algorithm not implemented");
 +  CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist))
 +      && (ir->ns_type == ensSIMPLE));
 +
 +  /* TEMPERATURE COUPLING */
 +  if (ir->etc == etcYES)
 +    {
 +        ir->etc = etcBERENDSEN;
 +        warning_note(wi,"Old option for temperature coupling given: "
 +                     "changing \"yes\" to \"Berendsen\"\n");
 +    }
 +
 +    if ((ir->etc == etcNOSEHOOVER) || (ir->epc == epcMTTK))
 +    {
 +        if (ir->opts.nhchainlength < 1)
 +        {
 +            sprintf(warn_buf,"number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n",ir->opts.nhchainlength);
 +            ir->opts.nhchainlength =1;
 +            warning(wi,warn_buf);
 +        }
 +        
 +        if (ir->etc==etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1)
 +        {
 +            warning_note(wi,"leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1");
 +            ir->opts.nhchainlength = 1;
 +        }
 +    }
 +    else
 +    {
 +        ir->opts.nhchainlength = 0;
 +    }
 +
 +    if (ir->eI == eiVVAK) {
 +        sprintf(err_buf,"%s implemented primarily for validation, and requires nsttcouple = 1 and nstpcouple = 1.",
 +                ei_names[eiVVAK]);
 +        CHECK((ir->nsttcouple != 1) || (ir->nstpcouple != 1));
 +    }
 +
 +    if (ETC_ANDERSEN(ir->etc))
 +    {
 +        sprintf(err_buf,"%s temperature control not supported for integrator %s.",etcoupl_names[ir->etc],ei_names[ir->eI]);
 +        CHECK(!(EI_VV(ir->eI)));
 +
 +        for (i=0;i<ir->opts.ngtc;i++)
 +        {
 +            sprintf(err_buf,"all tau_t must currently be equal using Andersen temperature control, violated for group %d",i);
 +            CHECK(ir->opts.tau_t[0] != ir->opts.tau_t[i]);
 +            sprintf(err_buf,"all tau_t must be postive using Andersen temperature control, tau_t[%d]=%10.6f",
 +                    i,ir->opts.tau_t[i]);
 +            CHECK(ir->opts.tau_t[i]<0);
 +        }
 +        if (ir->nstcomm > 0 && (ir->etc == etcANDERSEN)) {
 +            sprintf(warn_buf,"Center of mass removal not necessary for %s.  All velocities of coupled groups are rerandomized periodically, so flying ice cube errors will not occur.",etcoupl_names[ir->etc]);
 +            warning_note(wi,warn_buf);
 +        }
 +
 +        sprintf(err_buf,"nstcomm must be 1, not %d for %s, as velocities of atoms in coupled groups are randomized every time step",ir->nstcomm,etcoupl_names[ir->etc]);
 +        CHECK(ir->nstcomm > 1 && (ir->etc == etcANDERSEN));
 +
 +        for (i=0;i<ir->opts.ngtc;i++)
 +        {
 +            int nsteps = (int)(ir->opts.tau_t[i]/ir->delta_t);
 +            sprintf(err_buf,"tau_t/delta_t for group %d for temperature control method %s must be a multiple of nstcomm (%d), as velocities of atoms in coupled groups are randomized every time step. The input tau_t (%8.3f) leads to %d steps per randomization",i,etcoupl_names[ir->etc],ir->nstcomm,ir->opts.tau_t[i],nsteps);
 +            CHECK((nsteps % ir->nstcomm) && (ir->etc == etcANDERSENMASSIVE));
 +        }
 +    }
 +    if (ir->etc == etcBERENDSEN)
 +    {
 +        sprintf(warn_buf,"The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.",
 +                ETCOUPLTYPE(ir->etc),ETCOUPLTYPE(etcVRESCALE));
 +        warning_note(wi,warn_buf);
 +    }
 +
 +    if ((ir->etc==etcNOSEHOOVER || ETC_ANDERSEN(ir->etc))
 +        && ir->epc==epcBERENDSEN)
 +    {
 +        sprintf(warn_buf,"Using Berendsen pressure coupling invalidates the "
 +                "true ensemble for the thermostat");
 +        warning(wi,warn_buf);
 +    }
 +
 +    /* PRESSURE COUPLING */
 +    if (ir->epc == epcISOTROPIC)
 +    {
 +        ir->epc = epcBERENDSEN;
 +        warning_note(wi,"Old option for pressure coupling given: "
 +                     "changing \"Isotropic\" to \"Berendsen\"\n"); 
 +    }
 +
 +    if (ir->epc != epcNO)
 +    {
 +        dt_pcoupl = ir->nstpcouple*ir->delta_t;
 +
 +        sprintf(err_buf,"tau-p must be > 0 instead of %g\n",ir->tau_p);
 +        CHECK(ir->tau_p <= 0);
 +
 +        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
 +        {
 +            sprintf(warn_buf,"For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
 +                    EPCOUPLTYPE(ir->epc),ir->tau_p,pcouple_min_integration_steps(ir->epc),dt_pcoupl);
 +            warning(wi,warn_buf);
 +        }
 +
 +        sprintf(err_buf,"compressibility must be > 0 when using pressure"
 +                " coupling %s\n",EPCOUPLTYPE(ir->epc));
 +        CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 ||
 +              ir->compress[ZZ][ZZ] < 0 ||
 +              (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 &&
 +               ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0));
 +        
 +        if (epcPARRINELLORAHMAN == ir->epc && opts->bGenVel)
 +        {
 +            sprintf(warn_buf,
 +                    "You are generating velocities so I am assuming you "
 +                    "are equilibrating a system. You are using "
 +                    "%s pressure coupling, but this can be "
 +                    "unstable for equilibration. If your system crashes, try "
 +                    "equilibrating first with Berendsen pressure coupling. If "
 +                    "you are not equilibrating the system, you can probably "
 +                    "ignore this warning.",
 +                    epcoupl_names[ir->epc]);
 +            warning(wi,warn_buf);
 +        }
 +    }
 +
 +    if (EI_VV(ir->eI))
 +    {
 +        if (ir->epc > epcNO)
 +        {
 +            if ((ir->epc!=epcBERENDSEN) && (ir->epc!=epcMTTK))
 +            {
 +                warning_error(wi,"for md-vv and md-vv-avek, can only use Berendsen and Martyna-Tuckerman-Tobias-Klein (MTTK) equations for pressure control; MTTK is equivalent to Parrinello-Rahman.");
 +            }
 +        }
 +    }
 +
 +  /* ELECTROSTATICS */
 +  /* More checks are in triple check (grompp.c) */
 +
 +  if (ir->coulombtype == eelSWITCH) {
-     sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
++    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious "
++            "artifacts, advice: use coulombtype = %s",
 +          eel_names[ir->coulombtype],
 +          eel_names[eelRF_ZERO]);
 +    warning(wi,warn_buf);
 +  }
 +
 +  if (ir->epsilon_r!=1 && ir->implicit_solvent==eisGBSA) {
 +    sprintf(warn_buf,"epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric",ir->epsilon_r);
 +    warning_note(wi,warn_buf);
 +  }
 +
 +  if (EEL_RF(ir->coulombtype) && ir->epsilon_rf==1 && ir->epsilon_r!=1) {
-        sprintf(err_buf,"With coulombtype = %s, epsilon-rf must be 0",
++    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
 +    warning(wi,warn_buf);
 +    ir->epsilon_rf = ir->epsilon_r;
 +    ir->epsilon_r  = 1.0;
 +  }
 +
 +  if (getenv("GALACTIC_DYNAMICS") == NULL) {  
 +    sprintf(err_buf,"epsilon-r must be >= 0 instead of %g\n",ir->epsilon_r);
 +    CHECK(ir->epsilon_r < 0);
 +  }
 +  
 +  if (EEL_RF(ir->coulombtype)) {
 +    /* reaction field (at the cut-off) */
 +    
 +    if (ir->coulombtype == eelRF_ZERO) {
-       CHECK(ir->epsilon_rf != 0);
++       sprintf(warn_buf,"With coulombtype = %s, epsilon-rf must be 0, assuming you meant epsilon_rf=0",
 +             eel_names[ir->coulombtype]);
-             "With coulombtype = %s rcoulomb_switch must be < rcoulomb",
++        CHECK(ir->epsilon_rf != 0);
++        ir->epsilon_rf = 0.0;
 +    }
 +
 +    sprintf(err_buf,"epsilon-rf must be >= epsilon-r");
 +    CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) ||
 +        (ir->epsilon_r == 0));
 +    if (ir->epsilon_rf == ir->epsilon_r) {
 +      sprintf(warn_buf,"Using epsilon-rf = epsilon-r with %s does not make sense",
 +            eel_names[ir->coulombtype]);
 +      warning(wi,warn_buf);
 +    }
 +  }
 +  /* Allow rlist>rcoulomb for tabulated long range stuff. This just
 +   * means the interaction is zero outside rcoulomb, but it helps to
 +   * provide accurate energy conservation.
 +   */
 +  if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype)) {
 +    if (EEL_SWITCHED(ir->coulombtype)) {
 +      sprintf(err_buf,
-       if (ir->cutoff_scheme == ecutsGROUP) {
-           sprintf(err_buf,"With coulombtype = %s, rcoulomb must be >= rlist",
++            "With coulombtype = %s rcoulomb_switch must be < rcoulomb. Or, better: Use the potential modifier options!",
 +            eel_names[ir->coulombtype]);
 +      CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
 +    }
 +  } else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) {
-   if (EEL_FULL(ir->coulombtype)) {
-     if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
-         ir->coulombtype==eelPMEUSERSWITCH) {
-       sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
-             eel_names[ir->coulombtype]);
-       CHECK(ir->rcoulomb > ir->rlist);
-     } else if (ir->cutoff_scheme == ecutsGROUP) {
-       if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD) {
-       sprintf(err_buf,
-               "With coulombtype = %s, rcoulomb must be equal to rlist\n"
-               "If you want optimal energy conservation or exact integration use %s",
-               eel_names[ir->coulombtype],eel_names[eelPMESWITCH]);
-       } else { 
-       sprintf(err_buf,
-               "With coulombtype = %s, rcoulomb must be equal to rlist",
-               eel_names[ir->coulombtype]);
++      if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE) {
++          sprintf(err_buf,"With coulombtype = %s, rcoulomb should be >= rlist unless you use a potential modifier",
 +                  eel_names[ir->coulombtype]);
 +          CHECK(ir->rlist > ir->rcoulomb);
 +      }
 +  }
 +
-       CHECK(ir->rcoulomb != ir->rlist);
-     }
++  if(ir->coulombtype==eelSWITCH || ir->coulombtype==eelSHIFT ||
++     ir->vdwtype==evdwSWITCH || ir->vdwtype==evdwSHIFT)
++  {
++      sprintf(warn_buf,
++              "The switch/shift interaction settings are just for compatibility; you will get better"
++              "performance from applying potential modifiers to your interactions!\n");
++      warning_note(wi,warn_buf);
++  }
++
++  if (EEL_FULL(ir->coulombtype))
++  {
++      if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
++          ir->coulombtype==eelPMEUSERSWITCH)
++      {
++          sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
++                  eel_names[ir->coulombtype]);
++          CHECK(ir->rcoulomb > ir->rlist);
++      }
++      else if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
++      {
++          if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD)
++          {
++              sprintf(err_buf,
++                      "With coulombtype = %s (without modifier), rcoulomb must be equal to rlist,\n"
++                      "or rlistlong if nstcalclr=1. For optimal energy conservation,consider using\n"
++                      "a potential modifier.",eel_names[ir->coulombtype]);
++              if(ir->nstcalclr==1)
++              {
++                  CHECK(ir->rcoulomb != ir->rlist && ir->rcoulomb != ir->rlistlong);
++              }
++              else
++              {
++                  CHECK(ir->rcoulomb != ir->rlist);
++              }
++          }
 +      }
-     sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw",
 +  }
 +
 +  if (EEL_PME(ir->coulombtype)) {
 +    if (ir->pme_order < 3) {
 +        warning_error(wi,"pme-order can not be smaller than 3");
 +    }
 +  }
 +
 +  if (ir->nwall==2 && EEL_FULL(ir->coulombtype)) {
 +    if (ir->ewald_geometry == eewg3D) {
 +      sprintf(warn_buf,"With pbc=%s you should use ewald-geometry=%s",
 +            epbc_names[ir->ePBC],eewg_names[eewg3DC]);
 +      warning(wi,warn_buf);
 +    }
 +    /* This check avoids extra pbc coding for exclusion corrections */
 +    sprintf(err_buf,"wall-ewald-zfac should be >= 2");
 +    CHECK(ir->wall_ewald_zfac < 2);
 +  }
 +
 +  if (EVDW_SWITCHED(ir->vdwtype)) {
-       if (ir->cutoff_scheme == ecutsGROUP) {
-           sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist",evdw_names[ir->vdwtype]);
++    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw. Or, better - use a potential modifier.",
 +          evdw_names[ir->vdwtype]);
 +    CHECK(ir->rvdw_switch >= ir->rvdw);
 +  } else if (ir->vdwtype == evdwCUT) {
-     sprintf(err_buf,
-           "nstlist=-1 only works with switched or shifted potentials,\n"
-           "suggestion: use vdw-type=%s and coulomb-type=%s",
-           evdw_names[evdwSHIFT],eel_names[eelPMESWITCH]);
-     CHECK(!(EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) &&
-             EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)));
++      if (ir->cutoff_scheme == ecutsGROUP && ir->vdw_modifier == eintmodNONE) {
++          sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist unless you use a potential modifier",evdw_names[ir->vdwtype]);
 +          CHECK(ir->rlist > ir->rvdw);
 +      }
 +  }
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype)
 +            && (ir->rlistlong <= ir->rcoulomb))
 +        {
 +            sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
 +                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +            warning_note(wi,warn_buf);
 +        }
 +        if (EVDW_SWITCHED(ir->vdwtype) && (ir->rlistlong <= ir->rvdw))
 +        {
 +            sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
 +                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +            warning_note(wi,warn_buf);
 +        }
 +    }
 +
 +  if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO) {
 +      warning_note(wi,"You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
 +  }
 +
 +  if (ir->nstlist == -1) {
-         if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0)
 +    sprintf(err_buf,"With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
 +    CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
 +  }
 +  sprintf(err_buf,"nstlist can not be smaller than -1");
 +  CHECK(ir->nstlist < -1);
 +
 +  if (ir->eI == eiLBFGS && (ir->coulombtype==eelCUT || ir->vdwtype==evdwCUT)
 +     && ir->rvdw != 0) {
 +    warning(wi,"For efficient BFGS minimization, use switch/shift/pme instead of cut-off.");
 +  }
 +
 +  if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0) {
 +    warning(wi,"Using L-BFGS with nbfgscorr<=0 just gets you steepest descent.");
 +  }
 +
 +    /* ENERGY CONSERVATION */
 +    if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP)
 +    {
-         if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0)
++        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0 && ir->vdw_modifier == eintmodNONE)
 +        {
 +            sprintf(warn_buf,"You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
 +                    evdw_names[evdwSHIFT]);
 +            warning_note(wi,warn_buf);
 +        }
-         expand->nstexpanded = ir->nstlist;
-         /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to nstlist*/
++        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0 && ir->coulomb_modifier == eintmodNONE)
 +        {
 +            sprintf(warn_buf,"You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
 +                    eel_names[eelPMESWITCH],eel_names[eelRF_ZERO]);
 +            warning_note(wi,warn_buf);
 +        }
 +    }
 +
 +  /* IMPLICIT SOLVENT */
 +  if(ir->coulombtype==eelGB_NOTUSED)
 +  {
 +    ir->coulombtype=eelCUT;
 +    ir->implicit_solvent=eisGBSA;
 +    fprintf(stderr,"Note: Old option for generalized born electrostatics given:\n"
 +          "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n"
 +            "setting implicit-solvent value to \"GBSA\" in input section.\n");
 +  }
 +
 +  if(ir->sa_algorithm==esaSTILL)
 +  {
 +    sprintf(err_buf,"Still SA algorithm not available yet, use %s or %s instead\n",esa_names[esaAPPROX],esa_names[esaNO]);
 +    CHECK(ir->sa_algorithm == esaSTILL);
 +  }
 +  
 +  if(ir->implicit_solvent==eisGBSA)
 +  {
 +    sprintf(err_buf,"With GBSA implicit solvent, rgbradii must be equal to rlist.");
 +    CHECK(ir->rgbradii != ir->rlist);
 +        
 +    if(ir->coulombtype!=eelCUT)
 +        {
 +                sprintf(err_buf,"With GBSA, coulombtype must be equal to %s\n",eel_names[eelCUT]);
 +                CHECK(ir->coulombtype!=eelCUT);
 +        }
 +        if(ir->vdwtype!=evdwCUT)
 +        {
 +                sprintf(err_buf,"With GBSA, vdw-type must be equal to %s\n",evdw_names[evdwCUT]);
 +                CHECK(ir->vdwtype!=evdwCUT);
 +        }
 +    if(ir->nstgbradii<1)
 +    {
 +      sprintf(warn_buf,"Using GBSA with nstgbradii<1, setting nstgbradii=1");
 +      warning_note(wi,warn_buf);
 +      ir->nstgbradii=1;
 +    }
 +    if(ir->sa_algorithm==esaNO)
 +    {
 +      sprintf(warn_buf,"No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
 +      warning_note(wi,warn_buf);
 +    }
 +    if(ir->sa_surface_tension<0 && ir->sa_algorithm!=esaNO)
 +    {
 +      sprintf(warn_buf,"Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
 +      warning_note(wi,warn_buf);
 +      
 +      if(ir->gb_algorithm==egbSTILL)
 +      {
 +        ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
 +      }
 +      else
 +      {
 +        ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
 +      }
 +    }
 +    if(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO)
 +    {
 +      sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
 +      CHECK(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO);
 +    }
 +    
 +  }
 +
 +    if (ir->bAdress)
 +    {
++        warning_error(wi,"AdResS is currently disabled\n");
 +        if (ir->cutoff_scheme != ecutsGROUP)
 +        {
 +            warning_error(wi,"AdresS simulation supports only cutoff-scheme=group");
 +        }
 +        if (!EI_SD(ir->eI))
 +        {
 +            warning_error(wi,"AdresS simulation supports only stochastic dynamics");
 +        }
 +        if (ir->epc != epcNO)
 +        {
 +            warning_error(wi,"AdresS simulation does not support pressure coupling");
 +        }
 +        if (EEL_FULL(ir->coulombtype))
 +        {
 +            warning_error(wi,"AdresS simulation does not support long-range electrostatics");
 +        }
 +    }
 +}
 +
 +/* count the number of text elemets separated by whitespace in a string.
 +    str = the input string
 +    maxptr = the maximum number of allowed elements
 +    ptr = the output array of pointers to the first character of each element
 +    returns: the number of elements. */
 +int str_nelem(const char *str,int maxptr,char *ptr[])
 +{
 +  int  np=0;
 +  char *copy0,*copy;
 +  
 +  copy0=strdup(str); 
 +  copy=copy0;
 +  ltrim(copy);
 +  while (*copy != '\0') {
 +    if (np >= maxptr)
 +      gmx_fatal(FARGS,"Too many groups on line: '%s' (max is %d)",
 +                str,maxptr);
 +    if (ptr) 
 +      ptr[np]=copy;
 +    np++;
 +    while ((*copy != '\0') && !isspace(*copy))
 +      copy++;
 +    if (*copy != '\0') {
 +      *copy='\0';
 +      copy++;
 +    }
 +    ltrim(copy);
 +  }
 +  if (ptr == NULL)
 +    sfree(copy0);
 +
 +  return np;
 +}
 +
 +/* interpret a number of doubles from a string and put them in an array,
 +   after allocating space for them.
 +   str = the input string
 +   n = the (pre-allocated) number of doubles read
 +   r = the output array of doubles. */
 +static void parse_n_real(char *str,int *n,real **r)
 +{
 +  char *ptr[MAXPTR];
 +  int  i;
 +
 +  *n = str_nelem(str,MAXPTR,ptr);
 +
 +  snew(*r,*n);
 +  for(i=0; i<*n; i++) {
 +    (*r)[i] = strtod(ptr[i],NULL);
 +  }
 +}
 +
 +static void do_fep_params(t_inputrec *ir, char fep_lambda[][STRLEN],char weights[STRLEN]) {
 +
 +    int i,j,max_n_lambda,nweights,nfep[efptNR];
 +    t_lambda *fep = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +    real **count_fep_lambdas;
 +    gmx_bool bOneLambda = TRUE;
 +
 +    snew(count_fep_lambdas,efptNR);
 +
 +    /* FEP input processing */
 +    /* first, identify the number of lambda values for each type.
 +       All that are nonzero must have the same number */
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        parse_n_real(fep_lambda[i],&(nfep[i]),&(count_fep_lambdas[i]));
 +    }
 +
 +    /* now, determine the number of components.  All must be either zero, or equal. */
 +
 +    max_n_lambda = 0;
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (nfep[i] > max_n_lambda) {
 +            max_n_lambda = nfep[i];  /* here's a nonzero one.  All of them
 +                                        must have the same number if its not zero.*/
 +            break;
 +        }
 +    }
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (nfep[i] == 0)
 +        {
 +            ir->fepvals->separate_dvdl[i] = FALSE;
 +        }
 +        else if (nfep[i] == max_n_lambda)
 +        {
 +            if (i!=efptTEMPERATURE)  /* we treat this differently -- not really a reason to compute the derivative with
 +                                        respect to the temperature currently */
 +            {
 +                ir->fepvals->separate_dvdl[i] = TRUE;
 +            }
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS,"Number of lambdas (%d) for FEP type %s not equal to number of other types (%d)",
 +                      nfep[i],efpt_names[i],max_n_lambda);
 +        }
 +    }
 +    /* we don't print out dhdl if the temperature is changing, since we can't correctly define dhdl in this case */
 +    ir->fepvals->separate_dvdl[efptTEMPERATURE] = FALSE;
 +
 +    /* the number of lambdas is the number we've read in, which is either zero
 +       or the same for all */
 +    fep->n_lambda = max_n_lambda;
 +
 +    /* allocate space for the array of lambda values */
 +    snew(fep->all_lambda,efptNR);
 +    /* if init_lambda is defined, we need to set lambda */
 +    if ((fep->init_lambda > 0) && (fep->n_lambda == 0))
 +    {
 +        ir->fepvals->separate_dvdl[efptFEP] = TRUE;
 +    }
 +    /* otherwise allocate the space for all of the lambdas, and transfer the data */
 +    for (i=0;i<efptNR;i++)
 +    {
 +        snew(fep->all_lambda[i],fep->n_lambda);
 +        if (nfep[i] > 0)  /* if it's zero, then the count_fep_lambda arrays
 +                             are zero */
 +        {
 +            for (j=0;j<fep->n_lambda;j++)
 +            {
 +                fep->all_lambda[i][j] = (double)count_fep_lambdas[i][j];
 +            }
 +            sfree(count_fep_lambdas[i]);
 +        }
 +    }
 +    sfree(count_fep_lambdas);
 +
 +    /* "fep-vals" is either zero or the full number. If zero, we'll need to define fep-lambdas for internal
 +       bookkeeping -- for now, init_lambda */
 +
 +    if ((nfep[efptFEP] == 0) && (fep->init_lambda >= 0) && (fep->init_lambda <= 1))
 +    {
 +        for (i=0;i<fep->n_lambda;i++)
 +        {
 +            fep->all_lambda[efptFEP][i] = fep->init_lambda;
 +        }
 +    }
 +
 +    /* check to see if only a single component lambda is defined, and soft core is defined.
 +       In this case, turn on coulomb soft core */
 +
 +    if (max_n_lambda == 0)
 +    {
 +        bOneLambda = TRUE;
 +    }
 +    else
 +    {
 +        for (i=0;i<efptNR;i++)
 +        {
 +            if ((nfep[i] != 0) && (i!=efptFEP))
 +            {
 +                bOneLambda = FALSE;
 +            }
 +        }
 +    }
 +    if ((bOneLambda) && (fep->sc_alpha > 0))
 +    {
 +        fep->bScCoul = TRUE;
 +    }
 +
 +    /* Fill in the others with the efptFEP if they are not explicitly
 +       specified (i.e. nfep[i] == 0).  This means if fep is not defined,
 +       they are all zero. */
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if ((nfep[i] == 0) && (i!=efptFEP))
 +        {
 +            for (j=0;j<fep->n_lambda;j++)
 +            {
 +                fep->all_lambda[i][j] = fep->all_lambda[efptFEP][j];
 +            }
 +        }
 +    }
 +
 +
 +    /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */
 +    if (fep->sc_r_power == 48)
 +    {
 +        if (fep->sc_alpha > 0.1)
 +        {
 +            gmx_fatal(FARGS,"sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004", fep->sc_alpha);
 +        }
 +    }
 +
 +    expand = ir->expandedvals;
 +    /* now read in the weights */
 +    parse_n_real(weights,&nweights,&(expand->init_lambda_weights));
 +    if (nweights == 0)
 +    {
 +        expand->bInit_weights = FALSE;
 +        snew(expand->init_lambda_weights,fep->n_lambda); /* initialize to zero */
 +    }
 +    else if (nweights != fep->n_lambda)
 +    {
 +        gmx_fatal(FARGS,"Number of weights (%d) is not equal to number of lambda values (%d)",
 +                  nweights,fep->n_lambda);
 +    }
 +    else
 +    {
 +        expand->bInit_weights = TRUE;
 +    }
 +    if ((expand->nstexpanded < 0) && (ir->efep != efepNO)) {
 +        expand->nstexpanded = fep->nstdhdl;
 +        /* if you don't specify nstexpanded when doing expanded ensemble free energy calcs, it is set to nstdhdl */
 +    }
 +    if ((expand->nstexpanded < 0) && ir->bSimTemp) {
-               warning_note(wi,"tau-t = -1 is the new value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
++        expand->nstexpanded = 2*(int)(ir->opts.tau_t[0]/ir->delta_t);
++        /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to
++           2*tau_t just to be careful so it's not to frequent  */
 +    }
 +}
 +
 +
 +static void do_simtemp_params(t_inputrec *ir) {
 +
 +    snew(ir->simtempvals->temperatures,ir->fepvals->n_lambda);
 +    GetSimTemps(ir->fepvals->n_lambda,ir->simtempvals,ir->fepvals->all_lambda[efptTEMPERATURE]);
 +
 +    return;
 +}
 +
 +static void do_wall_params(t_inputrec *ir,
 +                           char *wall_atomtype, char *wall_density,
 +                           t_gromppopts *opts)
 +{
 +    int  nstr,i;
 +    char *names[MAXPTR];
 +    double dbl;
 +
 +    opts->wall_atomtype[0] = NULL;
 +    opts->wall_atomtype[1] = NULL;
 +
 +    ir->wall_atomtype[0] = -1;
 +    ir->wall_atomtype[1] = -1;
 +    ir->wall_density[0] = 0;
 +    ir->wall_density[1] = 0;
 +  
 +    if (ir->nwall > 0)
 +    {
 +        nstr = str_nelem(wall_atomtype,MAXPTR,names);
 +        if (nstr != ir->nwall)
 +        {
 +            gmx_fatal(FARGS,"Expected %d elements for wall_atomtype, found %d",
 +                      ir->nwall,nstr);
 +        }
 +        for(i=0; i<ir->nwall; i++)
 +        {
 +            opts->wall_atomtype[i] = strdup(names[i]);
 +        }
 +    
 +        if (ir->wall_type == ewt93 || ir->wall_type == ewt104) {
 +            nstr = str_nelem(wall_density,MAXPTR,names);
 +            if (nstr != ir->nwall)
 +            {
 +                gmx_fatal(FARGS,"Expected %d elements for wall-density, found %d",ir->nwall,nstr);
 +            }
 +            for(i=0; i<ir->nwall; i++)
 +            {
 +                sscanf(names[i],"%lf",&dbl);
 +                if (dbl <= 0)
 +                {
 +                    gmx_fatal(FARGS,"wall-density[%d] = %f\n",i,dbl);
 +                }
 +                ir->wall_density[i] = dbl;
 +            }
 +        }
 +    }
 +}
 +
 +static void add_wall_energrps(gmx_groups_t *groups,int nwall,t_symtab *symtab)
 +{
 +  int  i;
 +  t_grps *grps;
 +  char str[STRLEN];
 +  
 +  if (nwall > 0) {
 +    srenew(groups->grpname,groups->ngrpname+nwall);
 +    grps = &(groups->grps[egcENER]);
 +    srenew(grps->nm_ind,grps->nr+nwall);
 +    for(i=0; i<nwall; i++) {
 +      sprintf(str,"wall%d",i);
 +      groups->grpname[groups->ngrpname] = put_symtab(symtab,str);
 +      grps->nm_ind[grps->nr++] = groups->ngrpname++;
 +    }
 +  }
 +}
 +
 +void read_expandedparams(int *ninp_p,t_inpfile **inp_p,
 +                         t_expanded *expand,warninp_t wi)
 +{
 +  int  ninp,nerror=0;
 +  t_inpfile *inp;
 +
 +  ninp   = *ninp_p;
 +  inp    = *inp_p;
 +
 +  /* read expanded ensemble parameters */
 +  CCTYPE ("expanded ensemble variables");
 +  ITYPE ("nstexpanded",expand->nstexpanded,-1);
 +  EETYPE("lmc-stats", expand->elamstats, elamstats_names);
 +  EETYPE("lmc-move", expand->elmcmove, elmcmove_names);
 +  EETYPE("lmc-weights-equil",expand->elmceq,elmceq_names);
 +  ITYPE ("weight-equil-number-all-lambda",expand->equil_n_at_lam,-1);
 +  ITYPE ("weight-equil-number-samples",expand->equil_samples,-1);
 +  ITYPE ("weight-equil-number-steps",expand->equil_steps,-1);
 +  RTYPE ("weight-equil-wl-delta",expand->equil_wl_delta,-1);
 +  RTYPE ("weight-equil-count-ratio",expand->equil_ratio,-1);
 +  CCTYPE("Seed for Monte Carlo in lambda space");
 +  ITYPE ("lmc-seed",expand->lmc_seed,-1);
 +  RTYPE ("mc-temperature",expand->mc_temp,-1);
 +  ITYPE ("lmc-repeats",expand->lmc_repeats,1);
 +  ITYPE ("lmc-gibbsdelta",expand->gibbsdeltalam,-1);
 +  ITYPE ("lmc-forced-nstart",expand->lmc_forced_nstart,0);
 +  EETYPE("symmetrized-transition-matrix", expand->bSymmetrizedTMatrix, yesno_names);
 +  ITYPE("nst-transition-matrix", expand->nstTij, -1);
 +  ITYPE ("mininum-var-min",expand->minvarmin, 100); /*default is reasonable */
 +  ITYPE ("weight-c-range",expand->c_range, 0); /* default is just C=0 */
 +  RTYPE ("wl-scale",expand->wl_scale,0.8);
 +  RTYPE ("wl-ratio",expand->wl_ratio,0.8);
 +  RTYPE ("init-wl-delta",expand->init_wl_delta,1.0);
 +  EETYPE("wl-oneovert",expand->bWLoneovert,yesno_names);
 +
 +  *ninp_p   = ninp;
 +  *inp_p    = inp;
 +
 +  return;
 +}
 +
 +void get_ir(const char *mdparin,const char *mdparout,
 +            t_inputrec *ir,t_gromppopts *opts,
 +            warninp_t wi)
 +{
 +  char      *dumstr[2];
 +  double    dumdub[2][6];
 +  t_inpfile *inp;
 +  const char *tmp;
 +  int       i,j,m,ninp;
 +  char      warn_buf[STRLEN];
 +  t_lambda  *fep = ir->fepvals;
 +  t_expanded *expand = ir->expandedvals;
 +
 +  inp = read_inpfile(mdparin, &ninp, NULL, wi);
 +
 +  snew(dumstr[0],STRLEN);
 +  snew(dumstr[1],STRLEN);
 +
 +  /* remove the following deprecated commands */
 +  REM_TYPE("title");
 +  REM_TYPE("cpp");
 +  REM_TYPE("domain-decomposition");
 +  REM_TYPE("andersen-seed");
 +  REM_TYPE("dihre");
 +  REM_TYPE("dihre-fc");
 +  REM_TYPE("dihre-tau");
 +  REM_TYPE("nstdihreout");
 +  REM_TYPE("nstcheckpoint");
 +
 +  /* replace the following commands with the clearer new versions*/
 +  REPL_TYPE("unconstrained-start","continuation");
 +  REPL_TYPE("foreign-lambda","fep-lambdas");
 +
 +  CCTYPE ("VARIOUS PREPROCESSING OPTIONS");
 +  CTYPE ("Preprocessor information: use cpp syntax.");
 +  CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe");
 +  STYPE ("include",   opts->include,  NULL);
 +  CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)");
 +  STYPE ("define",    opts->define,   NULL);
 +    
 +  CCTYPE ("RUN CONTROL PARAMETERS");
 +  EETYPE("integrator",  ir->eI,         ei_names);
 +  CTYPE ("Start time and timestep in ps");
 +  RTYPE ("tinit",     ir->init_t,     0.0);
 +  RTYPE ("dt",                ir->delta_t,    0.001);
 +  STEPTYPE ("nsteps",   ir->nsteps,     0);
 +  CTYPE ("For exact run continuation or redoing part of a run");
 +  STEPTYPE ("init-step",ir->init_step,  0);
 +  CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)");
 +  ITYPE ("simulation-part", ir->simulation_part, 1);
 +  CTYPE ("mode for center of mass motion removal");
 +  EETYPE("comm-mode",   ir->comm_mode,  ecm_names);
 +  CTYPE ("number of steps for center of mass motion removal");
 +  ITYPE ("nstcomm",   ir->nstcomm,    100);
 +  CTYPE ("group(s) for center of mass motion removal");
 +  STYPE ("comm-grps",   vcm,            NULL);
 +  
 +  CCTYPE ("LANGEVIN DYNAMICS OPTIONS");
 +  CTYPE ("Friction coefficient (amu/ps) and random seed");
 +  RTYPE ("bd-fric",     ir->bd_fric,    0.0);
 +  ITYPE ("ld-seed",     ir->ld_seed,    1993);
 +  
 +  /* Em stuff */
 +  CCTYPE ("ENERGY MINIMIZATION OPTIONS");
 +  CTYPE ("Force tolerance and initial step-size");
 +  RTYPE ("emtol",       ir->em_tol,     10.0);
 +  RTYPE ("emstep",      ir->em_stepsize,0.01);
 +  CTYPE ("Max number of iterations in relax-shells");
 +  ITYPE ("niter",       ir->niter,      20);
 +  CTYPE ("Step size (ps^2) for minimization of flexible constraints");
 +  RTYPE ("fcstep",      ir->fc_stepsize, 0);
 +  CTYPE ("Frequency of steepest descents steps when doing CG");
 +  ITYPE ("nstcgsteep",        ir->nstcgsteep, 1000);
 +  ITYPE ("nbfgscorr",   ir->nbfgscorr,  10); 
 +
 +  CCTYPE ("TEST PARTICLE INSERTION OPTIONS");
 +  RTYPE ("rtpi",      ir->rtpi,       0.05);
 +
 +  /* Output options */
 +  CCTYPE ("OUTPUT CONTROL OPTIONS");
 +  CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)");
 +  ITYPE ("nstxout",   ir->nstxout,    0);
 +  ITYPE ("nstvout",   ir->nstvout,    0);
 +  ITYPE ("nstfout",   ir->nstfout,    0);
 +  ir->nstcheckpoint = 1000;
 +  CTYPE ("Output frequency for energies to log file and energy file");
 +  ITYPE ("nstlog",    ir->nstlog,     1000);
 +  ITYPE ("nstcalcenergy",ir->nstcalcenergy,   100);
 +  ITYPE ("nstenergy",   ir->nstenergy,  1000);
 +  CTYPE ("Output frequency and precision for .xtc file");
 +  ITYPE ("nstxtcout",   ir->nstxtcout,  0);
 +  RTYPE ("xtc-precision",ir->xtcprec,   1000.0);
 +  CTYPE ("This selects the subset of atoms for the .xtc file. You can");
 +  CTYPE ("select multiple groups. By default all atoms will be written.");
 +  STYPE ("xtc-grps",    xtc_grps,       NULL);
 +  CTYPE ("Selection of energy groups");
 +  STYPE ("energygrps",  energy,         NULL);
 +
 +  /* Neighbor searching */  
 +  CCTYPE ("NEIGHBORSEARCHING PARAMETERS");
 +  CTYPE ("cut-off scheme (group: using charge groups, Verlet: particle based cut-offs)");
 +  EETYPE("cutoff-scheme",     ir->cutoff_scheme,    ecutscheme_names);
 +  CTYPE ("nblist update frequency");
 +  ITYPE ("nstlist",   ir->nstlist,    10);
 +  CTYPE ("ns algorithm (simple or grid)");
 +  EETYPE("ns-type",     ir->ns_type,    ens_names);
 +  /* set ndelta to the optimal value of 2 */
 +  ir->ndelta = 2;
 +  CTYPE ("Periodic boundary conditions: xyz, no, xy");
 +  EETYPE("pbc",         ir->ePBC,       epbc_names);
 +  EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names);
 +  CTYPE ("Allowed energy drift due to the Verlet buffer in kJ/mol/ps per atom,");
 +  CTYPE ("a value of -1 means: use rlist");
 +  RTYPE("verlet-buffer-drift", ir->verletbuf_drift,    0.005);
 +  CTYPE ("nblist cut-off");
 +  RTYPE ("rlist",     ir->rlist,      -1);
 +  CTYPE ("long-range cut-off for switched potentials");
 +  RTYPE ("rlistlong", ir->rlistlong,  -1);
++  ITYPE ("nstcalclr", ir->nstcalclr,  -1);
 +
 +  /* Electrostatics */
 +  CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
 +  CTYPE ("Method for doing electrostatics");
 +  EETYPE("coulombtype",       ir->coulombtype,    eel_names);
 +  EETYPE("coulomb-modifier",  ir->coulomb_modifier,    eintmod_names);
 +  CTYPE ("cut-off lengths");
 +  RTYPE ("rcoulomb-switch",   ir->rcoulomb_switch,    0.0);
 +  RTYPE ("rcoulomb",  ir->rcoulomb,   -1);
 +  CTYPE ("Relative dielectric constant for the medium and the reaction field");
 +  RTYPE ("epsilon-r",   ir->epsilon_r,  1.0);
 +  RTYPE ("epsilon-rf",  ir->epsilon_rf, 0.0);
 +  CTYPE ("Method for doing Van der Waals");
 +  EETYPE("vdw-type",  ir->vdwtype,    evdw_names);
 +  EETYPE("vdw-modifier",      ir->vdw_modifier,    eintmod_names);
 +  CTYPE ("cut-off lengths");
 +  RTYPE ("rvdw-switch",       ir->rvdw_switch,        0.0);
 +  RTYPE ("rvdw",      ir->rvdw,       -1);
 +  CTYPE ("Apply long range dispersion corrections for Energy and Pressure");
 +  EETYPE("DispCorr",    ir->eDispCorr,  edispc_names);
 +  CTYPE ("Extension of the potential lookup tables beyond the cut-off");
 +  RTYPE ("table-extension", ir->tabext, 1.0);
 +  CTYPE ("Seperate tables between energy group pairs");
 +  STYPE ("energygrp-table", egptable,   NULL);
 +  CTYPE ("Spacing for the PME/PPPM FFT grid");
 +  RTYPE ("fourierspacing", ir->fourier_spacing,0.12);
 +  CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used");
 +  ITYPE ("fourier-nx",  ir->nkx,         0);
 +  ITYPE ("fourier-ny",  ir->nky,         0);
 +  ITYPE ("fourier-nz",  ir->nkz,         0);
 +  CTYPE ("EWALD/PME/PPPM parameters");
 +  ITYPE ("pme-order",   ir->pme_order,   4);
 +  RTYPE ("ewald-rtol",  ir->ewald_rtol, 0.00001);
 +  EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
 +  RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
 +  EETYPE("optimize-fft",ir->bOptFFT,  yesno_names);
 +
 +  CCTYPE("IMPLICIT SOLVENT ALGORITHM");
 +  EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
 +      
 +  CCTYPE ("GENERALIZED BORN ELECTROSTATICS"); 
 +  CTYPE ("Algorithm for calculating Born radii");
 +  EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
 +  CTYPE ("Frequency of calculating the Born radii inside rlist");
 +  ITYPE ("nstgbradii", ir->nstgbradii, 1);
 +  CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
 +  CTYPE ("between rlist and rgbradii is updated every nstlist steps");
 +  RTYPE ("rgbradii",  ir->rgbradii, 1.0);
 +  CTYPE ("Dielectric coefficient of the implicit solvent");
 +  RTYPE ("gb-epsilon-solvent",ir->gb_epsilon_solvent, 80.0);
 +  CTYPE ("Salt concentration in M for Generalized Born models");
 +  RTYPE ("gb-saltconc",  ir->gb_saltconc, 0.0);
 +  CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
 +  RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
 +  RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
 +  RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
 +  RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
 +  EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
 +  CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
 +  CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
 +  RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
 +               
 +  /* Coupling stuff */
 +  CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
 +  CTYPE ("Temperature coupling");
 +  EETYPE("tcoupl",    ir->etc,        etcoupl_names);
 +  ITYPE ("nsttcouple", ir->nsttcouple,  -1);
 +  ITYPE("nh-chain-length",     ir->opts.nhchainlength, NHCHAINLENGTH);
 +  EETYPE("print-nose-hoover-chain-variables", ir->bPrintNHChains, yesno_names);
 +  CTYPE ("Groups to couple separately");
 +  STYPE ("tc-grps",     tcgrps,         NULL);
 +  CTYPE ("Time constant (ps) and reference temperature (K)");
 +  STYPE ("tau-t",     tau_t,          NULL);
 +  STYPE ("ref-t",     ref_t,          NULL);
 +  CTYPE ("pressure coupling");
 +  EETYPE("pcoupl",    ir->epc,        epcoupl_names);
 +  EETYPE("pcoupltype",        ir->epct,       epcoupltype_names);
 +  ITYPE ("nstpcouple", ir->nstpcouple,  -1);
 +  CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)");
 +  RTYPE ("tau-p",     ir->tau_p,      1.0);
 +  STYPE ("compressibility",   dumstr[0],      NULL);
 +  STYPE ("ref-p",       dumstr[1],      NULL);
 +  CTYPE ("Scaling of reference coordinates, No, All or COM");
 +  EETYPE ("refcoord-scaling",ir->refcoord_scaling,erefscaling_names);
 +
 +  /* QMMM */
 +  CCTYPE ("OPTIONS FOR QMMM calculations");
 +  EETYPE("QMMM", ir->bQMMM, yesno_names);
 +  CTYPE ("Groups treated Quantum Mechanically");
 +  STYPE ("QMMM-grps",  QMMM,          NULL);
 +  CTYPE ("QM method");
 +  STYPE("QMmethod",     QMmethod, NULL);
 +  CTYPE ("QMMM scheme");
 +  EETYPE("QMMMscheme",  ir->QMMMscheme,    eQMMMscheme_names);
 +  CTYPE ("QM basisset");
 +  STYPE("QMbasis",      QMbasis, NULL);
 +  CTYPE ("QM charge");
 +  STYPE ("QMcharge",    QMcharge,NULL);
 +  CTYPE ("QM multiplicity");
 +  STYPE ("QMmult",      QMmult,NULL);
 +  CTYPE ("Surface Hopping");
 +  STYPE ("SH",          bSH, NULL);
 +  CTYPE ("CAS space options");
 +  STYPE ("CASorbitals",      CASorbitals,   NULL);
 +  STYPE ("CASelectrons",     CASelectrons,  NULL);
 +  STYPE ("SAon", SAon, NULL);
 +  STYPE ("SAoff",SAoff,NULL);
 +  STYPE ("SAsteps",  SAsteps, NULL);
 +  CTYPE ("Scale factor for MM charges");
 +  RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
 +  CTYPE ("Optimization of QM subsystem");
 +  STYPE ("bOPT",          bOPT, NULL);
 +  STYPE ("bTS",          bTS, NULL);
 +
 +  /* Simulated annealing */
 +  CCTYPE("SIMULATED ANNEALING");
 +  CTYPE ("Type of annealing for each temperature group (no/single/periodic)");
 +  STYPE ("annealing",   anneal,      NULL);
 +  CTYPE ("Number of time points to use for specifying annealing in each group");
 +  STYPE ("annealing-npoints", anneal_npoints, NULL);
 +  CTYPE ("List of times at the annealing points for each group");
 +  STYPE ("annealing-time",       anneal_time,       NULL);
 +  CTYPE ("Temp. at each annealing point, for each group.");
 +  STYPE ("annealing-temp",  anneal_temp,  NULL);
 +  
 +  /* Startup run */
 +  CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN");
 +  EETYPE("gen-vel",     opts->bGenVel,  yesno_names);
 +  RTYPE ("gen-temp",    opts->tempi,    300.0);
 +  ITYPE ("gen-seed",    opts->seed,     173529);
 +  
 +  /* Shake stuff */
 +  CCTYPE ("OPTIONS FOR BONDS");
 +  EETYPE("constraints",       opts->nshake,   constraints);
 +  CTYPE ("Type of constraint algorithm");
 +  EETYPE("constraint-algorithm",  ir->eConstrAlg, econstr_names);
 +  CTYPE ("Do not constrain the start configuration");
 +  EETYPE("continuation", ir->bContinuation, yesno_names);
 +  CTYPE ("Use successive overrelaxation to reduce the number of shake iterations");
 +  EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names);
 +  CTYPE ("Relative tolerance of shake");
 +  RTYPE ("shake-tol", ir->shake_tol, 0.0001);
 +  CTYPE ("Highest order in the expansion of the constraint coupling matrix");
 +  ITYPE ("lincs-order", ir->nProjOrder, 4);
 +  CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for");
 +  CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs.");
 +  CTYPE ("For energy minimization with constraints it should be 4 to 8.");
 +  ITYPE ("lincs-iter", ir->nLincsIter, 1);
 +  CTYPE ("Lincs will write a warning to the stderr if in one step a bond"); 
 +  CTYPE ("rotates over more degrees than");
 +  RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0);
 +  CTYPE ("Convert harmonic bonds to morse potentials");
 +  EETYPE("morse",       opts->bMorse,yesno_names);
 +
 +  /* Energy group exclusions */
 +  CCTYPE ("ENERGY GROUP EXCLUSIONS");
 +  CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded");
 +  STYPE ("energygrp-excl", egpexcl,     NULL);
 +  
 +  /* Walls */
 +  CCTYPE ("WALLS");
 +  CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald");
 +  ITYPE ("nwall", ir->nwall, 0);
 +  EETYPE("wall-type",     ir->wall_type,   ewt_names);
 +  RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1);
 +  STYPE ("wall-atomtype", wall_atomtype, NULL);
 +  STYPE ("wall-density",  wall_density,  NULL);
 +  RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3);
 +  
 +  /* COM pulling */
 +  CCTYPE("COM PULLING");
 +  CTYPE("Pull type: no, umbrella, constraint or constant-force");
 +  EETYPE("pull",          ir->ePull, epull_names);
 +  if (ir->ePull != epullNO) {
 +    snew(ir->pull,1);
 +    pull_grp = read_pullparams(&ninp,&inp,ir->pull,&opts->pull_start,wi);
 +  }
 +  
 +  /* Enforced rotation */
 +  CCTYPE("ENFORCED ROTATION");
 +  CTYPE("Enforced rotation: No or Yes");
 +  EETYPE("rotation",       ir->bRot, yesno_names);
 +  if (ir->bRot) {
 +    snew(ir->rot,1);
 +    rot_grp = read_rotparams(&ninp,&inp,ir->rot,wi);
 +  }
 +
 +  /* Refinement */
 +  CCTYPE("NMR refinement stuff");
 +  CTYPE ("Distance restraints type: No, Simple or Ensemble");
 +  EETYPE("disre",       ir->eDisre,     edisre_names);
 +  CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal");
 +  EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names);
 +  CTYPE ("Use sqrt of the time averaged times the instantaneous violation");
 +  EETYPE("disre-mixed", ir->bDisreMixed, yesno_names);
 +  RTYPE ("disre-fc",  ir->dr_fc,      1000.0);
 +  RTYPE ("disre-tau", ir->dr_tau,     0.0);
 +  CTYPE ("Output frequency for pair distances to energy file");
 +  ITYPE ("nstdisreout", ir->nstdisreout, 100);
 +  CTYPE ("Orientation restraints: No or Yes");
 +  EETYPE("orire",       opts->bOrire,   yesno_names);
 +  CTYPE ("Orientation restraints force constant and tau for time averaging");
 +  RTYPE ("orire-fc",  ir->orires_fc,  0.0);
 +  RTYPE ("orire-tau", ir->orires_tau, 0.0);
 +  STYPE ("orire-fitgrp",orirefitgrp,    NULL);
 +  CTYPE ("Output frequency for trace(SD) and S to energy file");
 +  ITYPE ("nstorireout", ir->nstorireout, 100);
 +
 +  /* free energy variables */
 +  CCTYPE ("Free energy variables");
 +  EETYPE("free-energy", ir->efep, efep_names);
 +  STYPE ("couple-moltype",  couple_moltype,  NULL);
 +  EETYPE("couple-lambda0", opts->couple_lam0, couple_lam);
 +  EETYPE("couple-lambda1", opts->couple_lam1, couple_lam);
 +  EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names);
 +
 +  RTYPE ("init-lambda", fep->init_lambda,-1); /* start with -1 so
 +                                                 we can recognize if
 +                                                 it was not entered */
 +  ITYPE ("init-lambda-state", fep->init_fep_state,0);
 +  RTYPE ("delta-lambda",fep->delta_lambda,0.0);
 +  ITYPE ("nstdhdl",fep->nstdhdl, 100);
 +  STYPE ("fep-lambdas", fep_lambda[efptFEP], NULL);
 +  STYPE ("mass-lambdas", fep_lambda[efptMASS], NULL);
 +  STYPE ("coul-lambdas", fep_lambda[efptCOUL], NULL);
 +  STYPE ("vdw-lambdas", fep_lambda[efptVDW], NULL);
 +  STYPE ("bonded-lambdas", fep_lambda[efptBONDED], NULL);
 +  STYPE ("restraint-lambdas", fep_lambda[efptRESTRAINT], NULL);
 +  STYPE ("temperature-lambdas", fep_lambda[efptTEMPERATURE], NULL);
 +  STYPE ("init-lambda-weights",lambda_weights,NULL);
 +  EETYPE("dhdl-print-energy", fep->bPrintEnergy, yesno_names);
 +  RTYPE ("sc-alpha",fep->sc_alpha,0.0);
 +  ITYPE ("sc-power",fep->sc_power,1);
 +  RTYPE ("sc-r-power",fep->sc_r_power,6.0);
 +  RTYPE ("sc-sigma",fep->sc_sigma,0.3);
 +  EETYPE("sc-coul",fep->bScCoul,yesno_names);
 +  ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +  RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +  EETYPE("separate-dhdl-file", fep->separate_dhdl_file,
 +                               separate_dhdl_file_names);
 +  EETYPE("dhdl-derivatives", fep->dhdl_derivatives, dhdl_derivatives_names);
 +  ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +  RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +
 +  /* Non-equilibrium MD stuff */  
 +  CCTYPE("Non-equilibrium MD stuff");
 +  STYPE ("acc-grps",    accgrps,        NULL);
 +  STYPE ("accelerate",  acc,            NULL);
 +  STYPE ("freezegrps",  freeze,         NULL);
 +  STYPE ("freezedim",   frdim,          NULL);
 +  RTYPE ("cos-acceleration", ir->cos_accel, 0);
 +  STYPE ("deform",      deform,         NULL);
 +
 +  /* simulated tempering variables */
 +  CCTYPE("simulated tempering variables");
 +  EETYPE("simulated-tempering",ir->bSimTemp,yesno_names);
 +  EETYPE("simulated-tempering-scaling",ir->simtempvals->eSimTempScale,esimtemp_names);
 +  RTYPE("sim-temp-low",ir->simtempvals->simtemp_low,300.0);
 +  RTYPE("sim-temp-high",ir->simtempvals->simtemp_high,300.0);
 +
 +  /* expanded ensemble variables */
 +  if (ir->efep==efepEXPANDED || ir->bSimTemp)
 +  {
 +      read_expandedparams(&ninp,&inp,expand,wi);
 +  }
 +
 +  /* Electric fields */
 +  CCTYPE("Electric fields");
 +  CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
 +  CTYPE ("and a phase angle (real)");
 +  STYPE ("E-x",       efield_x,       NULL);
 +  STYPE ("E-xt",      efield_xt,      NULL);
 +  STYPE ("E-y",       efield_y,       NULL);
 +  STYPE ("E-yt",      efield_yt,      NULL);
 +  STYPE ("E-z",       efield_z,       NULL);
 +  STYPE ("E-zt",      efield_zt,      NULL);
 +  
 +  /* AdResS defined thingies */
 +  CCTYPE ("AdResS parameters");
 +  EETYPE("adress",       ir->bAdress, yesno_names);
 +  if (ir->bAdress) {
 +    snew(ir->adress,1);
 +    read_adressparams(&ninp,&inp,ir->adress,wi);
 +  }
 +
 +  /* User defined thingies */
 +  CCTYPE ("User defined thingies");
 +  STYPE ("user1-grps",  user1,          NULL);
 +  STYPE ("user2-grps",  user2,          NULL);
 +  ITYPE ("userint1",    ir->userint1,   0);
 +  ITYPE ("userint2",    ir->userint2,   0);
 +  ITYPE ("userint3",    ir->userint3,   0);
 +  ITYPE ("userint4",    ir->userint4,   0);
 +  RTYPE ("userreal1",   ir->userreal1,  0);
 +  RTYPE ("userreal2",   ir->userreal2,  0);
 +  RTYPE ("userreal3",   ir->userreal3,  0);
 +  RTYPE ("userreal4",   ir->userreal4,  0);
 +#undef CTYPE
 +
 +  write_inpfile(mdparout,ninp,inp,FALSE,wi);
 +  for (i=0; (i<ninp); i++) {
 +    sfree(inp[i].name);
 +    sfree(inp[i].value);
 +  }
 +  sfree(inp);
 +
 +  /* Process options if necessary */
 +  for(m=0; m<2; m++) {
 +    for(i=0; i<2*DIM; i++)
 +      dumdub[m][i]=0.0;
 +    if(ir->epc) {
 +      switch (ir->epct) {
 +      case epctISOTROPIC:
 +      if (sscanf(dumstr[m],"%lf",&(dumdub[m][XX]))!=1) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 1)");
 +      }
 +      dumdub[m][YY]=dumdub[m][ZZ]=dumdub[m][XX];
 +      break;
 +      case epctSEMIISOTROPIC:
 +      case epctSURFACETENSION:
 +      if (sscanf(dumstr[m],"%lf%lf",
 +                 &(dumdub[m][XX]),&(dumdub[m][ZZ]))!=2) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 2)");
 +      }
 +      dumdub[m][YY]=dumdub[m][XX];
 +      break;
 +      case epctANISOTROPIC:
 +      if (sscanf(dumstr[m],"%lf%lf%lf%lf%lf%lf",
 +                 &(dumdub[m][XX]),&(dumdub[m][YY]),&(dumdub[m][ZZ]),
 +                 &(dumdub[m][3]),&(dumdub[m][4]),&(dumdub[m][5]))!=6) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 6)");
 +      }
 +      break;
 +      default:
 +      gmx_fatal(FARGS,"Pressure coupling type %s not implemented yet",
 +                  epcoupltype_names[ir->epct]);
 +      }
 +    }
 +  }
 +  clear_mat(ir->ref_p);
 +  clear_mat(ir->compress);
 +  for(i=0; i<DIM; i++) {
 +    ir->ref_p[i][i]    = dumdub[1][i];
 +    ir->compress[i][i] = dumdub[0][i];
 +  }
 +  if (ir->epct == epctANISOTROPIC) {
 +    ir->ref_p[XX][YY] = dumdub[1][3];
 +    ir->ref_p[XX][ZZ] = dumdub[1][4];
 +    ir->ref_p[YY][ZZ] = dumdub[1][5];
 +    if (ir->ref_p[XX][YY]!=0 && ir->ref_p[XX][ZZ]!=0 && ir->ref_p[YY][ZZ]!=0) {
 +      warning(wi,"All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n");
 +    }
 +    ir->compress[XX][YY] = dumdub[0][3];
 +    ir->compress[XX][ZZ] = dumdub[0][4];
 +    ir->compress[YY][ZZ] = dumdub[0][5];
 +    for(i=0; i<DIM; i++) {
 +      for(m=0; m<i; m++) {
 +      ir->ref_p[i][m] = ir->ref_p[m][i];
 +      ir->compress[i][m] = ir->compress[m][i];
 +      }
 +    }
 +  } 
 +  
 +  if (ir->comm_mode == ecmNO)
 +    ir->nstcomm = 0;
 +
 +  opts->couple_moltype = NULL;
 +  if (strlen(couple_moltype) > 0) 
 +  {
 +      if (ir->efep != efepNO) 
 +      {
 +          opts->couple_moltype = strdup(couple_moltype);
 +          if (opts->couple_lam0 == opts->couple_lam1)
 +          {
 +              warning(wi,"The lambda=0 and lambda=1 states for coupling are identical");
 +          }
 +          if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE ||
 +                                 opts->couple_lam1 == ecouplamNONE)) 
 +          {
 +              warning(wi,"For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used");
 +          }
 +      }
 +      else
 +      {
 +          warning(wi,"Can not couple a molecule with free_energy = no");
 +      }
 +  }
 +  /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */
 +  if (ir->efep != efepNO) {
 +      if (fep->delta_lambda > 0) {
 +          ir->efep = efepSLOWGROWTH;
 +      }
 +  }
 +
 +  if (ir->bSimTemp) {
 +      fep->bPrintEnergy = TRUE;
 +      /* always print out the energy to dhdl if we are doing expanded ensemble, since we need the total energy
 +         if the temperature is changing. */
 +  }
 +
 +  if ((ir->efep != efepNO) || ir->bSimTemp)
 +  {
 +      ir->bExpanded = FALSE;
 +      if ((ir->efep == efepEXPANDED) || ir->bSimTemp)
 +      {
 +          ir->bExpanded = TRUE;
 +      }
 +      do_fep_params(ir,fep_lambda,lambda_weights);
 +      if (ir->bSimTemp) { /* done after fep params */
 +          do_simtemp_params(ir);
 +      }
 +  }
 +  else
 +  {
 +      ir->fepvals->n_lambda = 0;
 +  }
 +
 +  /* WALL PARAMETERS */
 +
 +  do_wall_params(ir,wall_atomtype,wall_density,opts);
 +
 +  /* ORIENTATION RESTRAINT PARAMETERS */
 +  
 +  if (opts->bOrire && str_nelem(orirefitgrp,MAXPTR,NULL)!=1) {
 +      warning_error(wi,"ERROR: Need one orientation restraint fit group\n");
 +  }
 +
 +  /* DEFORMATION PARAMETERS */
 +
 +  clear_mat(ir->deform);
 +  for(i=0; i<6; i++)
 +  {
 +      dumdub[0][i] = 0;
 +  }
 +  m = sscanf(deform,"%lf %lf %lf %lf %lf %lf",
 +           &(dumdub[0][0]),&(dumdub[0][1]),&(dumdub[0][2]),
 +           &(dumdub[0][3]),&(dumdub[0][4]),&(dumdub[0][5]));
 +  for(i=0; i<3; i++)
 +  {
 +      ir->deform[i][i] = dumdub[0][i];
 +  }
 +  ir->deform[YY][XX] = dumdub[0][3];
 +  ir->deform[ZZ][XX] = dumdub[0][4];
 +  ir->deform[ZZ][YY] = dumdub[0][5];
 +  if (ir->epc != epcNO) {
 +    for(i=0; i<3; i++)
 +      for(j=0; j<=i; j++)
 +      if (ir->deform[i][j]!=0 && ir->compress[i][j]!=0) {
 +        warning_error(wi,"A box element has deform set and compressibility > 0");
 +      }
 +    for(i=0; i<3; i++)
 +      for(j=0; j<i; j++)
 +      if (ir->deform[i][j]!=0) {
 +        for(m=j; m<DIM; m++)
 +          if (ir->compress[m][j]!=0) {
 +            sprintf(warn_buf,"An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects.");
 +            warning(wi,warn_buf);
 +          }
 +      }
 +  }
 +
 +  sfree(dumstr[0]);
 +  sfree(dumstr[1]);
 +}
 +
 +static int search_QMstring(char *s,int ng,const char *gn[])
 +{
 +  /* same as normal search_string, but this one searches QM strings */
 +  int i;
 +
 +  for(i=0; (i<ng); i++)
 +    if (gmx_strcasecmp(s,gn[i]) == 0)
 +      return i;
 +
 +  gmx_fatal(FARGS,"this QM method or basisset (%s) is not implemented\n!",s);
 +
 +  return -1;
 +
 +} /* search_QMstring */
 +
 +
 +int search_string(char *s,int ng,char *gn[])
 +{
 +  int i;
 +  
 +  for(i=0; (i<ng); i++)
 +  {
 +    if (gmx_strcasecmp(s,gn[i]) == 0)
 +    {
 +      return i;
 +    }
 +  }
 +    
 +  gmx_fatal(FARGS,
 +            "Group %s referenced in the .mdp file was not found in the index file.\n"
 +            "Group names must match either [moleculetype] names or custom index group\n"
 +            "names, in which case you must supply an index file to the '-n' option\n"
 +            "of grompp.",
 +            s);
 +  
 +  return -1;
 +}
 +
 +static gmx_bool do_numbering(int natoms,gmx_groups_t *groups,int ng,char *ptrs[],
 +                         t_blocka *block,char *gnames[],
 +                         int gtype,int restnm,
 +                         int grptp,gmx_bool bVerbose,
 +                         warninp_t wi)
 +{
 +    unsigned short *cbuf;
 +    t_grps *grps=&(groups->grps[gtype]);
 +    int    i,j,gid,aj,ognr,ntot=0;
 +    const char *title;
 +    gmx_bool   bRest;
 +    char   warn_buf[STRLEN];
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Starting numbering %d groups of type %d\n",ng,gtype);
 +    }
 +  
 +    title = gtypes[gtype];
 +    
 +    snew(cbuf,natoms);
 +    /* Mark all id's as not set */
 +    for(i=0; (i<natoms); i++)
 +    {
 +        cbuf[i] = NOGID;
 +    }
 +  
 +    snew(grps->nm_ind,ng+1); /* +1 for possible rest group */
 +    for(i=0; (i<ng); i++)
 +    {
 +        /* Lookup the group name in the block structure */
 +        gid = search_string(ptrs[i],block->nr,gnames);
 +        if ((grptp != egrptpONE) || (i == 0))
 +        {
 +            grps->nm_ind[grps->nr++]=gid;
 +        }
 +        if (debug) 
 +        {
 +            fprintf(debug,"Found gid %d for group %s\n",gid,ptrs[i]);
 +        }
 +    
 +        /* Now go over the atoms in the group */
 +        for(j=block->index[gid]; (j<block->index[gid+1]); j++)
 +        {
 +
 +            aj=block->a[j];
 +      
 +            /* Range checking */
 +            if ((aj < 0) || (aj >= natoms)) 
 +            {
 +                gmx_fatal(FARGS,"Invalid atom number %d in indexfile",aj);
 +            }
 +            /* Lookup up the old group number */
 +            ognr = cbuf[aj];
 +            if (ognr != NOGID)
 +            {
 +                gmx_fatal(FARGS,"Atom %d in multiple %s groups (%d and %d)",
 +                          aj+1,title,ognr+1,i+1);
 +            }
 +            else
 +            {
 +                /* Store the group number in buffer */
 +                if (grptp == egrptpONE)
 +                {
 +                    cbuf[aj] = 0;
 +                }
 +                else
 +                {
 +                    cbuf[aj] = i;
 +                }
 +                ntot++;
 +            }
 +        }
 +    }
 +    
 +    /* Now check whether we have done all atoms */
 +    bRest = FALSE;
 +    if (ntot != natoms)
 +    {
 +        if (grptp == egrptpALL)
 +        {
 +            gmx_fatal(FARGS,"%d atoms are not part of any of the %s groups",
 +                      natoms-ntot,title);
 +        }
 +        else if (grptp == egrptpPART)
 +        {
 +            sprintf(warn_buf,"%d atoms are not part of any of the %s groups",
 +                    natoms-ntot,title);
 +            warning_note(wi,warn_buf);
 +        }
 +        /* Assign all atoms currently unassigned to a rest group */
 +        for(j=0; (j<natoms); j++)
 +        {
 +            if (cbuf[j] == NOGID)
 +            {
 +                cbuf[j] = grps->nr;
 +                bRest = TRUE;
 +            }
 +        }
 +        if (grptp != egrptpPART)
 +        {
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,
 +                        "Making dummy/rest group for %s containing %d elements\n",
 +                        title,natoms-ntot);
 +            }
 +            /* Add group name "rest" */ 
 +            grps->nm_ind[grps->nr] = restnm;
 +            
 +            /* Assign the rest name to all atoms not currently assigned to a group */
 +            for(j=0; (j<natoms); j++)
 +            {
 +                if (cbuf[j] == NOGID)
 +                {
 +                    cbuf[j] = grps->nr;
 +                }
 +            }
 +            grps->nr++;
 +        }
 +    }
 +    
 +    if (grps->nr == 1 && (ntot == 0 || ntot == natoms))
 +    {
 +        /* All atoms are part of one (or no) group, no index required */
 +        groups->ngrpnr[gtype] = 0;
 +        groups->grpnr[gtype]  = NULL;
 +    }
 +    else
 +    {
 +        groups->ngrpnr[gtype] = natoms;
 +        snew(groups->grpnr[gtype],natoms);
 +        for(j=0; (j<natoms); j++)
 +        {
 +            groups->grpnr[gtype][j] = cbuf[j];
 +        }
 +    }
 +    
 +    sfree(cbuf);
 +
 +    return (bRest && grptp == egrptpPART);
 +}
 +
 +static void calc_nrdf(gmx_mtop_t *mtop,t_inputrec *ir,char **gnames)
 +{
 +  t_grpopts *opts;
 +  gmx_groups_t *groups;
 +  t_pull  *pull;
 +  int     natoms,ai,aj,i,j,d,g,imin,jmin,nc;
 +  t_iatom *ia;
 +  int     *nrdf2,*na_vcm,na_tot;
 +  double  *nrdf_tc,*nrdf_vcm,nrdf_uc,n_sub=0;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom  *atom;
 +  int     mb,mol,ftype,as;
 +  gmx_molblock_t *molb;
 +  gmx_moltype_t *molt;
 +
 +  /* Calculate nrdf. 
 +   * First calc 3xnr-atoms for each group
 +   * then subtract half a degree of freedom for each constraint
 +   *
 +   * Only atoms and nuclei contribute to the degrees of freedom...
 +   */
 +
 +  opts = &ir->opts;
 +  
 +  groups = &mtop->groups;
 +  natoms = mtop->natoms;
 +
 +  /* Allocate one more for a possible rest group */
 +  /* We need to sum degrees of freedom into doubles,
 +   * since floats give too low nrdf's above 3 million atoms.
 +   */
 +  snew(nrdf_tc,groups->grps[egcTC].nr+1);
 +  snew(nrdf_vcm,groups->grps[egcVCM].nr+1);
 +  snew(na_vcm,groups->grps[egcVCM].nr+1);
 +  
 +  for(i=0; i<groups->grps[egcTC].nr; i++)
 +    nrdf_tc[i] = 0;
 +  for(i=0; i<groups->grps[egcVCM].nr+1; i++)
 +    nrdf_vcm[i] = 0;
 +
 +  snew(nrdf2,natoms);
 +  aloop = gmx_mtop_atomloop_all_init(mtop);
 +  while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +    nrdf2[i] = 0;
 +    if (atom->ptype == eptAtom || atom->ptype == eptNucleus) {
 +      g = ggrpnr(groups,egcFREEZE,i);
 +      /* Double count nrdf for particle i */
 +      for(d=0; d<DIM; d++) {
 +      if (opts->nFreeze[g][d] == 0) {
 +        nrdf2[i] += 2;
 +      }
 +      }
 +      nrdf_tc [ggrpnr(groups,egcTC ,i)] += 0.5*nrdf2[i];
 +      nrdf_vcm[ggrpnr(groups,egcVCM,i)] += 0.5*nrdf2[i];
 +    }
 +  }
 +
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    molt = &mtop->moltype[molb->type];
 +    atom = molt->atoms.atom;
 +    for(mol=0; mol<molb->nmol; mol++) {
 +      for (ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +      ia = molt->ilist[ftype].iatoms;
 +      for(i=0; i<molt->ilist[ftype].nr; ) {
 +        /* Subtract degrees of freedom for the constraints,
 +         * if the particles still have degrees of freedom left.
 +         * If one of the particles is a vsite or a shell, then all
 +         * constraint motion will go there, but since they do not
 +         * contribute to the constraints the degrees of freedom do not
 +         * change.
 +         */
 +        ai = as + ia[1];
 +        aj = as + ia[2];
 +        if (((atom[ia[1]].ptype == eptNucleus) ||
 +             (atom[ia[1]].ptype == eptAtom)) &&
 +            ((atom[ia[2]].ptype == eptNucleus) ||
 +             (atom[ia[2]].ptype == eptAtom))) {
 +          if (nrdf2[ai] > 0) 
 +            jmin = 1;
 +          else
 +            jmin = 2;
 +          if (nrdf2[aj] > 0)
 +            imin = 1;
 +          else
 +            imin = 2;
 +          imin = min(imin,nrdf2[ai]);
 +          jmin = min(jmin,nrdf2[aj]);
 +          nrdf2[ai] -= imin;
 +          nrdf2[aj] -= jmin;
 +          nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +          nrdf_tc [ggrpnr(groups,egcTC ,aj)] -= 0.5*jmin;
 +          nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +          nrdf_vcm[ggrpnr(groups,egcVCM,aj)] -= 0.5*jmin;
 +        }
 +        ia += interaction_function[ftype].nratoms+1;
 +        i  += interaction_function[ftype].nratoms+1;
 +      }
 +      }
 +      ia = molt->ilist[F_SETTLE].iatoms;
 +      for(i=0; i<molt->ilist[F_SETTLE].nr; ) {
 +      /* Subtract 1 dof from every atom in the SETTLE */
 +      for(j=0; j<3; j++) {
 +      ai = as + ia[1+j];
 +        imin = min(2,nrdf2[ai]);
 +        nrdf2[ai] -= imin;
 +        nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +        nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +      }
 +      ia += 4;
 +      i  += 4;
 +      }
 +      as += molt->atoms.nr;
 +    }
 +  }
 +
 +  if (ir->ePull == epullCONSTRAINT) {
 +    /* Correct nrdf for the COM constraints.
 +     * We correct using the TC and VCM group of the first atom
 +     * in the reference and pull group. If atoms in one pull group
 +     * belong to different TC or VCM groups it is anyhow difficult
 +     * to determine the optimal nrdf assignment.
 +     */
 +    pull = ir->pull;
 +    if (pull->eGeom == epullgPOS) {
 +      nc = 0;
 +      for(i=0; i<DIM; i++) {
 +      if (pull->dim[i])
 +        nc++;
 +      }
 +    } else {
 +      nc = 1;
 +    }
 +    for(i=0; i<pull->ngrp; i++) {
 +      imin = 2*nc;
 +      if (pull->grp[0].nat > 0) {
 +      /* Subtract 1/2 dof from the reference group */
 +      ai = pull->grp[0].ind[0];
 +      if (nrdf_tc[ggrpnr(groups,egcTC,ai)] > 1) {
 +        nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5;
 +        nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5;
 +        imin--;
 +      }
 +      }
 +      /* Subtract 1/2 dof from the pulled group */
 +      ai = pull->grp[1+i].ind[0];
 +      nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +      nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +      if (nrdf_tc[ggrpnr(groups,egcTC,ai)] < 0)
 +      gmx_fatal(FARGS,"Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative",gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups,egcTC,ai)]]);
 +    }
 +  }
 +  
 +  if (ir->nstcomm != 0) {
 +    /* Subtract 3 from the number of degrees of freedom in each vcm group
 +     * when com translation is removed and 6 when rotation is removed
 +     * as well.
 +     */
 +    switch (ir->comm_mode) {
 +    case ecmLINEAR:
 +      n_sub = ndof_com(ir);
 +      break;
 +    case ecmANGULAR:
 +      n_sub = 6;
 +      break;
 +    default:
 +      n_sub = 0;
 +      gmx_incons("Checking comm_mode");
 +    }
 +    
 +    for(i=0; i<groups->grps[egcTC].nr; i++) {
 +      /* Count the number of atoms of TC group i for every VCM group */
 +      for(j=0; j<groups->grps[egcVCM].nr+1; j++)
 +      na_vcm[j] = 0;
 +      na_tot = 0;
 +      for(ai=0; ai<natoms; ai++)
 +      if (ggrpnr(groups,egcTC,ai) == i) {
 +        na_vcm[ggrpnr(groups,egcVCM,ai)]++;
 +        na_tot++;
 +      }
 +      /* Correct for VCM removal according to the fraction of each VCM
 +       * group present in this TC group.
 +       */
 +      nrdf_uc = nrdf_tc[i];
 +      if (debug) {
 +      fprintf(debug,"T-group[%d] nrdf_uc = %g, n_sub = %g\n",
 +              i,nrdf_uc,n_sub);
 +      }
 +      nrdf_tc[i] = 0;
 +      for(j=0; j<groups->grps[egcVCM].nr+1; j++) {
 +      if (nrdf_vcm[j] > n_sub) {
 +        nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)*
 +          (nrdf_vcm[j] - n_sub)/nrdf_vcm[j];
 +      }
 +      if (debug) {
 +        fprintf(debug,"  nrdf_vcm[%d] = %g, nrdf = %g\n",
 +                j,nrdf_vcm[j],nrdf_tc[i]);
 +      }
 +      }
 +    }
 +  }
 +  for(i=0; (i<groups->grps[egcTC].nr); i++) {
 +    opts->nrdf[i] = nrdf_tc[i];
 +    if (opts->nrdf[i] < 0)
 +      opts->nrdf[i] = 0;
 +    fprintf(stderr,
 +          "Number of degrees of freedom in T-Coupling group %s is %.2f\n",
 +          gnames[groups->grps[egcTC].nm_ind[i]],opts->nrdf[i]);
 +  }
 +  
 +  sfree(nrdf2);
 +  sfree(nrdf_tc);
 +  sfree(nrdf_vcm);
 +  sfree(na_vcm);
 +}
 +
 +static void decode_cos(char *s,t_cosines *cosine,gmx_bool bTime)
 +{
 +  char   *t;
 +  char   format[STRLEN],f1[STRLEN];
 +  double a,phi;
 +  int    i;
 +  
 +  t=strdup(s);
 +  trim(t);
 +  
 +  cosine->n=0;
 +  cosine->a=NULL;
 +  cosine->phi=NULL;
 +  if (strlen(t)) {
 +    sscanf(t,"%d",&(cosine->n));
 +    if (cosine->n <= 0) {
 +      cosine->n=0;
 +    } else {
 +      snew(cosine->a,cosine->n);
 +      snew(cosine->phi,cosine->n);
 +      
 +      sprintf(format,"%%*d");
 +      for(i=0; (i<cosine->n); i++) {
 +      strcpy(f1,format);
 +      strcat(f1,"%lf%lf");
 +      if (sscanf(t,f1,&a,&phi) < 2)
 +        gmx_fatal(FARGS,"Invalid input for electric field shift: '%s'",t);
 +      cosine->a[i]=a;
 +      cosine->phi[i]=phi;
 +      strcat(format,"%*lf%*lf");
 +      }
 +    }
 +  }
 +  sfree(t);
 +}
 +
 +static gmx_bool do_egp_flag(t_inputrec *ir,gmx_groups_t *groups,
 +                      const char *option,const char *val,int flag)
 +{
 +  /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2.
 +   * But since this is much larger than STRLEN, such a line can not be parsed.
 +   * The real maximum is the number of names that fit in a string: STRLEN/2.
 +   */
 +#define EGP_MAX (STRLEN/2)
 +  int  nelem,i,j,k,nr;
 +  char *names[EGP_MAX];
 +  char ***gnames;
 +  gmx_bool bSet;
 +
 +  gnames = groups->grpname;
 +
 +  nelem = str_nelem(val,EGP_MAX,names);
 +  if (nelem % 2 != 0)
 +    gmx_fatal(FARGS,"The number of groups for %s is odd",option);
 +  nr = groups->grps[egcENER].nr;
 +  bSet = FALSE;
 +  for(i=0; i<nelem/2; i++) {
 +    j = 0;
 +    while ((j < nr) &&
 +         gmx_strcasecmp(names[2*i],*(gnames[groups->grps[egcENER].nm_ind[j]])))
 +      j++;
 +    if (j == nr)
 +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
 +                names[2*i],option);
 +    k = 0;
 +    while ((k < nr) &&
 +         gmx_strcasecmp(names[2*i+1],*(gnames[groups->grps[egcENER].nm_ind[k]])))
 +      k++;
 +    if (k==nr)
 +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
 +            names[2*i+1],option);
 +    if ((j < nr) && (k < nr)) {
 +      ir->opts.egp_flags[nr*j+k] |= flag;
 +      ir->opts.egp_flags[nr*k+j] |= flag;
 +      bSet = TRUE;
 +    }
 +  }
 +
 +  return bSet;
 +}
 +
 +void do_index(const char* mdparin, const char *ndx,
 +              gmx_mtop_t *mtop,
 +              gmx_bool bVerbose,
 +              t_inputrec *ir,rvec *v,
 +              warninp_t wi)
 +{
 +  t_blocka *grps;
 +  gmx_groups_t *groups;
 +  int     natoms;
 +  t_symtab *symtab;
 +  t_atoms atoms_all;
 +  char    warnbuf[STRLEN],**gnames;
 +  int     nr,ntcg,ntau_t,nref_t,nacc,nofg,nSA,nSA_points,nSA_time,nSA_temp;
 +  real    tau_min;
 +  int     nstcmin;
 +  int     nacg,nfreeze,nfrdim,nenergy,nvcm,nuser;
 +  char    *ptr1[MAXPTR],*ptr2[MAXPTR],*ptr3[MAXPTR];
 +  int     i,j,k,restnm;
 +  real    SAtime;
 +  gmx_bool    bExcl,bTable,bSetTCpar,bAnneal,bRest;
 +  int     nQMmethod,nQMbasis,nQMcharge,nQMmult,nbSH,nCASorb,nCASelec,
 +    nSAon,nSAoff,nSAsteps,nQMg,nbOPT,nbTS;
 +  char    warn_buf[STRLEN];
 +
 +  if (bVerbose)
 +    fprintf(stderr,"processing index file...\n");
 +  debug_gmx();
 +  if (ndx == NULL) {
 +    snew(grps,1);
 +    snew(grps->index,1);
 +    snew(gnames,1);
 +    atoms_all = gmx_mtop_global_atoms(mtop);
 +    analyse(&atoms_all,grps,&gnames,FALSE,TRUE);
 +    free_t_atoms(&atoms_all,FALSE);
 +  } else {
 +    grps = init_index(ndx,&gnames);
 +  }
 +
 +  groups = &mtop->groups;
 +  natoms = mtop->natoms;
 +  symtab = &mtop->symtab;
 +
 +  snew(groups->grpname,grps->nr+1);
 +  
 +  for(i=0; (i<grps->nr); i++) {
 +    groups->grpname[i] = put_symtab(symtab,gnames[i]);
 +  }
 +  groups->grpname[i] = put_symtab(symtab,"rest");
 +  restnm=i;
 +  srenew(gnames,grps->nr+1);
 +  gnames[restnm] = *(groups->grpname[i]);
 +  groups->ngrpname = grps->nr+1;
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  ntau_t = str_nelem(tau_t,MAXPTR,ptr1);
 +  nref_t = str_nelem(ref_t,MAXPTR,ptr2);
 +  ntcg   = str_nelem(tcgrps,MAXPTR,ptr3);
 +  if ((ntau_t != ntcg) || (nref_t != ntcg)) {
 +    gmx_fatal(FARGS,"Invalid T coupling input: %d groups, %d ref-t values and "
 +                "%d tau-t values",ntcg,nref_t,ntau_t);
 +  }
 +
 +  bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI==eiBD || EI_TPI(ir->eI));
 +  do_numbering(natoms,groups,ntcg,ptr3,grps,gnames,egcTC,
 +               restnm,bSetTCpar ? egrptpALL : egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcTC].nr;
 +  ir->opts.ngtc = nr;
 +  snew(ir->opts.nrdf,nr);
 +  snew(ir->opts.tau_t,nr);
 +  snew(ir->opts.ref_t,nr);
 +  if (ir->eI==eiBD && ir->bd_fric==0) {
 +    fprintf(stderr,"bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n");
 +  }
 +
 +  if (bSetTCpar)
 +  {
 +      if (nr != nref_t)
 +      {
 +          gmx_fatal(FARGS,"Not enough ref-t and tau-t values!");
 +      }
 +      
 +      tau_min = 1e20;
 +      for(i=0; (i<nr); i++)
 +      {
 +          ir->opts.tau_t[i] = strtod(ptr1[i],NULL);
 +          if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0)
 +          {
 +              sprintf(warn_buf,"With integrator %s tau-t should be larger than 0",ei_names[ir->eI]);
 +              warning_error(wi,warn_buf);
 +          }
 +
 +          if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0)
 +          {
++              warning_note(wi,"tau-t = -1 is the value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
 +          }
 +
 +          if (ir->opts.tau_t[i] >= 0)
 +          {
 +              tau_min = min(tau_min,ir->opts.tau_t[i]);
 +          }
 +      }
 +      if (ir->etc != etcNO && ir->nsttcouple == -1)
 +      {
 +            ir->nsttcouple = ir_optimal_nsttcouple(ir);
 +      }
 +
 +      if (EI_VV(ir->eI)) 
 +      {
 +          if ((ir->etc==etcNOSEHOOVER) && (ir->epc==epcBERENDSEN)) {
 +              gmx_fatal(FARGS,"Cannot do Nose-Hoover temperature with Berendsen pressure control with md-vv; use either vrescale temperature with berendsen pressure or Nose-Hoover temperature with MTTK pressure");
 +          }
 +          if ((ir->epc==epcMTTK) && (ir->etc>etcNO))
 +          {
 +              int mincouple;
 +              mincouple = ir->nsttcouple;
 +              if (ir->nstpcouple < mincouple)
 +              {
 +                  mincouple = ir->nstpcouple;
 +              }
 +              ir->nstpcouple = mincouple;
 +              ir->nsttcouple = mincouple;
 +              sprintf(warn_buf,"for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal.  Both have been reset to min(nsttcouple,nstpcouple) = %d",mincouple);
 +              warning_note(wi,warn_buf);
 +          }
 +      }
 +      /* velocity verlet with averaged kinetic energy KE = 0.5*(v(t+1/2) - v(t-1/2)) is implemented
 +         primarily for testing purposes, and does not work with temperature coupling other than 1 */
 +
 +      if (ETC_ANDERSEN(ir->etc)) {
 +          if (ir->nsttcouple != 1) {
 +              ir->nsttcouple = 1;
 +              sprintf(warn_buf,"Andersen temperature control methods assume nsttcouple = 1; there is no need for larger nsttcouple > 1, since no global parameters are computed. nsttcouple has been reset to 1");
 +              warning_note(wi,warn_buf);
 +          }
 +      }
 +      nstcmin = tcouple_min_integration_steps(ir->etc);
 +      if (nstcmin > 1)
 +      {
 +          if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
 +          {
 +              sprintf(warn_buf,"For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
 +                      ETCOUPLTYPE(ir->etc),
 +                      tau_min,nstcmin,
 +                      ir->nsttcouple*ir->delta_t);
 +              warning(wi,warn_buf);
 +          }
 +      }
 +      for(i=0; (i<nr); i++)
 +      {
 +          ir->opts.ref_t[i] = strtod(ptr2[i],NULL);
 +          if (ir->opts.ref_t[i] < 0)
 +          {
 +              gmx_fatal(FARGS,"ref-t for group %d negative",i);
 +          }
 +      }
 +      /* set the lambda mc temperature to the md integrator temperature (which should be defined
 +         if we are in this conditional) if mc_temp is negative */
 +      if (ir->expandedvals->mc_temp < 0)
 +      {
 +          ir->expandedvals->mc_temp = ir->opts.ref_t[0];  /*for now, set to the first reft */
 +      }
 +  }
 +
 +  /* Simulated annealing for each group. There are nr groups */
 +  nSA = str_nelem(anneal,MAXPTR,ptr1);
 +  if (nSA == 1 && (ptr1[0][0]=='n' || ptr1[0][0]=='N'))
 +     nSA = 0;
 +  if(nSA>0 && nSA != nr) 
 +    gmx_fatal(FARGS,"Not enough annealing values: %d (for %d groups)\n",nSA,nr);
 +  else {
 +    snew(ir->opts.annealing,nr);
 +    snew(ir->opts.anneal_npoints,nr);
 +    snew(ir->opts.anneal_time,nr);
 +    snew(ir->opts.anneal_temp,nr);
 +    for(i=0;i<nr;i++) {
 +      ir->opts.annealing[i]=eannNO;
 +      ir->opts.anneal_npoints[i]=0;
 +      ir->opts.anneal_time[i]=NULL;
 +      ir->opts.anneal_temp[i]=NULL;
 +    }
 +    if (nSA > 0) {
 +      bAnneal=FALSE;
 +      for(i=0;i<nr;i++) { 
 +      if(ptr1[i][0]=='n' || ptr1[i][0]=='N') {
 +        ir->opts.annealing[i]=eannNO;
 +      } else if(ptr1[i][0]=='s'|| ptr1[i][0]=='S') {
 +        ir->opts.annealing[i]=eannSINGLE;
 +        bAnneal=TRUE;
 +      } else if(ptr1[i][0]=='p'|| ptr1[i][0]=='P') {
 +        ir->opts.annealing[i]=eannPERIODIC;
 +        bAnneal=TRUE;
 +      } 
 +      } 
 +      if(bAnneal) {
 +      /* Read the other fields too */
 +      nSA_points = str_nelem(anneal_npoints,MAXPTR,ptr1);
 +      if(nSA_points!=nSA) 
 +          gmx_fatal(FARGS,"Found %d annealing-npoints values for %d groups\n",nSA_points,nSA);
 +      for(k=0,i=0;i<nr;i++) {
 +        ir->opts.anneal_npoints[i]=strtol(ptr1[i],NULL,10);
 +        if(ir->opts.anneal_npoints[i]==1)
 +          gmx_fatal(FARGS,"Please specify at least a start and an end point for annealing\n");
 +        snew(ir->opts.anneal_time[i],ir->opts.anneal_npoints[i]);
 +        snew(ir->opts.anneal_temp[i],ir->opts.anneal_npoints[i]);
 +        k += ir->opts.anneal_npoints[i];
 +      }
 +
 +      nSA_time = str_nelem(anneal_time,MAXPTR,ptr1);
 +      if(nSA_time!=k) 
 +          gmx_fatal(FARGS,"Found %d annealing-time values, wanter %d\n",nSA_time,k);
 +      nSA_temp = str_nelem(anneal_temp,MAXPTR,ptr2);
 +      if(nSA_temp!=k) 
 +          gmx_fatal(FARGS,"Found %d annealing-temp values, wanted %d\n",nSA_temp,k);
 +
 +      for(i=0,k=0;i<nr;i++) {
 +        
 +        for(j=0;j<ir->opts.anneal_npoints[i];j++) {
 +          ir->opts.anneal_time[i][j]=strtod(ptr1[k],NULL);
 +          ir->opts.anneal_temp[i][j]=strtod(ptr2[k],NULL);
 +          if(j==0) {
 +            if(ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS))
 +              gmx_fatal(FARGS,"First time point for annealing > init_t.\n");      
 +          } else { 
 +            /* j>0 */
 +            if(ir->opts.anneal_time[i][j]<ir->opts.anneal_time[i][j-1])
 +              gmx_fatal(FARGS,"Annealing timepoints out of order: t=%f comes after t=%f\n",
 +                          ir->opts.anneal_time[i][j],ir->opts.anneal_time[i][j-1]);
 +          }
 +          if(ir->opts.anneal_temp[i][j]<0) 
 +            gmx_fatal(FARGS,"Found negative temperature in annealing: %f\n",ir->opts.anneal_temp[i][j]);    
 +          k++;
 +        }
 +      }
 +      /* Print out some summary information, to make sure we got it right */
 +      for(i=0,k=0;i<nr;i++) {
 +        if(ir->opts.annealing[i]!=eannNO) {
 +          j = groups->grps[egcTC].nm_ind[i];
 +          fprintf(stderr,"Simulated annealing for group %s: %s, %d timepoints\n",
 +                  *(groups->grpname[j]),eann_names[ir->opts.annealing[i]],
 +                  ir->opts.anneal_npoints[i]);
 +          fprintf(stderr,"Time (ps)   Temperature (K)\n");
 +          /* All terms except the last one */
 +          for(j=0;j<(ir->opts.anneal_npoints[i]-1);j++) 
 +              fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +          
 +          /* Finally the last one */
 +          j = ir->opts.anneal_npoints[i]-1;
 +          if(ir->opts.annealing[i]==eannSINGLE)
 +            fprintf(stderr,"%9.1f-     %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +          else {
 +            fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +            if(fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0])>GMX_REAL_EPS)
 +              warning_note(wi,"There is a temperature jump when your annealing loops back.\n");
 +          }
 +        }
 +      } 
 +      }
 +    }
 +  }   
 +
 +  if (ir->ePull != epullNO) {
 +    make_pull_groups(ir->pull,pull_grp,grps,gnames);
 +  }
 +  
 +  if (ir->bRot) {
 +    make_rotation_groups(ir->rot,rot_grp,grps,gnames);
 +  }
 +
 +  nacc = str_nelem(acc,MAXPTR,ptr1);
 +  nacg = str_nelem(accgrps,MAXPTR,ptr2);
 +  if (nacg*DIM != nacc)
 +    gmx_fatal(FARGS,"Invalid Acceleration input: %d groups and %d acc. values",
 +              nacg,nacc);
 +  do_numbering(natoms,groups,nacg,ptr2,grps,gnames,egcACC,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcACC].nr;
 +  snew(ir->opts.acc,nr);
 +  ir->opts.ngacc=nr;
 +  
 +  for(i=k=0; (i<nacg); i++)
 +    for(j=0; (j<DIM); j++,k++)
 +      ir->opts.acc[i][j]=strtod(ptr1[k],NULL);
 +  for( ;(i<nr); i++)
 +    for(j=0; (j<DIM); j++)
 +      ir->opts.acc[i][j]=0;
 +  
 +  nfrdim  = str_nelem(frdim,MAXPTR,ptr1);
 +  nfreeze = str_nelem(freeze,MAXPTR,ptr2);
 +  if (nfrdim != DIM*nfreeze)
 +    gmx_fatal(FARGS,"Invalid Freezing input: %d groups and %d freeze values",
 +              nfreeze,nfrdim);
 +  do_numbering(natoms,groups,nfreeze,ptr2,grps,gnames,egcFREEZE,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcFREEZE].nr;
 +  ir->opts.ngfrz=nr;
 +  snew(ir->opts.nFreeze,nr);
 +  for(i=k=0; (i<nfreeze); i++)
 +    for(j=0; (j<DIM); j++,k++) {
 +      ir->opts.nFreeze[i][j]=(gmx_strncasecmp(ptr1[k],"Y",1)==0);
 +      if (!ir->opts.nFreeze[i][j]) {
 +      if (gmx_strncasecmp(ptr1[k],"N",1) != 0) {
 +        sprintf(warnbuf,"Please use Y(ES) or N(O) for freezedim only "
 +                "(not %s)", ptr1[k]);
 +        warning(wi,warn_buf);
 +      }
 +      }
 +    }
 +  for( ; (i<nr); i++)
 +    for(j=0; (j<DIM); j++)
 +      ir->opts.nFreeze[i][j]=0;
 +  
 +  nenergy=str_nelem(energy,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nenergy,ptr1,grps,gnames,egcENER,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  add_wall_energrps(groups,ir->nwall,symtab);
 +  ir->opts.ngener = groups->grps[egcENER].nr;
 +  nvcm=str_nelem(vcm,MAXPTR,ptr1);
 +  bRest =
 +    do_numbering(natoms,groups,nvcm,ptr1,grps,gnames,egcVCM,
 +                 restnm,nvcm==0 ? egrptpALL_GENREST : egrptpPART,bVerbose,wi);
 +  if (bRest) {
 +    warning(wi,"Some atoms are not part of any center of mass motion removal group.\n"
 +          "This may lead to artifacts.\n"
 +          "In most cases one should use one group for the whole system.");
 +  }
 +
 +  /* Now we have filled the freeze struct, so we can calculate NRDF */ 
 +  calc_nrdf(mtop,ir,gnames);
 +
 +  if (v && NULL) {
 +    real fac,ntot=0;
 +    
 +    /* Must check per group! */
 +    for(i=0; (i<ir->opts.ngtc); i++) 
 +      ntot += ir->opts.nrdf[i];
 +    if (ntot != (DIM*natoms)) {
 +      fac = sqrt(ntot/(DIM*natoms));
 +      if (bVerbose)
 +      fprintf(stderr,"Scaling velocities by a factor of %.3f to account for constraints\n"
 +              "and removal of center of mass motion\n",fac);
 +      for(i=0; (i<natoms); i++)
 +      svmul(fac,v[i],v[i]);
 +    }
 +  }
 +  
 +  nuser=str_nelem(user1,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser1,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nuser=str_nelem(user2,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser2,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nuser=str_nelem(xtc_grps,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcXTC,
 +               restnm,egrptpONE,bVerbose,wi);
 +  nofg = str_nelem(orirefitgrp,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nofg,ptr1,grps,gnames,egcORFIT,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +
 +  /* QMMM input processing */
 +  nQMg          = str_nelem(QMMM,MAXPTR,ptr1);
 +  nQMmethod     = str_nelem(QMmethod,MAXPTR,ptr2);
 +  nQMbasis      = str_nelem(QMbasis,MAXPTR,ptr3);
 +  if((nQMmethod != nQMg)||(nQMbasis != nQMg)){
 +    gmx_fatal(FARGS,"Invalid QMMM input: %d groups %d basissets"
 +            " and %d methods\n",nQMg,nQMbasis,nQMmethod);
 +  }
 +  /* group rest, if any, is always MM! */
 +  do_numbering(natoms,groups,nQMg,ptr1,grps,gnames,egcQMMM,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = nQMg; /*atoms->grps[egcQMMM].nr;*/
 +  ir->opts.ngQM = nQMg;
 +  snew(ir->opts.QMmethod,nr);
 +  snew(ir->opts.QMbasis,nr);
 +  for(i=0;i<nr;i++){
 +    /* input consists of strings: RHF CASSCF PM3 .. These need to be
 +     * converted to the corresponding enum in names.c
 +     */
 +    ir->opts.QMmethod[i] = search_QMstring(ptr2[i],eQMmethodNR,
 +                                           eQMmethod_names);
 +    ir->opts.QMbasis[i]  = search_QMstring(ptr3[i],eQMbasisNR,
 +                                           eQMbasis_names);
 +
 +  }
 +  nQMmult   = str_nelem(QMmult,MAXPTR,ptr1);
 +  nQMcharge = str_nelem(QMcharge,MAXPTR,ptr2);
 +  nbSH      = str_nelem(bSH,MAXPTR,ptr3);
 +  snew(ir->opts.QMmult,nr);
 +  snew(ir->opts.QMcharge,nr);
 +  snew(ir->opts.bSH,nr);
 +
 +  for(i=0;i<nr;i++){
 +    ir->opts.QMmult[i]   = strtol(ptr1[i],NULL,10);
 +    ir->opts.QMcharge[i] = strtol(ptr2[i],NULL,10);
 +    ir->opts.bSH[i]      = (gmx_strncasecmp(ptr3[i],"Y",1)==0);
 +  }
 +
 +  nCASelec  = str_nelem(CASelectrons,MAXPTR,ptr1);
 +  nCASorb   = str_nelem(CASorbitals,MAXPTR,ptr2);
 +  snew(ir->opts.CASelectrons,nr);
 +  snew(ir->opts.CASorbitals,nr);
 +  for(i=0;i<nr;i++){
 +    ir->opts.CASelectrons[i]= strtol(ptr1[i],NULL,10);
 +    ir->opts.CASorbitals[i] = strtol(ptr2[i],NULL,10);
 +  }
 +  /* special optimization options */
 +
 +  nbOPT = str_nelem(bOPT,MAXPTR,ptr1);
 +  nbTS = str_nelem(bTS,MAXPTR,ptr2);
 +  snew(ir->opts.bOPT,nr);
 +  snew(ir->opts.bTS,nr);
 +  for(i=0;i<nr;i++){
 +    ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i],"Y",1)==0);
 +    ir->opts.bTS[i]  = (gmx_strncasecmp(ptr2[i],"Y",1)==0);
 +  }
 +  nSAon     = str_nelem(SAon,MAXPTR,ptr1);
 +  nSAoff    = str_nelem(SAoff,MAXPTR,ptr2);
 +  nSAsteps  = str_nelem(SAsteps,MAXPTR,ptr3);
 +  snew(ir->opts.SAon,nr);
 +  snew(ir->opts.SAoff,nr);
 +  snew(ir->opts.SAsteps,nr);
 +
 +  for(i=0;i<nr;i++){
 +    ir->opts.SAon[i]    = strtod(ptr1[i],NULL);
 +    ir->opts.SAoff[i]   = strtod(ptr2[i],NULL);
 +    ir->opts.SAsteps[i] = strtol(ptr3[i],NULL,10);
 +  }
 +  /* end of QMMM input */
 +
 +  if (bVerbose)
 +    for(i=0; (i<egcNR); i++) {
 +      fprintf(stderr,"%-16s has %d element(s):",gtypes[i],groups->grps[i].nr); 
 +      for(j=0; (j<groups->grps[i].nr); j++)
 +      fprintf(stderr," %s",*(groups->grpname[groups->grps[i].nm_ind[j]]));
 +      fprintf(stderr,"\n");
 +    }
 +
 +  nr = groups->grps[egcENER].nr;
 +  snew(ir->opts.egp_flags,nr*nr);
 +
 +  bExcl = do_egp_flag(ir,groups,"energygrp-excl",egpexcl,EGP_EXCL);
 +    if (bExcl && ir->cutoff_scheme == ecutsVERLET) 
 +    {
 +        warning_error(wi,"Energy group exclusions are not (yet) implemented for the Verlet scheme");
 +    } 
 +  if (bExcl && EEL_FULL(ir->coulombtype))
 +    warning(wi,"Can not exclude the lattice Coulomb energy between energy groups");
 +
 +  bTable = do_egp_flag(ir,groups,"energygrp-table",egptable,EGP_TABLE);
 +  if (bTable && !(ir->vdwtype == evdwUSER) && 
 +      !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) &&
 +      !(ir->coulombtype == eelPMEUSERSWITCH))
 +    gmx_fatal(FARGS,"Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
 +
 +  decode_cos(efield_x,&(ir->ex[XX]),FALSE);
 +  decode_cos(efield_xt,&(ir->et[XX]),TRUE);
 +  decode_cos(efield_y,&(ir->ex[YY]),FALSE);
 +  decode_cos(efield_yt,&(ir->et[YY]),TRUE);
 +  decode_cos(efield_z,&(ir->ex[ZZ]),FALSE);
 +  decode_cos(efield_zt,&(ir->et[ZZ]),TRUE);
 +
 +  if (ir->bAdress)
 +    do_adress_index(ir->adress,groups,gnames,&(ir->opts),wi);
 +
 +  for(i=0; (i<grps->nr); i++)
 +    sfree(gnames[i]);
 +  sfree(gnames);
 +  done_blocka(grps);
 +  sfree(grps);
 +
 +}
 +
 +
 +
 +static void check_disre(gmx_mtop_t *mtop)
 +{
 +  gmx_ffparams_t *ffparams;
 +  t_functype *functype;
 +  t_iparams  *ip;
 +  int i,ndouble,ftype;
 +  int label,old_label;
 +  
 +  if (gmx_mtop_ftype_count(mtop,F_DISRES) > 0) {
 +    ffparams  = &mtop->ffparams;
 +    functype  = ffparams->functype;
 +    ip        = ffparams->iparams;
 +    ndouble   = 0;
 +    old_label = -1;
 +    for(i=0; i<ffparams->ntypes; i++) {
 +      ftype = functype[i];
 +      if (ftype == F_DISRES) {
 +      label = ip[i].disres.label;
 +      if (label == old_label) {
 +        fprintf(stderr,"Distance restraint index %d occurs twice\n",label);
 +        ndouble++;
 +      }
 +      old_label = label;
 +      }
 +    }
 +    if (ndouble>0)
 +      gmx_fatal(FARGS,"Found %d double distance restraint indices,\n"
 +              "probably the parameters for multiple pairs in one restraint "
 +              "are not identical\n",ndouble);
 +  }
 +}
 +
 +static gmx_bool absolute_reference(t_inputrec *ir,gmx_mtop_t *sys,
 +                                   gmx_bool posres_only,
 +                                   ivec AbsRef)
 +{
 +    int d,g,i;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist *ilist;
 +    int nmol;
 +    t_iparams *pr;
 +
 +    clear_ivec(AbsRef);
 +
 +    if (!posres_only)
 +    {
 +        /* Check the COM */
 +        for(d=0; d<DIM; d++)
 +        {
 +            AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
 +        }
 +        /* Check for freeze groups */
 +        for(g=0; g<ir->opts.ngfrz; g++)
 +        {
 +            for(d=0; d<DIM; d++)
 +            {
 +                if (ir->opts.nFreeze[g][d] != 0)
 +                {
 +                    AbsRef[d] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Check for position restraints */
 +    iloop = gmx_mtop_ilistloop_init(sys);
 +    while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol))
 +    {
 +        if (nmol > 0 &&
 +            (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0))
 +        {
 +            for(i=0; i<ilist[F_POSRES].nr; i+=2)
 +            {
 +                pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
 +                for(d=0; d<DIM; d++)
 +                {
 +                    if (pr->posres.fcA[d] != 0)
 +                    {
 +                        AbsRef[d] = 1;
 +                    }
 +                }
 +            }
 +            for(i=0; i<ilist[F_FBPOSRES].nr; i+=2)
 +            {
 +                /* Check for flat-bottom posres */
 +                pr = &sys->ffparams.iparams[ilist[F_FBPOSRES].iatoms[i]];
 +                if (pr->fbposres.k != 0)
 +                {
 +                    switch(pr->fbposres.geom)
 +                    {
 +                    case efbposresSPHERE:
 +                        AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
 +                        break;
 +                    case efbposresCYLINDER:
 +                        AbsRef[XX] = AbsRef[YY] = 1;
 +                        break;
 +                    case efbposresX: /* d=XX */
 +                    case efbposresY: /* d=YY */
 +                    case efbposresZ: /* d=ZZ */
 +                        d = pr->fbposres.geom - efbposresX;
 +                        AbsRef[d] = 1;
 +                        break;
 +                    default:
 +                        gmx_fatal(FARGS," Invalid geometry for flat-bottom position restraint.\n"
 +                                  "Expected nr between 1 and %d. Found %d\n", efbposresNR-1,
 +                                  pr->fbposres.geom);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
 +}
 +
 +void triple_check(const char *mdparin,t_inputrec *ir,gmx_mtop_t *sys,
 +                  warninp_t wi)
 +{
 +  char err_buf[256];
 +  int  i,m,g,nmol,npct;
 +  gmx_bool bCharge,bAcc;
 +  real gdt_max,*mgrp,mt;
 +  rvec acc;
 +  gmx_mtop_atomloop_block_t aloopb;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom *atom;
 +  ivec AbsRef;
 +  char warn_buf[STRLEN];
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD &&
 +      ir->comm_mode == ecmNO &&
 +      !(absolute_reference(ir,sys,FALSE,AbsRef) || ir->nsteps <= 10)) {
 +    warning(wi,"You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass");
 +  }
 +
 +    /* Check for pressure coupling with absolute position restraints */
 +    if (ir->epc != epcNO && ir->refcoord_scaling == erscNO)
 +    {
 +        absolute_reference(ir,sys,TRUE,AbsRef);
 +        {
 +            for(m=0; m<DIM; m++)
 +            {
 +                if (AbsRef[m] && norm2(ir->compress[m]) > 0)
 +                {
 +                    warning(wi,"You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option.");
 +                    break;
 +                }
 +            }
 +        }
 +    }
 +
 +  bCharge = FALSE;
 +  aloopb = gmx_mtop_atomloop_block_init(sys);
 +  while (gmx_mtop_atomloop_block_next(aloopb,&atom,&nmol)) {
 +    if (atom->q != 0 || atom->qB != 0) {
 +      bCharge = TRUE;
 +    }
 +  }
 +  
 +  if (!bCharge) {
 +    if (EEL_FULL(ir->coulombtype)) {
 +      sprintf(err_buf,
 +            "You are using full electrostatics treatment %s for a system without charges.\n"
 +            "This costs a lot of performance for just processing zeros, consider using %s instead.\n",
 +            EELTYPE(ir->coulombtype),EELTYPE(eelCUT));
 +      warning(wi,err_buf);
 +    }
 +  } else {
 +    if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent) {
 +      sprintf(err_buf,
 +            "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
 +            "You might want to consider using %s electrostatics.\n",
 +            EELTYPE(eelPME));
 +      warning_note(wi,err_buf);
 +    }
 +  }
 +
 +  /* Generalized reaction field */  
 +  if (ir->opts.ngtc == 0) {
 +    sprintf(err_buf,"No temperature coupling while using coulombtype %s",
 +          eel_names[eelGRF]);
 +    CHECK(ir->coulombtype == eelGRF);
 +  }
 +  else {
 +    sprintf(err_buf,"When using coulombtype = %s"
 +          " ref-t for temperature coupling should be > 0",
 +          eel_names[eelGRF]);
 +    CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0));
 +  }
 +
 +    if (ir->eI == eiSD1 &&
 +        (gmx_mtop_ftype_count(sys,F_CONSTR) > 0 ||
 +         gmx_mtop_ftype_count(sys,F_SETTLE) > 0))
 +    {
 +        sprintf(warn_buf,"With constraints integrator %s is less accurate, consider using %s instead",ei_names[ir->eI],ei_names[eiSD2]);
 +        warning_note(wi,warn_buf);
 +    }
 +    
 +  bAcc = FALSE;
 +  for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
 +    for(m=0; (m<DIM); m++) {
 +      if (fabs(ir->opts.acc[i][m]) > 1e-6) {
 +      bAcc = TRUE;
 +      }
 +    }
 +  }
 +  if (bAcc) {
 +    clear_rvec(acc);
 +    snew(mgrp,sys->groups.grps[egcACC].nr);
 +    aloop = gmx_mtop_atomloop_all_init(sys);
 +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +      mgrp[ggrpnr(&sys->groups,egcACC,i)] += atom->m;
 +    }
 +    mt = 0.0;
 +    for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
 +      for(m=0; (m<DIM); m++)
 +      acc[m] += ir->opts.acc[i][m]*mgrp[i];
 +      mt += mgrp[i];
 +    }
 +    for(m=0; (m<DIM); m++) {
 +      if (fabs(acc[m]) > 1e-6) {
 +      const char *dim[DIM] = { "X", "Y", "Z" };
 +      fprintf(stderr,
 +              "Net Acceleration in %s direction, will %s be corrected\n",
 +              dim[m],ir->nstcomm != 0 ? "" : "not");
 +      if (ir->nstcomm != 0 && m < ndof_com(ir)) {
 +        acc[m] /= mt;
 +        for (i=0; (i<sys->groups.grps[egcACC].nr); i++)
 +          ir->opts.acc[i][m] -= acc[m];
 +      }
 +      }
 +    }
 +    sfree(mgrp);
 +  }
 +
 +  if (ir->efep != efepNO && ir->fepvals->sc_alpha != 0 &&
 +      !gmx_within_tol(sys->ffparams.reppow,12.0,10*GMX_DOUBLE_EPS)) {
 +    gmx_fatal(FARGS,"Soft-core interactions are only supported with VdW repulsion power 12");
 +  }
 +
 +  if (ir->ePull != epullNO) {
 +    if (ir->pull->grp[0].nat == 0) {
 +        absolute_reference(ir,sys,FALSE,AbsRef);
 +      for(m=0; m<DIM; m++) {
 +      if (ir->pull->dim[m] && !AbsRef[m]) {
 +        warning(wi,"You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
 +        break;
 +      }
 +      }
 +    }
 +
 +    if (ir->pull->eGeom == epullgDIRPBC) {
 +      for(i=0; i<3; i++) {
 +      for(m=0; m<=i; m++) {
 +        if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
 +            ir->deform[i][m] != 0) {
 +          for(g=1; g<ir->pull->ngrp; g++) {
 +            if (ir->pull->grp[g].vec[m] != 0) {
 +              gmx_fatal(FARGS,"Can not have dynamic box while using pull geometry '%s' (dim %c)",EPULLGEOM(ir->pull->eGeom),'x'+m);
 +            }
 +          }
 +        }
 +      }
 +      }
 +    }
 +  }
 +
 +  check_disre(sys);
 +}
 +
 +void double_check(t_inputrec *ir,matrix box,gmx_bool bConstr,warninp_t wi)
 +{
 +  real min_size;
 +  gmx_bool bTWIN;
 +  char warn_buf[STRLEN];
 +  const char *ptr;
 +  
 +  ptr = check_box(ir->ePBC,box);
 +  if (ptr) {
 +      warning_error(wi,ptr);
 +  }  
 +
 +  if (bConstr && ir->eConstrAlg == econtSHAKE) {
 +    if (ir->shake_tol <= 0.0) {
 +      sprintf(warn_buf,"ERROR: shake-tol must be > 0 instead of %g\n",
 +              ir->shake_tol);
 +      warning_error(wi,warn_buf);
 +    }
 +
 +    if (IR_TWINRANGE(*ir) && ir->nstlist > 1) {
 +      sprintf(warn_buf,"With twin-range cut-off's and SHAKE the virial and the pressure are incorrect.");
 +      if (ir->epc == epcNO) {
 +      warning(wi,warn_buf);
 +      } else {
 +          warning_error(wi,warn_buf);
 +      }
 +    }
 +  }
 +
 +  if( (ir->eConstrAlg == econtLINCS) && bConstr) {
 +    /* If we have Lincs constraints: */
 +    if(ir->eI==eiMD && ir->etc==etcNO &&
 +       ir->eConstrAlg==econtLINCS && ir->nLincsIter==1) {
 +      sprintf(warn_buf,"For energy conservation with LINCS, lincs_iter should be 2 or larger.\n");
 +      warning_note(wi,warn_buf);
 +    }
 +    
 +    if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder<8)) {
 +      sprintf(warn_buf,"For accurate %s with LINCS constraints, lincs-order should be 8 or more.",ei_names[ir->eI]);
 +      warning_note(wi,warn_buf);
 +    }
 +    if (ir->epc==epcMTTK) {
 +        warning_error(wi,"MTTK not compatible with lincs -- use shake instead.");
 +    }
 +  }
 +
 +  if (ir->LincsWarnAngle > 90.0) {
 +    sprintf(warn_buf,"lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n");
 +    warning(wi,warn_buf);
 +    ir->LincsWarnAngle = 90.0;
 +  }
 +
 +  if (ir->ePBC != epbcNONE) {
 +    if (ir->nstlist == 0) {
 +      warning(wi,"With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash.");
 +    }
 +    bTWIN = (ir->rlistlong > ir->rlist);
 +    if (ir->ns_type == ensGRID) {
 +      if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC,box)) {
 +          sprintf(warn_buf,"ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n",
 +              bTWIN ? (ir->rcoulomb==ir->rlistlong ? "rcoulomb" : "rvdw"):"rlist");
 +          warning_error(wi,warn_buf);
 +      }
 +    } else {
 +      min_size = min(box[XX][XX],min(box[YY][YY],box[ZZ][ZZ]));
 +      if (2*ir->rlistlong >= min_size) {
 +          sprintf(warn_buf,"ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist.");
 +          warning_error(wi,warn_buf);
 +      if (TRICLINIC(box))
 +        fprintf(stderr,"Grid search might allow larger cut-off's than simple search with triclinic boxes.");
 +      }
 +    }
 +  }
 +}
 +
 +void check_chargegroup_radii(const gmx_mtop_t *mtop,const t_inputrec *ir,
 +                             rvec *x,
 +                             warninp_t wi)
 +{
 +    real rvdw1,rvdw2,rcoul1,rcoul2;
 +    char warn_buf[STRLEN];
 +
 +    calc_chargegroup_radii(mtop,x,&rvdw1,&rvdw2,&rcoul1,&rcoul2);
 +
 +    if (rvdw1 > 0)
 +    {
 +        printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n",
 +               rvdw1,rvdw2);
 +    }
 +    if (rcoul1 > 0)
 +    {
 +        printf("Largest charge group radii for Coulomb:       %5.3f, %5.3f nm\n",
 +               rcoul1,rcoul2);
 +    }
 +
 +    if (ir->rlist > 0)
 +    {
 +        if (rvdw1  + rvdw2  > ir->rlist ||
 +            rcoul1 + rcoul2 > ir->rlist)
 +        {
 +            sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f)\n",max(rvdw1+rvdw2,rcoul1+rcoul2),ir->rlist);
 +            warning(wi,warn_buf);
 +        }
 +        else
 +        {
 +            /* Here we do not use the zero at cut-off macro,
 +             * since user defined interactions might purposely
 +             * not be zero at the cut-off.
 +             */
 +            if (EVDW_IS_ZERO_AT_CUTOFF(ir->vdwtype) &&
 +                rvdw1 + rvdw2 > ir->rlist - ir->rvdw)
 +            {
 +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f) - rvdw (%f)\n",
 +                        rvdw1+rvdw2,
 +                        ir->rlist,ir->rvdw);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi,warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi,warn_buf);
 +                }
 +            }
 +            if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype) &&
 +                rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb)
 +            {
 +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f)\n",
 +                        rcoul1+rcoul2,
 +                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
 +                        ir->rlistlong,ir->rcoulomb);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi,warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi,warn_buf);
 +                }
 +            }
 +        }
 +    }
 +}
Simple merge
Simple merge
Simple merge
Simple merge
index 4c6d963e7e21adafe81b8cb3fb30dc4e6a097ff0,0000000000000000000000000000000000000000..37e10af13e90dcf51f1cb36d8c1ecdb0c5ca94c6
mode 100644,000000..100644
--- /dev/null
@@@ -1,189 -1,0 +1,213 @@@
-  * Pattern matcing with wildcards.
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gromacs Runs On Most of All Computer Systems
 + */
 +/*! \file
 + * \brief Generic string handling functions.
 + */
 +#ifndef _string2_h
 +#define _string2_h
 +
 +/*
 + *
 + * string2.h
 + * David van der Spoel
 + *
 + */
 +
 +
 +#include <string.h>
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <ctype.h>
 +#include <time.h>
 +#include <errno.h>
 +#include "../utility/gmx_header_config.h"
 +
 +#include "types/simple.h"
 +
 +/* Suppress Cygwin compiler warnings from using newlib version of
 + * ctype.h */
 +#ifdef GMX_CYGWIN
 +#undef isdigit
 +#undef isstring
 +#undef isspace
 +#undef isalnum
 +#undef isalpha
 +#undef ispunct
 +#undef isxdigit
 +#undef isupper
 +#undef islower
 +#undef toupper
 +#undef tolower
 +#endif
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
++#if 0
++}
++#endif
 +
 +/** Continuation character. */
 +#define CONTINUE    '\\'
 +/** Comment sign to use. */
 +#define COMMENTSIGN ';'
 +
 +/*! \brief
 + * Strip trailing spaces and if s ends with a ::CONTINUE remove that too.
 + *
 + * \returns TRUE if s ends with a CONTINUE, FALSE otherwise.
 + */
 +int continuing(char *s);
 +
 +/*! \brief
 + * Reads a line from a stream.
 + *
 + * This routine reads a string from stream of max length n
 + * and zero terminated, without newlines.
 + * \p s should be long enough (>= \p n)
 + */
 +char *fgets2(char *s, int n, FILE *stream);
 +
 +/** Remove portion of a line after a ::COMMENTSIGN.  */
 +void strip_comment(char *line);
 +
 +/** Make a string uppercase. */
 +void upstring(char *str);
 +
 +/** Remove leading whitespace from a string. */
 +void ltrim(char *str);
 +
 +/** Remove trailing whitespace from a string. */
 +void rtrim(char *str);
 +
 +/** Remove leading and trailing whitespace from a string. */
 +void trim(char *str);
 +
 +/** Prints creation time stamp and user information into a file as comments. */
 +void nice_header(FILE *out, const char *fn);
 +
 +/** Version of gmx_strcasecmp() that also ignores '-' and '_'. */
 +int gmx_strcasecmp_min(const char *str1, const char *str2);
 +/** Version of gmx_strncasecmp() that also ignores '-' and '_'. */
 +int gmx_strncasecmp_min(const char *str1, const char *str2, int n);
 +
 +/** Case-insensitive strcmp(). */
 +int gmx_strcasecmp(const char *str1, const char *str2);
 +/** Case-insensitive strncmp(). */
 +int gmx_strncasecmp(const char *str1, const char *str2, int n);
 +
 +/** Creates a duplicate of \p src. */
 +char *gmx_strdup(const char *src);
 +/** Duplicates first \p n characters of \p src. */
 +char *gmx_strndup(const char *src, int n);
 +
 +/*! \brief
++ * Pattern matching with wildcards.
 + *
 + * \param[in] pattern  Pattern to match against.
 + * \param[in] str      String to match.
 + * \returns   0 on match, GMX_NO_WCMATCH if there is no match.
 + *
 + * Matches \p str against \p pattern, which may contain * and ? wildcards.
 + * All other characters are matched literally.
 + * Currently, it is not possible to match literal * or ?.
 + */
 +int gmx_wcmatch(const char *pattern, const char *str);
 +
++/** Magic hash initialization number from Dan J. Bernstein. */
++extern const unsigned int
++gmx_string_hash_init;
++
++/*! \brief
++ * Return a hash of the string according to Dan J. Bernsteins algorithm.
++ * 
++ * \param[in] s          String to calculate hash for.
++ * \param[in] hash_init  Initial (or previous) hash value.
++ * \returns   Updated hash value (hash_init combined with string hash).
++ *
++ * This routine only uses characters for which isalnum(c) is true,
++ * and all characters are converted to upper case.
++ * On the first invocation for a new string, use the constant
++ * gmx_string_hash_init for the second argument. If you want to create a hash
++ * corresponding to several concatenated strings, provide the returned hash
++ * value as hash_init for the second string, etc.
++ */
++unsigned int
++gmx_string_hash_func(const char *s, unsigned int hash_init);
++    
 +/** Return value for gmx_wcmatch() when there is no match. */
 +#define GMX_NO_WCMATCH 1
 +
 +/** Our implementation of strsep, the thread-safe replacement for strtok. */
 +char *gmx_strsep(char **stringp, const char *delim);
 +
 +/*! \brief
 + * Wraps lines, optionally indenting lines.
 + *
 + * Wraps lines at \p linewidth, indenting all following lines by \p indent
 + * spaces.  A temp buffer is allocated and returned, which can be disposed of
 + * if no longer needed.
 + * If \p bIndentFirst is FALSE, then the first line will not be indented, only
 + * the lines that are created due to wapping.
 + */
 +char *wrap_lines(const char *buf,int line_width, int indent,
 +                 gmx_bool bIndentFirst);
 +
 +/** Implementation of the well-known Perl function split. */
 +char **split(char sep,const char *str);
 +
 +/*! \brief
 + * Convert a string to gmx_large_int_t.
 + *
 + * This method works as the standard library function strtol(), except that it
 + * does not support different bases.
 + *
 + * \attention
 + * The following differences are present from the standard behavior:
 + *  - \p endptr cannot be NULL.
 + *  - If an overflow occurs, returns zero and \p *endptr will equal \p str.
 + *    errno is still set to ERANGE.
 + */
 +gmx_large_int_t str_to_large_int_t(const char *str, char **endptr);
++    
 +#ifdef GMX_NATIVE_WINDOWS
 +#define snprintf _snprintf
 +#endif
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif        /* _string2_h */
Simple merge
index 3645ffa71b01c57dc58914584a97385004cbdb45,0000000000000000000000000000000000000000..986dce8872bb89c4c38ca2a1165528556448d6f2
mode 100644,000000..100644
--- /dev/null
@@@ -1,358 -1,0 +1,442 @@@
- enum { 
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GRoups of Organic Molecules in ACtion for Science
 + */
 +
 +#ifndef ENUMS_H_
 +#define ENUMS_H_
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
++#if 0
++} /* fixes auto-indentation problems */
++#endif
 +
 +/* note: these enums should correspond to the names in gmxlib/names.c */
 +
 +enum {
 +  epbcXYZ, epbcNONE, epbcXY, epbcSCREW, epbcNR
 +};
 +
 +enum {
 +  etcNO, etcBERENDSEN, etcNOSEHOOVER, etcYES, etcANDERSEN, etcANDERSENMASSIVE, etcVRESCALE, etcNR
 +}; /* yes is an alias for berendsen */
 +
 +#define ETC_ANDERSEN(e) (((e) == etcANDERSENMASSIVE) || ((e) == etcANDERSEN))
 +
 +enum {
 +  epcNO, epcBERENDSEN, epcPARRINELLORAHMAN, epcISOTROPIC, epcMTTK, epcNR
 +}; /* isotropic is an alias for berendsen */
 +
 +/* trotter decomposition extended variable parts */
 +enum {
 +  etrtNONE, etrtNHC, etrtBAROV, etrtBARONHC, etrtNHC2, etrtBAROV2, etrtBARONHC2, 
 +  etrtVELOCITY1, etrtVELOCITY2, etrtPOSITION, etrtSKIPALL, etrtNR
 +};
 +
 +/* sequenced parts of the trotter decomposition */
 +enum {
 +  ettTSEQ0,  ettTSEQ1,  ettTSEQ2,  ettTSEQ3,  ettTSEQ4, ettTSEQMAX
 +};
 +
 +enum {
 +  epctISOTROPIC, epctSEMIISOTROPIC, epctANISOTROPIC,
 +  epctSURFACETENSION, epctNR
 +};
 +
 +enum {
 +  erscNO, erscALL, erscCOM, erscNR
 +};
 +
- enum {
-     eintmodPOTSHIFT_VERLET, eintmodPOTSHIFT, eintmodNONE, eintmodNR
++enum {
 +  ecutsGROUP, ecutsVERLET, ecutsNR
 +};
 +
 +/* Coulomb / VdW interaction modifiers.
 + * grompp replaces eintmodPOTSHIFT_VERLET by eintmodPOTSHIFT or eintmodNONE.
++ * Exactcutoff is only used by Reaction-field-zero, and is not user-selectable.
 + */
++enum eintmod {
++    eintmodPOTSHIFT_VERLET, eintmodPOTSHIFT, eintmodNONE, eintmodPOTSWITCH, eintmodEXACTCUTOFF, eintmodNR
 +};
 +
 +/*
 + * eelNOTUSED1 used to be GB, but to enable generalized born with different
 + * forms of electrostatics (RF, switch, etc.) in the future it is now selected
 + * separately (through the implicit_solvent option).
 + */
 +enum {
 +  eelCUT,     eelRF,     eelGRF,   eelPME,  eelEWALD,  eelP3M_AD, 
 +  eelPOISSON, eelSWITCH, eelSHIFT, eelUSER, eelGB_NOTUSED, eelRF_NEC, eelENCADSHIFT, 
 +  eelPMEUSER, eelPMESWITCH, eelPMEUSERSWITCH, eelRF_ZERO, eelNR
 +};
 +
 +/* Ewald geometry */
 +enum { 
 +  eewg3D, eewg3DC, eewgNR
 +};
 +
 +#define EEL_RF(e) ((e) == eelRF || (e) == eelGRF || (e) == eelRF_NEC || (e) == eelRF_ZERO )
 +
 +#define EEL_PME(e)  ((e) == eelPME || (e) == eelPMESWITCH || (e) == eelPMEUSER || (e) == eelPMEUSERSWITCH || (e) == eelP3M_AD)
 +#define EEL_FULL(e) (EEL_PME(e) || (e) == eelPOISSON || (e) == eelEWALD)
 +
 +#define EEL_SWITCHED(e) ((e) == eelSWITCH || (e) == eelSHIFT || (e) == eelENCADSHIFT || (e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
 +
 +#define EEL_USER(e) ((e) == eelUSER || (e) == eelPMEUSER || (e) == (eelPMESWITCH))
 +
 +#define EEL_IS_ZERO_AT_CUTOFF(e) (EEL_SWITCHED(e) || (e) == eelRF_ZERO)
 +
 +#define EEL_MIGHT_BE_ZERO_AT_CUTOFF(e) (EEL_IS_ZERO_AT_CUTOFF(e) || (e) == eelUSER || (e) == eelPMEUSER)
 +
 +enum {
 +  evdwCUT, evdwSWITCH, evdwSHIFT, evdwUSER, evdwENCADSHIFT, evdwNR
 +};
 +
 +#define EVDW_SWITCHED(e) ((e) == evdwSWITCH || (e) == evdwSHIFT || (e) == evdwENCADSHIFT)
 +
 +#define EVDW_IS_ZERO_AT_CUTOFF(e) EVDW_SWITCHED(e)
 +
 +#define EVDW_MIGHT_BE_ZERO_AT_CUTOFF(e) (EVDW_IS_ZERO_AT_CUTOFF(e) || (e) == evdwUSER)
 +
 +enum { 
 +  ensGRID, ensSIMPLE, ensNR
 +};
 +
 +/* eiVV is normal velocity verlet -- eiVVAK uses 1/2*(KE(t-dt/2)+KE(t+dt/2)) as the kinetic energy, and the half step kinetic
 +   energy for temperature control */
 +
 +enum {
 +  eiMD, eiSteep, eiCG, eiBD, eiSD2, eiNM, eiLBFGS, eiTPI, eiTPIC, eiSD1, eiVV, eiVVAK, eiNR
 +};
 +#define EI_VV(e) ((e) == eiVV || (e) == eiVVAK)
 +#define EI_MD(e) ((e) == eiMD || EI_VV(e))
 +#define EI_SD(e) ((e) == eiSD1 || (e) == eiSD2)
 +#define EI_RANDOM(e) (EI_SD(e) || (e) == eiBD)
 +/*above integrators may not conserve momenta*/
 +#define EI_DYNAMICS(e) (EI_MD(e) || EI_SD(e) || (e) == eiBD)
 +#define EI_ENERGY_MINIMIZATION(e) ((e) == eiSteep || (e) == eiCG || (e) == eiLBFGS)
 +#define EI_TPI(e) ((e) == eiTPI || (e) == eiTPIC)
 +
 +#define EI_STATE_VELOCITY(e) (EI_MD(e) || EI_SD(e))
 +
 +enum {
 +  econtLINCS, econtSHAKE, econtNR
 +};
 +
 +enum {
 +  edrNone, edrSimple, edrEnsemble, edrNR
 +};
 +
 +enum {
 +  edrwConservative, edrwEqual, edrwNR
 +};
 +
 +/* Combination rule things */
 +enum { 
 +  eCOMB_NONE, eCOMB_GEOMETRIC, eCOMB_ARITHMETIC, eCOMB_GEOM_SIG_EPS, eCOMB_NR 
 +};
 +
 +/* NBF selection */
 +enum { 
 +  eNBF_NONE, eNBF_LJ, eNBF_BHAM, eNBF_NR 
 +};
 +
 +/* simulated tempering methods */
 +enum {
 +  esimtempGEOMETRIC, esimtempEXPONENTIAL, esimtempLINEAR, esimtempNR
 +};
 +/* FEP selection */
 +enum {
 +  efepNO, efepYES, efepSTATIC, efepSLOWGROWTH, efepEXPANDED, efepNR
 +};
 +  /* if efepNO, there are no evaluations at other states.
 +     if efepYES, treated equivalently to efepSTATIC.
 +     if efepSTATIC, then lambdas do not change during the simulation.
 +     if efepSLOWGROWTH, then the states change monotonically throughout the simulation.
 +     if efepEXPANDED, then expanded ensemble simulations are occuring.
 +  */
 +
 +/* FEP coupling types */
 +enum {
 +  efptFEP,efptMASS,efptCOUL,efptVDW,efptBONDED,efptRESTRAINT,efptTEMPERATURE,efptNR
 +};
 +
 +/* How the lambda weights are calculated:
 +   elamstatsMETROPOLIS = using the metropolis criteria
 +   elamstatsBARKER = using the Barker critera for transition weights - also called unoptimized Bennett
 +   elamstatsMINVAR = using Barker + minimum variance for weights
 +   elamstatsWL = Wang-Landu (using visitation counts)
 +   elamstatsWWL = Weighted Wang-Landau (using optimized gibbs weighted visitation counts)
 +*/
 +enum {
 +  elamstatsNO, elamstatsMETROPOLIS, elamstatsBARKER, elamstatsMINVAR, elamstatsWL, elamstatsWWL, elamstatsNR
 +};
 +
 +#define ELAMSTATS_EXPANDED(e) ((e) > elamstatsNO)
 +
 +#define EWL(e) ((e) == elamstatsWL || (e) == elamstatsWWL)
 +
 +/* How moves in lambda are calculated:
 +   elmovemcMETROPOLIS - using the Metropolis criteria, and 50% up and down
 +   elmovemcBARKER - using the Barker criteria, and 50% up and down
 +   elmovemcGIBBS - computing the transition using the marginalized probabilities of the lambdas
 +   elmovemcMETGIBBS - computing the transition using the metropolized version of Gibbs (Monte Carlo Strategies in Scientific computing, Liu, p. 134)
 +*/
 +enum {
 +  elmcmoveNO,elmcmoveMETROPOLIS, elmcmoveBARKER, elmcmoveGIBBS, elmcmoveMETGIBBS, elmcmoveNR
 +};
 +
 +/* how we decide whether weights have reached equilibrium
 +   elmceqNO - never stop, weights keep going
 +   elmceqYES - fix the weights from the beginning; no movement
 +   elmceqWLDELTA - stop when the WL-delta falls below a certain level
 +   elmceqNUMATLAM - stop when we have a certain number of samples at every step
 +   elmceqSTEPS - stop when we've run a certain total number of steps
 +   elmceqSAMPLES - stop when we've run a certain total number of samples
 +   elmceqRATIO - stop when the ratio of samples (lowest to highest) is sufficiently large
 +*/
 +enum {
 +  elmceqNO,elmceqYES,elmceqWLDELTA,elmceqNUMATLAM,elmceqSTEPS,elmceqSAMPLES,elmceqRATIO,elmceqNR
 +};
 +
 +/* separate_dhdl_file selection */
 +enum
 +{
 +  /* NOTE: YES is the first one. Do NOT interpret this one as a gmx_bool */
 +  esepdhdlfileYES, esepdhdlfileNO, esepdhdlfileNR
 +};
 +
 +/* dhdl_derivatives selection */
 +enum
 +{
 +  /* NOTE: YES is the first one. Do NOT interpret this one as a gmx_bool */
 +  edhdlderivativesYES, edhdlderivativesNO, edhdlderivativesNR
 +};
 +
 +/* Solvent model */
 +enum {
 +  esolNO, esolSPC, esolTIP4P, esolNR
 +};
 +
 +/* Dispersion correction */
 +enum {
 +  edispcNO, edispcEnerPres, edispcEner, edispcAllEnerPres, edispcAllEner, edispcNR
 +}; 
 +
 +/* Shell types, for completion stuff */
 +enum {
 +  eshellCSH, eshellBASH, eshellZSH, eshellNR
 +}; 
 +
 +/* Center of mass motion selection */
 +enum { 
 +  ecmLINEAR, ecmANGULAR, ecmNO, ecmNR 
 +};
 +
 +/* New version of simulated annealing */
 +enum { 
 +  eannNO, eannSINGLE, eannPERIODIC, eannNR 
 +};
 +
 +/* Implicit solvent algorithms */
 +enum { 
 +  eisNO, eisGBSA, eisNR
 +};
 +
 +/* Algorithms for calculating GB radii */
 +enum { 
 +  egbSTILL, egbHCT, egbOBC, egbNR 
 +};
 +
 +enum {
 +  esaAPPROX, esaNO, esaSTILL, esaNR
 +};
 +
 +/* Wall types */
 +enum {
 +  ewt93, ewt104, ewtTABLE, ewt126, ewtNR
 +};
 +
 +/* Pull stuff */
 +enum {
 +  epullNO, epullUMBRELLA, epullCONSTRAINT, epullCONST_F, epullNR
 +};
 +
 +enum {
 +  epullgDIST, epullgDIR, epullgCYL, epullgPOS, epullgDIRPBC, epullgNR
 +};
 +
 +#define PULL_CYL(pull) ((pull)->eGeom == epullgCYL)
 +
 +/* Enforced rotation groups */
 +enum {
 +  erotgISO  , erotgISOPF ,
 +  erotgPM   , erotgPMPF  ,
 +  erotgRM   , erotgRMPF  ,
 +  erotgRM2  , erotgRM2PF ,
 +  erotgFLEX , erotgFLEXT ,
 +  erotgFLEX2, erotgFLEX2T,
 +  erotgNR
 +};
 +
 +enum {
 +    erotgFitRMSD, erotgFitNORM, erotgFitPOT, erotgFitNR
 +};
 +
 +/* QMMM */
 +enum {
 +  eQMmethodAM1, eQMmethodPM3, eQMmethodRHF, 
 +  eQMmethodUHF, eQMmethodDFT, eQMmethodB3LYP, eQMmethodMP2, eQMmethodCASSCF, eQMmethodB3LYPLAN,
 +  eQMmethodDIRECT, eQMmethodNR
 +};
 +
 +enum {
 +  eQMbasisSTO3G, eQMbasisSTO3G2, eQMbasis321G, 
 +  eQMbasis321Gp, eQMbasis321dGp, eQMbasis621G,
 +  eQMbasis631G, eQMbasis631Gp, eQMbasis631dGp, 
 +  eQMbasis6311G, eQMbasisNR
 +};
 +
 +enum {
 +  eQMMMschemenormal,eQMMMschemeoniom,eQMMMschemeNR
 +};
 +
 +enum {
 +  eMultentOptName, eMultentOptNo, eMultentOptLast, eMultentOptNR
 +};
 +
 +/* flat-bottom posres geometries */
 +enum {
 +  efbposresZERO, efbposresSPHERE, efbposresCYLINDER, efbposresX, efbposresY, efbposresZ,
 +  efbposresNR
 +};
 +
 +enum {
 +  eAdressOff,eAdressConst, eAdressXSplit, eAdressSphere, eAdressNR
 +};
 +
 +enum {
 +  eAdressICOff, eAdressICThermoForce, eAdressICNR
 +};
 +
 +enum {
 +  eAdressSITEcom,eAdressSITEcog, eAdressSITEatom, eAdressSITEatomatom, eAdressSITENR
 +};
 +
++
++/* The interactions contained in a (possibly merged) table
++ * for computing electrostatic, VDW repulsion and/or VDW dispersion 
++ * contributions.
++ */
++enum gmx_table_interaction
++{
++    GMX_TABLE_INTERACTION_ELEC,
++    GMX_TABLE_INTERACTION_VDWREP_VDWDISP,
++    GMX_TABLE_INTERACTION_VDWEXPREP_VDWDISP,
++    GMX_TABLE_INTERACTION_VDWDISP,
++    GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP,
++    GMX_TABLE_INTERACTION_ELEC_VDWEXPREP_VDWDISP,
++    GMX_TABLE_INTERACTION_ELEC_VDWDISP,
++    GMX_TABLE_INTERACTION_NR
++};
++
++/* Different formats for table data. Cubic spline tables are typically stored
++ * with the four Y,F,G,H intermediate values (check tables.c for format), which
++ * makes it easy to load with a single 4-way SIMD instruction too.
++ * Linear tables only need one value per table point, or two if both V and F
++ * are calculated. However, with SIMD instructions this makes the loads unaligned,
++ * and in that case we store the data as F, D=F(i+1)-F(i), V, and then a blank value,
++ * which again makes it possible to load as a single instruction.
++ */
++enum gmx_table_format
++{
++    GMX_TABLE_FORMAT_CUBICSPLINE_YFGH,
++    GMX_TABLE_FORMAT_LINEAR_VF,
++    GMX_TABLE_FORMAT_LINEAR_V,
++    GMX_TABLE_FORMAT_LINEAR_F,
++    GMX_TABLE_FORMAT_LINEAR_FDV0,
++    GMX_TABLE_FORMAT_NR
++};
++
++/* Neighborlist geometry type.
++ * Kernels will compute interactions between two particles, 
++ * 3-center water, 4-center water or coarse-grained beads.
++ */
++enum gmx_nblist_kernel_geometry
++{
++    GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE,
++    GMX_NBLIST_GEOMETRY_WATER3_PARTICLE,
++    GMX_NBLIST_GEOMETRY_WATER3_WATER3,
++    GMX_NBLIST_GEOMETRY_WATER4_PARTICLE,
++    GMX_NBLIST_GEOMETRY_WATER4_WATER4,
++    GMX_NBLIST_GEOMETRY_CG_CG,
++    GMX_NBLIST_GEOMETRY_NR
++};
++
++/* Types of electrostatics calculations available inside nonbonded kernels.
++ * Note that these do NOT necessarily correspond to the user selections in the MDP file;
++ * many interactions for instance map to tabulated kernels.
++ */
++enum gmx_nbkernel_elec
++{
++    GMX_NBKERNEL_ELEC_NONE,
++    GMX_NBKERNEL_ELEC_COULOMB,
++    GMX_NBKERNEL_ELEC_REACTIONFIELD,
++    GMX_NBKERNEL_ELEC_CUBICSPLINETABLE,
++    GMX_NBKERNEL_ELEC_GENERALIZEDBORN,
++    GMX_NBKERNEL_ELEC_EWALD,
++    GMX_NBKERNEL_ELEC_NR
++};
++
++/* Types of vdw calculations available inside nonbonded kernels.
++ * Note that these do NOT necessarily correspond to the user selections in the MDP file;
++ * many interactions for instance map to tabulated kernels.
++ */
++enum gmx_nbkernel_vdw
++{
++    GMX_NBKERNEL_VDW_NONE,
++    GMX_NBKERNEL_VDW_LENNARDJONES,
++    GMX_NBKERNEL_VDW_BUCKINGHAM,
++    GMX_NBKERNEL_VDW_CUBICSPLINETABLE,
++    GMX_NBKERNEL_VDW_NR
++};
++
++
++
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif /* ENUMS_H_ */
index a6b51ffeb7e96c695974cd6c8d242585b4c96244,0000000000000000000000000000000000000000..3433adca4173daeae75fe9f0f9cfe08f68d6b40c
mode 100644,000000..100644
--- /dev/null
@@@ -1,331 -1,0 +1,331 @@@
-   real *tab;      /* the actual tables, per point there are  4 numbers */
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GRoups of Organic Molecules in ACtion for Science
 + */
 +
 +
 +#ifndef _idef_h
 +#define _idef_h
 +
 +#include "simple.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +
 +/* check kernel/toppush.c when you change these numbers */
 +#define MAXATOMLIST   6
 +#define MAXFORCEPARAM 12
 +#define NR_RBDIHS     6
 +#define NR_FOURDIHS     4
 +
 +typedef atom_id t_iatom;
 +
 +/* this MUST correspond to the 
 +   t_interaction_function[F_NRE] in gmxlib/ifunc.c */
 +enum {
 +  F_BONDS,
 +  F_G96BONDS,
 +  F_MORSE,
 +  F_CUBICBONDS,
 +  F_CONNBONDS,
 +  F_HARMONIC,
 +  F_FENEBONDS,
 +  F_TABBONDS,
 +  F_TABBONDSNC,
 +  F_RESTRBONDS,
 +  F_ANGLES, 
 +  F_G96ANGLES,
 +  F_LINEAR_ANGLES,
 +  F_CROSS_BOND_BONDS,
 +  F_CROSS_BOND_ANGLES,
 +  F_UREY_BRADLEY,
 +  F_QUARTIC_ANGLES,
 +  F_TABANGLES,
 +  F_PDIHS,
 +  F_RBDIHS, 
 +  F_FOURDIHS,
 +  F_IDIHS, 
 +  F_PIDIHS, 
 +  F_TABDIHS,
 +  F_CMAP,
 +  F_GB12,
 +  F_GB13,
 +  F_GB14,
 +  F_GBPOL,
 +  F_NPSOLVATION,
 +  F_LJ14,
 +  F_COUL14,
 +  F_LJC14_Q,
 +  F_LJC_PAIRS_NB,
 +  F_LJ,
 +  F_BHAM,
 +  F_LJ_LR,
 +  F_BHAM_LR,
 +  F_DISPCORR,
 +  F_COUL_SR,
 +  F_COUL_LR,
 +  F_RF_EXCL,
 +  F_COUL_RECIP,
 +  F_DPD,
 +  F_POLARIZATION,
 +  F_WATER_POL,
 +  F_THOLE_POL,
 +  F_ANHARM_POL,
 +  F_POSRES,
 +  F_FBPOSRES,
 +  F_DISRES,
 +  F_DISRESVIOL,
 +  F_ORIRES,
 +  F_ORIRESDEV,
 +  F_ANGRES,
 +  F_ANGRESZ,
 +  F_DIHRES,
 +  F_DIHRESVIOL,
 +  F_CONSTR,
 +  F_CONSTRNC,
 +  F_SETTLE,
 +  F_VSITE2,
 +  F_VSITE3,
 +  F_VSITE3FD,
 +  F_VSITE3FAD,
 +  F_VSITE3OUT,
 +  F_VSITE4FD,
 +  F_VSITE4FDN,
 +  F_VSITEN,
 +  F_COM_PULL,
 +  F_EQM,
 +  F_EPOT,
 +  F_EKIN,
 +  F_ETOT,
 +  F_ECONSERVED,
 +  F_TEMP,
 +  F_VTEMP,
 +  F_PDISPCORR,
 +  F_PRES,
 +  F_DHDL_CON,
 +  F_DVDL,
 +  F_DKDL,
 +  F_DVDL_COUL,
 +  F_DVDL_VDW,
 +  F_DVDL_BONDED,
 +  F_DVDL_RESTRAINT,
 +  F_DVDL_TEMPERATURE, /* not calculated for now, but should just be the energy (NVT) or enthalpy (NPT), or 0 (NVE) */
 +  F_NRE               /* This number is for the total number of energies      */
 +};
 +
 +#define IS_RESTRAINT_TYPE(ifunc) (((ifunc==F_POSRES) || (ifunc==F_DISRES) || (ifunc==F_RESTRBONDS) || (ifunc==F_DISRESVIOL) || (ifunc==F_ORIRES) || (ifunc==F_ORIRESDEV) || (ifunc==F_ANGRES) || (ifunc == F_ANGRESZ) || (ifunc==F_DIHRES)))
 +
 +/* A macro for checking if ftype is an explicit pair-listed LJ or COULOMB
 + * interaction type:
 + * bonded LJ (usually 1-4), or special listed non-bonded for FEP.
 + */
 +#define IS_LISTED_LJ_C(ftype) ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB)
 +
 +typedef union
 +{
 +  /* Some parameters have A and B values for free energy calculations.
 +   * The B values are not used for regular simulations of course.
 +   * Free Energy for nonbondeds can be computed by changing the atom type.
 +   * The harmonic type is used for all harmonic potentials:
 +   * bonds, angles and improper dihedrals
 +   */
 +  struct {real a,b,c;                                    } bham;
 +  struct {real rA,krA,rB,krB;                            } harmonic;
 +  struct {real klinA,aA,klinB,aB;                          } linangle;
 +  struct {real lowA,up1A,up2A,kA,lowB,up1B,up2B,kB;        } restraint;
 +  /* No free energy supported for cubic bonds, FENE, WPOL or cross terms */ 
 +  struct {real b0,kb,kcub;                                 } cubic;
 +  struct {real bm,kb;                                      } fene;
 +  struct {real r1e,r2e,krr;                                } cross_bb;
 +  struct {real r1e,r2e,r3e,krt;                            } cross_ba;
 +  struct {real thetaA,kthetaA,r13A,kUBA,thetaB,kthetaB,r13B,kUBB;} u_b;
 +  struct {real theta,c[5];                                 } qangle; 
 +  struct {real alpha;                                      } polarize;
 +  struct {real alpha,drcut,khyp;                           } anharm_polarize;
 +  struct {real al_x,al_y,al_z,rOH,rHH,rOD;                 } wpol;
 +  struct {real a,alpha1,alpha2,rfac;                       } thole;
 +  struct {real c6,c12;                                           } lj;
 +  struct {real c6A,c12A,c6B,c12B;                        } lj14;
 +  struct {real fqq,qi,qj,c6,c12;                         } ljc14;
 +  struct {real qi,qj,c6,c12;                             } ljcnb;
 +  /* Proper dihedrals can not have different multiplicity when
 +   * doing free energy calculations, because the potential would not
 +   * be periodic anymore.
 +   */ 
 +  struct {real phiA,cpA;int mult;real phiB,cpB;            } pdihs;
 +  struct {real dA,dB;                                    } constr;
 +  /* Settle can not be used for Free energy calculations of water bond geometry.
 +   * Use shake (or lincs) instead if you have to change the water bonds.
 +   */
 +  struct {real doh,dhh;                                   } settle;
 +  struct {real b0A,cbA,betaA,b0B,cbB,betaB;               } morse;
 +  struct {real pos0A[DIM],fcA[DIM],pos0B[DIM],fcB[DIM];   } posres;
 +  struct {real pos0[DIM],r,k; int geom;                   } fbposres;
 +  struct {real rbcA[NR_RBDIHS], rbcB[NR_RBDIHS];          } rbdihs;
 +  struct {real a,b,c,d,e,f;                               } vsite;   
 +  struct {int  n; real a;                                 } vsiten;   
 +  /* NOTE: npair is only set after reading the tpx file */
 +  struct {real low,up1,up2,kfac;int type,label,npair;     } disres; 
 +  struct {real phiA,dphiA,kfacA,phiB,dphiB,kfacB;         } dihres;
 +  struct {int  ex,power,label; real c,obs,kfac;           } orires;
 +  struct {int  table;real kA;real kB;                     } tab;
 +  struct {real sar,st,pi,gbr,bmlt;                        } gb;
 +  struct {int cmapA,cmapB;                                } cmap;
 +  struct {real buf[MAXFORCEPARAM];                      } generic; /* Conversion */
 +} t_iparams;
 +
 +typedef int t_functype;
 +
 +/*
 + * The nonperturbed/perturbed interactions are now separated (sorted) in the
 + * ilist, such that the first 0..(nr_nonperturbed-1) ones are exactly that, and 
 + * the remaining ones from nr_nonperturbed..(nr-1) are perturbed bonded 
 + * interactions.
 + */
 +typedef struct
 +{
 +  int nr;
 +  int nr_nonperturbed;
 +  t_iatom *iatoms;
 +  int nalloc;
 +} t_ilist;
 +
 +/*
 + * The struct t_ilist defines a list of atoms with their interactions. 
 + * General field description:
 + *   int nr
 + *    the size (nr elements) of the interactions array (iatoms[]).
 + *   t_iatom *iatoms
 + *    specifies which atoms are involved in an interaction of a certain 
 + *       type. The layout of this array is as follows:
 + *
 + *      +-----+---+---+---+-----+---+---+-----+---+---+---+-----+---+---+...
 + *      |type1|at1|at2|at3|type2|at1|at2|type1|at1|at2|at3|type3|at1|at2|
 + *      +-----+---+---+---+-----+---+---+-----+---+---+---+-----+---+---+...
 + *
 + *    So for interaction type type1 3 atoms are needed, and for type2 and 
 + *      type3 only 2. The type identifier is used to select the function to 
 + *    calculate the interaction and its actual parameters. This type 
 + *    identifier is an index in a params[] and functype[] array.
 + */
 +
 +typedef struct
 +{
 +      real *cmap; /* Has length 4*grid_spacing*grid_spacing, */
 +      /* there are 4 entries for each cmap type (V,dVdx,dVdy,d2dVdxdy) */
 +} cmapdata_t;
 +
 +typedef struct
 +{
 +      int ngrid;            /* Number of allocated cmap (cmapdata_t ) grids */
 +      int grid_spacing;     /* Grid spacing */
 +      cmapdata_t *cmapdata; /* Pointer to grid with actual, pre-interpolated data */
 +} gmx_cmap_t;
 +
 +
 +typedef struct
 +{
 +  int        ntypes;
 +  int        atnr;
 +  t_functype *functype;
 +  t_iparams  *iparams;
 +  double     reppow;     /* The repulsion power for VdW: C12*r^-reppow   */
 +  real       fudgeQQ;    /* The scaling factor for Coulomb 1-4: f*q1*q2  */
 +  gmx_cmap_t cmap_grid;  /* The dihedral correction maps                 */
 +} gmx_ffparams_t;
 +
 +enum {
 +  ilsortUNKNOWN, ilsortNO_FE, ilsortFE_UNSORTED, ilsortFE_SORTED
 +};
 +
 +typedef struct
 +{
 +  int ntypes;
 +  int atnr;
 +  t_functype *functype;
 +  t_iparams  *iparams;
 +  real fudgeQQ;
 +  gmx_cmap_t cmap_grid;
 +  t_iparams  *iparams_posres,*iparams_fbposres;
 +  int iparams_posres_nalloc,iparams_fbposres_nalloc;
 +
 +  t_ilist il[F_NRE];
 +  int ilsort;
 +} t_idef;
 +
 +/*
 + * The struct t_idef defines all the interactions for the complete
 + * simulation. The structure is setup in such a way that the multinode
 + * version of the program  can use it as easy as the single node version.
 + * General field description:
 + *   int ntypes
 + *    defines the number of elements in functype[] and param[].
 + *   int nodeid
 + *      the node id (if parallel machines)
 + *   int atnr
 + *      the number of atomtypes
 + *   t_functype *functype
 + *    array of length ntypes, defines for every force type what type of 
 + *      function to use. Every "bond" with the same function but different 
 + *    force parameters is a different force type. The type identifier in the 
 + *    forceatoms[] array is an index in this array.
 + *   t_iparams *iparams
 + *    array of length ntypes, defines the parameters for every interaction
 + *      type. The type identifier in the actual interaction list
 + *      (ilist[ftype].iatoms[]) is an index in this array.
 + *   gmx_cmap_t cmap_grid
 + *      the grid for the dihedral pair correction maps.
 + *   t_iparams *iparams_posres, *iparams_fbposres
 + *    defines the parameters for position restraints only.
 + *      Position restraints are the only interactions that have different
 + *      parameters (reference positions) for different molecules
 + *      of the same type. ilist[F_POSRES].iatoms[] is an index in this array.
 + *   t_ilist il[F_NRE]
 + *      The list of interactions for each type. Note that some,
 + *      such as LJ and COUL will have 0 entries.
 + */
 +
 +typedef struct {
 +  int  n;         /* n+1 is the number of points */
 +  real scale;     /* distance between two points */
++  real *data;     /* the actual table data, per point there are 4 numbers */
 +} bondedtable_t;
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +
 +#endif
index 1ea61275398a3e6505c3cfac057af5259fac59cd,0000000000000000000000000000000000000000..ae28aefd047e054c69e9a0ada23ae847baf92291
mode 100644,000000..100644
--- /dev/null
@@@ -1,142 -1,0 +1,133 @@@
- /* The nonbonded kernels are documented in gmxlib/nonbonded_kernels, 
-  * but here's a lazy version of the numbering. The first position
-  * is the Coulomb interaction (0 for none), second is Van der Waals
-  * (again, 0 means no interaction), and the third is the water optimization
-  * (0 meaning no water optimization = standard atom-atom loop)
-  *
-  *                                     value
-  * pos                 1                   2           3              4
-  * 1st Coul        Normal,1/r       Reaction-field  Table            Generalized born
-  * 2nd Vdw         Lennard-Jones    Buckingham      Table             n/a
-  * 3rd Water. opt  SPC-other atom   SPC-SPC         TIP4p-other at.  TIP4p-TIP4p 
-  */
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GRoups of Organic Molecules in ACtion for Science
 + */
 +#ifndef _types_nrnb_h
 +#define _types_nrnb_h
 +
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
++#if 0
++} /* fixes auto-indentation problems */
++#endif
 +
-     eNR_NBKERNEL010, eNR_NBKERNEL020, eNR_NBKERNEL030,
-     eNR_NBKERNEL100, eNR_NBKERNEL101, eNR_NBKERNEL102, eNR_NBKERNEL103, eNR_NBKERNEL104,
-     eNR_NBKERNEL110, eNR_NBKERNEL111, eNR_NBKERNEL112, eNR_NBKERNEL113, eNR_NBKERNEL114,
-     eNR_NBKERNEL120, eNR_NBKERNEL121, eNR_NBKERNEL122, eNR_NBKERNEL123, eNR_NBKERNEL124,
-     eNR_NBKERNEL130, eNR_NBKERNEL131, eNR_NBKERNEL132, eNR_NBKERNEL133, eNR_NBKERNEL134,
-     eNR_NBKERNEL200, eNR_NBKERNEL201, eNR_NBKERNEL202, eNR_NBKERNEL203, eNR_NBKERNEL204,
-     eNR_NBKERNEL210, eNR_NBKERNEL211, eNR_NBKERNEL212, eNR_NBKERNEL213, eNR_NBKERNEL214,
-     eNR_NBKERNEL220, eNR_NBKERNEL221, eNR_NBKERNEL222, eNR_NBKERNEL223, eNR_NBKERNEL224,
-     eNR_NBKERNEL230, eNR_NBKERNEL231, eNR_NBKERNEL232, eNR_NBKERNEL233, eNR_NBKERNEL234,
-     eNR_NBKERNEL300, eNR_NBKERNEL301, eNR_NBKERNEL302, eNR_NBKERNEL303, eNR_NBKERNEL304,
-     eNR_NBKERNEL310, eNR_NBKERNEL311, eNR_NBKERNEL312, eNR_NBKERNEL313, eNR_NBKERNEL314,
-     eNR_NBKERNEL320, eNR_NBKERNEL321, eNR_NBKERNEL322, eNR_NBKERNEL323, eNR_NBKERNEL324,
-     eNR_NBKERNEL330, eNR_NBKERNEL331, eNR_NBKERNEL332, eNR_NBKERNEL333, eNR_NBKERNEL334,
-     eNR_NBKERNEL400, eNR_NBKERNEL410, eNR_NBKERNEL430,
-     eNR_NBKERNEL010NF, eNR_NBKERNEL020NF, eNR_NBKERNEL030NF,
-     eNR_NBKERNEL100NF, eNR_NBKERNEL101NF, eNR_NBKERNEL102NF, eNR_NBKERNEL103NF, eNR_NBKERNEL104NF,
-     eNR_NBKERNEL110NF, eNR_NBKERNEL111NF, eNR_NBKERNEL112NF, eNR_NBKERNEL113NF, eNR_NBKERNEL114NF,
-     eNR_NBKERNEL120NF, eNR_NBKERNEL121NF, eNR_NBKERNEL122NF, eNR_NBKERNEL123NF, eNR_NBKERNEL124NF,
-     eNR_NBKERNEL130NF, eNR_NBKERNEL131NF, eNR_NBKERNEL132NF, eNR_NBKERNEL133NF, eNR_NBKERNEL134NF,
-     eNR_NBKERNEL200NF, eNR_NBKERNEL201NF, eNR_NBKERNEL202NF, eNR_NBKERNEL203NF, eNR_NBKERNEL204NF,
-     eNR_NBKERNEL210NF, eNR_NBKERNEL211NF, eNR_NBKERNEL212NF, eNR_NBKERNEL213NF, eNR_NBKERNEL214NF,
-     eNR_NBKERNEL220NF, eNR_NBKERNEL221NF, eNR_NBKERNEL222NF, eNR_NBKERNEL223NF, eNR_NBKERNEL224NF,
-     eNR_NBKERNEL230NF, eNR_NBKERNEL231NF, eNR_NBKERNEL232NF, eNR_NBKERNEL233NF, eNR_NBKERNEL234NF,
-     eNR_NBKERNEL300NF, eNR_NBKERNEL301NF, eNR_NBKERNEL302NF, eNR_NBKERNEL303NF, eNR_NBKERNEL304NF,
-     eNR_NBKERNEL310NF, eNR_NBKERNEL311NF, eNR_NBKERNEL312NF, eNR_NBKERNEL313NF, eNR_NBKERNEL314NF,
-     eNR_NBKERNEL320NF, eNR_NBKERNEL321NF, eNR_NBKERNEL322NF, eNR_NBKERNEL323NF, eNR_NBKERNEL324NF,
-     eNR_NBKERNEL330NF, eNR_NBKERNEL331NF, eNR_NBKERNEL332NF, eNR_NBKERNEL333NF, eNR_NBKERNEL334NF,
-     eNR_NBKERNEL400NF, eNR_NBKERNEL410NF, eNR_NBKERNEL430NF, 
-     eNR_NBKERNEL_NR,
-     eNR_NBKERNEL_FREE_ENERGY = eNR_NBKERNEL_NR,
 +
 +#define eNR_NBKERNEL_NONE -1
 +
 +enum 
 +{
-     eNR_NBKERNEL_OUTER,
++    eNR_NBKERNEL_VDW_VF,
++    eNR_NBKERNEL_VDW_F,
++    eNR_NBKERNEL_ELEC_VF,
++    eNR_NBKERNEL_ELEC_F,
++    eNR_NBKERNEL_ELEC_W3_VF,
++    eNR_NBKERNEL_ELEC_W3_F,
++    eNR_NBKERNEL_ELEC_W3W3_VF,
++    eNR_NBKERNEL_ELEC_W3W3_F,
++    eNR_NBKERNEL_ELEC_W4_VF,
++    eNR_NBKERNEL_ELEC_W4_F,
++    eNR_NBKERNEL_ELEC_W4W4_VF,
++    eNR_NBKERNEL_ELEC_W4W4_F,
++    eNR_NBKERNEL_ELEC_VDW_VF,
++    eNR_NBKERNEL_ELEC_VDW_F,
++    eNR_NBKERNEL_ELEC_VDW_W3_VF,
++    eNR_NBKERNEL_ELEC_VDW_W3_F,
++    eNR_NBKERNEL_ELEC_VDW_W3W3_VF,
++    eNR_NBKERNEL_ELEC_VDW_W3W3_F,
++    eNR_NBKERNEL_ELEC_VDW_W4_VF,
++    eNR_NBKERNEL_ELEC_VDW_W4_F,
++    eNR_NBKERNEL_ELEC_VDW_W4W4_VF,
++    eNR_NBKERNEL_ELEC_VDW_W4W4_F,
++
++    eNR_NBKERNEL_NR,  /* Total number of interaction-specific kernel entries */
++
++    eNR_NBKERNEL_GENERIC = eNR_NBKERNEL_NR, /* Reuse number; KERNEL_NR is not an entry itself */
++    eNR_NBKERNEL_FREE_ENERGY,               /* Add other generic kernels _before_ the free energy one */
++
 +    eNR_NBKERNEL_ALLVSALL,
 +    eNR_NBKERNEL_ALLVSALLGB,
- typedef struct {
-   double n[eNRNB];
- } t_nrnb;
++
 +    eNR_NBNXN_DIST2,
 +    eNR_NBNXN_LJ_RF,  eNR_NBNXN_LJ_RF_E,
 +    eNR_NBNXN_LJ_TAB, eNR_NBNXN_LJ_TAB_E,
 +    eNR_NBNXN_LJ,     eNR_NBNXN_LJ_E,
 +    eNR_NBNXN_RF,     eNR_NBNXN_RF_E,
 +    eNR_NBNXN_TAB,    eNR_NBNXN_TAB_E,
 +    eNR_NB14,
 +    eNR_BORN_RADII_STILL,     eNR_BORN_RADII_HCT_OBC,
 +    eNR_BORN_CHAINRULE,
 +    eNR_BORN_AVA_RADII_STILL, eNR_BORN_AVA_RADII_HCT_OBC,
 +    eNR_BORN_AVA_CHAINRULE,
 +    eNR_WEIGHTS,              eNR_SPREADQ,              eNR_SPREADQBSP,
 +    eNR_GATHERF,              eNR_GATHERFBSP,           eNR_FFT,
 +    eNR_CONV,                 eNR_SOLVEPME,eNR_NS,      eNR_RESETX,
 +    eNR_SHIFTX,               eNR_CGCM,                 eNR_FSUM,
 +    eNR_BONDS,                eNR_G96BONDS,             eNR_FENEBONDS,
 +    eNR_TABBONDS,             eNR_RESTRBONDS,           eNR_LINEAR_ANGLES,
 +    eNR_ANGLES,               eNR_G96ANGLES,            eNR_QANGLES,
 +    eNR_TABANGLES,            eNR_PROPER,               eNR_IMPROPER,
 +    eNR_RB,                   eNR_FOURDIH,              eNR_TABDIHS,
 +    eNR_DISRES,               eNR_ORIRES,               eNR_DIHRES,
 +    eNR_POSRES,               eNR_FBPOSRES,
 +    eNR_ANGRES,               eNR_ANGRESZ,
 +    eNR_MORSE,                eNR_CUBICBONDS,           eNR_WALLS,
 +    eNR_POLARIZE,             eNR_ANHARM_POL,
 +    eNR_WPOL,                 eNR_THOLE,                eNR_VIRIAL,
 +    eNR_UPDATE,               eNR_EXTUPDATE,            eNR_STOPCM,
 +    eNR_PCOUPL,               eNR_EKIN,                 eNR_LINCS,
 +    eNR_LINCSMAT,             eNR_SHAKE,                eNR_CONSTR_V,
 +    eNR_SHAKE_RIJ,            eNR_CONSTR_VIR,           eNR_SETTLE,
 +    eNR_VSITE2,               eNR_VSITE3,               eNR_VSITE3FD,
 +    eNR_VSITE3FAD,            eNR_VSITE3OUT,            eNR_VSITE4FD,
 +    eNR_VSITE4FDN,            eNR_VSITEN,               eNR_GB,
 +    eNR_CMAP,
 +    eNRNB
 +};
 +
 +
++typedef struct
++{
++    double n[eNRNB];
++}
++t_nrnb;
 +
 +
 +typedef struct gmx_wallcycle *gmx_wallcycle_t;
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif
index dc25bce58b2f95cff04d8e795d2f920040f22a79,0000000000000000000000000000000000000000..ec0785566b924c81bdda465c603c1b27aac4932f
mode 100644,000000..100644
--- /dev/null
@@@ -1,893 -1,0 +1,907 @@@
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gromacs Runs On Most of All Computer Systems
 + */
 +#ifndef _vec_h
 +#define _vec_h
 +
 +/*
 +  collection of in-line ready operations:
 +  
 +  lookup-table optimized scalar operations:
 +  real gmx_invsqrt(real x)
 +  void vecinvsqrt(real in[],real out[],int n)
 +  void vecrecip(real in[],real out[],int n)
 +  real sqr(real x)
 +  double dsqr(double x)
 +  
 +  vector operations:
 +  void rvec_add(const rvec a,const rvec b,rvec c)  c = a + b
 +  void dvec_add(const dvec a,const dvec b,dvec c)  c = a + b
 +  void ivec_add(const ivec a,const ivec b,ivec c)  c = a + b
 +  void rvec_inc(rvec a,const rvec b)               a += b
 +  void dvec_inc(dvec a,const dvec b)               a += b
 +  void ivec_inc(ivec a,const ivec b)               a += b
 +  void rvec_sub(const rvec a,const rvec b,rvec c)  c = a - b
 +  void dvec_sub(const dvec a,const dvec b,dvec c)  c = a - b
 +  void rvec_dec(rvec a,rvec b)                     a -= b
 +  void copy_rvec(const rvec a,rvec b)              b = a (reals)
 +  void copy_dvec(const dvec a,dvec b)              b = a (reals)
 +  void copy_ivec(const ivec a,ivec b)              b = a (integers)
 +  void ivec_sub(const ivec a,const ivec b,ivec c)  c = a - b
 +  void svmul(real a,rvec v1,rvec v2)               v2 = a * v1
 +  void dsvmul(double a,dvec v1,dvec v2)            v2 = a * v1
 +  void clear_rvec(rvec a)                          a = 0
 +  void clear_dvec(dvec a)                          a = 0
 +  void clear_ivec(rvec a)                          a = 0
 +  void clear_rvecs(int n,rvec v[])
 +  real iprod(rvec a,rvec b)                        = a . b (inner product)
 +  double diprod(dvec a,dvec b)                     = a . b (inner product)
 +  real iiprod(ivec a,ivec b)                       = a . b (integers)
 +  real norm2(rvec a)                               = | a |^2 ( = x*y*z )
 +  double dnorm2(dvec a)                            = | a |^2 ( = x*y*z )
 +  real norm(rvec a)                                = | a |
 +  double dnorm(dvec a)                             = | a |
 +  void cprod(rvec a,rvec b,rvec c)                 c = a x b (cross product)
 +  void dprod(rvec a,rvec b,rvec c)                 c = a x b (cross product)
 +  void dprod(rvec a,rvec b,rvec c)                 c = a * b (direct product)
 +  real cos_angle(rvec a,rvec b)
 +  real cos_angle_no_table(rvec a,rvec b)
 +  real distance2(rvec v1, rvec v2)                 = | v2 - v1 |^2
 +  void unitv(rvec src,rvec dest)                   dest = src / |src|
 +  void unitv_no_table(rvec src,rvec dest)          dest = src / |src|
 +  
 +  matrix (3x3) operations:
 +    ! indicates that dest should not be the same as a, b or src
 +    the _ur0 varieties work on matrices that have only zeros
 +    in the upper right part, such as box matrices, these varieties
 +    could produce less rounding errors, not due to the operations themselves,
 +    but because the compiler can easier recombine the operations
 +  void copy_mat(matrix a,matrix b)                 b = a
 +  void clear_mat(matrix a)                       a = 0
 +  void mmul(matrix a,matrix b,matrix dest)    !  dest = a . b
 +  void mmul_ur0(matrix a,matrix b,matrix dest)     dest = a . b
 +  void transpose(matrix src,matrix dest)      !  dest = src*
 +  void tmmul(matrix a,matrix b,matrix dest)   !  dest = a* . b
 +  void mtmul(matrix a,matrix b,matrix dest)   !  dest = a . b*
 +  real det(matrix a)                             = det(a)
 +  void m_add(matrix a,matrix b,matrix dest)      dest = a + b
 +  void m_sub(matrix a,matrix b,matrix dest)      dest = a - b
 +  void msmul(matrix m1,real r1,matrix dest)      dest = r1 * m1
 +  void m_inv_ur0(matrix src,matrix dest)           dest = src^-1
 +  void m_inv(matrix src,matrix dest)          !  dest = src^-1
 +  void mvmul(matrix a,rvec src,rvec dest)     !  dest = a . src
 +  void mvmul_ur0(matrix a,rvec src,rvec dest)      dest = a . src
 +  void tmvmul_ur0(matrix a,rvec src,rvec dest)     dest = a* . src
 +  real trace(matrix m)                             = trace(m)
 +*/
 +
 +#include "types/simple.h"
 +#include "maths.h"
 +#include "typedefs.h"
 +#include "sysstuff.h"
 +#include "gmx_fatal.h"
 +#include "physics.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#elif 0
 +} /* avoid screwing up indentation */
 +#endif
 +
 +
 +#define EXP_LSB         0x00800000
 +#define EXP_MASK        0x7f800000
 +#define EXP_SHIFT       23
 +#define FRACT_MASK      0x007fffff
 +#define FRACT_SIZE      11              /* significant part of fraction */
 +#define FRACT_SHIFT     (EXP_SHIFT-FRACT_SIZE)
 +#define EXP_ADDR(val)   (((val)&EXP_MASK)>>EXP_SHIFT)
 +#define FRACT_ADDR(val) (((val)&(FRACT_MASK|EXP_LSB))>>FRACT_SHIFT)
 +
 +#define PR_VEC(a)       a[XX],a[YY],a[ZZ]
 +
 +#ifdef GMX_SOFTWARE_INVSQRT
 +extern const unsigned int *  gmx_invsqrt_exptab;
 +extern const unsigned int *  gmx_invsqrt_fracttab;
 +#endif
 +
 +
 +typedef union 
 +{
 +  unsigned int bval;
 +  float fval;
 +} t_convert;
 +
 +
 +#ifdef GMX_SOFTWARE_INVSQRT
 +static real gmx_invsqrt(real x)
 +{
 +  const real  half=0.5;
 +  const real  three=3.0;
 +  t_convert   result,bit_pattern;
 +  unsigned int exp,fract;
 +  real        lu;
 +  real        y;
 +#ifdef GMX_DOUBLE
 +  real        y2;
 +#endif
 + 
 +  bit_pattern.fval=x;
 +  exp   = EXP_ADDR(bit_pattern.bval);
 +  fract = FRACT_ADDR(bit_pattern.bval);
 +  result.bval=gmx_invsqrt_exptab[exp] | gmx_invsqrt_fracttab[fract];
 +  lu    = result.fval;
 +  
 +  y=(half*lu*(three-((x*lu)*lu)));
 +#ifdef GMX_DOUBLE
 +  y2=(half*y*(three-((x*y)*y)));
 +  
 +  return y2;                    /* 10 Flops */
 +#else
 +  return y;                     /* 5  Flops */
 +#endif
 +}
 +#define INVSQRT_DONE 
 +#endif /* gmx_invsqrt */
 +
 +#ifdef GMX_POWERPC_SQRT
 +static real gmx_invsqrt(real x)
 +{
 +  const real  half=0.5;
 +  const real  three=3.0;
 +  t_convert   result,bit_pattern;
 +  unsigned int exp,fract;
 +  real        lu;
 +  real        y;
 +#ifdef GMX_DOUBLE
 +  real        y2;
 +#endif
 +
 +  lu = __frsqrte((double)x);
 +
 +  y=(half*lu*(three-((x*lu)*lu)));
 +
 +#if (GMX_POWERPC_SQRT==2)
 +  /* Extra iteration required */
 +  y=(half*y*(three-((x*y)*y)));
 +#endif
 +
 +#ifdef GMX_DOUBLE
 +  y2=(half*y*(three-((x*y)*y)));
 +
 +  return y2;                    /* 10 Flops */
 +#else
 +  return y;                     /* 5  Flops */
 +#endif
 +}
 +#define INVSQRT_DONE
 +#endif /* powerpc_invsqrt */
 +
- #define gmx_invsqrt(x) (1.0f/sqrt(x))
 +#ifndef INVSQRT_DONE
++#    ifdef GMX_DOUBLE
++#        ifdef HAVE_RSQRT
++#            define gmx_invsqrt(x)     rsqrt(x)
++#        else
++#            define gmx_invsqrt(x)     (1.0/sqrt(x))
++#        endif
++#    else /* single */
++#        ifdef HAVE_RSQRTF
++#            define gmx_invsqrt(x)     rsqrtf(x)
++#        elif defined HAVE_RSQRT
++#            define gmx_invsqrt(x)     rsqrt(x)
++#        elif defined HAVE_SQRTF
++#            define gmx_invsqrt(x)     (1.0/sqrtf(x))
++#        else
++#            define gmx_invsqrt(x)     (1.0/sqrt(x))
++#        endif
++#    endif
 +#endif
 +
 +
- #else
 +static real sqr(real x)
 +{
 +  return (x*x);
 +}
 +
 +static gmx_inline double dsqr(double x)
 +{
 +  return (x*x);
 +}
 +
 +/* Maclaurin series for sinh(x)/x, useful for NH chains and MTTK pressure control 
 +   Here, we compute it to 10th order, which might be overkill, 8th is probably enough, 
 +   but it's not very much more expensive. */
 +
 +static gmx_inline real series_sinhx(real x) 
 +{
 +  real x2 = x*x;
 +  return (1 + (x2/6.0)*(1 + (x2/20.0)*(1 + (x2/42.0)*(1 + (x2/72.0)*(1 + (x2/110.0))))));
 +}
 +
 +void vecinvsqrt(real in[],real out[],int n);
 +/* Perform out[i]=1.0/sqrt(in[i]) for n elements */
 +
 +
 +void vecrecip(real in[],real out[],int n);
 +/* Perform out[i]=1.0/(in[i]) for n elements */
 +
 +/* Note: If you need a fast version of vecinvsqrt 
 + * and/or vecrecip, call detectcpu() and run the SSE/3DNow/SSE2/Altivec
 + * versions if your hardware supports it.
 + *
 + * To use those routines, your memory HAS TO BE CACHE-ALIGNED.
 + * Use snew_aligned(ptr,size,32) to allocate and sfree_aligned to free.
 + */
 +
 +
 +static gmx_inline void rvec_add(const rvec a,const rvec b,rvec c)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void dvec_add(const dvec a,const dvec b,dvec c)
 +{
 +  double x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void ivec_add(const ivec a,const ivec b,ivec c)
 +{
 +  int x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void rvec_inc(rvec a,const rvec b)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  a[XX]=x;
 +  a[YY]=y;
 +  a[ZZ]=z;
 +}
 +
 +static gmx_inline void dvec_inc(dvec a,const dvec b)
 +{
 +  double x,y,z;
 +
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +
 +  a[XX]=x;
 +  a[YY]=y;
 +  a[ZZ]=z;
 +}
 +
 +static gmx_inline void rvec_sub(const rvec a,const rvec b,rvec c)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void dvec_sub(const dvec a,const dvec b,dvec c)
 +{
 +  double x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void rvec_dec(rvec a,const rvec b)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  a[XX]=x;
 +  a[YY]=y;
 +  a[ZZ]=z;
 +}
 +
 +static gmx_inline void copy_rvec(const rvec a,rvec b)
 +{
 +  b[XX]=a[XX];
 +  b[YY]=a[YY];
 +  b[ZZ]=a[ZZ];
 +}
 +
 +static gmx_inline void copy_rvecn(rvec *a,rvec *b,int startn, int endn)
 +{
 +  int i;
 +  for (i=startn;i<endn;i++) {
 +    b[i][XX]=a[i][XX];
 +    b[i][YY]=a[i][YY];
 +    b[i][ZZ]=a[i][ZZ];
 +  }
 +}
 +
 +static gmx_inline void copy_dvec(const dvec a,dvec b)
 +{
 +  b[XX]=a[XX];
 +  b[YY]=a[YY];
 +  b[ZZ]=a[ZZ];
 +}
 +
 +static gmx_inline void copy_ivec(const ivec a,ivec b)
 +{
 +  b[XX]=a[XX];
 +  b[YY]=a[YY];
 +  b[ZZ]=a[ZZ];
 +}
 +
 +static gmx_inline void ivec_sub(const ivec a,const ivec b,ivec c)
 +{
 +  int x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void copy_mat(matrix a,matrix b)
 +{
 +  copy_rvec(a[XX],b[XX]);
 +  copy_rvec(a[YY],b[YY]);
 +  copy_rvec(a[ZZ],b[ZZ]);
 +}
 +
 +static gmx_inline void svmul(real a,const rvec v1,rvec v2)
 +{
 +  v2[XX]=a*v1[XX];
 +  v2[YY]=a*v1[YY];
 +  v2[ZZ]=a*v1[ZZ];
 +}
 +
 +static gmx_inline void dsvmul(double a,const dvec v1,dvec v2)
 +{
 +  v2[XX]=a*v1[XX];
 +  v2[YY]=a*v1[YY];
 +  v2[ZZ]=a*v1[ZZ];
 +}
 +
 +static gmx_inline real distance2(const rvec v1,const rvec v2)
 +{
 +  return sqr(v2[XX]-v1[XX]) + sqr(v2[YY]-v1[YY]) + sqr(v2[ZZ]-v1[ZZ]);
 +}
 +
 +static gmx_inline void clear_rvec(rvec a)
 +{
 +  /* The ibm compiler has problems with inlining this 
 +   * when we use a const real variable
 +   */
 +  a[XX]=0.0;
 +  a[YY]=0.0;
 +  a[ZZ]=0.0;
 +}
 +
 +static gmx_inline void clear_dvec(dvec a)
 +{
 +  /* The ibm compiler has problems with inlining this 
 +   * when we use a const real variable
 +   */
 +  a[XX]=0.0;
 +  a[YY]=0.0;
 +  a[ZZ]=0.0;
 +}
 +
 +static gmx_inline void clear_ivec(ivec a)
 +{
 +  a[XX]=0;
 +  a[YY]=0;
 +  a[ZZ]=0;
 +}
 +
 +static gmx_inline void clear_rvecs(int n,rvec v[])
 +{
 +/*  memset(v[0],0,DIM*n*sizeof(v[0][0])); */
 +  int i;
 +    
 +  for(i=0; (i<n); i++) 
 +    clear_rvec(v[i]);
 +}
 +
 +static gmx_inline void clear_mat(matrix a)
 +{
 +/*  memset(a[0],0,DIM*DIM*sizeof(a[0][0])); */
 +  
 +  const real nul=0.0;
 +  
 +  a[XX][XX]=a[XX][YY]=a[XX][ZZ]=nul;
 +  a[YY][XX]=a[YY][YY]=a[YY][ZZ]=nul;
 +  a[ZZ][XX]=a[ZZ][YY]=a[ZZ][ZZ]=nul;
 +}
 +
 +static gmx_inline real iprod(const rvec a,const rvec b)
 +{
 +  return (a[XX]*b[XX]+a[YY]*b[YY]+a[ZZ]*b[ZZ]);
 +}
 +
 +static gmx_inline double diprod(const dvec a,const dvec b)
 +{
 +  return (a[XX]*b[XX]+a[YY]*b[YY]+a[ZZ]*b[ZZ]);
 +}
 +
 +static gmx_inline int iiprod(const ivec a,const ivec b)
 +{
 +  return (a[XX]*b[XX]+a[YY]*b[YY]+a[ZZ]*b[ZZ]);
 +}
 +
 +static gmx_inline real norm2(const rvec a)
 +{
 +  return a[XX]*a[XX]+a[YY]*a[YY]+a[ZZ]*a[ZZ];
 +}
 +
 +static gmx_inline double dnorm2(const dvec a)
 +{
 +  return a[XX]*a[XX]+a[YY]*a[YY]+a[ZZ]*a[ZZ];
 +}
 +
 +/* WARNING:
 + * As dnorm() uses sqrt() (which is slow) _only_ use it if you are sure you
 + * don't need 1/dnorm(), otherwise use dnorm2()*dinvnorm(). */
 +static gmx_inline double dnorm(const dvec a)
 +{
 +  return sqrt(diprod(a, a));
 +}
 +
 +/* WARNING:
 + * As norm() uses sqrtf() (which is slow) _only_ use it if you are sure you
 + * don't need 1/norm(), otherwise use norm2()*invnorm(). */
 +static gmx_inline real norm(const rvec a)
 +{
 +  /* This is ugly, but we deliberately do not define gmx_sqrt() and handle the
 +   * float/double case here instead to avoid gmx_sqrt() being accidentally used. */
 +#ifdef GMX_DOUBLE
 +  return dnorm(a);
++#elif defined HAVE_SQRTF
 +  return sqrtf(iprod(a, a));
++#else
++  return sqrt(iprod(a, a));
 +#endif
 +}
 +
 +static gmx_inline real invnorm(const rvec a)
 +{
 +    return gmx_invsqrt(norm2(a));
 +}
 +
 +static gmx_inline real dinvnorm(const dvec a)
 +{
 +    return gmx_invsqrt(dnorm2(a));
 +}
 +
 +/* WARNING:
 + * Do _not_ use these routines to calculate the angle between two vectors
 + * as acos(cos_angle(u,v)). While it might seem obvious, the acos function
 + * is very flat close to -1 and 1, which will lead to accuracy-loss.
 + * Instead, use the new gmx_angle() function directly.
 + */
 +static gmx_inline real 
 +cos_angle(const rvec a,const rvec b)
 +{
 +  /* 
 +   *                  ax*bx + ay*by + az*bz
 +   * cos-vec (a,b) =  ---------------------
 +   *                      ||a|| * ||b||
 +   */
 +  real   cosval;
 +  int    m;
 +  double aa,bb,ip,ipa,ipb,ipab; /* For accuracy these must be double! */
 +  
 +  ip=ipa=ipb=0.0;
 +  for(m=0; (m<DIM); m++) {            /* 18           */
 +    aa   = a[m];
 +    bb   = b[m];
 +    ip  += aa*bb;
 +    ipa += aa*aa;
 +    ipb += bb*bb;
 +  }
 +  ipab = ipa*ipb;
 +  if (ipab > 0)
 +    cosval = ip*gmx_invsqrt(ipab);            /*  7           */
 +  else 
 +    cosval = 1;
 +                                      /* 25 TOTAL     */
 +  if (cosval > 1.0) 
 +    return  1.0; 
 +  if (cosval <-1.0) 
 +    return -1.0;
 +  
 +  return cosval;
 +}
 +
 +/* WARNING:
 + * Do _not_ use these routines to calculate the angle between two vectors
 + * as acos(cos_angle(u,v)). While it might seem obvious, the acos function
 + * is very flat close to -1 and 1, which will lead to accuracy-loss.
 + * Instead, use the new gmx_angle() function directly.
 + */
 +static gmx_inline real 
 +cos_angle_no_table(const rvec a,const rvec b)
 +{
 +  /* This version does not need the invsqrt lookup table */
 +  real   cosval;
 +  int    m;
 +  double aa,bb,ip,ipa,ipb; /* For accuracy these must be double! */
 +  
 +  ip=ipa=ipb=0.0;
 +  for(m=0; (m<DIM); m++) {            /* 18           */
 +    aa   = a[m];
 +    bb   = b[m];
 +    ip  += aa*bb;
 +    ipa += aa*aa;
 +    ipb += bb*bb;
 +  }
 +  cosval=ip/sqrt(ipa*ipb);            /* 12           */
 +                                      /* 30 TOTAL     */
 +  if (cosval > 1.0) 
 +    return  1.0; 
 +  if (cosval <-1.0) 
 +    return -1.0;
 +  
 +  return cosval;
 +}
 +
 +
 +static gmx_inline void cprod(const rvec a,const rvec b,rvec c)
 +{
 +  c[XX]=a[YY]*b[ZZ]-a[ZZ]*b[YY];
 +  c[YY]=a[ZZ]*b[XX]-a[XX]*b[ZZ];
 +  c[ZZ]=a[XX]*b[YY]-a[YY]*b[XX];
 +}
 +
 +static gmx_inline void dcprod(const dvec a,const dvec b,dvec c)
 +{
 +  c[XX]=a[YY]*b[ZZ]-a[ZZ]*b[YY];
 +  c[YY]=a[ZZ]*b[XX]-a[XX]*b[ZZ];
 +  c[ZZ]=a[XX]*b[YY]-a[YY]*b[XX];
 +}
 +
 +/* This routine calculates the angle between a & b without any loss of accuracy close to 0/PI.
 + * If you only need cos(theta), use the cos_angle() routines to save a few cycles.
 + * This routine is faster than it might appear, since atan2 is accelerated on many CPUs (e.g. x86).
 + */
 +static gmx_inline real 
 +gmx_angle(const rvec a, const rvec b)
 +{
 +    rvec w;
 +    real wlen,s;
 +    
 +    cprod(a,b,w);
 +    
 +    wlen  = norm(w);
 +    s     = iprod(a,b);
 +    
 +    return atan2(wlen,s);
 +}
 +
 +static gmx_inline void mmul_ur0(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX];
 +  dest[XX][YY]=0.0;
 +  dest[XX][ZZ]=0.0;
 +  dest[YY][XX]=a[YY][XX]*b[XX][XX]+a[YY][YY]*b[YY][XX];
 +  dest[YY][YY]=                    a[YY][YY]*b[YY][YY];
 +  dest[YY][ZZ]=0.0;
 +  dest[ZZ][XX]=a[ZZ][XX]*b[XX][XX]+a[ZZ][YY]*b[YY][XX]+a[ZZ][ZZ]*b[ZZ][XX];
 +  dest[ZZ][YY]=                    a[ZZ][YY]*b[YY][YY]+a[ZZ][ZZ]*b[ZZ][YY];
 +  dest[ZZ][ZZ]=                                        a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void mmul(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX]+a[XX][YY]*b[YY][XX]+a[XX][ZZ]*b[ZZ][XX];
 +  dest[YY][XX]=a[YY][XX]*b[XX][XX]+a[YY][YY]*b[YY][XX]+a[YY][ZZ]*b[ZZ][XX];
 +  dest[ZZ][XX]=a[ZZ][XX]*b[XX][XX]+a[ZZ][YY]*b[YY][XX]+a[ZZ][ZZ]*b[ZZ][XX];
 +  dest[XX][YY]=a[XX][XX]*b[XX][YY]+a[XX][YY]*b[YY][YY]+a[XX][ZZ]*b[ZZ][YY];
 +  dest[YY][YY]=a[YY][XX]*b[XX][YY]+a[YY][YY]*b[YY][YY]+a[YY][ZZ]*b[ZZ][YY];
 +  dest[ZZ][YY]=a[ZZ][XX]*b[XX][YY]+a[ZZ][YY]*b[YY][YY]+a[ZZ][ZZ]*b[ZZ][YY];
 +  dest[XX][ZZ]=a[XX][XX]*b[XX][ZZ]+a[XX][YY]*b[YY][ZZ]+a[XX][ZZ]*b[ZZ][ZZ];
 +  dest[YY][ZZ]=a[YY][XX]*b[XX][ZZ]+a[YY][YY]*b[YY][ZZ]+a[YY][ZZ]*b[ZZ][ZZ];
 +  dest[ZZ][ZZ]=a[ZZ][XX]*b[XX][ZZ]+a[ZZ][YY]*b[YY][ZZ]+a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void transpose(matrix src,matrix dest)
 +{
 +  dest[XX][XX]=src[XX][XX];
 +  dest[YY][XX]=src[XX][YY];
 +  dest[ZZ][XX]=src[XX][ZZ];
 +  dest[XX][YY]=src[YY][XX];
 +  dest[YY][YY]=src[YY][YY];
 +  dest[ZZ][YY]=src[YY][ZZ];
 +  dest[XX][ZZ]=src[ZZ][XX];
 +  dest[YY][ZZ]=src[ZZ][YY];
 +  dest[ZZ][ZZ]=src[ZZ][ZZ];
 +}
 +
 +static gmx_inline void tmmul(matrix a,matrix b,matrix dest)
 +{
 +  /* Computes dest=mmul(transpose(a),b,dest) - used in do_pr_pcoupl */
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX]+a[YY][XX]*b[YY][XX]+a[ZZ][XX]*b[ZZ][XX];
 +  dest[XX][YY]=a[XX][XX]*b[XX][YY]+a[YY][XX]*b[YY][YY]+a[ZZ][XX]*b[ZZ][YY];
 +  dest[XX][ZZ]=a[XX][XX]*b[XX][ZZ]+a[YY][XX]*b[YY][ZZ]+a[ZZ][XX]*b[ZZ][ZZ];
 +  dest[YY][XX]=a[XX][YY]*b[XX][XX]+a[YY][YY]*b[YY][XX]+a[ZZ][YY]*b[ZZ][XX];
 +  dest[YY][YY]=a[XX][YY]*b[XX][YY]+a[YY][YY]*b[YY][YY]+a[ZZ][YY]*b[ZZ][YY];
 +  dest[YY][ZZ]=a[XX][YY]*b[XX][ZZ]+a[YY][YY]*b[YY][ZZ]+a[ZZ][YY]*b[ZZ][ZZ];
 +  dest[ZZ][XX]=a[XX][ZZ]*b[XX][XX]+a[YY][ZZ]*b[YY][XX]+a[ZZ][ZZ]*b[ZZ][XX];
 +  dest[ZZ][YY]=a[XX][ZZ]*b[XX][YY]+a[YY][ZZ]*b[YY][YY]+a[ZZ][ZZ]*b[ZZ][YY];
 +  dest[ZZ][ZZ]=a[XX][ZZ]*b[XX][ZZ]+a[YY][ZZ]*b[YY][ZZ]+a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void mtmul(matrix a,matrix b,matrix dest)
 +{
 +  /* Computes dest=mmul(a,transpose(b),dest) - used in do_pr_pcoupl */
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX]+a[XX][YY]*b[XX][YY]+a[XX][ZZ]*b[XX][ZZ];
 +  dest[XX][YY]=a[XX][XX]*b[YY][XX]+a[XX][YY]*b[YY][YY]+a[XX][ZZ]*b[YY][ZZ];
 +  dest[XX][ZZ]=a[XX][XX]*b[ZZ][XX]+a[XX][YY]*b[ZZ][YY]+a[XX][ZZ]*b[ZZ][ZZ];
 +  dest[YY][XX]=a[YY][XX]*b[XX][XX]+a[YY][YY]*b[XX][YY]+a[YY][ZZ]*b[XX][ZZ];
 +  dest[YY][YY]=a[YY][XX]*b[YY][XX]+a[YY][YY]*b[YY][YY]+a[YY][ZZ]*b[YY][ZZ];
 +  dest[YY][ZZ]=a[YY][XX]*b[ZZ][XX]+a[YY][YY]*b[ZZ][YY]+a[YY][ZZ]*b[ZZ][ZZ];
 +  dest[ZZ][XX]=a[ZZ][XX]*b[XX][XX]+a[ZZ][YY]*b[XX][YY]+a[ZZ][ZZ]*b[XX][ZZ];
 +  dest[ZZ][YY]=a[ZZ][XX]*b[YY][XX]+a[ZZ][YY]*b[YY][YY]+a[ZZ][ZZ]*b[YY][ZZ];
 +  dest[ZZ][ZZ]=a[ZZ][XX]*b[ZZ][XX]+a[ZZ][YY]*b[ZZ][YY]+a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline real det(matrix a)
 +{
 +  return ( a[XX][XX]*(a[YY][YY]*a[ZZ][ZZ]-a[ZZ][YY]*a[YY][ZZ])
 +        -a[YY][XX]*(a[XX][YY]*a[ZZ][ZZ]-a[ZZ][YY]*a[XX][ZZ])
 +        +a[ZZ][XX]*(a[XX][YY]*a[YY][ZZ]-a[YY][YY]*a[XX][ZZ]));
 +}
 +
 +static gmx_inline void m_add(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]+b[XX][XX];
 +  dest[XX][YY]=a[XX][YY]+b[XX][YY];
 +  dest[XX][ZZ]=a[XX][ZZ]+b[XX][ZZ];
 +  dest[YY][XX]=a[YY][XX]+b[YY][XX];
 +  dest[YY][YY]=a[YY][YY]+b[YY][YY];
 +  dest[YY][ZZ]=a[YY][ZZ]+b[YY][ZZ];
 +  dest[ZZ][XX]=a[ZZ][XX]+b[ZZ][XX];
 +  dest[ZZ][YY]=a[ZZ][YY]+b[ZZ][YY];
 +  dest[ZZ][ZZ]=a[ZZ][ZZ]+b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void m_sub(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]-b[XX][XX];
 +  dest[XX][YY]=a[XX][YY]-b[XX][YY];
 +  dest[XX][ZZ]=a[XX][ZZ]-b[XX][ZZ];
 +  dest[YY][XX]=a[YY][XX]-b[YY][XX];
 +  dest[YY][YY]=a[YY][YY]-b[YY][YY];
 +  dest[YY][ZZ]=a[YY][ZZ]-b[YY][ZZ];
 +  dest[ZZ][XX]=a[ZZ][XX]-b[ZZ][XX];
 +  dest[ZZ][YY]=a[ZZ][YY]-b[ZZ][YY];
 +  dest[ZZ][ZZ]=a[ZZ][ZZ]-b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void msmul(matrix m1,real r1,matrix dest)
 +{
 +  dest[XX][XX]=r1*m1[XX][XX];
 +  dest[XX][YY]=r1*m1[XX][YY];
 +  dest[XX][ZZ]=r1*m1[XX][ZZ];
 +  dest[YY][XX]=r1*m1[YY][XX];
 +  dest[YY][YY]=r1*m1[YY][YY];
 +  dest[YY][ZZ]=r1*m1[YY][ZZ];
 +  dest[ZZ][XX]=r1*m1[ZZ][XX];
 +  dest[ZZ][YY]=r1*m1[ZZ][YY];
 +  dest[ZZ][ZZ]=r1*m1[ZZ][ZZ];
 +}
 +
 +static gmx_inline void m_inv_ur0(matrix src,matrix dest)
 +{
 +  double tmp = src[XX][XX]*src[YY][YY]*src[ZZ][ZZ];
 +  if (fabs(tmp) <= 100*GMX_REAL_MIN)
 +    gmx_fatal(FARGS,"Can not invert matrix, determinant is zero");
 +
 +  dest[XX][XX] = 1/src[XX][XX];
 +  dest[YY][YY] = 1/src[YY][YY];
 +  dest[ZZ][ZZ] = 1/src[ZZ][ZZ];
 +  dest[ZZ][XX] = (src[YY][XX]*src[ZZ][YY]*dest[YY][YY]
 +                - src[ZZ][XX])*dest[XX][XX]*dest[ZZ][ZZ];
 +  dest[YY][XX] = -src[YY][XX]*dest[XX][XX]*dest[YY][YY];
 +  dest[ZZ][YY] = -src[ZZ][YY]*dest[YY][YY]*dest[ZZ][ZZ];
 +  dest[XX][YY] = 0.0;
 +  dest[XX][ZZ] = 0.0;
 +  dest[YY][ZZ] = 0.0;
 +}
 +
 +static gmx_inline void m_inv(matrix src,matrix dest)
 +{
 +  const real smallreal = (real)1.0e-24;
 +  const real largereal = (real)1.0e24;
 +  real  deter,c,fc;
 +
 +  deter = det(src);
 +  c     = (real)1.0/deter;
 +  fc    = (real)fabs(c);
 +  
 +  if ((fc <= smallreal) || (fc >= largereal)) 
 +    gmx_fatal(FARGS,"Can not invert matrix, determinant = %e",deter);
 +
 +  dest[XX][XX]= c*(src[YY][YY]*src[ZZ][ZZ]-src[ZZ][YY]*src[YY][ZZ]);
 +  dest[XX][YY]=-c*(src[XX][YY]*src[ZZ][ZZ]-src[ZZ][YY]*src[XX][ZZ]);
 +  dest[XX][ZZ]= c*(src[XX][YY]*src[YY][ZZ]-src[YY][YY]*src[XX][ZZ]);
 +  dest[YY][XX]=-c*(src[YY][XX]*src[ZZ][ZZ]-src[ZZ][XX]*src[YY][ZZ]);
 +  dest[YY][YY]= c*(src[XX][XX]*src[ZZ][ZZ]-src[ZZ][XX]*src[XX][ZZ]);
 +  dest[YY][ZZ]=-c*(src[XX][XX]*src[YY][ZZ]-src[YY][XX]*src[XX][ZZ]);
 +  dest[ZZ][XX]= c*(src[YY][XX]*src[ZZ][YY]-src[ZZ][XX]*src[YY][YY]);
 +  dest[ZZ][YY]=-c*(src[XX][XX]*src[ZZ][YY]-src[ZZ][XX]*src[XX][YY]);
 +  dest[ZZ][ZZ]= c*(src[XX][XX]*src[YY][YY]-src[YY][XX]*src[XX][YY]);
 +}
 +
 +static gmx_inline void mvmul(matrix a,const rvec src,rvec dest)
 +{
 +  dest[XX]=a[XX][XX]*src[XX]+a[XX][YY]*src[YY]+a[XX][ZZ]*src[ZZ];
 +  dest[YY]=a[YY][XX]*src[XX]+a[YY][YY]*src[YY]+a[YY][ZZ]*src[ZZ];
 +  dest[ZZ]=a[ZZ][XX]*src[XX]+a[ZZ][YY]*src[YY]+a[ZZ][ZZ]*src[ZZ];
 +}
 +
 +static gmx_inline void mvmul_ur0(matrix a,const rvec src,rvec dest)
 +{
 +  dest[ZZ]=a[ZZ][XX]*src[XX]+a[ZZ][YY]*src[YY]+a[ZZ][ZZ]*src[ZZ];
 +  dest[YY]=a[YY][XX]*src[XX]+a[YY][YY]*src[YY];
 +  dest[XX]=a[XX][XX]*src[XX];
 +}
 +
 +static gmx_inline void tmvmul_ur0(matrix a,const rvec src,rvec dest)
 +{
 +  dest[XX]=a[XX][XX]*src[XX]+a[YY][XX]*src[YY]+a[ZZ][XX]*src[ZZ];
 +  dest[YY]=                  a[YY][YY]*src[YY]+a[ZZ][YY]*src[ZZ];
 +  dest[ZZ]=                                    a[ZZ][ZZ]*src[ZZ];
 +}
 +
 +static gmx_inline void unitv(const rvec src,rvec dest)
 +{
 +  real linv;
 +  
 +  linv=gmx_invsqrt(norm2(src));
 +  dest[XX]=linv*src[XX];
 +  dest[YY]=linv*src[YY];
 +  dest[ZZ]=linv*src[ZZ];
 +}
 +
 +static gmx_inline void unitv_no_table(const rvec src,rvec dest)
 +{
 +  real linv;
 +  
 +  linv=1.0/sqrt(norm2(src));
 +  dest[XX]=linv*src[XX];
 +  dest[YY]=linv*src[YY];
 +  dest[ZZ]=linv*src[ZZ];
 +}
 +
 +static void calc_lll(rvec box,rvec lll)
 +{
 +  lll[XX] = 2.0*M_PI/box[XX];
 +  lll[YY] = 2.0*M_PI/box[YY];
 +  lll[ZZ] = 2.0*M_PI/box[ZZ];
 +}
 +
 +static gmx_inline real trace(matrix m)
 +{
 +  return (m[XX][XX]+m[YY][YY]+m[ZZ][ZZ]);
 +}
 +
 +static gmx_inline real _divide_err(real a,real b,const char *file,int line)
 +{
 +    if (fabs(b) <= GMX_REAL_MIN) 
 +        gmx_fatal(FARGS,"Dividing by zero, file %s, line %d",file,line);
 +    return a/b;
 +}
 +
 +static gmx_inline int _mod(int a,int b,char *file,int line)
 +{
 +  if(b==0)
 +    gmx_fatal(FARGS,"Modulo zero, file %s, line %d",file,line);
 +  return a % b;
 +}
 +
 +/* Operations on multidimensional rvecs, used e.g. in edsam.c */
 +static void m_rveccopy(int dim, rvec *a, rvec *b)
 +{
 +    /* b = a */
 +    int i;
 +
 +    for (i=0; i<dim; i++)
 +        copy_rvec(a[i],b[i]);
 +} 
 +
 +/*computer matrix vectors from base vectors and angles */
 +static void matrix_convert(matrix box, rvec vec, rvec angle)
 +{
 +    svmul(DEG2RAD,angle,angle);
 +    box[XX][XX] = vec[XX];
 +    box[YY][XX] = vec[YY]*cos(angle[ZZ]);
 +    box[YY][YY] = vec[YY]*sin(angle[ZZ]);
 +    box[ZZ][XX] = vec[ZZ]*cos(angle[YY]);
 +    box[ZZ][YY] = vec[ZZ]
 +                         *(cos(angle[XX])-cos(angle[YY])*cos(angle[ZZ]))/sin(angle[ZZ]);
 +    box[ZZ][ZZ] = sqrt(sqr(vec[ZZ])
 +                       -box[ZZ][XX]*box[ZZ][XX]-box[ZZ][YY]*box[ZZ][YY]);
 +}
 +
 +#define divide_err(a,b) _divide_err((a),(b),__FILE__,__LINE__)
 +#define mod(a,b)    _mod((a),(b),__FILE__,__LINE__)
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +
 +#endif        /* _vec_h */
index e8adce57f620414838af61d200bd4fbfc59f2870,0000000000000000000000000000000000000000..366dda2d545d1f8251bbfc40fdf480c7399d9b28
mode 100644,000000..100644
--- /dev/null
@@@ -1,12 -1,0 +1,12 @@@
- Libs.private: -lm @CMAKE_THREAD_LIBS_INIT@ @PKG_DL_LIBS@
- Libs: -L${libdir} -lgromacs@GMX_LIBS_SUFFIX@ @PKG_FFT_LIBS@
 +libdir=@LIB_INSTALL_DIR@
 +includedir=@INCL_INSTALL_DIR@
 +
 +Name: libgromacs
 +Description: Gromacs library
 +URL: http://www.gromacs.org
 +Version: @PROJECT_VERSION@
 +Requires: @PKG_FFT@ @PKG_XML@
++Libs.private: @CMAKE_THREAD_LIBS_INIT@ @PKG_DL_LIBS@
++Libs: -L${libdir} -lgromacs@GMX_LIBS_SUFFIX@ @PKG_FFT_LIBS@ -lm
 +Cflags: -I${includedir} @PKG_CFLAGS@
 +
index 055c8f5e389722eb61f5d1ea692a25f6ac39a056,0000000000000000000000000000000000000000..d00aabe4b10e822c662c7f4b342008274ed1210a
mode 100644,000000..100644
--- /dev/null
@@@ -1,624 -1,0 +1,624 @@@
-                         ATFtab = fr->atf_tabs[mdatoms->tf_table_index[iatom]].tab;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 4.0.5
 + * Written by Christoph Junghans, Brad Lambeth, and possibly others.
 + * Copyright (c) 2009 Christoph Junghans, Brad Lambeth.
 + * All rights reserved.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +
 +#include "adress.h"
 +#include "maths.h"
 +#include "pbc.h"
 +#include "types/simple.h"
 +#include "typedefs.h"
 +#include "vec.h"
 +
 +real
 +adress_weight(rvec            x,
 +              int             adresstype,
 +              real            adressr,
 +              real            adressw,
 +              rvec *          ref,
 +              t_pbc *         pbc,
 +              t_forcerec *         fr )
 +{
 +    int  i;
 +    real l2 = adressr+adressw;
 +    real sqr_dl,dl;
 +    real tmp;
 +    rvec dx;
 +
 +    sqr_dl = 0.0;
 +
 +    if (pbc)
 +    {
 +        pbc_dx(pbc,(*ref),x,dx);
 +    }
 +    else
 +    {
 +        rvec_sub((*ref),x,dx);
 +    }
 +
 +    switch(adresstype)
 +    {
 +    case eAdressOff:
 +        /* default to explicit simulation */
 +        return 1;
 +    case eAdressConst:
 +        /* constant value for weighting function = adressw */
 +        return fr->adress_const_wf;
 +    case eAdressXSplit:
 +        /* plane through center of ref, varies in x direction */
 +        sqr_dl         = dx[0]*dx[0];
 +        break;
 +    case eAdressSphere:
 +        /* point at center of ref, assuming cubic geometry */
 +        for(i=0;i<3;i++){
 +            sqr_dl    += dx[i]*dx[i];
 +        }
 +        break;
 +    default:
 +        /* default to explicit simulation */
 +        return 1;
 +    }
 +
 +    dl=sqrt(sqr_dl);
 +
 +    /* molecule is coarse grained */
 +    if (dl > l2)
 +    {
 +        return 0;
 +    }
 +    /* molecule is explicit */
 +    else if (dl < adressr)
 +    {
 +        return 1;
 +    }
 +    /* hybrid region */
 +    else
 +    {
 +        tmp=cos((dl-adressr)*M_PI/2/adressw);
 +        return tmp*tmp;
 +    }
 +}
 +
 +void
 +update_adress_weights_com(FILE *               fplog,
 +                          int                  cg0,
 +                          int                  cg1,
 +                          t_block *            cgs,
 +                          rvec                 x[],
 +                          t_forcerec *         fr,
 +                          t_mdatoms *          mdatoms,
 +                          t_pbc *              pbc)
 +{
 +    int            icg,k,k0,k1,d;
 +    real           nrcg,inv_ncg,mtot,inv_mtot;
 +    atom_id *      cgindex;
 +    rvec           ix;
 +    int            adresstype;
 +    real           adressr,adressw;
 +    rvec *         ref;
 +    real *         massT;
 +    real *         wf;
 +
 +
 +    int n_hyb, n_ex, n_cg;
 +
 +    n_hyb=0;
 +    n_cg=0;
 +    n_ex=0;
 +
 +    adresstype         = fr->adress_type;
 +    adressr            = fr->adress_ex_width;
 +    adressw            = fr->adress_hy_width;
 +    massT              = mdatoms->massT;
 +    wf                 = mdatoms->wf;
 +    ref                = &(fr->adress_refs);
 +
 +
 +    /* Since this is center of mass AdResS, the vsite is not guaranteed
 +     * to be on the same node as the constructing atoms.  Therefore we
 +     * loop over the charge groups, calculate their center of mass,
 +     * then use this to calculate wf for each atom.  This wastes vsite
 +     * construction, but it's the only way to assure that the explicit
 +     * atoms have the same wf as their vsite. */
 +
 +#ifdef DEBUG
 +    fprintf(fplog,"Calculating center of mass for charge groups %d to %d\n",
 +            cg0,cg1);
 +#endif
 +    cgindex = cgs->index;
 +
 +    /* Compute the center of mass for all charge groups */
 +    for(icg=cg0; (icg<cg1); icg++)
 +    {
 +        k0      = cgindex[icg];
 +        k1      = cgindex[icg+1];
 +        nrcg    = k1-k0;
 +        if (nrcg == 1)
 +        {
 +            wf[k0] = adress_weight(x[k0],adresstype,adressr,adressw,ref,pbc,fr);
 +            if (wf[k0]==0){ n_cg++;}
 +            else if (wf[k0]==1){ n_ex++;}
 +            else {n_hyb++;}
 +        }
 +        else
 +        {
 +            mtot = 0.0;
 +            for(k=k0; (k<k1); k++)
 +            {
 +                mtot += massT[k];
 +            }
 +            if (mtot > 0.0)
 +            {
 +                inv_mtot = 1.0/mtot;
 +
 +                clear_rvec(ix);
 +                for(k=k0; (k<k1); k++)
 +                {
 +                    for(d=0; (d<DIM); d++)
 +                    {
 +                        ix[d] += x[k][d]*massT[k];
 +                    }
 +                }
 +                for(d=0; (d<DIM); d++)
 +                {
 +                    ix[d] *= inv_mtot;
 +                }
 +            }
 +            /* Calculate the center of gravity if the charge group mtot=0 (only vsites) */
 +            else
 +            {
 +                inv_ncg = 1.0/nrcg;
 +
 +                clear_rvec(ix);
 +                for(k=k0; (k<k1); k++)
 +                {
 +                    for(d=0; (d<DIM); d++)
 +                    {
 +                        ix[d] += x[k][d];
 +                    }
 +                }
 +                for(d=0; (d<DIM); d++)
 +                {
 +                    ix[d] *= inv_ncg;
 +                }
 +            }
 +
 +            /* Set wf of all atoms in charge group equal to wf of com */
 +            wf[k0] = adress_weight(ix,adresstype,adressr,adressw,ref,pbc, fr);
 +
 +            if (wf[k0]==0){ n_cg++;}
 +            else if (wf[k0]==1){ n_ex++;}
 +            else {n_hyb++;}
 +
 +            for(k=(k0+1); (k<k1); k++)
 +            {
 +                wf[k] = wf[k0];
 +            }
 +        }
 +    }
 +
 +
 +    adress_set_kernel_flags(n_ex, n_hyb, n_cg, mdatoms);
 +
 +
 +}
 +void update_adress_weights_atom_per_atom(
 +                            int                  cg0,
 +                          int                  cg1,
 +                          t_block *            cgs,
 +                          rvec                 x[],
 +                          t_forcerec *         fr,
 +                          t_mdatoms *          mdatoms,
 +                          t_pbc *              pbc)
 +{
 +    int            icg,k,k0,k1,d;
 +    real           nrcg,inv_ncg,mtot,inv_mtot;
 +    atom_id *      cgindex;
 +    rvec           ix;
 +    int            adresstype;
 +    real           adressr,adressw;
 +    rvec *         ref;
 +    real *         massT;
 +    real *         wf;
 +
 +
 +    int n_hyb, n_ex, n_cg;
 +
 +    n_hyb=0;
 +    n_cg=0;
 +    n_ex=0;
 +
 +    adresstype         = fr->adress_type;
 +    adressr            = fr->adress_ex_width;
 +    adressw            = fr->adress_hy_width;
 +    massT              = mdatoms->massT;
 +    wf                 = mdatoms->wf;
 +    ref                = &(fr->adress_refs);
 +
 +    cgindex = cgs->index;
 +
 +    /* Weighting function is determined for each atom individually.
 +     * This is an approximation
 +     * as in the theory requires an interpolation based on the center of masses.
 +     * Should be used with caution */
 +
 +    for (icg = cg0; (icg < cg1); icg++) {
 +        k0 = cgindex[icg];
 +        k1 = cgindex[icg + 1];
 +        nrcg = k1 - k0;
 +
 +        for (k = (k0); (k < k1); k++) {
 +            wf[k] = adress_weight(x[k], adresstype, adressr, adressw, ref, pbc, fr);
 +            if (wf[k] == 0) {
 +                n_cg++;
 +            } else if (wf[k] == 1) {
 +                n_ex++;
 +            } else {
 +                n_hyb++;
 +            }
 +        }
 +
 +    }
 +    adress_set_kernel_flags(n_ex, n_hyb, n_cg, mdatoms);
 +}
 +
 +void
 +update_adress_weights_cog(t_iparams            ip[],
 +                          t_ilist              ilist[],
 +                          rvec                 x[],
 +                          t_forcerec *         fr,
 +                          t_mdatoms *          mdatoms,
 +                          t_pbc *              pbc)
 +{
 +    int            i,j,k,nr,nra,inc;
 +    int            ftype,adresstype;
 +    t_iatom        avsite,ai,aj,ak,al;
 +    t_iatom *      ia;
 +    real           adressr,adressw;
 +    rvec *         ref;
 +    real *         wf;
 +    int            n_hyb, n_ex, n_cg;
 +
 +    adresstype         = fr->adress_type;
 +    adressr            = fr->adress_ex_width;
 +    adressw            = fr->adress_hy_width;
 +    wf                 = mdatoms->wf;
 +    ref                = &(fr->adress_refs);
 +
 +
 +    n_hyb=0;
 +    n_cg=0;
 +    n_ex=0;
 +
 +
 +    /* Since this is center of geometry AdResS, we know the vsite
 +     * is in the same charge group node as the constructing atoms.
 +     * Loop over vsite types, calculate the weight of the vsite,
 +     * then assign that weight to the constructing atoms. */
 +
 +    for(ftype=0; (ftype<F_NRE); ftype++)
 +    {
 +        if (interaction_function[ftype].flags & IF_VSITE)
 +        {
 +            nra    = interaction_function[ftype].nratoms;
 +            nr     = ilist[ftype].nr;
 +            ia     = ilist[ftype].iatoms;
 +
 +            for(i=0; (i<nr); )
 +            {
 +                /* The vsite and first constructing atom */
 +                avsite     = ia[1];
 +                ai         = ia[2];
 +                wf[avsite] = adress_weight(x[avsite],adresstype,adressr,adressw,ref,pbc,fr);
 +                wf[ai]     = wf[avsite];
 +
 +                if (wf[ai]  == 0) {
 +                    n_cg++;
 +                } else if (wf[ai]  == 1) {
 +                    n_ex++;
 +                } else {
 +                    n_hyb++;
 +                }
 +
 +                /* Assign the vsite wf to rest of constructing atoms depending on type */
 +                inc = nra+1;
 +                switch (ftype) {
 +                case F_VSITE2:
 +                    aj     = ia[3];
 +                    wf[aj] = wf[avsite];
 +                    break;
 +                case F_VSITE3:
 +                    aj     = ia[3];
 +                    wf[aj] = wf[avsite];
 +                    ak     = ia[4];
 +                    wf[ak] = wf[avsite];
 +                    break;
 +                case F_VSITE3FD:
 +                    aj     = ia[3];
 +                    wf[aj] = wf[avsite];
 +                    ak     = ia[4];
 +                    wf[ak] = wf[avsite];
 +                    break;
 +                case F_VSITE3FAD:
 +                    aj     = ia[3];
 +                    wf[aj] = wf[avsite];
 +                    ak     = ia[4];
 +                    wf[ak] = wf[avsite];
 +                    break;
 +                case F_VSITE3OUT:
 +                    aj     = ia[3];
 +                    wf[aj] = wf[avsite];
 +                    ak     = ia[4];
 +                    wf[ak] = wf[avsite];
 +                    break;
 +                case F_VSITE4FD:
 +                    aj     = ia[3];
 +                    wf[aj] = wf[avsite];
 +                    ak     = ia[4];
 +                    wf[ak] = wf[avsite];
 +                    al     = ia[5];
 +                    wf[al] = wf[avsite];
 +                    break;
 +                case F_VSITE4FDN:
 +                    aj     = ia[3];
 +                    wf[aj] = wf[avsite];
 +                    ak     = ia[4];
 +                    wf[ak] = wf[avsite];
 +                    al     = ia[5];
 +                    wf[al] = wf[avsite];
 +                    break;
 +                case F_VSITEN:
 +                    inc    = 3*ip[ia[0]].vsiten.n;
 +                    for(j=3; j<inc; j+=3)
 +                    {
 +                        ai = ia[j+2];
 +                        wf[ai] = wf[avsite];
 +                    }
 +                    break;
 +                default:
 +                    gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
 +                              ftype,__FILE__,__LINE__);
 +                }
 +
 +                /* Increment loop variables */
 +                i  += inc;
 +                ia += inc;
 +            }
 +        }
 +    }
 +
 +    adress_set_kernel_flags(n_ex, n_hyb, n_cg, mdatoms);
 +}
 +
 +void
 +update_adress_weights_atom(int                  cg0,
 +                           int                  cg1,
 +                           t_block *            cgs,
 +                           rvec                 x[],
 +                           t_forcerec *         fr,
 +                           t_mdatoms *          mdatoms,
 +                           t_pbc *              pbc)
 +{
 +    int            icg,k,k0,k1;
 +    atom_id *      cgindex;
 +    int            adresstype;
 +    real           adressr,adressw;
 +    rvec *         ref;
 +    real *         massT;
 +    real *         wf;
 +
 +    adresstype         = fr->adress_type;
 +    adressr            = fr->adress_ex_width;
 +    adressw            = fr->adress_hy_width;
 +    massT              = mdatoms->massT;
 +    wf                 = mdatoms->wf;
 +    ref                = &(fr->adress_refs);
 +    cgindex            = cgs->index;
 +
 +    /* Only use first atom in charge group.
 +     * We still can't be sure that the vsite and constructing
 +     * atoms are on the same processor, so we must calculate
 +     * in the same way as com adress. */
 +
 +    for(icg=cg0; (icg<cg1); icg++)
 +    {
 +        k0      = cgindex[icg];
 +        k1      = cgindex[icg+1];
 +        wf[k0] = adress_weight(x[k0],adresstype,adressr,adressw,ref,pbc,fr);
 +
 +        /* Set wf of all atoms in charge group equal to wf of first atom in charge group*/
 +        for(k=(k0+1); (k<k1); k++)
 +        {
 +            wf[k] = wf[k0];
 +        }
 +    }
 +}
 +
 +void adress_set_kernel_flags(int n_ex, int n_hyb, int n_cg, t_mdatoms * mdatoms){
 +
 +    /* With domain decomposition we can check weather a cpu calculates only
 +     * coarse-grained or explicit interactions. If so we use standard gromacs kernels
 +     * on this proc. See also nonbonded.c */
 +
 +    if (n_hyb ==0 && n_ex == 0){
 +     /* all particles on this proc are coarse-grained, use standard gromacs kernels */
 +        if (!mdatoms->purecg){
 +            mdatoms->purecg = TRUE;
 +           if (debug) fprintf (debug, "adress.c: pure cg kernels on this proc\n");
 +        }
 +    }
 +    else
 +    {
 +        if (mdatoms->purecg){
 +         /* now this processor has hybrid particles again, call the hybrid kernels */
 +            mdatoms->purecg = FALSE;
 +        }
 +    }
 +
 +    if (n_hyb ==0 && n_cg == 0){
 +    /* all particles on this proc are atomistic, use standard gromacs kernels */
 +        if (!mdatoms->pureex){
 +             mdatoms->pureex = TRUE;
 +             if (debug) fprintf (debug, "adress.c: pure ex kernels on this proc\n");
 +        }
 +    }
 +    else
 +    {
 +        if (mdatoms->pureex){
 +            mdatoms->pureex = FALSE;
 +        }
 +    }
 +}
 +
 +void
 +adress_thermo_force(int                  start,
 +                    int                  homenr,
 +                    t_block *            cgs,
 +                    rvec                 x[],
 +                    rvec                 f[],
 +                    t_forcerec *         fr,
 +                    t_mdatoms *          mdatoms,
 +                    t_pbc *              pbc)
 +{
 +    int              iatom,n0,nnn,nrcg, i;
 +    int              adresstype;
 +    real             adressw, adressr;
 +    atom_id *        cgindex;
 +    unsigned short * ptype;
 +    rvec *           ref;
 +    real *           wf;
 +    real             tabscale;
 +    real *           ATFtab;
 +    rvec             dr;
 +    real             w,wsq,wmin1,wmin1sq,wp,wt,rinv, sqr_dl, dl;
 +    real             eps,eps2,F,Geps,Heps2,Fp,dmu_dwp,dwp_dr,fscal;
 +
 +    adresstype       = fr->adress_type;
 +    adressw          = fr->adress_hy_width;
 +    adressr           = fr->adress_ex_width;
 +    cgindex          = cgs->index;
 +    ptype            = mdatoms->ptype;
 +    ref              = &(fr->adress_refs);
 +    wf               = mdatoms->wf;
 +
 +    for(iatom=start; (iatom<start+homenr); iatom++)
 +    {
 +        if (egp_coarsegrained(fr, mdatoms->cENER[iatom]))
 +        {
 +            if (ptype[iatom] == eptVSite)
 +            {
 +                w    = wf[iatom];
 +                /* is it hybrid or apply the thermodynamics force everywhere?*/
 +                if ( mdatoms->tf_table_index[iatom] != NO_TF_TABLE)
 +                {
 +                    if (fr->n_adress_tf_grps > 0 ){
 +                        /* multi component tf is on, select the right table */
-                         ATFtab = fr->atf_tabs[DEFAULT_TF_TABLE].tab;
++                        ATFtab = fr->atf_tabs[mdatoms->tf_table_index[iatom]].data;
 +                        tabscale = fr->atf_tabs[mdatoms->tf_table_index[iatom]].scale;
 +                    }
 +                    else {
 +                    /* just on component*/
++                        ATFtab = fr->atf_tabs[DEFAULT_TF_TABLE].data;
 +                        tabscale = fr->atf_tabs[DEFAULT_TF_TABLE].scale;
 +                    }
 +
 +                    fscal            = 0;
 +                    if (pbc)
 +                    {
 +                        pbc_dx(pbc,(*ref),x[iatom],dr);
 +                    }
 +                    else
 +                    {
 +                        rvec_sub((*ref),x[iatom],dr);
 +                    }
 +
 +
 +
 +
 +                    /* calculate distace to adress center again */
 +                    sqr_dl =0.0;
 +                    switch(adresstype)
 +                    {
 +                    case eAdressXSplit:
 +                        /* plane through center of ref, varies in x direction */
 +                        sqr_dl         = dr[0]*dr[0];
 +                        rinv             = gmx_invsqrt(dr[0]*dr[0]);
 +                        break;
 +                    case eAdressSphere:
 +                        /* point at center of ref, assuming cubic geometry */
 +                        for(i=0;i<3;i++){
 +                            sqr_dl    += dr[i]*dr[i];
 +                        }
 +                        rinv             = gmx_invsqrt(sqr_dl);
 +                        break;
 +                    default:
 +                        /* This case should not happen */
 +                        rinv = 0.0;
 +                    }
 +
 +                    dl=sqrt(sqr_dl);
 +                    /* table origin is adress center */
 +                    wt               = dl*tabscale;
 +                    n0               = wt;
 +                    eps              = wt-n0;
 +                    eps2             = eps*eps;
 +                    nnn              = 4*n0;
 +                    F                = ATFtab[nnn+1];
 +                    Geps             = eps*ATFtab[nnn+2];
 +                    Heps2            = eps2*ATFtab[nnn+3];
 +                    Fp               = F+Geps+Heps2;
 +                    F                = (Fp+Geps+2.0*Heps2)*tabscale;
 +
 +                    fscal            = F*rinv;
 +
 +                    f[iatom][0]        += fscal*dr[0];
 +                    if (adresstype != eAdressXSplit)
 +                    {
 +                        f[iatom][1]    += fscal*dr[1];
 +                        f[iatom][2]    += fscal*dr[2];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +gmx_bool egp_explicit(t_forcerec *   fr, int egp_nr)
 +{
 +    return fr->adress_group_explicit[egp_nr];
 +}
 +
 +gmx_bool egp_coarsegrained(t_forcerec *   fr, int egp_nr)
 +{
 +   return !fr->adress_group_explicit[egp_nr];
 +}
index cf8f5b8e989957c48f84221825c7b25c8936f398,0000000000000000000000000000000000000000..438d8dfb2062ca0fea526fc0f2ddb786a07f3bfc
mode 100644,000000..100644
--- /dev/null
@@@ -1,9554 -1,0 +1,9554 @@@
-     
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include <math.h>
 +#include <string.h>
 +#include <stdlib.h>
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "gmx_fatal_collective.h"
 +#include "vec.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "nrnb.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "constr.h"
 +#include "mdatoms.h"
 +#include "names.h"
 +#include "pdbio.h"
 +#include "futil.h"
 +#include "force.h"
 +#include "pme.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "gmx_wallcycle.h"
 +#include "mdrun.h"
 +#include "nsgrid.h"
 +#include "shellfc.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "gmx_ga2la.h"
 +#include "gmx_sort.h"
 +#include "macros.h"
 +#include "nbnxn_search.h"
 +#include "bondf.h"
 +#include "gmx_omp_nthreads.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#define DDRANK(dd,rank)    (rank)
 +#define DDMASTERRANK(dd)   (dd->masterrank)
 +
 +typedef struct gmx_domdec_master
 +{
 +    /* The cell boundaries */
 +    real **cell_x;
 +    /* The global charge group division */
 +    int  *ncg;     /* Number of home charge groups for each node */
 +    int  *index;   /* Index of nnodes+1 into cg */
 +    int  *cg;      /* Global charge group index */
 +    int  *nat;     /* Number of home atoms for each node. */
 +    int  *ibuf;    /* Buffer for communication */
 +    rvec *vbuf;    /* Buffer for state scattering and gathering */
 +} gmx_domdec_master_t;
 +
 +typedef struct
 +{
 +    /* The numbers of charge groups to send and receive for each cell
 +     * that requires communication, the last entry contains the total
 +     * number of atoms that needs to be communicated.
 +     */
 +    int nsend[DD_MAXIZONE+2];
 +    int nrecv[DD_MAXIZONE+2];
 +    /* The charge groups to send */
 +    int *index;
 +    int nalloc;
 +    /* The atom range for non-in-place communication */
 +    int cell2at0[DD_MAXIZONE];
 +    int cell2at1[DD_MAXIZONE];
 +} gmx_domdec_ind_t;
 +
 +typedef struct
 +{
 +    int  np;                   /* Number of grid pulses in this dimension */
 +    int  np_dlb;               /* For dlb, for use with edlbAUTO          */
 +    gmx_domdec_ind_t *ind;     /* The indices to communicate, size np     */
 +    int  np_nalloc;
 +    gmx_bool bInPlace;             /* Can we communicate in place?            */
 +} gmx_domdec_comm_dim_t;
 +
 +typedef struct
 +{
 +    gmx_bool *bCellMin;    /* Temp. var.: is this cell size at the limit     */
 +    real *cell_f;      /* State var.: cell boundaries, box relative      */
 +    real *old_cell_f;  /* Temp. var.: old cell size                      */
 +    real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
 +    real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
 +    real *bound_min;   /* Temp. var.: lower limit for cell boundary      */
 +    real *bound_max;   /* Temp. var.: upper limit for cell boundary      */
 +    gmx_bool bLimited;     /* State var.: is DLB limited in this dim and row */
 +    real *buf_ncd;     /* Temp. var.                                     */
 +} gmx_domdec_root_t;
 +
 +#define DD_NLOAD_MAX 9
 +
 +/* Here floats are accurate enough, since these variables
 + * only influence the load balancing, not the actual MD results.
 + */
 +typedef struct
 +{
 +    int  nload;
 +    float *load;
 +    float sum;
 +    float max;
 +    float sum_m;
 +    float cvol_min;
 +    float mdf;
 +    float pme;
 +    int   flags;
 +} gmx_domdec_load_t;
 +
 +typedef struct
 +{
 +    int  nsc;
 +    int  ind_gl;
 +    int  ind;
 +} gmx_cgsort_t;
 +
 +typedef struct
 +{
 +    gmx_cgsort_t *sort;
 +    gmx_cgsort_t *sort2;
 +    int  sort_nalloc;
 +    gmx_cgsort_t *sort_new;
 +    int  sort_new_nalloc;
 +    int  *ibuf;
 +    int  ibuf_nalloc;
 +} gmx_domdec_sort_t;
 +
 +typedef struct
 +{
 +    rvec *v;
 +    int  nalloc;
 +} vec_rvec_t;
 +
 +/* This enum determines the order of the coordinates.
 + * ddnatHOME and ddnatZONE should be first and second,
 + * the others can be ordered as wanted.
 + */
 +enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
 +
 +enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
 +const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
 +
 +typedef struct
 +{
 +    int  dim;      /* The dimension                                          */
 +    gmx_bool dim_match;/* Tells if DD and PME dims match                         */
 +    int  nslab;    /* The number of PME slabs in this dimension              */
 +    real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB    */
 +    int  *pp_min;  /* The minimum pp node location, size nslab               */
 +    int  *pp_max;  /* The maximum pp node location,size nslab                */
 +    int  maxshift; /* The maximum shift for coordinate redistribution in PME */
 +} gmx_ddpme_t;
 +
 +typedef struct
 +{
 +    real min0;    /* The minimum bottom of this zone                        */
 +    real max1;    /* The maximum top of this zone                           */
 +    real min1;    /* The minimum top of this zone                           */
 +    real mch0;    /* The maximum bottom communicaton height for this zone   */
 +    real mch1;    /* The maximum top communicaton height for this zone      */
 +    real p1_0;    /* The bottom value of the first cell in this zone        */
 +    real p1_1;    /* The top value of the first cell in this zone           */
 +} gmx_ddzone_t;
 +
 +typedef struct
 +{
 +    gmx_domdec_ind_t ind;
 +    int *ibuf;
 +    int ibuf_nalloc;
 +    vec_rvec_t vbuf;
 +    int nsend;
 +    int nat;
 +    int nsend_zone;
 +} dd_comm_setup_work_t;
 +
 +typedef struct gmx_domdec_comm
 +{
 +    /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
 +     * unless stated otherwise.
 +     */
 +
 +    /* The number of decomposition dimensions for PME, 0: no PME */
 +    int  npmedecompdim;
 +    /* The number of nodes doing PME (PP/PME or only PME) */
 +    int  npmenodes;
 +    int  npmenodes_x;
 +    int  npmenodes_y;
 +    /* The communication setup including the PME only nodes */
 +    gmx_bool bCartesianPP_PME;
 +    ivec ntot;
 +    int  cartpmedim;
 +    int  *pmenodes;          /* size npmenodes                         */
 +    int  *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
 +                              * but with bCartesianPP_PME              */
 +    gmx_ddpme_t ddpme[2];
 +    
 +    /* The DD particle-particle nodes only */
 +    gmx_bool bCartesianPP;
 +    int  *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
 +    
 +    /* The global charge groups */
 +    t_block cgs_gl;
 +
 +    /* Should we sort the cgs */
 +    int  nstSortCG;
 +    gmx_domdec_sort_t *sort;
 +    
 +    /* Are there charge groups? */
 +    gmx_bool bCGs;
 +
 +    /* Are there bonded and multi-body interactions between charge groups? */
 +    gmx_bool bInterCGBondeds;
 +    gmx_bool bInterCGMultiBody;
 +
 +    /* Data for the optional bonded interaction atom communication range */
 +    gmx_bool bBondComm;
 +    t_blocka *cglink;
 +    char *bLocalCG;
 +
 +    /* The DLB option */
 +    int  eDLB;
 +    /* Are we actually using DLB? */
 +    gmx_bool bDynLoadBal;
 +
 +    /* Cell sizes for static load balancing, first index cartesian */
 +    real **slb_frac;
-     for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
++
 +    /* The width of the communicated boundaries */
 +    real cutoff_mbody;
 +    real cutoff;
 +    /* The minimum cell size (including triclinic correction) */
 +    rvec cellsize_min;
 +    /* For dlb, for use with edlbAUTO */
 +    rvec cellsize_min_dlb;
 +    /* The lower limit for the DD cell size with DLB */
 +    real cellsize_limit;
 +    /* Effectively no NB cut-off limit with DLB for systems without PBC? */
 +    gmx_bool bVacDLBNoLimit;
 +
 +    /* tric_dir is only stored here because dd_get_ns_ranges needs it */
 +    ivec tric_dir;
 +    /* box0 and box_size are required with dim's without pbc and -gcom */
 +    rvec box0;
 +    rvec box_size;
 +    
 +    /* The cell boundaries */
 +    rvec cell_x0;
 +    rvec cell_x1;
 +
 +    /* The old location of the cell boundaries, to check cg displacements */
 +    rvec old_cell_x0;
 +    rvec old_cell_x1;
 +
 +    /* The communication setup and charge group boundaries for the zones */
 +    gmx_domdec_zones_t zones;
 +    
 +    /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
 +     * cell boundaries of neighboring cells for dynamic load balancing.
 +     */
 +    gmx_ddzone_t zone_d1[2];
 +    gmx_ddzone_t zone_d2[2][2];
 +    
 +    /* The coordinate/force communication setup and indices */
 +    gmx_domdec_comm_dim_t cd[DIM];
 +    /* The maximum number of cells to communicate with in one dimension */
 +    int  maxpulse;
 +    
 +    /* Which cg distribution is stored on the master node */
 +    int master_cg_ddp_count;
 +    
 +    /* The number of cg's received from the direct neighbors */
 +    int  zone_ncg1[DD_MAXZONE];
 +    
 +    /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
 +    int  nat[ddnatNR];
 +
 +    /* Array for signalling if atoms have moved to another domain */
 +    int  *moved;
 +    int  moved_nalloc;
 +    
 +    /* Communication buffer for general use */
 +    int  *buf_int;
 +    int  nalloc_int;
 +
 +    /* Communication buffer for general use */
 +    vec_rvec_t vbuf;
 +
 +    /* Temporary storage for thread parallel communication setup */
 +    int nth;
 +    dd_comm_setup_work_t *dth;
 +
 +    /* Communication buffers only used with multiple grid pulses */
 +    int  *buf_int2;
 +    int  nalloc_int2;
 +    vec_rvec_t vbuf2;
 +    
 +    /* Communication buffers for local redistribution */
 +    int  **cggl_flag;
 +    int  cggl_flag_nalloc[DIM*2];
 +    rvec **cgcm_state;
 +    int  cgcm_state_nalloc[DIM*2];
 +    
 +    /* Cell sizes for dynamic load balancing */
 +    gmx_domdec_root_t **root;
 +    real *cell_f_row;
 +    real cell_f0[DIM];
 +    real cell_f1[DIM];
 +    real cell_f_max0[DIM];
 +    real cell_f_min1[DIM];
 +    
 +    /* Stuff for load communication */
 +    gmx_bool bRecordLoad;
 +    gmx_domdec_load_t *load;
 +#ifdef GMX_MPI
 +    MPI_Comm *mpi_comm_load;
 +#endif
 +
 +    /* Maximum DLB scaling per load balancing step in percent */
 +    int dlb_scale_lim;
 +
 +    /* Cycle counters */
 +    float cycl[ddCyclNr];
 +    int   cycl_n[ddCyclNr];
 +    float cycl_max[ddCyclNr];
 +    /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
 +    int eFlop;
 +    double flop;
 +    int    flop_n;
 +    /* Have often have did we have load measurements */
 +    int    n_load_have;
 +    /* Have often have we collected the load measurements */
 +    int    n_load_collect;
 +    
 +    /* Statistics */
 +    double sum_nat[ddnatNR-ddnatZONE];
 +    int    ndecomp;
 +    int    nload;
 +    double load_step;
 +    double load_sum;
 +    double load_max;
 +    ivec   load_lim;
 +    double load_mdf;
 +    double load_pme;
 +
 +    /* The last partition step */
 +    gmx_large_int_t partition_step;
 +
 +    /* Debugging */
 +    int  nstDDDump;
 +    int  nstDDDumpGrid;
 +    int  DD_debug;
 +} gmx_domdec_comm_t;
 +
 +/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_CGIBS 2
 +
 +/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_FLAG_NRCG  65535
 +#define DD_FLAG_FW(d) (1<<(16+(d)*2))
 +#define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 +
 +/* Zone permutation required to obtain consecutive charge groups
 + * for neighbor searching.
 + */
 +static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
 +
 +/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
 + * components see only j zones with that component 0.
 + */
 +
 +/* The DD zone order */
 +static const ivec dd_zo[DD_MAXZONE] =
 +  {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
 +
 +/* The 3D setup */
 +#define dd_z3n  8
 +#define dd_zp3n 4
 +static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
 +
 +/* The 2D setup */
 +#define dd_z2n  4
 +#define dd_zp2n 2
 +static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
 +
 +/* The 1D setup */
 +#define dd_z1n  2
 +#define dd_zp1n 1
 +static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
 +
 +/* Factors used to avoid problems due to rounding issues */
 +#define DD_CELL_MARGIN       1.0001
 +#define DD_CELL_MARGIN2      1.00005
 +/* Factor to account for pressure scaling during nstlist steps */
 +#define DD_PRES_SCALE_MARGIN 1.02
 +
 +/* Allowed performance loss before we DLB or warn */
 +#define DD_PERF_LOSS 0.05
 +
 +#define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
 +
 +/* Use separate MPI send and receive commands
 + * when nnodes <= GMX_DD_NNODES_SENDRECV.
 + * This saves memory (and some copying for small nnodes).
 + * For high parallelization scatter and gather calls are used.
 + */
 +#define GMX_DD_NNODES_SENDRECV 4
 +
 +
 +/*
 +#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 +
 +static void index2xyz(ivec nc,int ind,ivec xyz)
 +{
 +  xyz[XX] = ind % nc[XX];
 +  xyz[YY] = (ind / nc[XX]) % nc[YY];
 +  xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 +}
 +*/
 +
 +/* This order is required to minimize the coordinate communication in PME
 + * which uses decomposition in the x direction.
 + */
 +#define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
 +
 +static void ddindex2xyz(ivec nc,int ind,ivec xyz)
 +{
 +    xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 +    xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 +    xyz[ZZ] = ind % nc[ZZ];
 +}
 +
 +static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
 +{
 +    int ddindex;
 +    int ddnodeid=-1;
 +    
 +    ddindex = dd_index(dd->nc,c);
 +    if (dd->comm->bCartesianPP_PME)
 +    {
 +        ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
 +    }
 +    else if (dd->comm->bCartesianPP)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddnodeid = ddindex;
 +    }
 +    
 +    return ddnodeid;
 +}
 +
 +static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
 +{
 +    return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
 +}
 +
 +int ddglatnr(gmx_domdec_t *dd,int i)
 +{
 +    int atnr;
 +    
 +    if (dd == NULL)
 +    {
 +        atnr = i + 1;
 +    }
 +    else
 +    {
 +        if (i >= dd->comm->nat[ddnatNR-1])
 +        {
 +            gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
 +        }
 +        atnr = dd->gatindex[i] + 1;
 +    }
 +    
 +    return atnr;
 +}
 +
 +t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->cgs_gl;
 +}
 +
 +static void vec_rvec_init(vec_rvec_t *v)
 +{
 +    v->nalloc = 0;
 +    v->v      = NULL;
 +}
 +
 +static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
 +{
 +    if (n > v->nalloc)
 +    {
 +        v->nalloc = over_alloc_dd(n);
 +        srenew(v->v,v->nalloc);
 +    }
 +}
 +
 +void dd_store_state(gmx_domdec_t *dd,t_state *state)
 +{
 +    int i;
 +    
 +    if (state->ddp_count != dd->ddp_count)
 +    {
 +        gmx_incons("The state does not the domain decomposition state");
 +    }
 +    
 +    state->ncg_gl = dd->ncg_home;
 +    if (state->ncg_gl > state->cg_gl_nalloc)
 +    {
 +        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
 +        srenew(state->cg_gl,state->cg_gl_nalloc);
 +    }
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        state->cg_gl[i] = dd->index_gl[i];
 +    }
 +    
 +    state->ddp_count_cg_gl = dd->ddp_count;
 +}
 +
 +gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->zones;
 +}
 +
 +void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
 +                      int *jcg0,int *jcg1,ivec shift0,ivec shift1)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int izone,d,dim;
 +
 +    zones = &dd->comm->zones;
 +
 +    izone = 0;
 +    while (icg >= zones->izone[izone].cg1)
 +    {
 +        izone++;
 +    }
 +    
 +    if (izone == 0)
 +    {
 +        *jcg0 = icg;
 +    }
 +    else if (izone < zones->nizone)
 +    {
 +        *jcg0 = zones->izone[izone].jcg0;
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
 +                  icg,izone,zones->nizone);
 +    }
 +        
 +    *jcg1 = zones->izone[izone].jcg1;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        shift0[dim] = zones->izone[izone].shift0[dim];
 +        shift1[dim] = zones->izone[izone].shift1[dim];
 +        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
 +        {
 +            /* A conservative approach, this can be optimized */
 +            shift0[dim] -= 1;
 +            shift1[dim] += 1;
 +        }
 +    }
 +}
 +
 +int dd_natoms_vsite(gmx_domdec_t *dd)
 +{
 +    return dd->comm->nat[ddnatVSITE];
 +}
 +
 +void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
 +{
 +    *at_start = dd->comm->nat[ddnatCON-1];
 +    *at_end   = dd->comm->nat[ddnatCON];
 +}
 +
 +void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec shift={0,0,0},*buf,*rbuf;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = comm->vbuf.v;
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (bPBC)
 +        {
 +            copy_rvec(box[dd->dim[d]],shift);
 +        }
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        copy_rvec(x[j],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* We need to shift the coordinates */
 +                        rvec_add(x[j],shift,buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Shift x */
 +                        buf[n][XX] = x[j][XX] + shift[XX];
 +                        /* Rotate y and z.
 +                         * This operation requires a special shift force
 +                         * treatment, which is performed in calc_vir.
 +                         */
 +                        buf[n][YY] = box[YY][YY] - x[j][YY];
 +                        buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 +                        n++;
 +                    }
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = x + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = comm->vbuf2.v;
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_rvec(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(rbuf[j],x[i]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec *buf,*sbuf;
 +    ivec vis;
 +    int  is;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = comm->vbuf.v;
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (fshift == NULL && !bScrew)
 +        {
 +            bPBC = FALSE;
 +        }
 +        /* Determine which shift vector we need */
 +        clear_ivec(vis);
 +        vis[dd->dim[d]] = 1;
 +        is = IVEC2IS(vis);
 +        
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = f + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = comm->vbuf2.v;
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(f[i],sbuf[j]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        n++;
 +                    }
 +                } 
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        /* Add this force to the shift force */
 +                        rvec_inc(fshift[is],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Rotate the force */
 +                        f[j][XX] += buf[n][XX];
 +                        f[j][YY] -= buf[n][YY];
 +                        f[j][ZZ] -= buf[n][ZZ];
 +                        if (fshift)
 +                        {
 +                            /* Add this force to the shift force */
 +                            rvec_inc(fshift[is],buf[n]);
 +                        }
 +                        n++;
 +                    }
 +                }
 +            }
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*rbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = &comm->vbuf.v[0][0];
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    buf[n] = v[j];
 +                    n++;
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = &comm->vbuf2.v[0][0];
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_real(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        v[i] = rbuf[j];
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*sbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = &comm->vbuf.v[0][0];
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = &comm->vbuf2.v[0][0];
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        sbuf[j] = v[i];
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_real(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    v[j] += buf[n];
 +                    n++;
 +                }
 +            } 
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
 +{
 +    fprintf(fp,"zone d0 %d d1 %d d2 %d  min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
 +            d,i,j,
 +            zone->min0,zone->max1,
 +            zone->mch0,zone->mch0,
 +            zone->p1_0,zone->p1_1);
 +}
 +
 +
 +#define DDZONECOMM_MAXZONE  5
 +#define DDZONECOMM_BUFSIZE  3
 +
 +static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
 +                               int ddimind,int direction,
 +                               gmx_ddzone_t *buf_s,int n_s,
 +                               gmx_ddzone_t *buf_r,int n_r)
 +{
 +#define ZBS  DDZONECOMM_BUFSIZE
 +    rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
 +    rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
 +    int i;
 +
 +    for(i=0; i<n_s; i++)
 +    {
 +        vbuf_s[i*ZBS  ][0] = buf_s[i].min0;
 +        vbuf_s[i*ZBS  ][1] = buf_s[i].max1;
 +        vbuf_s[i*ZBS  ][2] = buf_s[i].min1;
 +        vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
 +        vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
 +        vbuf_s[i*ZBS+1][2] = 0;
 +        vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
 +        vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
 +        vbuf_s[i*ZBS+2][2] = 0;
 +    }
 +
 +    dd_sendrecv_rvec(dd, ddimind, direction,
 +                     vbuf_s, n_s*ZBS,
 +                     vbuf_r, n_r*ZBS);
 +
 +    for(i=0; i<n_r; i++)
 +    {
 +        buf_r[i].min0 = vbuf_r[i*ZBS  ][0];
 +        buf_r[i].max1 = vbuf_r[i*ZBS  ][1];
 +        buf_r[i].min1 = vbuf_r[i*ZBS  ][2];
 +        buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
 +        buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
 +        buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
 +        buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
 +    }
 +
 +#undef ZBS
 +}
 +
 +static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                          rvec cell_ns_x0,rvec cell_ns_x1)
 +{
 +    int  d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
 +    gmx_ddzone_t *zp;
 +    gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
 +    gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
 +    gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
 +    rvec extr_s[2],extr_r[2];
 +    rvec dh;
 +    real dist_d,c=0,det;
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bPBC,bUse;
 +
 +    comm = dd->comm;
 +
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
 +        zp->min0 = cell_ns_x0[dim];
 +        zp->max1 = cell_ns_x1[dim];
 +        zp->min1 = cell_ns_x1[dim];
 +        zp->mch0 = cell_ns_x0[dim];
 +        zp->mch1 = cell_ns_x1[dim];
 +        zp->p1_0 = cell_ns_x0[dim];
 +        zp->p1_1 = cell_ns_x1[dim];
 +    }
 +    
 +    for(d=dd->ndim-2; d>=0; d--)
 +    {
 +        dim  = dd->dim[d];
 +        bPBC = (dim < ddbox->npbcdim);
 +
 +        /* Use an rvec to store two reals */
 +        extr_s[d][0] = comm->cell_f0[d+1];
 +        extr_s[d][1] = comm->cell_f1[d+1];
 +        extr_s[d][2] = comm->cell_f1[d+1];
 +
 +        pos = 0;
 +        /* Store the extremes in the backward sending buffer,
 +         * so the get updated separately from the forward communication.
 +         */
 +        for(d1=d; d1<dd->ndim-1; d1++)
 +        {
 +            /* We invert the order to be able to use the same loop for buf_e */
 +            buf_s[pos].min0 = extr_s[d1][1];
 +            buf_s[pos].max1 = extr_s[d1][0];
 +            buf_s[pos].min1 = extr_s[d1][2];
 +            buf_s[pos].mch0 = 0;
 +            buf_s[pos].mch1 = 0;
 +            /* Store the cell corner of the dimension we communicate along */
 +            buf_s[pos].p1_0 = comm->cell_x0[dim];
 +            buf_s[pos].p1_1 = 0;
 +            pos++;
 +        }
 +
 +        buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
 +        pos++;
 +
 +        if (dd->ndim == 3 && d == 0)
 +        {
 +            buf_s[pos] = comm->zone_d2[0][1];
 +            pos++;
 +            buf_s[pos] = comm->zone_d1[0];
 +            pos++;
 +        }
 +
 +        /* We only need to communicate the extremes
 +         * in the forward direction
 +         */
 +        npulse = comm->cd[d].np;
 +        if (bPBC)
 +        {
 +            /* Take the minimum to avoid double communication */
 +            npulse_min = min(npulse,dd->nc[dim]-1-npulse);
 +        }
 +        else
 +        {
 +            /* Without PBC we should really not communicate over
 +             * the boundaries, but implementing that complicates
 +             * the communication setup and therefore we simply
 +             * do all communication, but ignore some data.
 +             */
 +            npulse_min = npulse;
 +        }
 +        for(p=0; p<npulse_min; p++)
 +        {
 +            /* Communicate the extremes forward */
 +            bUse = (bPBC || dd->ci[dim] > 0);
 +
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             extr_s+d, dd->ndim-d-1,
 +                             extr_r+d, dd->ndim-d-1);
 +
 +            if (bUse)
 +            {
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
 +                    extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
 +                    extr_s[d1][2] = min(extr_s[d1][2],extr_r[d1][2]);
 +                }
 +            }
 +        }
 +
 +        buf_size = pos;
 +        for(p=0; p<npulse; p++)
 +        {
 +            /* Communicate all the zone information backward */
 +            bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
 +
 +            dd_sendrecv_ddzone(dd, d, dddirBackward,
 +                               buf_s, buf_size,
 +                               buf_r, buf_size);
 +
 +            clear_rvec(dh);
 +            if (p > 0)
 +            {
 +                for(d1=d+1; d1<dd->ndim; d1++)
 +                {
 +                    /* Determine the decrease of maximum required
 +                     * communication height along d1 due to the distance along d,
 +                     * this avoids a lot of useless atom communication.
 +                     */
 +                    dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
 +
 +                    if (ddbox->tric_dir[dim])
 +                    {
 +                        /* c is the off-diagonal coupling between the cell planes
 +                         * along directions d and d1.
 +                         */
 +                        c = ddbox->v[dim][dd->dim[d1]][dim];
 +                    }
 +                    else
 +                    {
 +                        c = 0;
 +                    }
 +                    det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
 +                    if (det > 0)
 +                    {
 +                        dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
 +                    }
 +                    else
 +                    {
 +                        /* A negative value signals out of range */
 +                        dh[d1] = -1;
 +                    }
 +                }
 +            }
 +
 +            /* Accumulate the extremes over all pulses */
 +            for(i=0; i<buf_size; i++)
 +            {
 +                if (p == 0)
 +                {
 +                    buf_e[i] = buf_r[i];
 +                }
 +                else
 +                {
 +                    if (bUse)
 +                    {
 +                        buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
 +                        buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
 +                        buf_e[i].min1 = min(buf_e[i].min1,buf_r[i].min1);
 +                    }
 +
 +                    if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
 +                    {
 +                        d1 = 1;
 +                    }
 +                    else
 +                    {
 +                        d1 = d + 1;
 +                    }
 +                    if (bUse && dh[d1] >= 0)
 +                    {
 +                        buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
 +                        buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
 +                    }
 +                }
 +                /* Copy the received buffer to the send buffer,
 +                 * to pass the data through with the next pulse.
 +                 */
 +                buf_s[i] = buf_r[i];
 +            }
 +            if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
 +                (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
 +            {
 +                /* Store the extremes */ 
 +                pos = 0;
 +
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
 +                    extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
 +                    extr_s[d1][2] = min(extr_s[d1][2],buf_e[pos].min1);
 +                    pos++;
 +                }
 +
 +                if (d == 1 || (d == 0 && dd->ndim == 3))
 +                {
 +                    for(i=d; i<2; i++)
 +                    {
 +                        comm->zone_d2[1-d][i] = buf_e[pos];
 +                        pos++;
 +                    }
 +                }
 +                if (d == 0)
 +                {
 +                    comm->zone_d1[1] = buf_e[pos];
 +                    pos++;
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->ndim >= 2)
 +    {
 +        dim = dd->dim[1];
 +        for(i=0; i<2; i++)
 +        {
 +            if (debug)
 +            {
 +                print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
 +            }
 +            cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
 +            cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        dim = dd->dim[2];
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0; j<2; j++)
 +            {
 +                if (debug)
 +                {
 +                    print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
 +                }
 +                cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
 +                cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
 +            }
 +        }
 +    }
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        comm->cell_f_max0[d] = extr_s[d-1][0];
 +        comm->cell_f_min1[d] = extr_s[d-1][1];
 +        if (debug)
 +        {
 +            fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
 +                    d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
 +        }
 +    }
 +}
 +
 +static void dd_collect_cg(gmx_domdec_t *dd,
 +                          t_state *state_local)
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
 +    t_block *cgs_gl;
 +
 +    if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
 +    {
 +        /* The master has the correct distribution */
 +        return;
 +    }
 +    
 +    if (state_local->ddp_count == dd->ddp_count)
 +    {
 +        ncg_home = dd->ncg_home;
 +        cg       = dd->index_gl;
 +        nat_home = dd->nat_home;
 +    } 
 +    else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        ncg_home = state_local->ncg_gl;
 +        cg       = state_local->cg_gl;
 +        nat_home = 0;
 +        for(i=0; i<ncg_home; i++)
 +        {
 +            nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
 +        }
 +    }
 +    else
 +    {
 +        gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
 +    }
 +    
 +    buf2[0] = dd->ncg_home;
 +    buf2[1] = dd->nat_home;
 +    if (DDMASTER(dd))
 +    {
 +        ma = dd->ma;
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    /* Collect the charge group and atom counts on the master */
 +    dd_gather(dd,2*sizeof(int),buf2,ibuf);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma->index[0] = 0;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ncg[i] = ma->ibuf[2*i];
 +            ma->nat[i] = ma->ibuf[2*i+1];
 +            ma->index[i+1] = ma->index[i] + ma->ncg[i];
 +            
 +        }
 +        /* Make byte counts and indices */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"Initial charge group distribution: ");
 +            for(i=0; i<dd->nnodes; i++)
 +                fprintf(debug," %d",ma->ncg[i]);
 +            fprintf(debug,"\n");
 +        }
 +    }
 +    
 +    /* Collect the charge group indices on the master */
 +    dd_gatherv(dd,
 +               dd->ncg_home*sizeof(int),dd->index_gl,
 +               DDMASTER(dd) ? ma->ibuf : NULL,
 +               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +               DDMASTER(dd) ? ma->cg : NULL);
 +    
 +    dd->comm->master_cg_ddp_count = state_local->ddp_count;
 +}
 +
 +static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
 +                                    rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +
 +    ma = dd->ma;
 +    
 +    if (!DDMASTER(dd))
 +    {
 +#ifdef GMX_MPI
 +        MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 dd->rank,dd->mpi_comm_all);
 +#endif
 +    } else {
 +        /* Copy the master coordinates to the global array */
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(lv[a++],v[c]);
 +            }
 +        }
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +#ifdef GMX_MPI
 +                MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
 +                         n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(buf[a++],v[c]);
 +                    }
 +                }
 +            }
 +        }
 +        sfree(buf);
 +    }
 +}
 +
 +static void get_commbuffer_counts(gmx_domdec_t *dd,
 +                                  int **counts,int **disps)
 +{
 +    gmx_domdec_master_t *ma;
 +    int n;
 +
 +    ma = dd->ma;
 +    
 +    /* Make the rvec count and displacment arrays */
 +    *counts  = ma->ibuf;
 +    *disps   = ma->ibuf + dd->nnodes;
 +    for(n=0; n<dd->nnodes; n++)
 +    {
 +        (*counts)[n] = ma->nat[n]*sizeof(rvec);
 +        (*disps)[n]  = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
 +    }
 +}
 +
 +static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
 +                                   rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *rcounts=NULL,*disps=NULL;
 +    int  n,i,c,a;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +    
 +    ma = dd->ma;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        get_commbuffer_counts(dd,&rcounts,&disps);
 +
 +        buf = ma->vbuf;
 +    }
 +    
 +    dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
 +
 +    if (DDMASTER(dd))
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(buf[a++],v[c]);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +void dd_collect_vec(gmx_domdec_t *dd,
 +                    t_state *state_local,rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    dd_collect_cg(dd,state_local);
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_collect_vec_sendrecv(dd,lv,v);
 +    }
 +    else
 +    {
 +        dd_collect_vec_gatherv(dd,lv,v);
 +    }
 +}
 +
 +
 +void dd_collect_state(gmx_domdec_t *dd,
 +                      t_state *state_local,t_state *state)
 +{
 +    int est,i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        for (i=0;i<efptNR;i++) {
 +            state->lambda[i] = state_local->lambda[i];
 +        }
 +        state->fep_state = state_local->fep_state;
 +        state->veta = state_local->veta;
 +        state->vol0 = state_local->vol0;
 +        copy_mat(state_local->box,state->box);
 +        copy_mat(state_local->boxv,state->boxv);
 +        copy_mat(state_local->svir_prev,state->svir_prev);
 +        copy_mat(state_local->fvir_prev,state->fvir_prev);
 +        copy_mat(state_local->pres_prev,state->pres_prev);
 +
 +
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
 +                state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
 +            }
 +            state->therm_integral[i] = state_local->therm_integral[i];            
 +        }
 +        for(i=0; i<state_local->nnhpres; i++) 
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
 +                state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state_local->flags & (1<<est)))
 +        {
 +            switch (est) {
 +            case estX:
 +                dd_collect_vec(dd,state_local,state_local->x,state->x);
 +                break;
 +            case estV:
 +                dd_collect_vec(dd,state_local,state_local->v,state->v);
 +                break;
 +            case estSDX:
 +                dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
 +                break;
 +            case estCGP:
 +                dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    if (DDMASTER(dd))
 +                    {
 +                        for(i=0; i<state_local->nrng; i++)
 +                        {
 +                            state->ld_rng[i] = state_local->ld_rng[i];
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
 +                              state_local->ld_rng,state->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                   if (DDMASTER(dd))
 +                    {
 +                        state->ld_rngi[0] = state_local->ld_rngi[0];
 +                    } 
 +                }
 +                else
 +                {
 +                    dd_gather(dd,sizeof(state->ld_rngi[0]),
 +                              state_local->ld_rngi,state->ld_rngi);
 +                }
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_collect_state");
 +            }
 +        }
 +    }
 +}
 +
 +static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
 +{
 +    int est;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +
 +    state->nalloc = over_alloc_dd(nalloc);
 +    
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est)))
 +        {
 +            switch(est) {
 +            case estX:
 +                srenew(state->x,state->nalloc);
 +                break;
 +            case estV:
 +                srenew(state->v,state->nalloc);
 +                break;
 +            case estSDX:
 +                srenew(state->sd_X,state->nalloc);
 +                break;
 +            case estCGP:
 +                srenew(state->cg_p,state->nalloc);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No reallocation required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_realloc_state");            
 +            }
 +        }
 +    }
 +    
 +    if (f != NULL)
 +    {
 +        srenew(*f,state->nalloc);
 +    }
 +}
 +
 +static void dd_check_alloc_ncg(t_forcerec *fr,t_state *state,rvec **f,
 +                               int nalloc)
 +{
 +    if (nalloc > fr->cg_nalloc)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
 +        }
 +        fr->cg_nalloc = over_alloc_dd(nalloc);
 +        srenew(fr->cginfo,fr->cg_nalloc);
 +        if (fr->cutoff_scheme == ecutsGROUP)
 +        {
 +            srenew(fr->cg_cm,fr->cg_nalloc);
 +        }
 +    }
 +    if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
 +    {
 +        /* We don't use charge groups, we use x in state to set up
 +         * the atom communication.
 +         */
 +        dd_realloc_state(state,f,nalloc);
 +    }
 +}
 +
 +static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +                /* Use lv as a temporary buffer */
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(v[c],buf[a++]);
 +                    }
 +                }
 +                if (a != ma->nat[n])
 +                {
 +                    gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
 +                              a,ma->nat[n]);
 +                }
 +                
 +#ifdef GMX_MPI
 +                MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
 +                         DDRANK(dd,n),n,dd->mpi_comm_all);
 +#endif
 +            }
 +        }
 +        sfree(buf);
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(v[c],lv[a++]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +#ifdef GMX_MPI
 +        MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +    }
 +}
 +
 +static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *scounts=NULL,*disps=NULL;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +     
 +        get_commbuffer_counts(dd,&scounts,&disps);
 +
 +        buf = ma->vbuf;
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(v[c],buf[a++]);
 +                }
 +            }
 +        }
 +    }
 +
 +    dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
 +}
 +
 +static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
 +{
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_distribute_vec_sendrecv(dd,cgs,v,lv);
 +    }
 +    else
 +    {
 +        dd_distribute_vec_scatterv(dd,cgs,v,lv);
 +    }
 +}
 +
 +static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
 +                                t_state *state,t_state *state_local,
 +                                rvec **f)
 +{
 +    int  i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        for(i=0;i<efptNR;i++)
 +        {
 +            state_local->lambda[i] = state->lambda[i];
 +        }
 +        state_local->fep_state = state->fep_state;
 +        state_local->veta   = state->veta;
 +        state_local->vol0   = state->vol0;
 +        copy_mat(state->box,state_local->box);
 +        copy_mat(state->box_rel,state_local->box_rel);
 +        copy_mat(state->boxv,state_local->boxv);
 +        copy_mat(state->svir_prev,state_local->svir_prev);
 +        copy_mat(state->fvir_prev,state_local->fvir_prev);
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
 +                state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
 +            }
 +            state_local->therm_integral[i] = state->therm_integral[i];
 +        }
 +        for(i=0; i<state_local->nnhpres; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
 +                state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    dd_bcast(dd,((efptNR)*sizeof(real)),state_local->lambda);
 +    dd_bcast(dd,sizeof(int),&state_local->fep_state);
 +    dd_bcast(dd,sizeof(real),&state_local->veta);
 +    dd_bcast(dd,sizeof(real),&state_local->vol0);
 +    dd_bcast(dd,sizeof(state_local->box),state_local->box);
 +    dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
 +    dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
 +    dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
 +    dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
 +    dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
 +
 +    if (dd->nat_home > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,dd->nat_home);
 +    }
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state_local->flags & (1<<i)))
 +        {
 +            switch (i) {
 +            case estX:
 +                dd_distribute_vec(dd,cgs,state->x,state_local->x);
 +                break;
 +            case estV:
 +                dd_distribute_vec(dd,cgs,state->v,state_local->v);
 +                break;
 +            case estSDX:
 +                dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
 +                break;
 +            case estCGP:
 +                dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,
 +                              state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                              state->ld_rng,state_local->ld_rng);
 +                }
 +                else
 +                {
 +                    dd_scatter(dd,
 +                               state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                               state->ld_rng,state_local->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
 +                              state->ld_rngi,state_local->ld_rngi);
 +                }
 +                else
 +                {
 +                     dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
 +                               state->ld_rngi,state_local->ld_rngi);
 +                }   
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* Not implemented yet */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_distribute_state");
 +            }
 +        }
 +    }
 +}
 +
 +static char dim2char(int dim)
 +{
 +    char c='?';
 +    
 +    switch (dim)
 +    {
 +    case XX: c = 'X'; break;
 +    case YY: c = 'Y'; break;
 +    case ZZ: c = 'Z'; break;
 +    default: gmx_fatal(FARGS,"Unknown dim %d",dim);
 +    }
 +    
 +    return c;
 +}
 +
 +static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
 +{
 +    rvec grid_s[2],*grid_r=NULL,cx,r;
 +    char fname[STRLEN],format[STRLEN],buf[22];
 +    FILE *out;
 +    int  a,i,d,z,y,x;
 +    matrix tric;
 +    real vol;
 +
 +    copy_rvec(dd->comm->cell_x0,grid_s[0]);
 +    copy_rvec(dd->comm->cell_x1,grid_s[1]);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        snew(grid_r,2*dd->nnodes);
 +    }
 +    
 +    dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            for(i=0; i<DIM; i++)
 +            {
 +                if (d == i)
 +                {
 +                    tric[d][i] = 1;
 +                }
 +                else
 +                {
 +                    if (d < ddbox->npbcdim && dd->nc[d] > 1)
 +                    {
 +                        tric[d][i] = box[i][d]/box[i][i];
 +                    }
 +                    else
 +                    {
 +                        tric[d][i] = 0;
 +                    }
 +                }
 +            }
 +        }
 +        sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
 +        sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +        out = gmx_fio_fopen(fname,"w");
 +        gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +        a = 1;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 +            for(d=0; d<DIM; d++)
 +            {
 +                vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
 +            }
 +            for(z=0; z<2; z++)
 +            {
 +                for(y=0; y<2; y++)
 +                {
 +                    for(x=0; x<2; x++)
 +                    {
 +                        cx[XX] = grid_r[i*2+x][XX];
 +                        cx[YY] = grid_r[i*2+y][YY];
 +                        cx[ZZ] = grid_r[i*2+z][ZZ];
 +                        mvmul(tric,cx,r);
 +                        fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
 +                                10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
 +                    }
 +                }
 +            }
 +            for(d=0; d<DIM; d++)
 +            {
 +                for(x=0; x<4; x++)
 +                {
 +                    switch(d)
 +                    {
 +                    case 0: y = 1 + i*8 + 2*x; break;
 +                    case 1: y = 1 + i*8 + 2*x - (x % 2); break;
 +                    case 2: y = 1 + i*8 + x; break;
 +                    }
 +                    fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
 +                }
 +            }
 +        }
 +        gmx_fio_fclose(out);
 +        sfree(grid_r);
 +    }
 +}
 +
 +void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
 +                  gmx_mtop_t *mtop,t_commrec *cr,
 +                  int natoms,rvec x[],matrix box)
 +{
 +    char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
 +    FILE *out;
 +    int  i,ii,resnr,c;
 +    char *atomname,*resname;
 +    real b;
 +    gmx_domdec_t *dd;
 +    
 +    dd = cr->dd;
 +    if (natoms == -1)
 +    {
 +        natoms = dd->comm->nat[ddnatVSITE];
 +    }
 +    
 +    sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
 +    
 +    sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +    sprintf(format4,"%s%s\n",get_pdbformat4(),"%6.2f%6.2f");
 +    
 +    out = gmx_fio_fopen(fname,"w");
 +    
 +    fprintf(out,"TITLE     %s\n",title);
 +    gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +    for(i=0; i<natoms; i++)
 +    {
 +        ii = dd->gatindex[i];
 +        gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
 +        if (i < dd->comm->nat[ddnatZONE])
 +        {
 +            c = 0;
 +            while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
 +            {
 +                c++;
 +            }
 +            b = c;
 +        }
 +        else if (i < dd->comm->nat[ddnatVSITE])
 +        {
 +            b = dd->comm->zones.n;
 +        }
 +        else
 +        {
 +            b = dd->comm->zones.n + 1;
 +        }
 +        fprintf(out,strlen(atomname)<4 ? format : format4,
 +                "ATOM",(ii+1)%100000,
 +                atomname,resname,' ',resnr%10000,' ',
 +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
 +    }
 +    fprintf(out,"TER\n");
 +    
 +    gmx_fio_fclose(out);
 +}
 +
 +real dd_cutoff_mbody(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  di;
 +    real r;
 +
 +    comm = dd->comm;
 +
 +    r = -1;
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm->cutoff_mbody > 0)
 +        {
 +            r = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            /* cutoff_mbody=0 means we do not have DLB */
 +            r = comm->cellsize_min[dd->dim[0]];
 +            for(di=1; di<dd->ndim; di++)
 +            {
 +                r = min(r,comm->cellsize_min[dd->dim[di]]);
 +            }
 +            if (comm->bBondComm)
 +            {
 +                r = max(r,comm->cutoff_mbody);
 +            }
 +            else
 +            {
 +                r = min(r,comm->cutoff);
 +            }
 +        }
 +    }
 +
 +    return r;
 +}
 +
 +real dd_cutoff_twobody(gmx_domdec_t *dd)
 +{
 +    real r_mb;
 +
 +    r_mb = dd_cutoff_mbody(dd);
 +
 +    return max(dd->comm->cutoff,r_mb);
 +}
 +
 +
 +static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
 +{
 +    int nc,ntot;
 +    
 +    nc   = dd->nc[dd->comm->cartpmedim];
 +    ntot = dd->comm->ntot[dd->comm->cartpmedim];
 +    copy_ivec(coord,coord_pme);
 +    coord_pme[dd->comm->cartpmedim] =
 +        nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 +}
 +
 +static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
 +{
 +    /* Here we assign a PME node to communicate with this DD node
 +     * by assuming that the major index of both is x.
 +     * We add cr->npmenodes/2 to obtain an even distribution.
 +     */
 +    return (ddindex*npme + npme/2)/ndd;
 +}
 +
 +static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
 +{
 +    return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
 +}
 +
 +static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
 +{
 +    return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
 +}
 +
 +static int *dd_pmenodes(t_commrec *cr)
 +{
 +    int *pmenodes;
 +    int n,i,p0,p1;
 +    
 +    snew(pmenodes,cr->npmenodes);
 +    n = 0;
 +    for(i=0; i<cr->dd->nnodes; i++) {
 +        p0 = cr_ddindex2pmeindex(cr,i);
 +        p1 = cr_ddindex2pmeindex(cr,i+1);
 +        if (i+1 == cr->dd->nnodes || p1 > p0) {
 +            if (debug)
 +                fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
 +            pmenodes[n] = i + 1 + n;
 +            n++;
 +        }
 +    }
 +
 +    return pmenodes;
 +}
 +
 +static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_t *dd;
 +    ivec coords,coords_pme,nc;
 +    int  slab;
 +    
 +    dd = cr->dd;
 +    /*
 +      if (dd->comm->bCartesian) {
 +      gmx_ddindex2xyz(dd->nc,ddindex,coords);
 +      dd_coords2pmecoords(dd,coords,coords_pme);
 +      copy_ivec(dd->ntot,nc);
 +      nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 +      coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 +      
 +      slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 +      } else {
 +      slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 +      }
 +    */
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
 +    
 +    return slab;
 +}
 +
 +static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_comm_t *comm;
 +    ivec coords;
 +    int  ddindex,nodeid=-1;
 +    
 +    comm = cr->dd->comm;
 +    
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddindex = dd_index(cr->dd->nc,coords);
 +        if (comm->bCartesianPP)
 +        {
 +            nodeid = comm->ddindex2simnodeid[ddindex];
 +        }
 +        else
 +        {
 +            if (comm->pmenodes)
 +            {
 +                nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
 +            }
 +            else
 +            {
 +                nodeid = ddindex;
 +            }
 +        }
 +    }
 +  
 +    return nodeid;
 +}
 +
 +static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    ivec coord,coord_pme;
 +    int  i;
 +    int  pmenode=-1;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    /* This assumes a uniform x domain decomposition grid cell size */
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
 +        if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            /* This is a PP node */
 +            dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +            MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
 +        }
 +#endif
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (sim_nodeid < dd->nnodes)
 +        {
 +            pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +        }
 +    }
 +    else
 +    {
 +        /* This assumes DD cells with identical x coordinates
 +         * are numbered sequentially.
 +         */
 +        if (dd->comm->pmenodes == NULL)
 +        {
 +            if (sim_nodeid < dd->nnodes)
 +            {
 +                /* The DD index equals the nodeid */
 +                pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +            }
 +        }
 +        else
 +        {
 +            i = 0;
 +            while (sim_nodeid > dd->comm->pmenodes[i])
 +            {
 +                i++;
 +            }
 +            if (sim_nodeid < dd->comm->pmenodes[i])
 +            {
 +                pmenode = dd->comm->pmenodes[i];
 +            }
 +        }
 +    }
 +    
 +    return pmenode;
 +}
 +
 +gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_bool bPMEOnlyNode;
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
 +    }
 +    else
 +    {
 +        bPMEOnlyNode = FALSE;
 +    }
 +    
 +    return bPMEOnlyNode;
 +}
 +
 +void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
 +                     int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
 +{
 +    gmx_domdec_t *dd;
 +    int x,y,z;
 +    ivec coord,coord_pme;
 +    
 +    dd = cr->dd;
 +    
 +    snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
 +    
 +    *nmy_ddnodes = 0;
 +    for(x=0; x<dd->nc[XX]; x++)
 +    {
 +        for(y=0; y<dd->nc[YY]; y++)
 +        {
 +            for(z=0; z<dd->nc[ZZ]; z++)
 +            {
 +                if (dd->comm->bCartesianPP_PME)
 +                {
 +                    coord[XX] = x;
 +                    coord[YY] = y;
 +                    coord[ZZ] = z;
 +                    dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +                    if (dd->ci[XX] == coord_pme[XX] &&
 +                        dd->ci[YY] == coord_pme[YY] &&
 +                        dd->ci[ZZ] == coord_pme[ZZ])
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                }
 +                else
 +                {
 +                    /* The slab corresponds to the nodeid in the PME group */
 +                    if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
 +                    {
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* The last PP-only node is the peer node */
 +    *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Receive coordinates from PP nodes:");
 +        for(x=0; x<*nmy_ddnodes; x++)
 +        {
 +            fprintf(debug," %d",(*my_ddnodes)[x]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static gmx_bool receive_vir_ener(t_commrec *cr)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  pmenode,coords[DIM],rank;
 +    gmx_bool bReceive;
 +    
 +    bReceive = TRUE;
 +    if (cr->npmenodes < cr->dd->nnodes)
 +    {
 +        comm = cr->dd->comm;
 +        if (comm->bCartesianPP_PME)
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +#ifdef GMX_MPI
 +            MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
 +            coords[comm->cartpmedim]++;
 +            if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
 +            {
 +                MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
 +                if (dd_simnode2pmenode(cr,rank) == pmenode)
 +                {
 +                    /* This is not the last PP node for pmenode */
 +                    bReceive = FALSE;
 +                }
 +            }
 +#endif  
 +        }
 +        else
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +            if (cr->sim_nodeid+1 < cr->nnodes &&
 +                dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
 +            {
 +                /* This is not the last PP node for pmenode */
 +                bReceive = FALSE;
 +            }
 +        }
 +    }
 +    
 +    return bReceive;
 +}
 +
 +static void set_zones_ncg_home(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int i;
 +
 +    zones = &dd->comm->zones;
 +
 +    zones->cg_range[0] = 0;
 +    for(i=1; i<zones->n+1; i++)
 +    {
 +        zones->cg_range[i] = dd->ncg_home;
 +    }
 +}
 +
 +static void rebuild_cgindex(gmx_domdec_t *dd,
 +                            const int *gcgs_index,t_state *state)
 +{
 +    int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
 +    
 +    ind = state->cg_gl;
 +    dd_cg_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    nat = 0;
 +    cgindex[0] = nat;
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        cgindex[i] = nat;
 +        cg_gl = ind[i];
 +        dd_cg_gl[i] = cg_gl;
 +        nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
 +    }
 +    cgindex[i] = nat;
 +    
 +    dd->ncg_home = state->ncg_gl;
 +    dd->nat_home = nat;
 +
 +    set_zones_ncg_home(dd);
 +}
 +
 +static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
 +{
 +    while (cg >= cginfo_mb->cg_end)
 +    {
 +        cginfo_mb++;
 +    }
 +
 +    return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
 +}
 +
 +static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
 +                          t_forcerec *fr,char *bLocalCG)
 +{
 +    cginfo_mb_t *cginfo_mb;
 +    int *cginfo;
 +    int cg;
 +
 +    if (fr != NULL)
 +    {
 +        cginfo_mb = fr->cginfo_mb;
 +        cginfo    = fr->cginfo;
 +
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
 +        }
 +    }
 +
 +    if (bLocalCG != NULL)
 +    {
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            bLocalCG[index_gl[cg]] = TRUE;
 +        }
 +    }
 +}
 +
 +static void make_dd_indices(gmx_domdec_t *dd,
 +                            const int *gcgs_index,int cg_start)
 +{
 +    int nzone,zone,zone1,cg0,cg1,cg1_p1,cg,cg_gl,a,a_gl;
 +    int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
 +    gmx_ga2la_t *ga2la;
 +    char *bLocalCG;
 +    gmx_bool bCGs;
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +
 +    if (dd->nat_tot > dd->gatindex_nalloc)
 +    {
 +        dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
 +        srenew(dd->gatindex,dd->gatindex_nalloc);
 +    }
 +
 +    nzone      = dd->comm->zones.n;
 +    zone2cg    = dd->comm->zones.cg_range;
 +    zone_ncg1  = dd->comm->zone_ncg1;
 +    index_gl   = dd->index_gl;
 +    gatindex   = dd->gatindex;
 +    bCGs       = dd->comm->bCGs;
 +
 +    if (zone2cg[1] != dd->ncg_home)
 +    {
 +        gmx_incons("dd->ncg_zone is not up to date");
 +    }
 +    
 +    /* Make the local to global and global to local atom index */
 +    a = dd->cgindex[cg_start];
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        if (zone == 0)
 +        {
 +            cg0 = cg_start;
 +        }
 +        else
 +        {
 +            cg0 = zone2cg[zone];
 +        }
 +        cg1    = zone2cg[zone+1];
 +        cg1_p1 = cg0 + zone_ncg1[zone];
 +
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            zone1 = zone;
 +            if (cg >= cg1_p1)
 +            {
 +                /* Signal that this cg is from more than one pulse away */
 +                zone1 += nzone;
 +            }
 +            cg_gl = index_gl[cg];
 +            if (bCGs)
 +            {
 +                for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
 +                {
 +                    gatindex[a] = a_gl;
 +                    ga2la_set(dd->ga2la,a_gl,a,zone1);
 +                    a++;
 +                }
 +            }
 +            else
 +            {
 +                gatindex[a] = cg_gl;
 +                ga2la_set(dd->ga2la,cg_gl,a,zone1);
 +                a++;
 +            }
 +        }
 +    }
 +}
 +
 +static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
 +                          const char *where)
 +{
 +    int ncg,i,ngl,nerr;
 +
 +    nerr = 0;
 +    if (bLocalCG == NULL)
 +    {
 +        return nerr;
 +    }
 +    for(i=0; i<dd->ncg_tot; i++)
 +    {
 +        if (!bLocalCG[dd->index_gl[i]])
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
 +            nerr++;
 +        }
 +    }
 +    ngl = 0;
 +    for(i=0; i<ncg_sys; i++)
 +    {
 +        if (bLocalCG[i])
 +        {
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->ncg_tot)
 +    {
 +        fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
 +        nerr++;
 +    }
 +
 +    return nerr;
 +}
 +
 +static void check_index_consistency(gmx_domdec_t *dd,
 +                                    int natoms_sys,int ncg_sys,
 +                                    const char *where)
 +{
 +    int  nerr,ngl,i,a,cell;
 +    int  *have;
 +
 +    nerr = 0;
 +
 +    if (dd->comm->DD_debug > 1)
 +    {
 +        snew(have,natoms_sys);
 +        for(a=0; a<dd->nat_tot; a++)
 +        {
 +            if (have[dd->gatindex[a]] > 0)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
 +            }
 +            else
 +            {
 +                have[dd->gatindex[a]] = a + 1;
 +            }
 +        }
 +        sfree(have);
 +    }
 +
 +    snew(have,dd->nat_tot);
 +
 +    ngl  = 0;
 +    for(i=0; i<natoms_sys; i++)
 +    {
 +        if (ga2la_get(dd->ga2la,i,&a,&cell))
 +        {
 +            if (a >= dd->nat_tot)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
 +                nerr++;
 +            }
 +            else
 +            {
 +                have[a] = 1;
 +                if (dd->gatindex[a] != i)
 +                {
 +                    fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
 +                    nerr++;
 +                }
 +            }
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->nat_tot)
 +    {
 +        fprintf(stderr,
 +                "DD node %d, %s: %d global atom indices, %d local atoms\n",
 +                dd->rank,where,ngl,dd->nat_tot);
 +    }
 +    for(a=0; a<dd->nat_tot; a++)
 +    {
 +        if (have[a] == 0)
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: local atom %d, global %d has no global index\n",
 +                    dd->rank,where,a+1,dd->gatindex[a]+1);
 +        }
 +    }
 +    sfree(have);
 +
 +    nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
 +
 +    if (nerr > 0) {
 +        gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
 +                  dd->rank,where,nerr);
 +    }
 +}
 +
 +static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
 +{
 +    int  i;
 +    char *bLocalCG;
 +
 +    if (a_start == 0)
 +    {
 +        /* Clear the whole list without searching */
 +        ga2la_clear(dd->ga2la);
 +    }
 +    else
 +    {
 +        for(i=a_start; i<dd->nat_tot; i++)
 +        {
 +            ga2la_del(dd->ga2la,dd->gatindex[i]);
 +        }
 +    }
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +    if (bLocalCG)
 +    {
 +        for(i=cg_start; i<dd->ncg_tot; i++)
 +        {
 +            bLocalCG[dd->index_gl[i]] = FALSE;
 +        }
 +    }
 +
 +    dd_clear_local_vsite_indices(dd);
 +    
 +    if (dd->constraints)
 +    {
 +        dd_clear_local_constraint_indices(dd);
 +    }
 +}
 +
 +static real grid_jump_limit(gmx_domdec_comm_t *comm,real cutoff,
 +                            int dim_ind)
 +{
 +    real grid_jump_limit;
 +
 +    /* The distance between the boundaries of cells at distance
 +     * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
 +     * and by the fact that cells should not be shifted by more than
 +     * half their size, such that cg's only shift by one cell
 +     * at redecomposition.
 +     */
 +    grid_jump_limit = comm->cellsize_limit;
 +    if (!comm->bVacDLBNoLimit)
 +    {
 +        grid_jump_limit = max(grid_jump_limit,
 +                              cutoff/comm->cd[dim_ind].np);
 +    }
 +
 +    return grid_jump_limit;
 +}
 +
 +static gmx_bool check_grid_jump(gmx_large_int_t step,
 +                                gmx_domdec_t *dd,
 +                                real cutoff,
 +                                gmx_ddbox_t *ddbox,
 +                                gmx_bool bFatal)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim;
 +    real limit,bfac;
 +    gmx_bool bInvalid;
 +
 +    bInvalid = FALSE;
 +
 +    comm = dd->comm;
 +    
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        limit = grid_jump_limit(comm,cutoff,d);
 +        bfac = ddbox->box_size[dim];
 +        if (ddbox->tric_dir[dim])
 +        {
 +            bfac *= ddbox->skew_fac[dim];
 +        }
 +        if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac <  limit ||
 +            (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
 +        {
 +            bInvalid = TRUE;
 +
 +            if (bFatal)
 +            {
 +                char buf[22];
 +
 +                /* This error should never be triggered under normal
 +                 * circumstances, but you never know ...
 +                 */
 +                gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
 +                          gmx_step_str(step,buf),
 +                          dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +            }
 +        }
 +    }
 +
 +    return bInvalid;
 +}
 +
 +static int dd_load_count(gmx_domdec_comm_t *comm)
 +{
 +    return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
 +}
 +
 +static float dd_force_load(gmx_domdec_comm_t *comm)
 +{
 +    float load;
 +    
 +    if (comm->eFlop)
 +    {
 +        load = comm->flop;
 +        if (comm->eFlop > 1)
 +        {
 +            load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
 +        }
 +    } 
 +    else
 +    {
 +        load = comm->cycl[ddCyclF];
 +        if (comm->cycl_n[ddCyclF] > 1)
 +        {
 +            /* Subtract the maximum of the last n cycle counts
 +             * to get rid of possible high counts due to other soures,
 +             * for instance system activity, that would otherwise
 +             * affect the dynamic load balancing.
 +             */
 +            load -= comm->cycl_max[ddCyclF];
 +        }
 +    }
 +    
 +    return load;
 +}
 +
 +static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int i;
 +    
 +    comm = dd->comm;
 +    
 +    snew(*dim_f,dd->nc[dim]+1);
 +    (*dim_f)[0] = 0;
 +    for(i=1; i<dd->nc[dim]; i++)
 +    {
 +        if (comm->slb_frac[dim])
 +        {
 +            (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 +        }
 +        else
 +        {
 +            (*dim_f)[i] = (real)i/(real)dd->nc[dim];
 +        }
 +    }
 +    (*dim_f)[dd->nc[dim]] = 1;
 +}
 +
 +static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
 +{
 +    int        pmeindex,slab,nso,i;
 +    ivec xyz;
 +    
 +    if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
 +    {
 +        ddpme->dim = YY;
 +    }
 +    else
 +    {
 +        ddpme->dim = dimind;
 +    }
 +    ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 +    
 +    ddpme->nslab = (ddpme->dim == 0 ?
 +                    dd->comm->npmenodes_x :
 +                    dd->comm->npmenodes_y);
 +
 +    if (ddpme->nslab <= 1)
 +    {
 +        return;
 +    }
 +
 +    nso = dd->comm->npmenodes/ddpme->nslab;
 +    /* Determine for each PME slab the PP location range for dimension dim */
 +    snew(ddpme->pp_min,ddpme->nslab);
 +    snew(ddpme->pp_max,ddpme->nslab);
 +    for(slab=0; slab<ddpme->nslab; slab++) {
 +        ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 +        ddpme->pp_max[slab] = 0;
 +    }
 +    for(i=0; i<dd->nnodes; i++) {
 +        ddindex2xyz(dd->nc,i,xyz);
 +        /* For y only use our y/z slab.
 +         * This assumes that the PME x grid size matches the DD grid size.
 +         */
 +        if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
 +            pmeindex = ddindex2pmeindex(dd,i);
 +            if (dimind == 0) {
 +                slab = pmeindex/nso;
 +            } else {
 +                slab = pmeindex % ddpme->nslab;
 +            }
 +            ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
 +            ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
 +        }
 +    }
 +
 +    set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
 +}
 +
 +int dd_pme_maxshift_x(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == XX)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +int dd_pme_maxshift_y(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == YY)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
 +    {
 +        return dd->comm->ddpme[1].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
 +                             gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  nc,ns,s;
 +    int  *xmin,*xmax;
 +    real range,pme_boundary;
 +    int  sh;
 +    
 +    comm = dd->comm;
 +    nc  = dd->nc[ddpme->dim];
 +    ns  = ddpme->nslab;
 +    
 +    if (!ddpme->dim_match)
 +    {
 +        /* PP decomposition is not along dim: the worst situation */
 +        sh = ns/2;
 +    }
 +    else if (ns <= 3 || (bUniform && ns == nc))
 +    {
 +        /* The optimal situation */
 +        sh = 1;
 +    }
 +    else
 +    {
 +        /* We need to check for all pme nodes which nodes they
 +         * could possibly need to communicate with.
 +         */
 +        xmin = ddpme->pp_min;
 +        xmax = ddpme->pp_max;
 +        /* Allow for atoms to be maximally 2/3 times the cut-off
 +         * out of their DD cell. This is a reasonable balance between
 +         * between performance and support for most charge-group/cut-off
 +         * combinations.
 +         */
 +        range  = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
 +        /* Avoid extra communication when we are exactly at a boundary */
 +        range *= 0.999;
 +        
 +        sh = 1;
 +        for(s=0; s<ns; s++)
 +        {
 +            /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
 +            pme_boundary = (real)s/ns;
 +            while (sh+1 < ns &&
 +                   ((s-(sh+1) >= 0 &&
 +                     cell_f[xmax[s-(sh+1)   ]+1]     + range > pme_boundary) ||
 +                    (s-(sh+1) <  0 &&
 +                     cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
 +            {
 +                sh++;
 +            }
 +            pme_boundary = (real)(s+1)/ns;
 +            while (sh+1 < ns &&
 +                   ((s+(sh+1) <  ns &&
 +                     cell_f[xmin[s+(sh+1)   ]  ]     - range < pme_boundary) ||
 +                    (s+(sh+1) >= ns &&
 +                     cell_f[xmin[s+(sh+1)-ns]  ] + 1 - range < pme_boundary)))
 +            {
 +                sh++;
 +            }
 +        }
 +    }
 +    
 +    ddpme->maxshift = sh;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"PME slab communication range for dim %d is %d\n",
 +                ddpme->dim,ddpme->maxshift);
 +    }
 +}
 +
 +static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d,dim;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        if (dim < ddbox->nboundeddim &&
 +            ddbox->box_size[dim]*ddbox->skew_fac[dim] <
 +            dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
 +        {
 +            gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
 +                      dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                      dd->nc[dim],dd->comm->cellsize_limit);
 +        }
 +    }
 +}
 +
 +static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                                  gmx_bool bMaster,ivec npulse)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,j;
 +    rvec cellsize_min;
 +    real *cell_x,cell_dx,cellsize;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=0; d<DIM; d++)
 +    {
 +        cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
 +        npulse[d] = 1;
 +        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
 +        {
 +            /* Uniform grid */
 +            cell_dx = ddbox->box_size[d]/dd->nc[d];
 +            if (bMaster)
 +            {
 +                for(j=0; j<dd->nc[d]+1; j++)
 +                {
 +                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
 +                }
 +            }
 +            else
 +            {
 +                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
 +                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
 +            }
 +            cellsize = cell_dx*ddbox->skew_fac[d];
 +            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
 +            {
 +                npulse[d]++;
 +            }
 +            cellsize_min[d] = cellsize;
 +        }
 +        else
 +        {
 +            /* Statically load balanced grid */
 +            /* Also when we are not doing a master distribution we determine
 +             * all cell borders in a loop to obtain identical values
 +             * to the master distribution case and to determine npulse.
 +             */
 +            if (bMaster)
 +            {
 +                cell_x = dd->ma->cell_x[d];
 +            }
 +            else
 +            {
 +                snew(cell_x,dd->nc[d]+1);
 +            }
 +            cell_x[0] = ddbox->box0[d];
 +            for(j=0; j<dd->nc[d]; j++)
 +            {
 +                cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
 +                cell_x[j+1] = cell_x[j] + cell_dx;
 +                cellsize = cell_dx*ddbox->skew_fac[d];
 +                while (cellsize*npulse[d] < comm->cutoff &&
 +                       npulse[d] < dd->nc[d]-1)
 +                {
 +                    npulse[d]++;
 +                }
 +                cellsize_min[d] = min(cellsize_min[d],cellsize);
 +            }
 +            if (!bMaster)
 +            {
 +                comm->cell_x0[d] = cell_x[dd->ci[d]];
 +                comm->cell_x1[d] = cell_x[dd->ci[d]+1];
 +                sfree(cell_x);
 +            }
 +        }
 +        /* The following limitation is to avoid that a cell would receive
 +         * some of its own home charge groups back over the periodic boundary.
 +         * Double charge groups cause trouble with the global indices.
 +         */
 +        if (d < ddbox->npbcdim &&
 +            dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
 +                                 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
 +                                 comm->cutoff,
 +                                 dd->nc[d],dd->nc[d],
 +                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
 +        }
 +    }
 +    
 +    if (!comm->bDynLoadBal)
 +    {
 +        copy_rvec(cellsize_min,comm->cellsize_min);
 +    }
 +   
 +    for(d=0; d<comm->npmedecompdim; d++)
 +    {
 +        set_pme_maxshift(dd,&comm->ddpme[d],
 +                         comm->slb_frac[dd->dim[d]]==NULL,ddbox,
 +                         comm->ddpme[d].slb_dim_f);
 +    }
 +}
 +
 +
 +static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,
 +                                       gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,i,j,nmin,nmin_old;
 +    gmx_bool bLimLo,bLimHi;
 +    real *cell_size;
 +    real fac,halfway,cellsize_limit_f_i,region_size;
 +    gmx_bool bPBC,bLastHi=FALSE;
 +    int nrange[]={range[0],range[1]};
 +
 +    region_size= root->cell_f[range[1]]-root->cell_f[range[0]];  
 +
 +    comm = dd->comm;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    if (debug) 
 +    {
 +        fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
 +    }
 +
 +    /* First we need to check if the scaling does not make cells
 +     * smaller than the smallest allowed size.
 +     * We need to do this iteratively, since if a cell is too small,
 +     * it needs to be enlarged, which makes all the other cells smaller,
 +     * which could in turn make another cell smaller than allowed.
 +     */
 +    for(i=range[0]; i<range[1]; i++)
 +    {
 +        root->bCellMin[i] = FALSE;
 +    }
 +    nmin = 0;
 +    do
 +    {
 +        nmin_old = nmin;
 +        /* We need the total for normalization */
 +        fac = 0;
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                fac += cell_size[i];
 +            }
 +        }
 +        fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
 +        /* Determine the cell boundaries */
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                cell_size[i] *= fac;
 +                if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
 +                {
 +                    cellsize_limit_f_i = 0;
 +                }
 +                else
 +                {
 +                    cellsize_limit_f_i = cellsize_limit_f;
 +                }
 +                if (cell_size[i] < cellsize_limit_f_i)
 +                {
 +                    root->bCellMin[i] = TRUE;
 +                    cell_size[i] = cellsize_limit_f_i;
 +                    nmin++;
 +                }
 +            }
 +            root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
 +        }
 +    }
 +    while (nmin > nmin_old);
 +    
 +    i=range[1]-1;
 +    cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
 +    /* For this check we should not use DD_CELL_MARGIN,
 +     * but a slightly smaller factor,
 +     * since rounding could get use below the limit.
 +     */
 +    if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
 +    {
 +        char buf[22];
 +        gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
 +                  gmx_step_str(step,buf),
 +                  dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                  ncd,comm->cellsize_min[dim]);
 +    }
 +    
 +    root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
 +    
 +    if (!bUniform)
 +    {
 +        /* Check if the boundary did not displace more than halfway
 +         * each of the cells it bounds, as this could cause problems,
 +         * especially when the differences between cell sizes are large.
 +         * If changes are applied, they will not make cells smaller
 +         * than the cut-off, as we check all the boundaries which
 +         * might be affected by a change and if the old state was ok,
 +         * the cells will at most be shrunk back to their old size.
 +         */
 +        for(i=range[0]+1; i<range[1]; i++)
 +        {
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
 +            if (root->cell_f[i] < halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i+1; j<range[1]; j++)
 +                {
 +                    if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
 +                        root->cell_f[j] =  root->cell_f[j-1] + cellsize_limit_f;
 +                }
 +            }
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
 +            if (root->cell_f[i] > halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i-1; j>=range[0]+1; j--)
 +                {
 +                    if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
 +                        root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* nrange is defined as [lower, upper) range for new call to enforce_limits */
 +    /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
 +     * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
 +     * for a and b nrange is used */
 +    if (d > 0)
 +    {
 +        /* Take care of the staggering of the cell boundaries */
 +        if (bUniform)
 +        {
 +            for(i=range[0]; i<range[1]; i++)
 +            {
 +                root->cell_f_max0[i] = root->cell_f[i];
 +                root->cell_f_min1[i] = root->cell_f[i+1];
 +            }
 +        }
 +        else
 +        {
 +            for(i=range[0]+1; i<range[1]; i++)
 +            {
 +                bLimLo = (root->cell_f[i] < root->bound_min[i]);
 +                bLimHi = (root->cell_f[i] > root->bound_max[i]);
 +                if (bLimLo && bLimHi)
 +                {
 +                    /* Both limits violated, try the best we can */
 +                    /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
 +                    root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
 +                    nrange[0]=range[0];
 +                    nrange[1]=i;
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    return;
 +                }
 +                else if (bLimLo)
 +                {
 +                    /* root->cell_f[i] = root->bound_min[i]; */
 +                    nrange[1]=i;  /* only store violation location. There could be a LimLo violation following with an higher index */
 +                    bLastHi=FALSE;
 +                }
 +                else if (bLimHi && !bLastHi)
 +                {
 +                    bLastHi=TRUE;
 +                    if (nrange[1] < range[1])   /* found a LimLo before */
 +                    {
 +                        root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                        dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                        nrange[0]=nrange[1];
 +                    }
 +                    root->cell_f[i] = root->bound_max[i];
 +                    nrange[1]=i; 
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                }
 +            }
 +            if (nrange[1] < range[1])   /* found last a LimLo */
 +            {
 +                root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                nrange[0]=nrange[1];
 +                nrange[1]=range[1];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            } 
 +            else if (nrange[0] > range[0]) /* found at least one LimHi */
 +            {
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                       gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,d1,i,j,pos;
 +    real *cell_size;
 +    real load_aver,load_i,imbalance,change,change_max,sc;
 +    real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
 +    real change_limit;
 +    real relax = 0.5;
 +    gmx_bool bPBC;
 +    int range[] = { 0, 0 };
 +
 +    comm = dd->comm;
 +
 +    /* Convert the maximum change from the input percentage to a fraction */
 +    change_limit = comm->dlb_scale_lim*0.01;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    /* Store the original boundaries */
 +    for(i=0; i<ncd+1; i++)
 +    {
 +        root->old_cell_f[i] = root->cell_f[i];
 +    }
 +    if (bUniform) {
 +        for(i=0; i<ncd; i++)
 +        {
 +            cell_size[i] = 1.0/ncd;
 +        }
 +    }
 +    else if (dd_load_count(comm))
 +    {
 +        load_aver = comm->load[d].sum_m/ncd;
 +        change_max = 0;
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -relax*imbalance;
 +            change_max = max(change_max,max(change,-change));
 +        }
 +        /* Limit the amount of scaling.
 +         * We need to use the same rescaling for all cells in one row,
 +         * otherwise the load balancing might not converge.
 +         */
 +        sc = relax;
 +        if (change_max > change_limit)
 +        {
 +            sc *= change_limit/change_max;
 +        }
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -sc*imbalance;
 +            cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
 +        }
 +    }
 +    
 +    cellsize_limit_f  = comm->cellsize_min[dim]/ddbox->box_size[dim];
 +    cellsize_limit_f *= DD_CELL_MARGIN;
 +    dist_min_f_hard   = grid_jump_limit(comm,comm->cutoff,d)/ddbox->box_size[dim];
 +    dist_min_f        = dist_min_f_hard * DD_CELL_MARGIN;
 +    if (ddbox->tric_dir[dim])
 +    {
 +        cellsize_limit_f /= ddbox->skew_fac[dim];
 +        dist_min_f       /= ddbox->skew_fac[dim];
 +    }
 +    if (bDynamicBox && d > 0)
 +    {
 +        dist_min_f *= DD_PRES_SCALE_MARGIN;
 +    }
 +    if (d > 0 && !bUniform)
 +    {
 +        /* Make sure that the grid is not shifted too much */
 +        for(i=1; i<ncd; i++) {
 +            if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard) 
 +            {
 +                gmx_incons("Inconsistent DD boundary staggering limits!");
 +            }
 +            root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
 +            if (space > 0) {
 +                root->bound_min[i] += 0.5*space;
 +            }
 +            root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
 +            if (space < 0) {
 +                root->bound_max[i] += 0.5*space;
 +            }
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
 +                        d,i,
 +                        root->cell_f_max0[i-1] + dist_min_f,
 +                        root->bound_min[i],root->cell_f[i],root->bound_max[i],
 +                        root->cell_f_min1[i] - dist_min_f);
 +            }
 +        }
 +    }
 +    range[1]=ncd;
 +    root->cell_f[0] = 0;
 +    root->cell_f[ncd] = 1;
 +    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
 +
 +
 +    /* After the checks above, the cells should obey the cut-off
 +     * restrictions, but it does not hurt to check.
 +     */
 +    for(i=0; i<ncd; i++)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug,"Relative bounds dim %d  cell %d: %f %f\n",
 +                    dim,i,root->cell_f[i],root->cell_f[i+1]);
 +        }
 +
 +        if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
 +            root->cell_f[i+1] - root->cell_f[i] <
 +            cellsize_limit_f/DD_CELL_MARGIN)
 +        {
 +            char buf[22];
 +            fprintf(stderr,
 +                    "\nWARNING step %s: direction %c, cell %d too small: %f\n",
 +                    gmx_step_str(step,buf),dim2char(dim),i,
 +                    (root->cell_f[i+1] - root->cell_f[i])
 +                    *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
 +        }
 +    }
 +    
 +    pos = ncd + 1;
 +    /* Store the cell boundaries of the lower dimensions at the end */
 +    for(d1=0; d1<d; d1++)
 +    {
 +        root->cell_f[pos++] = comm->cell_f0[d1];
 +        root->cell_f[pos++] = comm->cell_f1[d1];
 +    }
 +    
 +    if (d < comm->npmedecompdim)
 +    {
 +        /* The master determines the maximum shift for
 +         * the coordinate communication between separate PME nodes.
 +         */
 +        set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
 +    }
 +    root->cell_f[pos++] = comm->ddpme[0].maxshift;
 +    if (d >= 1)
 +    {
 +        root->cell_f[pos++] = comm->ddpme[1].maxshift;
 +    }
 +}    
 +
 +static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
 +                                             gmx_ddbox_t *ddbox,int dimind)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +
 +    /* Set the cell dimensions */
 +    dim = dd->dim[dimind];
 +    comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
 +    comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
 +    if (dim >= ddbox->nboundeddim)
 +    {
 +        comm->cell_x0[dim] += ddbox->box0[dim];
 +        comm->cell_x1[dim] += ddbox->box0[dim];
 +    }
 +}
 +
 +static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                         int d,int dim,real *cell_f_row,
 +                                         gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d1,dim1,pos;
 +
 +    comm = dd->comm;
 +
 +#ifdef GMX_MPI
 +    /* Each node would only need to know two fractions,
 +     * but it is probably cheaper to broadcast the whole array.
 +     */
 +    MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
 +              0,comm->mpi_comm_load[d]);
 +#endif
 +    /* Copy the fractions for this dimension from the buffer */
 +    comm->cell_f0[d] = cell_f_row[dd->ci[dim]  ];
 +    comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
 +    /* The whole array was communicated, so set the buffer position */
 +    pos = dd->nc[dim] + 1;
 +    for(d1=0; d1<=d; d1++)
 +    {
 +        if (d1 < d)
 +        {
 +            /* Copy the cell fractions of the lower dimensions */
 +            comm->cell_f0[d1] = cell_f_row[pos++];
 +            comm->cell_f1[d1] = cell_f_row[pos++];
 +        }
 +        relative_to_absolute_cell_bounds(dd,ddbox,d1);
 +    }
 +    /* Convert the communicated shift from float to int */
 +    comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    if (d >= 1)
 +    {
 +        comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    }
 +}
 +
 +static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
 +                                         gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                         gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d,dim,d1;
 +    gmx_bool bRowMember,bRowRoot;
 +    real *cell_f_row;
 +    
 +    comm = dd->comm;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        bRowMember = TRUE;
 +        bRowRoot = TRUE;
 +        for(d1=d; d1<dd->ndim; d1++)
 +        {
 +            if (dd->ci[dd->dim[d1]] > 0)
 +            {
 +                if (d1 > d)
 +                {
 +                    bRowMember = FALSE;
 +                }
 +                bRowRoot = FALSE;
 +            }
 +        }
 +        if (bRowMember)
 +        {
 +            if (bRowRoot)
 +            {
 +                set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
 +                                           ddbox,bDynamicBox,bUniform,step);
 +                cell_f_row = comm->root[d]->cell_f;
 +            }
 +            else
 +            {
 +                cell_f_row = comm->cell_f_row;
 +            }
 +            distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
 +        }
 +    }
 +}    
 +
 +static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d;
 +
 +    /* This function assumes the box is static and should therefore
 +     * not be called when the box has changed since the last
 +     * call to dd_partition_system.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        relative_to_absolute_cell_bounds(dd,ddbox,d); 
 +    }
 +}
 +
 +
 +
 +static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                  gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                                  gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +    
 +    if (bDoDLB)
 +    {
 +        wallcycle_start(wcycle,ewcDDCOMMBOUND);
 +        set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
 +        wallcycle_stop(wcycle,ewcDDCOMMBOUND);
 +    }
 +    else if (bDynamicBox)
 +    {
 +        set_dd_cell_sizes_dlb_nochange(dd,ddbox);
 +    }
 +    
 +    /* Set the dimensions for which no DD is used */
 +    for(dim=0; dim<DIM; dim++) {
 +        if (dd->nc[dim] == 1) {
 +            comm->cell_x0[dim] = 0;
 +            comm->cell_x1[dim] = ddbox->box_size[dim];
 +            if (dim >= ddbox->nboundeddim)
 +            {
 +                comm->cell_x0[dim] += ddbox->box0[dim];
 +                comm->cell_x1[dim] += ddbox->box0[dim];
 +            }
 +        }
 +    }
 +}
 +
 +static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
 +{
 +    int d,np,i;
 +    gmx_domdec_comm_dim_t *cd;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &dd->comm->cd[d];
 +        np = npulse[dd->dim[d]];
 +        if (np > cd->np_nalloc)
 +        {
 +            if (debug)
 +            {
 +                fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
 +                        dim2char(dd->dim[d]),np);
 +            }
 +            if (DDMASTER(dd) && cd->np_nalloc > 0)
 +            {
 +                fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
 +            }
 +            srenew(cd->ind,np);
 +            for(i=cd->np_nalloc; i<np; i++)
 +            {
 +                cd->ind[i].index  = NULL;
 +                cd->ind[i].nalloc = 0;
 +            }
 +            cd->np_nalloc = np;
 +        }
 +        cd->np = np;
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes(gmx_domdec_t *dd,
 +                              gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                              gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                              gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec npulse;
 +    
 +    comm = dd->comm;
 +
 +    /* Copy the old cell boundaries for the cg displacement check */
 +    copy_rvec(comm->cell_x0,comm->old_cell_x0);
 +    copy_rvec(comm->cell_x1,comm->old_cell_x1);
 +    
 +    if (comm->bDynLoadBal)
 +    {
 +        if (DDMASTER(dd))
 +        {
 +            check_box_size(dd,ddbox);
 +        }
 +        set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
 +        realloc_comm_ind(dd,npulse);
 +    }
 +    
 +    if (debug)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
 +                    d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
 +        }
 +    }
 +}
 +
 +static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,
 +                                  rvec cell_ns_x0,rvec cell_ns_x1,
 +                                  gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim_ind,dim;
 +    
 +    comm = dd->comm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        
 +        /* Without PBC we don't have restrictions on the outer cells */
 +        if (!(dim >= ddbox->npbcdim && 
 +              (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
 +            comm->bDynLoadBal &&
 +            (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
 +            comm->cellsize_min[dim])
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
 +                      gmx_step_str(step,buf),dim2char(dim),
 +                      comm->cell_x1[dim] - comm->cell_x0[dim],
 +                      ddbox->skew_fac[dim],
 +                      dd->comm->cellsize_min[dim],
 +                      dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +    
 +    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
 +    {
 +        /* Communicate the boundaries and update cell_ns_x0/1 */
 +        dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
 +        if (dd->bGridJump && dd->ndim > 1)
 +        {
 +            check_grid_jump(step,dd,dd->comm->cutoff,ddbox,TRUE);
 +        }
 +    }
 +}
 +
 +static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
 +{
 +    if (YY < npbcdim)
 +    {
 +        tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
 +    }
 +    else
 +    {
 +        tcm[YY][XX] = 0;
 +    }
 +    if (ZZ < npbcdim)
 +    {
 +        tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
 +        tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
 +    }
 +    else
 +    {
 +        tcm[ZZ][XX] = 0;
 +        tcm[ZZ][YY] = 0;
 +    }
 +}
 +
 +static void check_screw_box(matrix box)
 +{
 +    /* Mathematical limitation */
 +    if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
 +    {
 +        gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
 +    }
 +    
 +    /* Limitation due to the asymmetry of the eighth shell method */
 +    if (box[ZZ][YY] != 0)
 +    {
 +        gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
 +    }
 +}
 +
 +static void distribute_cg(FILE *fplog,gmx_large_int_t step,
 +                          matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
 +                          gmx_domdec_t *dd)
 +{
 +    gmx_domdec_master_t *ma;
 +    int **tmp_ind=NULL,*tmp_nalloc=NULL;
 +    int  i,icg,j,k,k0,k1,d,npbcdim;
 +    matrix tcm;
 +    rvec box_size,cg_cm;
 +    ivec ind;
 +    real nrcg,inv_ncg,pos_d;
 +    atom_id *cgindex;
 +    gmx_bool bUnbounded,bScrew;
 +
 +    ma = dd->ma;
 +    
 +    if (tmp_ind == NULL)
 +    {
 +        snew(tmp_nalloc,dd->nnodes);
 +        snew(tmp_ind,dd->nnodes);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
 +            snew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +    }
 +    
 +    /* Clear the count */
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->ncg[i] = 0;
 +        ma->nat[i] = 0;
 +    }
 +    
 +    make_tric_corr_matrix(dd->npbcdim,box,tcm);
 +    
 +    cgindex = cgs->index;
 +    
 +    /* Compute the center of geometry for all charge groups */
 +    for(icg=0; icg<cgs->nr; icg++)
 +    {
 +        k0      = cgindex[icg];
 +        k1      = cgindex[icg+1];
 +        nrcg    = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(pos[k0],cg_cm);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cg_cm);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cg_cm,pos[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cg_cm[d] *= inv_ncg;
 +            }
 +        }
 +        /* Put the charge group in the box and determine the cell index */
 +        for(d=DIM-1; d>=0; d--) {
 +            pos_d = cg_cm[d];
 +            if (d < dd->npbcdim)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                if (tric_dir[d] && dd->nc[d] > 1)
 +                {
 +                    /* Use triclinic coordintates for this dimension */
 +                    for(j=d+1; j<DIM; j++)
 +                    {
 +                        pos_d += cg_cm[j]*tcm[j][d];
 +                    }
 +                }
 +                while(pos_d >= box[d][d])
 +                {
 +                    pos_d -= box[d][d];
 +                    rvec_dec(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(pos[k],box[d]);
 +                        if (bScrew)
 +                        {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +                while(pos_d < 0)
 +                {
 +                    pos_d += box[d][d];
 +                    rvec_inc(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(pos[k],box[d]);
 +                        if (bScrew) {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +            }
 +            /* This could be done more efficiently */
 +            ind[d] = 0;
 +            while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
 +            {
 +                ind[d]++;
 +            }
 +        }
 +        i = dd_index(dd->nc,ind);
 +        if (ma->ncg[i] == tmp_nalloc[i])
 +        {
 +            tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
 +            srenew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +        tmp_ind[i][ma->ncg[i]] = icg;
 +        ma->ncg[i]++;
 +        ma->nat[i] += cgindex[icg+1] - cgindex[icg];
 +    }
 +    
 +    k1 = 0;
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->index[i] = k1;
 +        for(k=0; k<ma->ncg[i]; k++)
 +        {
 +            ma->cg[k1++] = tmp_ind[i][k];
 +        }
 +    }
 +    ma->index[dd->nnodes] = k1;
 +    
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        sfree(tmp_ind[i]);
 +    }
 +    sfree(tmp_ind);
 +    sfree(tmp_nalloc);
 +    
 +    if (fplog)
 +    {
 +        char buf[22];
 +        fprintf(fplog,"Charge group distribution at step %s:",
 +                gmx_step_str(step,buf));
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            fprintf(fplog," %d",ma->ncg[i]);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +}
 +
 +static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
 +                                t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
 +                                rvec pos[])
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    ivec npulse;
 +    int  i,cg_gl;
 +    int  *ibuf,buf2[2] = { 0, 0 };
 +    gmx_bool bMaster = DDMASTER(dd);
 +    if (bMaster)
 +    {
 +        ma = dd->ma;
 +        
 +        if (dd->bScrewPBC)
 +        {
 +            check_screw_box(box);
 +        }
 +    
 +        set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
 +    
 +        distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[2*i]   = ma->ncg[i];
 +            ma->ibuf[2*i+1] = ma->nat[i];
 +        }
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    dd_scatter(dd,2*sizeof(int),ibuf,buf2);
 +    
 +    dd->ncg_home = buf2[0];
 +    dd->nat_home = buf2[1];
 +    dd->ncg_tot  = dd->ncg_home;
 +    dd->nat_tot  = dd->nat_home;
 +    if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
 +    {
 +        dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(dd->index_gl,dd->cg_nalloc);
 +        srenew(dd->cgindex,dd->cg_nalloc+1);
 +    }
 +    if (bMaster)
 +    {
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +    }
 +    
 +    dd_scatterv(dd,
 +                DDMASTER(dd) ? ma->ibuf : NULL,
 +                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +                DDMASTER(dd) ? ma->cg : NULL,
 +                dd->ncg_home*sizeof(int),dd->index_gl);
 +    
 +    /* Determine the home charge group sizes */
 +    dd->cgindex[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cg_gl = dd->index_gl[i];
 +        dd->cgindex[i+1] =
 +            dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
 +    }
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Home charge groups:\n");
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            fprintf(debug," %d",dd->index_gl[i]);
 +            if (i % 10 == 9) 
 +                fprintf(debug,"\n");
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static int compact_and_copy_vec_at(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,int vec,
 +                                   rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                for(i=i0; i<i1; i++)
 +                {
 +                    copy_rvec(src[i],src[home_pos++]);
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Copy to the communication buffer */
 +            nrcg = i1 - i0;
 +            pos_vec[m] += 1 + vec*nrcg;
 +            for(i=i0; i<i1; i++)
 +            {
 +                copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
 +            }
 +            pos_vec[m] += (nvec - vec - 1)*nrcg;
 +        }
 +        if (!bCompact)
 +        {
 +            home_pos += i1 - i0;
 +        }
 +        i0 = i1;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_and_copy_vec_cg(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +    
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                copy_rvec(src[icg],src[home_pos++]);
 +            }
 +        }
 +        else
 +        {
 +            nrcg = i1 - i0;
 +            /* Copy to the communication buffer */
 +            copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
 +            pos_vec[m] += 1 + nrcg*nvec;
 +        }
 +        i0 = i1;
 +    }
 +    if (!bCompact)
 +    {
 +        home_pos = ncg;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_ind(int ncg,int *move,
 +                       int *index_gl,int *cgindex,
 +                       int *gatindex,
 +                       gmx_ga2la_t ga2la,char *bLocalCG,
 +                       int *cginfo)
 +{
 +    int cg,nat,a0,a1,a,a_gl;
 +    int home_pos;
 +
 +    home_pos = 0;
 +    nat = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        a0 = cgindex[cg];
 +        a1 = cgindex[cg+1];
 +        if (move[cg] == -1)
 +        {
 +            /* Compact the home arrays in place.
 +             * Anything that can be done here avoids access to global arrays.
 +             */
 +            cgindex[home_pos] = nat;
 +            for(a=a0; a<a1; a++)
 +            {
 +                a_gl = gatindex[a];
 +                gatindex[nat] = a_gl;
 +                /* The cell number stays 0, so we don't need to set it */
 +                ga2la_change_la(ga2la,a_gl,nat);
 +                nat++;
 +            }
 +            index_gl[home_pos] = index_gl[cg];
 +            cginfo[home_pos]   = cginfo[cg];
 +            /* The charge group remains local, so bLocalCG does not change */
 +            home_pos++;
 +        }
 +        else
 +        {
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +        }
 +    }
 +    cgindex[home_pos] = nat;
 +    
 +    return home_pos;
 +}
 +
 +static void clear_and_mark_ind(int ncg,int *move,
 +                               int *index_gl,int *cgindex,int *gatindex,
 +                               gmx_ga2la_t ga2la,char *bLocalCG,
 +                               int *cell_index)
 +{
 +    int cg,a0,a1,a;
 +    
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        if (move[cg] >= 0)
 +        {
 +            a0 = cgindex[cg];
 +            a1 = cgindex[cg+1];
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +            /* Signal that this cg has moved using the ns cell index.
 +             * Here we set it to -1. fill_grid will change it
 +             * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
 +             */
 +            cell_index[cg] = -1;
 +        }
 +    }
 +}
 +
 +static void print_cg_move(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    gmx_domdec_comm_t *comm;
 +    char buf[22];
 +
 +    comm = dd->comm;
 +
 +    fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
 +    }
 +    else
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
 +    }
 +    fprintf(fplog,"distance out of cell %f\n",
 +            dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
 +                cm_old[XX],cm_old[YY],cm_old[ZZ]);
 +    }
 +    fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
 +            cm_new[XX],cm_new[YY],cm_new[ZZ]);
 +    fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
 +    fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->cell_x0[dim],comm->cell_x1[dim]);
 +}
 +
 +static void cg_move_error(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    if (fplog)
 +    {
 +        print_cg_move(fplog, dd,step,cg,dim,dir,
 +                      bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    }
 +    print_cg_move(stderr,dd,step,cg,dim,dir,
 +                  bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    gmx_fatal(FARGS,
 +              "A charge group moved too far between two domain decomposition steps\n"
 +              "This usually means that your system is not well equilibrated");
 +}
 +
 +static void rotate_state_atom(t_state *state,int a)
 +{
 +    int est;
 +
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est))) {
 +            switch (est) {
 +            case estX:
 +                /* Rotate the complete state; for a rectangular box only */
 +                state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
 +                state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
 +                break;
 +            case estV:
 +                state->v[a][YY] = -state->v[a][YY];
 +                state->v[a][ZZ] = -state->v[a][ZZ];
 +                break;
 +            case estSDX:
 +                state->sd_X[a][YY] = -state->sd_X[a][YY];
 +                state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
 +                break;
 +            case estCGP:
 +                state->cg_p[a][YY] = -state->cg_p[a][YY];
 +                state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* These are distances, so not affected by rotation */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in rotate_state_atom");            
 +            }
 +        }
 +    }
 +}
 +
 +static int *get_moved(gmx_domdec_comm_t *comm,int natoms)
 +{
 +    if (natoms > comm->moved_nalloc)
 +    {
 +        /* Contents should be preserved here */
 +        comm->moved_nalloc = over_alloc_dd(natoms);
 +        srenew(comm->moved,comm->moved_nalloc);
 +    }
 +
 +    return comm->moved;
 +}
 +
 +static void calc_cg_move(FILE *fplog,gmx_large_int_t step,
 +                         gmx_domdec_t *dd,
 +                         t_state *state,
 +                         ivec tric_dir,matrix tcm,
 +                         rvec cell_x0,rvec cell_x1,
 +                         rvec limitd,rvec limit0,rvec limit1,
 +                         const int *cgindex,
 +                         int cg_start,int cg_end,
 +                         rvec *cg_cm,
 +                         int *move)
 +{
 +    int  npbcdim;
 +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
 +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
 +    int  flag;
 +    gmx_bool bScrew;
 +    ivec dev;
 +    real inv_ncg,pos_d;
 +    rvec cm_new;
 +
 +    npbcdim = dd->npbcdim;
 +
 +    for(cg=cg_start; cg<cg_end; cg++)
 +    {
 +        k0   = cgindex[cg];
 +        k1   = cgindex[cg+1];
 +        nrcg = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(state->x[k0],cm_new);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cm_new);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cm_new,state->x[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cm_new[d] = inv_ncg*cm_new[d];
 +            }
 +        }
 +        
 +        clear_ivec(dev);
 +        /* Do pbc and check DD cell boundary crossings */
 +        for(d=DIM-1; d>=0; d--)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                /* Determine the location of this cg in lattice coordinates */
 +                pos_d = cm_new[d];
 +                if (tric_dir[d])
 +                {
 +                    for(d2=d+1; d2<DIM; d2++)
 +                    {
 +                        pos_d += cm_new[d2]*tcm[d2][d];
 +                    }
 +                }
 +                /* Put the charge group in the triclinic unit-cell */
 +                if (pos_d >= cell_x1[d])
 +                {
 +                    if (pos_d >= limit1[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = 1;
 +                    if (dd->ci[d] == dd->nc[d] - 1)
 +                    {
 +                        rvec_dec(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_dec(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +                else if (pos_d < cell_x0[d])
 +                {
 +                    if (pos_d < limit0[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = -1;
 +                    if (dd->ci[d] == 0)
 +                    {
 +                        rvec_inc(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_inc(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            else if (d < npbcdim)
 +            {
 +                /* Put the charge group in the rectangular unit-cell */
 +                while (cm_new[d] >= state->box[d][d])
 +                {
 +                    rvec_dec(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(state->x[k],state->box[d]);
 +                    }
 +                }
 +                while (cm_new[d] < 0)
 +                {
 +                    rvec_inc(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(state->x[k],state->box[d]);
 +                    }
 +                }
 +            }
 +        }
 +    
 +        copy_rvec(cm_new,cg_cm[cg]);
 +        
 +        /* Determine where this cg should go */
 +        flag = 0;
 +        mc = -1;
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            dim = dd->dim[d];
 +            if (dev[dim] == 1)
 +            {
 +                flag |= DD_FLAG_FW(d);
 +                if (mc == -1)
 +                {
 +                    mc = d*2;
 +                }
 +            }
 +            else if (dev[dim] == -1)
 +            {
 +                flag |= DD_FLAG_BW(d);
 +                if (mc == -1) {
 +                    if (dd->nc[dim] > 2)
 +                    {
 +                        mc = d*2 + 1;
 +                    }
 +                    else
 +                    {
 +                        mc = d*2;
 +                    }
 +                }
 +            }
 +        }
 +        /* Temporarily store the flag in move */
 +        move[cg] = mc + flag;
 +    }
 +}
 +
 +static void dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
 +                               gmx_domdec_t *dd,ivec tric_dir,
 +                               t_state *state,rvec **f,
 +                               t_forcerec *fr,t_mdatoms *md,
 +                               gmx_bool bCompact,
 +                               t_nrnb *nrnb,
 +                               int *ncg_stay_home,
 +                               int *ncg_moved)
 +{
 +    int  *move;
 +    int  npbcdim;
 +    int  ncg[DIM*2],nat[DIM*2];
 +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
 +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
 +    int  sbuf[2],rbuf[2];
 +    int  home_pos_cg,home_pos_at,buf_pos;
 +    int  flag;
 +    gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
 +    gmx_bool bScrew;
 +    ivec dev;
 +    real inv_ncg,pos_d;
 +    matrix tcm;
 +    rvec *cg_cm=NULL,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
 +    atom_id *cgindex;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_domdec_comm_t *comm;
 +    int  *moved;
 +    int  nthread,thread;
 +    
 +    if (dd->bScrewPBC)
 +    {
 +        check_screw_box(state->box);
 +    }
 +    
 +    comm  = dd->comm;
 +    if (fr->cutoff_scheme == ecutsGROUP)
 +    {
 +        cg_cm = fr->cg_cm;
 +    }
 +    
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i))
 +        {
 +            switch (i)
 +            {
 +            case estX:   /* Always present */            break;
 +            case estV:   bV   = (state->flags & (1<<i)); break;
 +            case estSDX: bSDX = (state->flags & (1<<i)); break;
 +            case estCGP: bCGP = (state->flags & (1<<i)); break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No processing required */
 +                break;
 +            default:
 +            gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
 +            }
 +        }
 +    }
 +    
 +    if (dd->ncg_tot > comm->nalloc_int)
 +    {
 +        comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
 +        srenew(comm->buf_int,comm->nalloc_int);
 +    }
 +    move = comm->buf_int;
 +    
 +    /* Clear the count */
 +    for(c=0; c<dd->ndim*2; c++)
 +    {
 +        ncg[c] = 0;
 +        nat[c] = 0;
 +    }
 +
 +    npbcdim = dd->npbcdim;
 +
 +    for(d=0; (d<DIM); d++)
 +    {
 +        limitd[d] = dd->comm->cellsize_min[d];
 +        if (d >= npbcdim && dd->ci[d] == 0)
 +        {
 +            cell_x0[d] = -GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x0[d] = comm->cell_x0[d];
 +        }
 +        if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
 +        {
 +            cell_x1[d] = GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x1[d] = comm->cell_x1[d];
 +        }
 +        if (d < npbcdim)
 +        {
 +            limit0[d] = comm->old_cell_x0[d] - limitd[d];
 +            limit1[d] = comm->old_cell_x1[d] + limitd[d];
 +        }
 +        else
 +        {
 +            /* We check after communication if a charge group moved
 +             * more than one cell. Set the pre-comm check limit to float_max.
 +             */
 +            limit0[d] = -GMX_FLOAT_MAX;
 +            limit1[d] =  GMX_FLOAT_MAX;
 +        }
 +    }
 +    
 +    make_tric_corr_matrix(npbcdim,state->box,tcm);
 +    
 +    cgindex = dd->cgindex;
 +
 +    nthread = gmx_omp_nthreads_get(emntDomdec);
 +
 +    /* Compute the center of geometry for all home charge groups
 +     * and put them in the box and determine where they should go.
 +     */
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        calc_cg_move(fplog,step,dd,state,tric_dir,tcm,
 +                     cell_x0,cell_x1,limitd,limit0,limit1,
 +                     cgindex,
 +                     ( thread   *dd->ncg_home)/nthread,
 +                     ((thread+1)*dd->ncg_home)/nthread,
 +                     fr->cutoff_scheme==ecutsGROUP ? cg_cm : state->x,
 +                     move);
 +    }
 +
 +    for(cg=0; cg<dd->ncg_home; cg++)
 +    {
 +        if (move[cg] >= 0)
 +        {
 +            mc = move[cg];
 +            flag     = mc & ~DD_FLAG_NRCG;
 +            mc       = mc & DD_FLAG_NRCG;
 +            move[cg] = mc;
 +
 +            if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +            {
 +                comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +            }
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
 +            /* We store the cg size in the lower 16 bits
 +             * and the place where the charge group should go
 +             * in the next 6 bits. This saves some communication volume.
 +             */
 +            nrcg = cgindex[cg+1] - cgindex[cg];
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
 +            ncg[mc] += 1;
 +            nat[mc] += nrcg;
 +        }
 +    }
 +    
 +    inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +    inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
 +
 +    *ncg_moved = 0;
 +    for(i=0; i<dd->ndim*2; i++)
 +    {
 +        *ncg_moved += ncg[i];
 +    }
 +    
 +    nvec = 1;
 +    if (bV)
 +    {
 +        nvec++;
 +    }
 +    if (bSDX)
 +    {
 +        nvec++;
 +    }
 +    if (bCGP)
 +    {
 +        nvec++;
 +    }
 +    
 +    /* Make sure the communication buffers are large enough */
 +    for(mc=0; mc<dd->ndim*2; mc++)
 +    {
 +        nvr = ncg[mc] + nat[mc]*nvec;
 +        if (nvr > comm->cgcm_state_nalloc[mc])
 +        {
 +            comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
 +            srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +        }
 +    }
 +    
 +    switch (fr->cutoff_scheme)
 +    {
 +    case ecutsGROUP:
 +        /* Recalculating cg_cm might be cheaper than communicating,
 +         * but that could give rise to rounding issues.
 +         */
 +        home_pos_cg =
 +            compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
 +                                    nvec,cg_cm,comm,bCompact);
 +    break;
 +    case ecutsVERLET:
 +        /* Without charge groups we send the moved atom coordinates
 +         * over twice. This is so the code below can be used without
 +         * many conditionals for both for with and without charge groups.
 +         */
 +        home_pos_cg =
 +            compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
 +                                    nvec,state->x,comm,FALSE);
 +        if (bCompact)
 +        {
 +            home_pos_cg -= *ncg_moved;
 +        }
 +        break;
 +    default:
 +        gmx_incons("unimplemented");
 +        home_pos_cg = 0;
 +    }
 +    
 +    vec = 0;
 +    home_pos_at =
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->x,comm,bCompact);
 +    if (bV)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->v,comm,bCompact);
 +    }
 +    if (bSDX)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->sd_X,comm,bCompact);
 +    }
 +    if (bCGP)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->cg_p,comm,bCompact);
 +    }
 +    
 +    if (bCompact)
 +    {
 +        compact_ind(dd->ncg_home,move,
 +                    dd->index_gl,dd->cgindex,dd->gatindex,
 +                    dd->ga2la,comm->bLocalCG,
 +                    fr->cginfo);
 +    }
 +    else
 +    {
 +        if (fr->cutoff_scheme == ecutsVERLET)
 +        {
 +            moved = get_moved(comm,dd->ncg_home);
 +
 +            for(k=0; k<dd->ncg_home; k++)
 +            {
 +                moved[k] = 0;
 +            }
 +        }
 +        else
 +        {
 +            moved = fr->ns.grid->cell_index;
 +        }
 +
 +        clear_and_mark_ind(dd->ncg_home,move,
 +                           dd->index_gl,dd->cgindex,dd->gatindex,
 +                           dd->ga2la,comm->bLocalCG,
 +                           moved);
 +    }
 +    
 +    cginfo_mb = fr->cginfo_mb;
 +
 +    *ncg_stay_home = home_pos_cg;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        ncg_recv = 0;
 +        nat_recv = 0;
 +        nvr      = 0;
 +        for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
 +        {
 +            cdd = d*2 + dir;
 +            /* Communicate the cg and atom counts */
 +            sbuf[0] = ncg[cdd];
 +            sbuf[1] = nat[cdd];
 +            if (debug)
 +            {
 +                fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
 +                        d,dir,sbuf[0],sbuf[1]);
 +            }
 +            dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
 +            
 +            if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
 +            {
 +                comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
 +                srenew(comm->buf_int,comm->nalloc_int);
 +            }
 +            
 +            /* Communicate the charge group indices, sizes and flags */
 +            dd_sendrecv_int(dd, d, dir,
 +                            comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
 +                            comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
 +            
 +            nvs = ncg[cdd] + nat[cdd]*nvec;
 +            i   = rbuf[0]  + rbuf[1] *nvec;
 +            vec_rvec_check_alloc(&comm->vbuf,nvr+i);
 +            
 +            /* Communicate cgcm and state */
 +            dd_sendrecv_rvec(dd, d, dir,
 +                             comm->cgcm_state[cdd], nvs,
 +                             comm->vbuf.v+nvr, i);
 +            ncg_recv += rbuf[0];
 +            nat_recv += rbuf[1];
 +            nvr      += i;
 +        }
 +        
 +        /* Process the received charge groups */
 +        buf_pos = 0;
 +        for(cg=0; cg<ncg_recv; cg++)
 +        {
 +            flag = comm->buf_int[cg*DD_CGIBS+1];
 +
 +            if (dim >= npbcdim && dd->nc[dim] > 2)
 +            {
 +                /* No pbc in this dim and more than one domain boundary.
 +                 * We do a separate check if a charge group didn't move too far.
 +                 */
 +                if (((flag & DD_FLAG_FW(d)) &&
 +                     comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
 +                    ((flag & DD_FLAG_BW(d)) &&
 +                     comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
 +                {
 +                    cg_move_error(fplog,dd,step,cg,dim,
 +                                  (flag & DD_FLAG_FW(d)) ? 1 : 0,
 +                                   FALSE,0,
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos][dim]);
 +                }
 +            }
 +
 +            mc = -1;
 +            if (d < dd->ndim-1)
 +            {
 +                /* Check which direction this cg should go */
 +                for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
 +                {
 +                    if (dd->bGridJump)
 +                    {
 +                        /* The cell boundaries for dimension d2 are not equal
 +                         * for each cell row of the lower dimension(s),
 +                         * therefore we might need to redetermine where
 +                         * this cg should go.
 +                         */
 +                        dim2 = dd->dim[d2];
 +                        /* If this cg crosses the box boundary in dimension d2
 +                         * we can use the communicated flag, so we do not
 +                         * have to worry about pbc.
 +                         */
 +                        if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
 +                               (flag & DD_FLAG_FW(d2))) ||
 +                              (dd->ci[dim2] == 0 &&
 +                               (flag & DD_FLAG_BW(d2)))))
 +                        {
 +                            /* Clear the two flags for this dimension */
 +                            flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
 +                            /* Determine the location of this cg
 +                             * in lattice coordinates
 +                             */
 +                            pos_d = comm->vbuf.v[buf_pos][dim2];
 +                            if (tric_dir[dim2])
 +                            {
 +                                for(d3=dim2+1; d3<DIM; d3++)
 +                                {
 +                                    pos_d +=
 +                                        comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
 +                                }
 +                            }
 +                            /* Check of we are not at the box edge.
 +                             * pbc is only handled in the first step above,
 +                             * but this check could move over pbc while
 +                             * the first step did not due to different rounding.
 +                             */
 +                            if (pos_d >= cell_x1[dim2] &&
 +                                dd->ci[dim2] != dd->nc[dim2]-1)
 +                            {
 +                                flag |= DD_FLAG_FW(d2);
 +                            }
 +                            else if (pos_d < cell_x0[dim2] &&
 +                                     dd->ci[dim2] != 0)
 +                            {
 +                                flag |= DD_FLAG_BW(d2);
 +                            }
 +                            comm->buf_int[cg*DD_CGIBS+1] = flag;
 +                        }
 +                    }
 +                    /* Set to which neighboring cell this cg should go */
 +                    if (flag & DD_FLAG_FW(d2))
 +                    {
 +                        mc = d2*2;
 +                    }
 +                    else if (flag & DD_FLAG_BW(d2))
 +                    {
 +                        if (dd->nc[dd->dim[d2]] > 2)
 +                        {
 +                            mc = d2*2+1;
 +                        }
 +                        else
 +                        {
 +                            mc = d2*2;
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            nrcg = flag & DD_FLAG_NRCG;
 +            if (mc == -1)
 +            {
 +                if (home_pos_cg+1 > dd->cg_nalloc)
 +                {
 +                    dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
 +                    srenew(dd->index_gl,dd->cg_nalloc);
 +                    srenew(dd->cgindex,dd->cg_nalloc+1);
 +                }
 +                /* Set the global charge group index and size */
 +                dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
 +                dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
 +                /* Copy the state from the buffer */
 +                dd_check_alloc_ncg(fr,state,f,home_pos_cg+1);
 +                if (fr->cutoff_scheme == ecutsGROUP)
 +                {
 +                    cg_cm = fr->cg_cm;
 +                    copy_rvec(comm->vbuf.v[buf_pos],cg_cm[home_pos_cg]);
 +                }
 +                buf_pos++;
 +
 +                /* Set the cginfo */
 +                fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
 +                                                   dd->index_gl[home_pos_cg]);
 +                if (comm->bLocalCG)
 +                {
 +                    comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
 +                }
 +
 +                if (home_pos_at+nrcg > state->nalloc)
 +                {
 +                    dd_realloc_state(state,f,home_pos_at+nrcg);
 +                }
 +                for(i=0; i<nrcg; i++)
 +                {
 +                    copy_rvec(comm->vbuf.v[buf_pos++],
 +                              state->x[home_pos_at+i]);
 +                }
 +                if (bV)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->v[home_pos_at+i]);
 +                    }
 +                }
 +                if (bSDX)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->sd_X[home_pos_at+i]);
 +                    }
 +                }
 +                if (bCGP)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->cg_p[home_pos_at+i]);
 +                    }
 +                }
 +                home_pos_cg += 1;
 +                home_pos_at += nrcg;
 +            }
 +            else
 +            {
 +                /* Reallocate the buffers if necessary  */
 +                if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +                {
 +                    comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                    srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +                }
 +                nvr = ncg[mc] + nat[mc]*nvec;
 +                if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
 +                {
 +                    comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
 +                    srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +                }
 +                /* Copy from the receive to the send buffers */
 +                memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
 +                       comm->buf_int + cg*DD_CGIBS,
 +                       DD_CGIBS*sizeof(int));
 +                memcpy(comm->cgcm_state[mc][nvr],
 +                       comm->vbuf.v[buf_pos],
 +                       (1+nrcg*nvec)*sizeof(rvec));
 +                buf_pos += 1 + nrcg*nvec;
 +                ncg[mc] += 1;
 +                nat[mc] += nrcg;
 +            }
 +        }
 +    }
 +    
 +    /* With sorting (!bCompact) the indices are now only partially up to date
 +     * and ncg_home and nat_home are not the real count, since there are
 +     * "holes" in the arrays for the charge groups that moved to neighbors.
 +     */
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        moved = get_moved(comm,home_pos_cg);
 +
 +        for(i=dd->ncg_home; i<home_pos_cg; i++)
 +        {
 +            moved[i] = 0;
 +        }
 +    }
 +    dd->ncg_home = home_pos_cg;
 +    dd->nat_home = home_pos_at;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Finished repartitioning: cgs moved out %d, new home %d\n",
 +                *ncg_moved,dd->ncg_home-*ncg_moved);
 +                
 +    }
 +}
 +
 +void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
 +{
 +    dd->comm->cycl[ddCycl] += cycles;
 +    dd->comm->cycl_n[ddCycl]++;
 +    if (cycles > dd->comm->cycl_max[ddCycl])
 +    {
 +        dd->comm->cycl_max[ddCycl] = cycles;
 +    }
 +}
 +
 +static double force_flop_count(t_nrnb *nrnb)
 +{
 +    int i;
 +    double sum;
 +    const char *name;
 +
 +    sum = 0;
++    for(i=0; i<eNR_NBKERNEL_FREE_ENERGY; i++)
 +    {
 +        /* To get closer to the real timings, we half the count
 +         * for the normal loops and again half it for water loops.
 +         */
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        {
 +            sum += nrnb->n[i]*0.25*cost_nrnb(i);
 +        }
 +        else
 +        {
 +            sum += nrnb->n[i]*0.50*cost_nrnb(i);
 +        }
 +    }
 +    for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
 +    {
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +    for(i=eNR_BONDS; i<=eNR_WALLS; i++)
 +    {
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +
 +    return sum;
 +}
 +
 +void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop -= force_flop_count(nrnb);
 +    }
 +}
 +void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop += force_flop_count(nrnb);
 +        dd->comm->flop_n++;
 +    }
 +}  
 +
 +static void clear_dd_cycle_counts(gmx_domdec_t *dd)
 +{
 +    int i;
 +    
 +    for(i=0; i<ddCyclNr; i++)
 +    {
 +        dd->comm->cycl[i] = 0;
 +        dd->comm->cycl_n[i] = 0;
 +        dd->comm->cycl_max[i] = 0;
 +    }
 +    dd->comm->flop = 0;
 +    dd->comm->flop_n = 0;
 +}
 +
 +static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_load_t *load;
 +    gmx_domdec_root_t *root=NULL;
 +    int  d,dim,cid,i,pos;
 +    float cell_frac=0,sbuf[DD_NLOAD_MAX];
 +    gmx_bool bSepPME;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution start\n");
 +    }
 +
 +    wallcycle_start(wcycle,ewcDDCOMMLOAD);
 +    
 +    comm = dd->comm;
 +    
 +    bSepPME = (dd->pme_nodeid >= 0);
 +    
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        dim = dd->dim[d];
 +        /* Check if we participate in the communication in this dimension */
 +        if (d == dd->ndim-1 || 
 +            (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
 +        {
 +            load = &comm->load[d];
 +            if (dd->bGridJump)
 +            {
 +                cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
 +            }
 +            pos = 0;
 +            if (d == dd->ndim-1)
 +            {
 +                sbuf[pos++] = dd_force_load(comm);
 +                sbuf[pos++] = sbuf[0];
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = sbuf[0];
 +                    sbuf[pos++] = cell_frac;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
 +                    sbuf[pos++] = comm->cycl[ddCyclPME];
 +                }
 +            }
 +            else
 +            {
 +                sbuf[pos++] = comm->load[d+1].sum;
 +                sbuf[pos++] = comm->load[d+1].max;
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].sum_m;
 +                    sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
 +                    sbuf[pos++] = comm->load[d+1].flags;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].mdf;
 +                    sbuf[pos++] = comm->load[d+1].pme;
 +                }
 +            }
 +            load->nload = pos;
 +            /* Communicate a row in DD direction d.
 +             * The communicators are setup such that the root always has rank 0.
 +             */
 +#ifdef GMX_MPI
 +            MPI_Gather(sbuf      ,load->nload*sizeof(float),MPI_BYTE,
 +                       load->load,load->nload*sizeof(float),MPI_BYTE,
 +                       0,comm->mpi_comm_load[d]);
 +#endif
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* We are the root, process this row */
 +                if (comm->bDynLoadBal)
 +                {
 +                    root = comm->root[d];
 +                }
 +                load->sum = 0;
 +                load->max = 0;
 +                load->sum_m = 0;
 +                load->cvol_min = 1;
 +                load->flags = 0;
 +                load->mdf = 0;
 +                load->pme = 0;
 +                pos = 0;
 +                for(i=0; i<dd->nc[dim]; i++)
 +                {
 +                    load->sum += load->load[pos++];
 +                    load->max = max(load->max,load->load[pos]);
 +                    pos++;
 +                    if (dd->bGridJump)
 +                    {
 +                        if (root->bLimited)
 +                        {
 +                            /* This direction could not be load balanced properly,
 +                             * therefore we need to use the maximum iso the average load.
 +                             */
 +                            load->sum_m = max(load->sum_m,load->load[pos]);
 +                        }
 +                        else
 +                        {
 +                            load->sum_m += load->load[pos];
 +                        }
 +                        pos++;
 +                        load->cvol_min = min(load->cvol_min,load->load[pos]);
 +                        pos++;
 +                        if (d < dd->ndim-1)
 +                        {
 +                            load->flags = (int)(load->load[pos++] + 0.5);
 +                        }
 +                        if (d > 0)
 +                        {
 +                            root->cell_f_max0[i] = load->load[pos++];
 +                            root->cell_f_min1[i] = load->load[pos++];
 +                        }
 +                    }
 +                    if (bSepPME)
 +                    {
 +                        load->mdf = max(load->mdf,load->load[pos]);
 +                        pos++;
 +                        load->pme = max(load->pme,load->load[pos]);
 +                        pos++;
 +                    }
 +                }
 +                if (comm->bDynLoadBal && root->bLimited)
 +                {
 +                    load->sum_m *= dd->nc[dim];
 +                    load->flags |= (1<<d);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        comm->nload      += dd_load_count(comm);
 +        comm->load_step  += comm->cycl[ddCyclStep];
 +        comm->load_sum   += comm->load[0].sum;
 +        comm->load_max   += comm->load[0].max;
 +        if (comm->bDynLoadBal)
 +        {
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                if (comm->load[0].flags & (1<<d))
 +                {
 +                    comm->load_lim[d]++;
 +                }
 +            }
 +        }
 +        if (bSepPME)
 +        {
 +            comm->load_mdf += comm->load[0].mdf;
 +            comm->load_pme += comm->load[0].pme;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcDDCOMMLOAD);
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution finished\n");
 +    }
 +}
 +
 +static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
 +{
 +    /* Return the relative performance loss on the total run time
 +     * due to the force calculation load imbalance.
 +     */
 +    if (dd->comm->nload > 0)
 +    {
 +        return
 +            (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
 +            (dd->comm->load_step*dd->nnodes);
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    char  buf[STRLEN];
 +    int   npp,npme,nnodes,d,limp;
 +    float imbal,pme_f_ratio,lossf,lossp=0;
 +    gmx_bool  bLim;
 +    gmx_domdec_comm_t *comm;
 +
 +    comm = dd->comm;
 +    if (DDMASTER(dd) && comm->nload > 0)
 +    {
 +        npp    = dd->nnodes;
 +        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
 +        nnodes = npp + npme;
 +        imbal = comm->load_max*npp/comm->load_sum - 1;
 +        lossf = dd_force_imb_perf_loss(dd);
 +        sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"%s",buf);
 +        sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"%s",buf);
 +        bLim = FALSE;
 +        if (comm->bDynLoadBal)
 +        {
 +            sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
 +                sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
 +                if (limp >= 50)
 +                {
 +                    bLim = TRUE;
 +                }
 +            }
 +            sprintf(buf+strlen(buf),"\n");
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        if (npme > 0)
 +        {
 +            pme_f_ratio = comm->load_pme/comm->load_mdf;
 +            lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
 +            if (lossp <= 0)
 +            {
 +                lossp *= (float)npme/(float)nnodes;
 +            }
 +            else
 +            {
 +                lossp *= (float)npp/(float)nnodes;
 +            }
 +            sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +            sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(stderr,"\n");
 +        
 +        if (lossf >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost due to load imbalance\n"
 +                    "      in the domain decomposition.\n",lossf*100);
 +            if (!comm->bDynLoadBal)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to use dynamic load balancing (option -dlb.)\n");
 +            }
 +            else if (bLim)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
 +            }
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
 +                    "      had %s work to do than the PP nodes.\n"
 +                    "      You might want to %s the number of PME nodes\n"
 +                    "      or %s the cut-off and the grid spacing.\n",
 +                    fabs(lossp*100),
 +                    (lossp < 0) ? "less"     : "more",
 +                    (lossp < 0) ? "decrease" : "increase",
 +                    (lossp < 0) ? "decrease" : "increase");
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +    }
 +}
 +
 +static float dd_vol_min(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].cvol_min*dd->nnodes;
 +}
 +
 +static gmx_bool dd_load_flags(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].flags;
 +}
 +
 +static float dd_f_imbal(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
 +}
 +
 +float dd_pme_f_ratio(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->cycl_n[ddCyclPME] > 0)
 +    {
 +        return dd->comm->load[0].pme/dd->comm->load[0].mdf;
 +    }
 +    else
 +    {
 +        return -1.0;
 +    }
 +}
 +
 +static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
 +{
 +    int flags,d;
 +    char buf[22];
 +    
 +    flags = dd_load_flags(dd);
 +    if (flags)
 +    {
 +        fprintf(fplog,
 +                "DD  load balancing is limited by minimum cell size in dimension");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (flags & (1<<d))
 +            {
 +                fprintf(fplog," %c",dim2char(dd->dim[d]));
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    fprintf(fplog,"DD  step %s",gmx_step_str(step,buf));
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(fplog,"  vol min/aver %5.3f%c",
 +                dd_vol_min(dd),flags ? '!' : ' ');
 +    }
 +    fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(fplog,"  pme mesh/force %5.3f",dd_pme_f_ratio(dd));
 +    }
 +    fprintf(fplog,"\n\n");
 +}
 +
 +static void dd_print_load_verbose(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(stderr,"vol %4.2f%c ",
 +                dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
 +    }
 +    fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void make_load_communicator(gmx_domdec_t *dd, int dim_ind,ivec loc)
 +{
 +    MPI_Comm  c_row;
 +    int  dim, i, rank;
 +    ivec loc_c;
 +    gmx_domdec_root_t *root;
 +    gmx_bool bPartOfGroup = FALSE;
 +    
 +    dim = dd->dim[dim_ind];
 +    copy_ivec(loc,loc_c);
 +    for(i=0; i<dd->nc[dim]; i++)
 +    {
 +        loc_c[dim] = i;
 +        rank = dd_index(dd->nc,loc_c);
 +        if (rank == dd->rank)
 +        {
 +            /* This process is part of the group */
 +            bPartOfGroup = TRUE;
 +        }
 +    }
 +    MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup?0:MPI_UNDEFINED, dd->rank,
 +                   &c_row);
 +    if (bPartOfGroup)
 +    {
 +        dd->comm->mpi_comm_load[dim_ind] = c_row;
 +        if (dd->comm->eDLB != edlbNO)
 +        {
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* This is the root process of this row */
 +                snew(dd->comm->root[dim_ind],1);
 +                root = dd->comm->root[dim_ind];
 +                snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
 +                snew(root->old_cell_f,dd->nc[dim]+1);
 +                snew(root->bCellMin,dd->nc[dim]);
 +                if (dim_ind > 0)
 +                {
 +                    snew(root->cell_f_max0,dd->nc[dim]);
 +                    snew(root->cell_f_min1,dd->nc[dim]);
 +                    snew(root->bound_min,dd->nc[dim]);
 +                    snew(root->bound_max,dd->nc[dim]);
 +                }
 +                snew(root->buf_ncd,dd->nc[dim]);
 +            }
 +            else
 +            {
 +                /* This is not a root process, we only need to receive cell_f */
 +                snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
 +            }
 +        }
 +        if (dd->ci[dim] == dd->master_ci[dim])
 +        {
 +            snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
 +        }
 +    }
 +}
 +#endif
 +
 +static void make_load_communicators(gmx_domdec_t *dd)
 +{
 +#ifdef GMX_MPI
 +  int  dim0,dim1,i,j;
 +  ivec loc;
 +
 +  if (debug)
 +    fprintf(debug,"Making load communicators\n");
 +
 +  snew(dd->comm->load,dd->ndim);
 +  snew(dd->comm->mpi_comm_load,dd->ndim);
 +  
 +  clear_ivec(loc);
 +  make_load_communicator(dd,0,loc);
 +  if (dd->ndim > 1) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      make_load_communicator(dd,1,loc);
 +    }
 +  }
 +  if (dd->ndim > 2) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      dim1 = dd->dim[1];
 +      for(j=0; j<dd->nc[dim1]; j++) {
 +        loc[dim1] = j;
 +        make_load_communicator(dd,2,loc);
 +      }
 +    }
 +  }
 +
 +  if (debug)
 +    fprintf(debug,"Finished making load communicators\n");
 +#endif
 +}
 +
 +void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    gmx_bool bZYX;
 +    int  d,dim,i,j,m;
 +    ivec tmp,s;
 +    int  nzone,nzonep;
 +    ivec dd_zp[DD_MAXIZONE];
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_ns_ranges_t *izone;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
 +        dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
 +        dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
 +        if (debug)
 +        {
 +            fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
 +                    dd->rank,dim,
 +                    dd->neighbor[d][0],
 +                    dd->neighbor[d][1]);
 +        }
 +    }
 +    
 +    if (DDMASTER(dd))
 +    {
 +        fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
 +          dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
 +                dd->ndim,
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
 +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    switch (dd->ndim)
 +    {
 +    case 3:
 +        nzone  = dd_z3n;
 +        nzonep = dd_zp3n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp3[i],dd_zp[i]);
 +        }
 +        break;
 +    case 2:
 +        nzone  = dd_z2n;
 +        nzonep = dd_zp2n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp2[i],dd_zp[i]);
 +        }
 +        break;
 +    case 1:
 +        nzone  = dd_z1n;
 +        nzonep = dd_zp1n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp1[i],dd_zp[i]);
 +        }
 +        break;
 +    default:
 +        gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
 +        nzone = 0;
 +        nzonep = 0;
 +    }
 +
 +    zones = &dd->comm->zones;
 +
 +    for(i=0; i<nzone; i++)
 +    {
 +        m = 0;
 +        clear_ivec(zones->shift[i]);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
 +        }
 +    }
 +    
 +    zones->n = nzone;
 +    for(i=0; i<nzone; i++)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            s[d] = dd->ci[d] - zones->shift[i][d];
 +            if (s[d] < 0)
 +            {
 +                s[d] += dd->nc[d];
 +            }
 +            else if (s[d] >= dd->nc[d])
 +            {
 +                s[d] -= dd->nc[d];
 +            }
 +        }
 +    }
 +    zones->nizone = nzonep;
 +    for(i=0; i<zones->nizone; i++)
 +    {
 +        if (dd_zp[i][0] != i)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
 +        }
 +        izone = &zones->izone[i];
 +        izone->j0 = dd_zp[i][1];
 +        izone->j1 = dd_zp[i][2];
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] == 1)
 +            {
 +                /* All shifts should be allowed */
 +                izone->shift0[dim] = -1;
 +                izone->shift1[dim] = 1;
 +            }
 +            else
 +            {
 +                /*
 +                  izone->shift0[d] = 0;
 +                  izone->shift1[d] = 0;
 +                  for(j=izone->j0; j<izone->j1; j++) {
 +                  if (dd->shift[j][d] > dd->shift[i][d])
 +                  izone->shift0[d] = -1;
 +                  if (dd->shift[j][d] < dd->shift[i][d])
 +                  izone->shift1[d] = 1;
 +                  }
 +                */
 +                
 +                int shift_diff;
 +                
 +                /* Assume the shift are not more than 1 cell */
 +                izone->shift0[dim] = 1;
 +                izone->shift1[dim] = -1;
 +                for(j=izone->j0; j<izone->j1; j++)
 +                {
 +                    shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
 +                    if (shift_diff < izone->shift0[dim])
 +                    {
 +                        izone->shift0[dim] = shift_diff;
 +                    }
 +                    if (shift_diff > izone->shift1[dim])
 +                    {
 +                        izone->shift1[dim] = shift_diff;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->comm->eDLB != edlbNO)
 +    {
 +        snew(dd->comm->root,dd->ndim);
 +    }
 +    
 +    if (dd->comm->bRecordLoad)
 +    {
 +        make_load_communicators(dd);
 +    }
 +}
 +
 +static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank,*buf;
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP)
 +    {
 +        /* Set up cartesian communication for the particle-particle part */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
 +                    dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
 +                        &comm_cart);
 +        /* We overwrite the old communicator with the new cartesian one */
 +        cr->mpi_comm_mygroup = comm_cart;
 +    }
 +    
 +    dd->mpi_comm_all = cr->mpi_comm_mygroup;
 +    MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
 +    
 +    if (comm->bCartesianPP_PME)
 +    {
 +        /* Since we want to use the original cartesian setup for sim,
 +         * and not the one after split, we need to make an index.
 +         */
 +        snew(comm->ddindex2ddnodeid,dd->nnodes);
 +        comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
 +        gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
 +        /* Get the rank of the DD master,
 +         * above we made sure that the master node is a PP node.
 +         */
 +        if (MASTER(cr))
 +        {
 +            rank = dd->rank;
 +        }
 +        else
 +        {
 +            rank = 0;
 +        }
 +        MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (cr->npmenodes == 0)
 +        {
 +            /* The PP communicator is also
 +             * the communicator for this simulation
 +             */
 +            cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
 +        }
 +        cr->nodeid = dd->rank;
 +        
 +        MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
 +        
 +        /* We need to make an index to go from the coordinates
 +         * to the nodeid of this simulation.
 +         */
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        sfree(buf);
 +        
 +        /* Determine the master coordinates and rank.
 +         * The DD master should be the same node as the master of this sim.
 +         */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            if (comm->ddindex2simnodeid[i] == 0)
 +            {
 +                ddindex2xyz(dd->nc,i,dd->master_ci);
 +                MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"The master rank is %d\n",dd->masterrank);
 +        }
 +    }
 +    else
 +    {
 +        /* No Cartesian communicators */
 +        /* We use the rank in dd->comm->all as DD index */
 +        ddindex2xyz(dd->nc,dd->rank,dd->ci);
 +        /* The simulation master nodeid is 0, so the DD master rank is also 0 */
 +        dd->masterrank = 0;
 +        clear_ivec(dd->master_ci);
 +    }
 +#endif
 +  
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +}
 +
 +static void receive_ddindex2simnodeid(t_commrec *cr)
 +{
 +    gmx_domdec_t *dd;
 +    
 +    gmx_domdec_comm_t *comm;
 +    int  *buf;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (!comm->bCartesianPP_PME && comm->bCartesianPP)
 +    {
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +#ifdef GMX_MPI
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +        sfree(buf);
 +    }
 +#endif
 +}
 +
 +static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
 +                                                     int ncg,int natoms)
 +{
 +    gmx_domdec_master_t *ma;
 +    int i;
 +
 +    snew(ma,1);
 +    
 +    snew(ma->ncg,dd->nnodes);
 +    snew(ma->index,dd->nnodes+1);
 +    snew(ma->cg,ncg);
 +    snew(ma->nat,dd->nnodes);
 +    snew(ma->ibuf,dd->nnodes*2);
 +    snew(ma->cell_x,DIM);
 +    for(i=0; i<DIM; i++)
 +    {
 +        snew(ma->cell_x[i],dd->nc[i]+1);
 +    }
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        ma->vbuf = NULL;
 +    }
 +    else
 +    {
 +        snew(ma->vbuf,natoms);
 +    }
 +
 +    return ma;
 +}
 +
 +static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
 +                               int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank;
 +    gmx_bool bDiv[DIM];
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (comm->bCartesianPP)
 +    {
 +        for(i=1; i<DIM; i++)
 +        {
 +            bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
 +        }
 +        if (bDiv[YY] || bDiv[ZZ])
 +        {
 +            comm->bCartesianPP_PME = TRUE;
 +            /* If we have 2D PME decomposition, which is always in x+y,
 +             * we stack the PME only nodes in z.
 +             * Otherwise we choose the direction that provides the thinnest slab
 +             * of PME only nodes as this will have the least effect
 +             * on the PP communication.
 +             * But for the PME communication the opposite might be better.
 +             */
 +            if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
 +                             !bDiv[YY] ||
 +                             dd->nc[YY] > dd->nc[ZZ]))
 +            {
 +                comm->cartpmedim = ZZ;
 +            }
 +            else
 +            {
 +                comm->cartpmedim = YY;
 +            }
 +            comm->ntot[comm->cartpmedim]
 +                += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
 +        }
 +        else if (fplog)
 +        {
 +            fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
 +            fprintf(fplog,
 +                    "Will not use a Cartesian communicator for PP <-> PME\n\n");
 +        }
 +    }
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP_PME)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
 +                        &comm_cart);
 +        
 +        MPI_Comm_rank(comm_cart,&rank);
 +        if (MASTERNODE(cr) && rank != 0)
 +        {
 +            gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
 +        }
 +        
 +        /* With this assigment we loose the link to the original communicator
 +         * which will usually be MPI_COMM_WORLD, unless have multisim.
 +         */
 +        cr->mpi_comm_mysim = comm_cart;
 +        cr->sim_nodeid = rank;
 +        
 +        MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
 +        
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
 +                    cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +        
 +        if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        if (cr->npmenodes == 0 ||
 +            dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       dd_index(comm->ntot,dd->ci),
 +                       &cr->mpi_comm_mygroup);
 +    }
 +    else
 +    {
 +        switch (dd_node_order)
 +        {
 +        case ddnoPP_PME:
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Order of the nodes: PP first, PME last\n");
 +            }
 +            break;
 +        case ddnoINTERLEAVE:
 +            /* Interleave the PP-only and PME-only nodes,
 +             * as on clusters with dual-core machines this will double
 +             * the communication bandwidth of the PME processes
 +             * and thus speed up the PP <-> PME and inter PME communication.
 +             */
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Interleaving PP and PME nodes\n");
 +            }
 +            comm->pmenodes = dd_pmenodes(cr);
 +            break;
 +        case ddnoCARTESIAN:
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
 +        }
 +    
 +        if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        else
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       cr->nodeid,
 +                       &cr->mpi_comm_mygroup);
 +        MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
 +    }
 +#endif
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"This is a %s only node\n\n",
 +                (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
 +    }
 +}
 +
 +void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int CartReorder;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    copy_ivec(dd->nc,comm->ntot);
 +    
 +    comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
 +    comm->bCartesianPP_PME = FALSE;
 +    
 +    /* Reorder the nodes by default. This might change the MPI ranks.
 +     * Real reordering is only supported on very few architectures,
 +     * Blue Gene is one of them.
 +     */
 +    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
 +    
 +    if (cr->npmenodes > 0)
 +    {
 +        /* Split the communicator into a PP and PME part */
 +        split_communicator(fplog,cr,dd_node_order,CartReorder);
 +        if (comm->bCartesianPP_PME)
 +        {
 +            /* We (possibly) reordered the nodes in split_communicator,
 +             * so it is no longer required in make_pp_communicator.
 +             */
 +            CartReorder = FALSE;
 +        }
 +    }
 +    else
 +    {
 +        /* All nodes do PP and PME */
 +#ifdef GMX_MPI    
 +        /* We do not require separate communicators */
 +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#endif
 +    }
 +    
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* Copy or make a new PP communicator */
 +        make_pp_communicator(fplog,cr,CartReorder);
 +    }
 +    else
 +    {
 +        receive_ddindex2simnodeid(cr);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Set up the commnuication to our PME node */
 +        dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +        dd->pme_receive_vir_ener = receive_vir_ener(cr);
 +        if (debug)
 +        {
 +            fprintf(debug,"My pme_nodeid %d receive ener %d\n",
 +                    dd->pme_nodeid,dd->pme_receive_vir_ener);
 +        }
 +    }
 +    else
 +    {
 +        dd->pme_nodeid = -1;
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        dd->ma = init_gmx_domdec_master_t(dd,
 +                                          comm->cgs_gl.nr,
 +                                          comm->cgs_gl.index[comm->cgs_gl.nr]);
 +    }
 +}
 +
 +static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
 +{
 +    real *slb_frac,tot;
 +    int  i,n;
 +    double dbl;
 +    
 +    slb_frac = NULL;
 +    if (nc > 1 && size_string != NULL)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using static load balancing for the %s direction\n",
 +                    dir);
 +        }
 +        snew(slb_frac,nc);
 +        tot = 0;
 +        for (i=0; i<nc; i++)
 +        {
 +            dbl = 0;
 +            sscanf(size_string,"%lf%n",&dbl,&n);
 +            if (dbl == 0)
 +            {
 +                gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
 +            }
 +            slb_frac[i] = dbl;
 +            size_string += n;
 +            tot += slb_frac[i];
 +        }
 +        /* Normalize */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Relative cell sizes:");
 +        }
 +        for (i=0; i<nc; i++)
 +        {
 +            slb_frac[i] /= tot;
 +            if (fplog)
 +            {
 +                fprintf(fplog," %5.3f",slb_frac[i]);
 +            }
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\n");
 +        }
 +    }
 +    
 +    return slb_frac;
 +}
 +
 +static int multi_body_bondeds_count(gmx_mtop_t *mtop)
 +{
 +    int n,nmol,ftype;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist *il;
 +    
 +    n = 0;
 +    iloop = gmx_mtop_ilistloop_init(mtop);
 +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
 +    {
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if ((interaction_function[ftype].flags & IF_BOND) &&
 +                NRAL(ftype) >  2)
 +            {
 +                n += nmol*il[ftype].nr/(1 + NRAL(ftype));
 +            }
 +        }
 +  }
 +
 +  return n;
 +}
 +
 +static int dd_nst_env(FILE *fplog,const char *env_var,int def)
 +{
 +    char *val;
 +    int  nst;
 +    
 +    nst = def;
 +    val = getenv(env_var);
 +    if (val)
 +    {
 +        if (sscanf(val,"%d",&nst) <= 0)
 +        {
 +            nst = 1;
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
 +                    env_var,val,nst);
 +        }
 +    }
 +    
 +    return nst;
 +}
 +
 +static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
 +{
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n%s\n",warn_string);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n%s\n",warn_string);
 +    }
 +}
 +
 +static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
 +                                  t_inputrec *ir,FILE *fplog)
 +{
 +    if (ir->ePBC == epbcSCREW &&
 +        (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
 +    {
 +        gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
 +    }
 +
 +    if (ir->ns_type == ensSIMPLE)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
 +    }
 +
 +    if (ir->nstlist == 0)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
 +    {
 +        dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
 +    }
 +}
 +
 +static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int  di,d;
 +    real r;
 +
 +    r = ddbox->box_size[XX];
 +    for(di=0; di<dd->ndim; di++)
 +    {
 +        d = dd->dim[di];
 +        /* Check using the initial average cell size */
 +        r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +    }
 +
 +    return r;
 +}
 +
 +static int check_dlb_support(FILE *fplog,t_commrec *cr,
 +                             const char *dlb_opt,gmx_bool bRecordLoad,
 +                             unsigned long Flags,t_inputrec *ir)
 +{
 +    gmx_domdec_t *dd;
 +    int  eDLB=-1;
 +    char buf[STRLEN];
 +
 +    switch (dlb_opt[0])
 +    {
 +    case 'a': eDLB = edlbAUTO; break;
 +    case 'n': eDLB = edlbNO;   break;
 +    case 'y': eDLB = edlbYES;  break;
 +    default: gmx_incons("Unknown dlb_opt");
 +    }
 +
 +    if (Flags & MD_RERUN)
 +    {
 +        return edlbNO;
 +    }
 +
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        if (eDLB == edlbYES)
 +        {
 +            sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
 +            dd_warning(cr,fplog,buf);
 +        }
 +            
 +        return edlbNO;
 +    }
 +
 +    if (!bRecordLoad)
 +    {
 +        dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
 +
 +        return edlbNO;
 +    }
 +
 +    if (Flags & MD_REPRODUCIBLE)
 +    {
 +        switch (eDLB)
 +        {
 +                      case edlbNO: 
 +                              break;
 +                      case edlbAUTO:
 +                              dd_warning(cr,fplog,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
 +                              eDLB = edlbNO;
 +                              break;
 +                      case edlbYES:
 +                              dd_warning(cr,fplog,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
 +                              break;
 +                      default:
 +                              gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
 +                              break;
 +        }
 +    }
 +
 +    return eDLB;
 +}
 +
 +static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    int dim;
 +
 +    dd->ndim = 0;
 +    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
 +    {
 +        /* Decomposition order z,y,x */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using domain decomposition order z, y, x\n");
 +        }
 +        for(dim=DIM-1; dim>=0; dim--)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* Decomposition order x,y,z */
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +}
 +
 +static gmx_domdec_comm_t *init_dd_comm()
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  i;
 +
 +    snew(comm,1);
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +    for(i=0; i<DIM*2; i++)
 +    {
 +        comm->cggl_flag_nalloc[i]  = 0;
 +        comm->cgcm_state_nalloc[i] = 0;
 +    }
 +    
 +    comm->nalloc_int = 0;
 +    comm->buf_int    = NULL;
 +
 +    vec_rvec_init(&comm->vbuf);
 +
 +    comm->n_load_have    = 0;
 +    comm->n_load_collect = 0;
 +
 +    for(i=0; i<ddnatNR-ddnatZONE; i++)
 +    {
 +        comm->sum_nat[i] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload   = 0;
 +    comm->load_step = 0;
 +    comm->load_sum  = 0;
 +    comm->load_max  = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf  = 0;
 +    comm->load_pme  = 0;
 +
 +    return comm;
 +}
 +
 +gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
 +                                        unsigned long Flags,
 +                                        ivec nc,
 +                                        real comm_distance_min,real rconstr,
 +                                        const char *dlb_opt,real dlb_scale,
 +                                        const char *sizex,const char *sizey,const char *sizez,
 +                                        gmx_mtop_t *mtop,t_inputrec *ir,
 +                                        matrix box,rvec *x,
 +                                        gmx_ddbox_t *ddbox,
 +                                        int *npme_x,int *npme_y)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  recload;
 +    int  d,i,j;
 +    real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
 +    gmx_bool bC;
 +    char buf[STRLEN];
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
 +    }
 +    
 +    snew(dd,1);
 +
 +    dd->comm = init_dd_comm();
 +    comm = dd->comm;
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +
 +    dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
 +    dd->bScrewPBC = (ir->ePBC == epbcSCREW);
 +    
 +    dd->bSendRecv2      = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
 +    comm->dlb_scale_lim = dd_nst_env(fplog,"GMX_DLB_MAX",10);
 +    comm->eFlop         = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
 +    recload             = dd_nst_env(fplog,"GMX_DD_LOAD",1);
 +    comm->nstSortCG     = dd_nst_env(fplog,"GMX_DD_SORT",1);
 +    comm->nstDDDump     = dd_nst_env(fplog,"GMX_DD_DUMP",0);
 +    comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
 +    comm->DD_debug      = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
 +
 +    dd->pme_recv_f_alloc = 0;
 +    dd->pme_recv_f_buf = NULL;
 +
 +    if (dd->bSendRecv2 && fplog)
 +    {
 +        fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
 +    }
 +    if (comm->eFlop)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will load balance based on FLOP count\n");
 +        }
 +        if (comm->eFlop > 1)
 +        {
 +            srand(1+cr->nodeid);
 +        }
 +        comm->bRecordLoad = TRUE;
 +    }
 +    else
 +    {
 +        comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
 +                             
 +    }
 +    
 +    comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
 +    
 +    comm->bDynLoadBal = (comm->eDLB == edlbYES);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
 +    }
 +    dd->bGridJump = comm->bDynLoadBal;
 +    
 +    if (comm->nstSortCG)
 +    {
 +        if (fplog)
 +        {
 +            if (comm->nstSortCG == 1)
 +            {
 +                fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
 +            }
 +            else
 +            {
 +                fprintf(fplog,"Will sort the charge groups every %d steps\n",
 +                        comm->nstSortCG);
 +            }
 +        }
 +        snew(comm->sort,1);
 +    }
 +    else
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will not sort the charge groups\n");
 +        }
 +    }
 +
 +    comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
 +    
 +    comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
 +    if (comm->bInterCGBondeds)
 +    {
 +        comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
 +    }
 +    else
 +    {
 +        comm->bInterCGMultiBody = FALSE;
 +    }
 +    
 +    dd->bInterCGcons    = inter_charge_group_constraints(mtop);
 +    dd->bInterCGsettles = inter_charge_group_settles(mtop);
 +
 +    if (ir->rlistlong == 0)
 +    {
 +        /* Set the cut-off to some very large value,
 +         * so we don't need if statements everywhere in the code.
 +         * We use sqrt, since the cut-off is squared in some places.
 +         */
 +        comm->cutoff   = GMX_CUTOFF_INF;
 +    }
 +    else
 +    {
 +        comm->cutoff   = ir->rlistlong;
 +    }
 +    comm->cutoff_mbody = 0;
 +    
 +    comm->cellsize_limit = 0;
 +    comm->bBondComm = FALSE;
 +
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm_distance_min > 0)
 +        {
 +            comm->cutoff_mbody = comm_distance_min;
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
 +            }
 +            else
 +            {
 +                comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else if (ir->bPeriodicMols)
 +        {
 +            /* Can not easily determine the required cut-off */
 +            dd_warning(cr,fplog,"NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
 +            comm->cutoff_mbody = comm->cutoff/2;
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            if (MASTER(cr))
 +            {
 +                dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
 +                                      Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
 +            }
 +            gmx_bcast(sizeof(r_2b),&r_2b,cr);
 +            gmx_bcast(sizeof(r_mb),&r_mb,cr);
 +
 +            /* We use an initial margin of 10% for the minimum cell size,
 +             * except when we are just below the non-bonded cut-off.
 +             */
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                if (max(r_2b,r_mb) > comm->cutoff)
 +                {
 +                    r_bonded       = max(r_2b,r_mb);
 +                    r_bonded_limit = 1.1*r_bonded;
 +                    comm->bBondComm = TRUE;
 +                }
 +                else
 +                {
 +                    r_bonded       = r_mb;
 +                    r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
 +                }
 +                /* We determine cutoff_mbody later */
 +            }
 +            else
 +            {
 +                /* No special bonded communication,
 +                 * simply increase the DD cut-off.
 +                 */
 +                r_bonded_limit     = 1.1*max(r_2b,r_mb);
 +                comm->cutoff_mbody = r_bonded_limit;
 +                comm->cutoff       = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Minimum cell size due to bonded interactions: %.3f nm\n",
 +                    comm->cellsize_limit);
 +        }
 +    }
 +
 +    if (dd->bInterCGcons && rconstr <= 0)
 +    {
 +        /* There is a cell size limit due to the constraints (P-LINCS) */
 +        rconstr = constr_r_max(fplog,mtop,ir);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Estimated maximum distance required for P-LINCS: %.3f nm\n",
 +                    rconstr);
 +            if (rconstr > comm->cellsize_limit)
 +            {
 +                fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
 +            }
 +        }
 +    }
 +    else if (rconstr > 0 && fplog)
 +    {
 +        /* Here we do not check for dd->bInterCGcons,
 +         * because one can also set a cell size limit for virtual sites only
 +         * and at this point we don't know yet if there are intercg v-sites.
 +         */
 +        fprintf(fplog,
 +                "User supplied maximum distance required for P-LINCS: %.3f nm\n",
 +                rconstr);
 +    }
 +    comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
 +
 +    comm->cgs_gl = gmx_mtop_global_cgs(mtop);
 +
 +    if (nc[XX] > 0)
 +    {
 +        copy_ivec(nc,dd->nc);
 +        set_dd_dim(fplog,dd);
 +        set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        if (cr->npmenodes == -1)
 +        {
 +            cr->npmenodes = 0;
 +        }
 +        acs = average_cellsize_min(dd,ddbox);
 +        if (acs < comm->cellsize_limit)
 +        {
 +            if (fplog)
 +            {
 +                fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
 +            }
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
 +                                 acs,comm->cellsize_limit);
 +        }
 +    }
 +    else
 +    {
 +        set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        /* We need to choose the optimal DD grid and possibly PME nodes */
 +        limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
 +                               comm->eDLB!=edlbNO,dlb_scale,
 +                               comm->cellsize_limit,comm->cutoff,
 +                               comm->bInterCGBondeds,comm->bInterCGMultiBody);
 +        
 +        if (dd->nc[XX] == 0)
 +        {
 +            bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
 +            sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
 +                    !bC ? "-rdd" : "-rcon",
 +                    comm->eDLB!=edlbNO ? " or -dds" : "",
 +                    bC ? " or your LINCS settings" : "");
 +
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
 +                                 "%s\n"
 +                                 "Look in the log file for details on the domain decomposition",
 +                                 cr->nnodes-cr->npmenodes,limit,buf);
 +        }
 +        set_dd_dim(fplog,dd);
 +    }
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
 +    }
 +    
 +    dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
 +    if (cr->nnodes - dd->nnodes != cr->npmenodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
 +                             dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
 +    }
 +    if (cr->npmenodes > dd->nnodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
 +    }
 +    if (cr->npmenodes > 0)
 +    {
 +        comm->npmenodes = cr->npmenodes;
 +    }
 +    else
 +    {
 +        comm->npmenodes = dd->nnodes;
 +    }
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        /* The following choices should match those
 +         * in comm_cost_est in domdec_setup.c.
 +         * Note that here the checks have to take into account
 +         * that the decomposition might occur in a different order than xyz
 +         * (for instance through the env.var. GMX_DD_ORDER_ZYX),
 +         * in which case they will not match those in comm_cost_est,
 +         * but since that is mainly for testing purposes that's fine.
 +         */
 +        if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
 +            comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
 +            getenv("GMX_PMEONEDD") == NULL)
 +        {
 +            comm->npmedecompdim = 2;
 +            comm->npmenodes_x   = dd->nc[XX];
 +            comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
 +        }
 +        else
 +        {
 +            /* In case nc is 1 in both x and y we could still choose to
 +             * decompose pme in y instead of x, but we use x for simplicity.
 +             */
 +            comm->npmedecompdim = 1;
 +            if (dd->dim[0] == YY)
 +            {
 +                comm->npmenodes_x = 1;
 +                comm->npmenodes_y = comm->npmenodes;
 +            }
 +            else
 +            {
 +                comm->npmenodes_x = comm->npmenodes;
 +                comm->npmenodes_y = 1;
 +            }
 +        }    
 +        if (fplog)
 +        {
 +            fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
 +                    comm->npmenodes_x,comm->npmenodes_y,1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmedecompdim = 0;
 +        comm->npmenodes_x   = 0;
 +        comm->npmenodes_y   = 0;
 +    }
 +    
 +    /* Technically we don't need both of these,
 +     * but it simplifies code not having to recalculate it.
 +     */
 +    *npme_x = comm->npmenodes_x;
 +    *npme_y = comm->npmenodes_y;
 +        
 +    snew(comm->slb_frac,DIM);
 +    if (comm->eDLB == edlbNO)
 +    {
 +        comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
 +        comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
 +        comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
 +    }
 +
 +    if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
 +    {
 +        if (comm->bBondComm || comm->eDLB != edlbNO)
 +        {
 +            /* Set the bonded communication distance to halfway
 +             * the minimum and the maximum,
 +             * since the extra communication cost is nearly zero.
 +             */
 +            acs = average_cellsize_min(dd,ddbox);
 +            comm->cutoff_mbody = 0.5*(r_bonded + acs);
 +            if (comm->eDLB != edlbNO)
 +            {
 +                /* Check if this does not limit the scaling */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
 +            }
 +            if (!comm->bBondComm)
 +            {
 +                /* Without bBondComm do not go beyond the n.b. cut-off */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
 +                if (comm->cellsize_limit >= comm->cutoff)
 +                {
 +                    /* We don't loose a lot of efficieny
 +                     * when increasing it to the n.b. cut-off.
 +                     * It can even be slightly faster, because we need
 +                     * less checks for the communication setup.
 +                     */
 +                    comm->cutoff_mbody = comm->cutoff;
 +                }
 +            }
 +            /* Check if we did not end up below our original limit */
 +            comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
 +
 +            if (comm->cutoff_mbody > comm->cellsize_limit)
 +            {
 +                comm->cellsize_limit = comm->cutoff_mbody;
 +            }
 +        }
 +        /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
 +                "cellsize limit %f\n",
 +                comm->bBondComm,comm->cellsize_limit);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        check_dd_restrictions(cr,dd,ir,fplog);
 +    }
 +
 +    comm->partition_step = INT_MIN;
 +    dd->ddp_count = 0;
 +
 +    clear_dd_cycle_counts(dd);
 +
 +    return dd;
 +}
 +
 +static void set_dlb_limits(gmx_domdec_t *dd)
 +
 +{
 +    int d;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
 +        dd->comm->cellsize_min[dd->dim[d]] =
 +            dd->comm->cellsize_min_dlb[dd->dim[d]];
 +    }
 +}
 +
 +
 +static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    real cellsize_min;
 +    int  d,nc,i;
 +    char buf[STRLEN];
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
 +    }
 +
 +    cellsize_min = comm->cellsize_min[dd->dim[0]];
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
 +    }
 +
 +    if (cellsize_min < comm->cellsize_limit*1.05)
 +    {
 +        dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
 +
 +        /* Change DLB from "auto" to "no". */
 +        comm->eDLB = edlbNO;
 +
 +        return;
 +    }
 +
 +    dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
 +    comm->bDynLoadBal = TRUE;
 +    dd->bGridJump = TRUE;
 +    
 +    set_dlb_limits(dd);
 +
 +    /* We can set the required cell size info here,
 +     * so we do not need to communicate this.
 +     * The grid is completely uniform.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        if (comm->root[d])
 +        {
 +            comm->load[d].sum_m = comm->load[d].sum;
 +
 +            nc = dd->nc[dd->dim[d]];
 +            for(i=0; i<nc; i++)
 +            {
 +                comm->root[d]->cell_f[i]    = i/(real)nc;
 +                if (d > 0)
 +                {
 +                    comm->root[d]->cell_f_max0[i] =  i   /(real)nc;
 +                    comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
 +                }
 +            }
 +            comm->root[d]->cell_f[nc] = 1.0;
 +        }
 +    }
 +}
 +
 +static char *init_bLocalCG(gmx_mtop_t *mtop)
 +{
 +    int  ncg,cg;
 +    char *bLocalCG;
 +    
 +    ncg = ncg_mtop(mtop);
 +    snew(bLocalCG,ncg);
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        bLocalCG[cg] = FALSE;
 +    }
 +
 +    return bLocalCG;
 +}
 +
 +void dd_init_bondeds(FILE *fplog,
 +                     gmx_domdec_t *dd,gmx_mtop_t *mtop,
 +                     gmx_vsite_t *vsite,gmx_constr_t constr,
 +                     t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bBondComm;
 +    int  d;
 +
 +    dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
 +
 +    comm = dd->comm;
 +
 +    if (comm->bBondComm)
 +    {
 +        /* Communicate atoms beyond the cut-off for bonded interactions */
 +        comm = dd->comm;
 +
 +        comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
 +
 +        comm->bLocalCG = init_bLocalCG(mtop);
 +    }
 +    else
 +    {
 +        /* Only communicate atoms based on cut-off */
 +        comm->cglink   = NULL;
 +        comm->bLocalCG = NULL;
 +    }
 +}
 +
 +static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
 +                              t_inputrec *ir,
 +                              gmx_bool bDynLoadBal,real dlb_scale,
 +                              gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec np;
 +    real limit,shrink;
 +    char buf[64];
 +
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +
 +    comm = dd->comm;
 +
 +    if (bDynLoadBal)
 +    {
 +        fprintf(fplog,"The maximum number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
 +        fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
 +        fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
 +        for(d=0; d<DIM; d++)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                if (d >= ddbox->npbcdim && dd->nc[d] == 2)
 +                {
 +                    shrink = 0;
 +                }
 +                else
 +                {
 +                    shrink =
 +                        comm->cellsize_min_dlb[d]/
 +                        (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +                }
 +                fprintf(fplog," %c %.2f",dim2char(d),shrink);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
 +        fprintf(fplog,"The initial number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The initial domain decomposition cell size is:");
 +        for(d=0; d<DIM; d++) {
 +            if (dd->nc[d] > 1)
 +            {
 +                fprintf(fplog," %c %.2f nm",
 +                        dim2char(d),dd->comm->cellsize_min[d]);
 +            }
 +        }
 +        fprintf(fplog,"\n\n");
 +    }
 +    
 +    if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
 +    {
 +        fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
 +        fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                "non-bonded interactions","",comm->cutoff);
 +
 +        if (bDynLoadBal)
 +        {
 +            limit = dd->comm->cellsize_limit;
 +        }
 +        else
 +        {
 +            if (dynamic_dd_box(ddbox,ir))
 +            {
 +                fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
 +            }
 +            limit = dd->comm->cellsize_min[XX];
 +            for(d=1; d<DIM; d++)
 +            {
 +                limit = min(limit,dd->comm->cellsize_min[d]);
 +            }
 +        }
 +
 +        if (comm->bInterCGBondeds)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "two-body bonded interactions","(-rdd)",
 +                    max(comm->cutoff,comm->cutoff_mbody));
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "multi-body bonded interactions","(-rdd)",
 +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
 +        }
 +        if (dd->vsite_comm)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "virtual site constructions","(-rcon)",limit);
 +        }
 +        if (dd->constraint_comm)
 +        {
 +            sprintf(buf,"atoms separated by up to %d constraints",
 +                    1+ir->nProjOrder);
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    buf,"(-rcon)",limit);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    fflush(fplog);
 +}
 +
 +static void set_cell_limits_dlb(gmx_domdec_t *dd,
 +                                real dlb_scale,
 +                                const t_inputrec *ir,
 +                                const gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim,npulse,npulse_d_max,npulse_d;
 +    gmx_bool bNoCutOff;
 +
 +    comm = dd->comm;
 +
 +    bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
 +
 +    /* Determine the maximum number of comm. pulses in one dimension */
 +        
 +    comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        
 +    /* Determine the maximum required number of grid pulses */
 +    if (comm->cellsize_limit >= comm->cutoff)
 +    {
 +        /* Only a single pulse is required */
 +        npulse = 1;
 +    }
 +    else if (!bNoCutOff && comm->cellsize_limit > 0)
 +    {
 +        /* We round down slightly here to avoid overhead due to the latency
 +         * of extra communication calls when the cut-off
 +         * would be only slightly longer than the cell size.
 +         * Later cellsize_limit is redetermined,
 +         * so we can not miss interactions due to this rounding.
 +         */
 +        npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
 +    }
 +    else
 +    {
 +        /* There is no cell size limit */
 +        npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
 +    }
 +
 +    if (!bNoCutOff && npulse > 1)
 +    {
 +        /* See if we can do with less pulses, based on dlb_scale */
 +        npulse_d_max = 0;
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            dim = dd->dim[d];
 +            npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
 +                             /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
 +            npulse_d_max = max(npulse_d_max,npulse_d);
 +        }
 +        npulse = min(npulse,npulse_d_max);
 +    }
 +
 +    /* This env var can override npulse */
 +    d = dd_nst_env(debug,"GMX_DD_NPULSE",0);
 +    if (d > 0)
 +    {
 +        npulse = d;
 +    }
 +
 +    comm->maxpulse = 1;
 +    comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
 +        comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
 +        snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
 +        comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
 +        if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
 +        {
 +            comm->bVacDLBNoLimit = FALSE;
 +        }
 +    }
 +
 +    /* cellsize_limit is set for LINCS in init_domain_decomposition */
 +    if (!comm->bVacDLBNoLimit)
 +    {
 +        comm->cellsize_limit = max(comm->cellsize_limit,
 +                                   comm->cutoff/comm->maxpulse);
 +    }
 +    comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +    /* Set the minimum cell size for each DD dimension */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        if (comm->bVacDLBNoLimit ||
 +            comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
 +        {
 +            comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
 +        }
 +        else
 +        {
 +            comm->cellsize_min_dlb[dd->dim[d]] =
 +                comm->cutoff/comm->cd[d].np_dlb;
 +        }
 +    }
 +    if (comm->cutoff_mbody <= 0)
 +    {
 +        comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
 +    }
 +    if (comm->bDynLoadBal)
 +    {
 +        set_dlb_limits(dd);
 +    }
 +}
 +
 +gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd,int ePBC)
 +{
 +    /* If each molecule is a single charge group
 +     * or we use domain decomposition for each periodic dimension,
 +     * we do not need to take pbc into account for the bonded interactions.
 +     */
 +    return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
 +            !(dd->nc[XX]>1 &&
 +              dd->nc[YY]>1 &&
 +              (dd->nc[ZZ]>1 || ePBC==epbcXY)));
 +}
 +
 +void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
 +                       t_inputrec *ir,t_forcerec *fr,
 +                       gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  natoms_tot;
 +    real vol_frac;
 +
 +    comm = dd->comm;
 +
 +    /* Initialize the thread data.
 +     * This can not be done in init_domain_decomposition,
 +     * as the numbers of threads is determined later.
 +     */
 +    comm->nth = gmx_omp_nthreads_get(emntDomdec);
 +    if (comm->nth > 1)
 +    {
 +        snew(comm->dth,comm->nth);
 +    }
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        init_ddpme(dd,&comm->ddpme[0],0);
 +        if (comm->npmedecompdim >= 2)
 +        {
 +            init_ddpme(dd,&comm->ddpme[1],1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmenodes = 0;
 +        if (dd->pme_nodeid >= 0)
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "Can not have separate PME nodes without PME electrostatics");
 +        }
 +    }
 +        
 +    if (debug)
 +    {
 +        fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
 +    }
 +    if (comm->eDLB != edlbNO)
 +    {
 +        set_cell_limits_dlb(dd,dlb_scale,ir,ddbox);
 +    }
 +    
 +    print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
 +    if (comm->eDLB == edlbAUTO)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
 +        }
 +        print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
 +    }
 +
 +    if (ir->ePBC == epbcNONE)
 +    {
 +        vol_frac = 1 - 1/(double)dd->nnodes;
 +    }
 +    else
 +    {
 +        vol_frac =
 +            (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
 +    }
 +    natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
 +   
 +    dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
 +}
 +
 +gmx_bool change_dd_cutoff(t_commrec *cr,t_state *state,t_inputrec *ir,
 +                          real cutoff_req)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_ddbox_t ddbox;
 +    int d,dim,np;
 +    real inv_cell_size;
 +    int LocallyLimited;
 +
 +    dd = cr->dd;
 +
 +    set_ddbox(dd,FALSE,cr,ir,state->box,
 +              TRUE,&dd->comm->cgs_gl,state->x,&ddbox);
 +
 +    LocallyLimited = 0;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +
 +        inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
 +        if (dynamic_dd_box(&ddbox,ir))
 +        {
 +            inv_cell_size *= DD_PRES_SCALE_MARGIN;
 +        }
 +
 +        np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
 +
 +        if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
 +            dd->comm->cd[d].np_dlb > 0)
 +        {
 +            if (np > dd->comm->cd[d].np_dlb)
 +            {
 +                return FALSE;
 +            }
 +
 +            /* If a current local cell size is smaller than the requested
 +             * cut-off, we could still fix it, but this gets very complicated.
 +             * Without fixing here, we might actually need more checks.
 +             */
 +            if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
 +            {
 +                LocallyLimited = 1;
 +            }
 +        }
 +    }
 +
 +    if (dd->comm->eDLB != edlbNO)
 +    {
 +        /* If DLB is not active yet, we don't need to check the grid jumps.
 +         * Actually we shouldn't, because then the grid jump data is not set.
 +         */
 +        if (dd->comm->bDynLoadBal &&
 +            check_grid_jump(0,dd,cutoff_req,&ddbox,FALSE))
 +        {
 +            LocallyLimited = 1; 
 +        }
 +
 +        gmx_sumi(1,&LocallyLimited,cr);
 +
 +        if (LocallyLimited > 0)
 +        {
 +            return FALSE;
 +        }
 +    }
 +
 +    dd->comm->cutoff = cutoff_req;
 +
 +    return TRUE;
 +}
 +
 +static void merge_cg_buffers(int ncell,
 +                             gmx_domdec_comm_dim_t *cd, int pulse,
 +                             int  *ncg_cell,
 +                             int  *index_gl, int  *recv_i,
 +                             rvec *cg_cm,    rvec *recv_vr,
 +                             int *cgindex,
 +                             cginfo_mb_t *cginfo_mb,int *cginfo)
 +{
 +    gmx_domdec_ind_t *ind,*ind_p;
 +    int p,cell,c,cg,cg0,cg1,cg_gl,nat;
 +    int shift,shift_at;
 +    
 +    ind = &cd->ind[pulse];
 +    
 +    /* First correct the already stored data */
 +    shift = ind->nrecv[ncell];
 +    for(cell=ncell-1; cell>=0; cell--)
 +    {
 +        shift -= ind->nrecv[cell];
 +        if (shift > 0)
 +        {
 +            /* Move the cg's present from previous grid pulses */
 +            cg0 = ncg_cell[ncell+cell];
 +            cg1 = ncg_cell[ncell+cell+1];
 +            cgindex[cg1+shift] = cgindex[cg1];
 +            for(cg=cg1-1; cg>=cg0; cg--)
 +            {
 +                index_gl[cg+shift] = index_gl[cg];
 +                copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
 +                cgindex[cg+shift] = cgindex[cg];
 +                cginfo[cg+shift] = cginfo[cg];
 +            }
 +            /* Correct the already stored send indices for the shift */
 +            for(p=1; p<=pulse; p++)
 +            {
 +                ind_p = &cd->ind[p];
 +                cg0 = 0;
 +                for(c=0; c<cell; c++)
 +                {
 +                    cg0 += ind_p->nsend[c];
 +                }
 +                cg1 = cg0 + ind_p->nsend[cell];
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    ind_p->index[cg] += shift;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Merge in the communicated buffers */
 +    shift = 0;
 +    shift_at = 0;
 +    cg0 = 0;
 +    for(cell=0; cell<ncell; cell++)
 +    {
 +        cg1 = ncg_cell[ncell+cell+1] + shift;
 +        if (shift_at > 0)
 +        {
 +            /* Correct the old cg indices */
 +            for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
 +            {
 +                cgindex[cg+1] += shift_at;
 +            }
 +        }
 +        for(cg=0; cg<ind->nrecv[cell]; cg++)
 +        {
 +            /* Copy this charge group from the buffer */
 +            index_gl[cg1] = recv_i[cg0];
 +            copy_rvec(recv_vr[cg0],cg_cm[cg1]);
 +            /* Add it to the cgindex */
 +            cg_gl = index_gl[cg1];
 +            cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
 +            nat = GET_CGINFO_NATOMS(cginfo[cg1]);
 +            cgindex[cg1+1] = cgindex[cg1] + nat;
 +            cg0++;
 +            cg1++;
 +            shift_at += nat;
 +        }
 +        shift += ind->nrecv[cell];
 +        ncg_cell[ncell+cell+1] = cg1;
 +    }
 +}
 +
 +static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
 +                               int nzone,int cg0,const int *cgindex)
 +{
 +    int cg,zone,p;
 +    
 +    /* Store the atom block boundaries for easy copying of communication buffers
 +     */
 +    cg = cg0;
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        for(p=0; p<cd->np; p++) {
 +            cd->ind[p].cell2at0[zone] = cgindex[cg];
 +            cg += cd->ind[p].nrecv[zone];
 +            cd->ind[p].cell2at1[zone] = cgindex[cg];
 +        }
 +    }
 +}
 +
 +static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
 +{
 +    int  i;
 +    gmx_bool bMiss;
 +
 +    bMiss = FALSE;
 +    for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
 +    {
 +        if (!bLocalCG[link->a[i]])
 +        {
 +            bMiss = TRUE;
 +        }
 +    }
 +
 +    return bMiss;
 +}
 +
 +/* Domain corners for communication, a maximum of 4 i-zones see a j domain */
 +typedef struct {
 +    real c[DIM][4]; /* the corners for the non-bonded communication */
 +    real cr0;       /* corner for rounding */
 +    real cr1[4];    /* corners for rounding */
 +    real bc[DIM];   /* corners for bounded communication */
 +    real bcr1;      /* corner for rounding for bonded communication */
 +} dd_corners_t;
 +
 +/* Determine the corners of the domain(s) we are communicating with */
 +static void
 +set_dd_corners(const gmx_domdec_t *dd,
 +               int dim0, int dim1, int dim2,
 +               gmx_bool bDistMB,
 +               dd_corners_t *c)
 +{
 +    const gmx_domdec_comm_t *comm;
 +    const gmx_domdec_zones_t *zones;
 +    int i,j;
 +
 +    comm = dd->comm;
 +
 +    zones = &comm->zones;
 +
 +    /* Keep the compiler happy */
 +    c->cr0  = 0;
 +    c->bcr1 = 0;
 +
 +    /* The first dimension is equal for all cells */
 +    c->c[0][0] = comm->cell_x0[dim0];
 +    if (bDistMB)
 +    {
 +        c->bc[0] = c->c[0][0];
 +    }
 +    if (dd->ndim >= 2)
 +    {
 +        dim1 = dd->dim[1];
 +        /* This cell row is only seen from the first row */
 +        c->c[1][0] = comm->cell_x0[dim1];
 +        /* All rows can see this row */
 +        c->c[1][1] = comm->cell_x0[dim1];
 +        if (dd->bGridJump)
 +        {
 +            c->c[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
 +            if (bDistMB)
 +            {
 +                /* For the multi-body distance we need the maximum */
 +                c->bc[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
 +            }
 +        }
 +        /* Set the upper-right corner for rounding */
 +        c->cr0 = comm->cell_x1[dim0];
 +        
 +        if (dd->ndim >= 3)
 +        {
 +            dim2 = dd->dim[2];
 +            for(j=0; j<4; j++)
 +            {
 +                c->c[2][j] = comm->cell_x0[dim2];
 +            }
 +            if (dd->bGridJump)
 +            {
 +                /* Use the maximum of the i-cells that see a j-cell */
 +                for(i=0; i<zones->nizone; i++)
 +                {
 +                    for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
 +                    {
 +                        if (j >= 4)
 +                        {
 +                            c->c[2][j-4] =
 +                                max(c->c[2][j-4],
 +                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
 +                        }
 +                    }
 +                }
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    c->bc[2] = comm->cell_x0[dim2];
 +                    for(i=0; i<2; i++)
 +                    {
 +                        for(j=0; j<2; j++)
 +                        {
 +                            c->bc[2] = max(c->bc[2],comm->zone_d2[i][j].p1_0);
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            /* Set the upper-right corner for rounding */
 +            /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
 +             * Only cell (0,0,0) can see cell 7 (1,1,1)
 +             */
 +            c->cr1[0] = comm->cell_x1[dim1];
 +            c->cr1[3] = comm->cell_x1[dim1];
 +            if (dd->bGridJump)
 +            {
 +                c->cr1[0] = max(comm->cell_x1[dim1],comm->zone_d1[1].mch1);
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    c->bcr1 = max(comm->cell_x1[dim1],comm->zone_d1[1].p1_1);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* Determine which cg's we need to send in this pulse from this zone */
 +static void
 +get_zone_pulse_cgs(gmx_domdec_t *dd,
 +                   int zonei, int zone,
 +                   int cg0, int cg1,
 +                   const int *index_gl,
 +                   const int *cgindex,
 +                   int dim, int dim_ind,
 +                   int dim0, int dim1, int dim2,
 +                   real r_comm2, real r_bcomm2,
 +                   matrix box,
 +                   ivec tric_dist,
 +                   rvec *normal,
 +                   real skew_fac2_d, real skew_fac_01,
 +                   rvec *v_d, rvec *v_0, rvec *v_1,
 +                   const dd_corners_t *c,
 +                   rvec sf2_round,
 +                   gmx_bool bDistBonded,
 +                   gmx_bool bBondComm,
 +                   gmx_bool bDist2B,
 +                   gmx_bool bDistMB,
 +                   rvec *cg_cm,
 +                   int *cginfo,
 +                   gmx_domdec_ind_t *ind,
 +                   int **ibuf, int *ibuf_nalloc,
 +                   vec_rvec_t *vbuf,
 +                   int *nsend_ptr,
 +                   int *nat_ptr,
 +                   int *nsend_z_ptr)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bScrew;
 +    gmx_bool bDistMB_pulse;
 +    int  cg,i;
 +    real r2,rb2,r,tric_sh;
 +    rvec rn,rb;
 +    int  dimd;
 +    int  nsend_z,nsend,nat;
 +
 +    comm = dd->comm;
 +
 +    bScrew = (dd->bScrewPBC && dim == XX);
 +
 +    bDistMB_pulse = (bDistMB && bDistBonded);
 +
 +    nsend_z = 0;
 +    nsend   = *nsend_ptr;
 +    nat     = *nat_ptr;
 +
 +    for(cg=cg0; cg<cg1; cg++)
 +    {
 +        r2  = 0;
 +        rb2 = 0;
 +        if (tric_dist[dim_ind] == 0)
 +        {
 +            /* Rectangular direction, easy */
 +            r = cg_cm[cg][dim] - c->c[dim_ind][zone];
 +            if (r > 0)
 +            {
 +                r2 += r*r;
 +            }
 +            if (bDistMB_pulse)
 +            {
 +                r = cg_cm[cg][dim] - c->bc[dim_ind];
 +                if (r > 0)
 +                {
 +                    rb2 += r*r;
 +                }
 +            }
 +            /* Rounding gives at most a 16% reduction
 +             * in communicated atoms
 +             */
 +            if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +            {
 +                r = cg_cm[cg][dim0] - c->cr0;
 +                /* This is the first dimension, so always r >= 0 */
 +                r2 += r*r;
 +                if (bDistMB_pulse)
 +                {
 +                    rb2 += r*r;
 +                }
 +            }
 +            if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +            {
 +                r = cg_cm[cg][dim1] - c->cr1[zone];
 +                if (r > 0)
 +                {
 +                    r2 += r*r;
 +                }
 +                if (bDistMB_pulse)
 +                {
 +                    r = cg_cm[cg][dim1] - c->bcr1;
 +                    if (r > 0)
 +                    {
 +                        rb2 += r*r;
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Triclinic direction, more complicated */
 +            clear_rvec(rn);
 +            clear_rvec(rb);
 +            /* Rounding, conservative as the skew_fac multiplication
 +             * will slightly underestimate the distance.
 +             */
 +            if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +            {
 +                rn[dim0] = cg_cm[cg][dim0] - c->cr0;
 +                for(i=dim0+1; i<DIM; i++)
 +                {
 +                    rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
 +                }
 +                r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
 +                if (bDistMB_pulse)
 +                {
 +                    rb[dim0] = rn[dim0];
 +                    rb2 = r2;
 +                }
 +                /* Take care that the cell planes along dim0 might not
 +                 * be orthogonal to those along dim1 and dim2.
 +                 */
 +                for(i=1; i<=dim_ind; i++)
 +                {
 +                    dimd = dd->dim[i];
 +                    if (normal[dim0][dimd] > 0)
 +                    {
 +                        rn[dimd] -= rn[dim0]*normal[dim0][dimd];
 +                        if (bDistMB_pulse)
 +                        {
 +                            rb[dimd] -= rb[dim0]*normal[dim0][dimd];
 +                        }
 +                    }
 +                }
 +            }
 +            if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +            {
 +                rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
 +                tric_sh = 0;
 +                for(i=dim1+1; i<DIM; i++)
 +                {
 +                    tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
 +                }
 +                rn[dim1] += tric_sh;
 +                if (rn[dim1] > 0)
 +                {
 +                    r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
 +                    /* Take care of coupling of the distances
 +                     * to the planes along dim0 and dim1 through dim2.
 +                     */
 +                    r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
 +                    /* Take care that the cell planes along dim1
 +                     * might not be orthogonal to that along dim2.
 +                     */
 +                    if (normal[dim1][dim2] > 0)
 +                    {
 +                        rn[dim2] -= rn[dim1]*normal[dim1][dim2];
 +                    }
 +                }
 +                if (bDistMB_pulse)
 +                {
 +                    rb[dim1] +=
 +                        cg_cm[cg][dim1] - c->bcr1 + tric_sh;
 +                    if (rb[dim1] > 0)
 +                    {
 +                        rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
 +                        /* Take care of coupling of the distances
 +                         * to the planes along dim0 and dim1 through dim2.
 +                         */
 +                        rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
 +                        /* Take care that the cell planes along dim1
 +                         * might not be orthogonal to that along dim2.
 +                         */
 +                        if (normal[dim1][dim2] > 0)
 +                        {
 +                            rb[dim2] -= rb[dim1]*normal[dim1][dim2];
 +                        }
 +                    }
 +                }
 +            }
 +            /* The distance along the communication direction */
 +            rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
 +            tric_sh = 0;
 +            for(i=dim+1; i<DIM; i++)
 +            {
 +                tric_sh -= cg_cm[cg][i]*v_d[i][dim];
 +            }
 +            rn[dim] += tric_sh;
 +            if (rn[dim] > 0)
 +            {
 +                r2 += rn[dim]*rn[dim]*skew_fac2_d;
 +                /* Take care of coupling of the distances
 +                 * to the planes along dim0 and dim1 through dim2.
 +                 */
 +                if (dim_ind == 1 && zonei == 1)
 +                {
 +                    r2 -= rn[dim0]*rn[dim]*skew_fac_01;
 +                }
 +            }
 +            if (bDistMB_pulse)
 +            {
 +                clear_rvec(rb);
 +                rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
 +                if (rb[dim] > 0)
 +                {
 +                    rb2 += rb[dim]*rb[dim]*skew_fac2_d;
 +                    /* Take care of coupling of the distances
 +                     * to the planes along dim0 and dim1 through dim2.
 +                     */
 +                    if (dim_ind == 1 && zonei == 1)
 +                    {
 +                        rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
 +                    }
 +                }
 +            }
 +        }
 +        
 +        if (r2 < r_comm2 ||
 +            (bDistBonded &&
 +             ((bDistMB && rb2 < r_bcomm2) ||
 +              (bDist2B && r2  < r_bcomm2)) &&
 +             (!bBondComm ||
 +              (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
 +               missing_link(comm->cglink,index_gl[cg],
 +                            comm->bLocalCG)))))
 +        {
 +            /* Make an index to the local charge groups */
 +            if (nsend+1 > ind->nalloc)
 +            {
 +                ind->nalloc = over_alloc_large(nsend+1);
 +                srenew(ind->index,ind->nalloc);
 +            }
 +            if (nsend+1 > *ibuf_nalloc)
 +            {
 +                *ibuf_nalloc = over_alloc_large(nsend+1);
 +                srenew(*ibuf,*ibuf_nalloc);
 +            }
 +            ind->index[nsend] = cg;
 +            (*ibuf)[nsend] = index_gl[cg];
 +            nsend_z++;
 +            vec_rvec_check_alloc(vbuf,nsend+1);
 +            
 +            if (dd->ci[dim] == 0)
 +            {
 +                /* Correct cg_cm for pbc */
 +                rvec_add(cg_cm[cg],box[dim],vbuf->v[nsend]);
 +                if (bScrew)
 +                {
 +                    vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
 +                    vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
 +                }
 +            }
 +            else
 +            {
 +                copy_rvec(cg_cm[cg],vbuf->v[nsend]);
 +            }
 +            nsend++;
 +            nat += cgindex[cg+1] - cgindex[cg];
 +        }
 +    }
 +
 +    *nsend_ptr   = nsend;
 +    *nat_ptr     = nat;
 +    *nsend_z_ptr = nsend_z;
 +}
 +
 +static void setup_dd_communication(gmx_domdec_t *dd,
 +                                   matrix box,gmx_ddbox_t *ddbox,
 +                                   t_forcerec *fr,t_state *state,rvec **f)
 +{
 +    int dim_ind,dim,dim0,dim1,dim2,dimd,p,nat_tot;
 +    int nzone,nzone_send,zone,zonei,cg0,cg1;
 +    int c,i,j,cg,cg_gl,nrcg;
 +    int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool bBondComm,bDist2B,bDistMB,bDistBonded;
 +    real r_mb,r_comm2,r_scomm2,r_bcomm2,r_0,r_1,r2inc,inv_ncg;
 +    dd_corners_t corners;
 +    ivec tric_dist;
 +    rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
 +    real skew_fac2_d,skew_fac_01;
 +    rvec sf2_round;
 +    int  nsend,nat;
 +    int  th;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Setting up DD communication\n");
 +    }
 +    
 +    comm  = dd->comm;
 +
 +    switch (fr->cutoff_scheme)
 +    {
 +    case ecutsGROUP:
 +        cg_cm = fr->cg_cm;
 +        break;
 +    case ecutsVERLET:
 +        cg_cm = state->x;
 +        break;
 +    default:
 +        gmx_incons("unimplemented");
 +        cg_cm = NULL;
 +    }
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +
 +        /* Check if we need to use triclinic distances */
 +        tric_dist[dim_ind] = 0;
 +        for(i=0; i<=dim_ind; i++)
 +        {
 +            if (ddbox->tric_dir[dd->dim[i]])
 +            {
 +                tric_dist[dim_ind] = 1;
 +            }
 +        }
 +    }
 +
 +    bBondComm = comm->bBondComm;
 +
 +    /* Do we need to determine extra distances for multi-body bondeds? */
 +    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
 +    
 +    /* Do we need to determine extra distances for only two-body bondeds? */
 +    bDist2B = (bBondComm && !bDistMB);
 +
 +    r_comm2  = sqr(comm->cutoff);
 +    r_bcomm2 = sqr(comm->cutoff_mbody);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
 +    }
 +
 +    zones = &comm->zones;
 +    
 +    dim0 = dd->dim[0];
 +    dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
 +    dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
 +
 +    set_dd_corners(dd,dim0,dim1,dim2,bDistMB,&corners);
 +    
 +    /* Triclinic stuff */
 +    normal = ddbox->normal;
 +    skew_fac_01 = 0;
 +    if (dd->ndim >= 2)
 +    {
 +        v_0 = ddbox->v[dim0];
 +        if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
 +        {
 +            /* Determine the coupling coefficient for the distances
 +             * to the cell planes along dim0 and dim1 through dim2.
 +             * This is required for correct rounding.
 +             */
 +            skew_fac_01 =
 +                ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
 +            if (debug)
 +            {
 +                fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
 +            }
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        v_1 = ddbox->v[dim1];
 +    }
 +    
 +    zone_cg_range = zones->cg_range;
 +    index_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    cginfo_mb = fr->cginfo_mb;
 +    
 +    zone_cg_range[0]   = 0;
 +    zone_cg_range[1]   = dd->ncg_home;
 +    comm->zone_ncg1[0] = dd->ncg_home;
 +    pos_cg             = dd->ncg_home;
 +    
 +    nat_tot = dd->nat_home;
 +    nzone = 1;
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        cd = &comm->cd[dim_ind];
 +        
 +        if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
 +        {
 +            /* No pbc in this dimension, the first node should not comm. */
 +            nzone_send = 0;
 +        }
 +        else
 +        {
 +            nzone_send = nzone;
 +        }
 +
 +        v_d = ddbox->v[dim];
 +        skew_fac2_d = sqr(ddbox->skew_fac[dim]);
 +
 +        cd->bInPlace = TRUE;
 +        for(p=0; p<cd->np; p++)
 +        {
 +            /* Only atoms communicated in the first pulse are used
 +             * for multi-body bonded interactions or for bBondComm.
 +             */
 +            bDistBonded = ((bDistMB || bDist2B) && p == 0);
 +
 +            ind = &cd->ind[p];
 +            nsend = 0;
 +            nat = 0;
 +            for(zone=0; zone<nzone_send; zone++)
 +            {
 +                if (tric_dist[dim_ind] && dim_ind > 0)
 +                {
 +                    /* Determine slightly more optimized skew_fac's
 +                     * for rounding.
 +                     * This reduces the number of communicated atoms
 +                     * by about 10% for 3D DD of rhombic dodecahedra.
 +                     */
 +                    for(dimd=0; dimd<dim; dimd++)
 +                    {
 +                        sf2_round[dimd] = 1;
 +                        if (ddbox->tric_dir[dimd])
 +                        {
 +                            for(i=dd->dim[dimd]+1; i<DIM; i++)
 +                            {
 +                                /* If we are shifted in dimension i
 +                                 * and the cell plane is tilted forward
 +                                 * in dimension i, skip this coupling.
 +                                 */
 +                                if (!(zones->shift[nzone+zone][i] &&
 +                                      ddbox->v[dimd][i][dimd] >= 0))
 +                                {
 +                                    sf2_round[dimd] +=
 +                                        sqr(ddbox->v[dimd][i][dimd]);
 +                                }
 +                            }
 +                            sf2_round[dimd] = 1/sf2_round[dimd];
 +                        }
 +                    }
 +                }
 +
 +                zonei = zone_perm[dim_ind][zone];
 +                if (p == 0)
 +                {
 +                    /* Here we permutate the zones to obtain a convenient order
 +                     * for neighbor searching
 +                     */
 +                    cg0 = zone_cg_range[zonei];
 +                    cg1 = zone_cg_range[zonei+1];
 +                }
 +                else
 +                {
 +                    /* Look only at the cg's received in the previous grid pulse
 +                     */
 +                    cg1 = zone_cg_range[nzone+zone+1];
 +                    cg0 = cg1 - cd->ind[p-1].nrecv[zone];
 +                }
 +
 +#pragma omp parallel for num_threads(comm->nth) schedule(static)
 +                for(th=0; th<comm->nth; th++)
 +                {
 +                    gmx_domdec_ind_t *ind_p;
 +                    int **ibuf_p,*ibuf_nalloc_p;
 +                    vec_rvec_t *vbuf_p;
 +                    int *nsend_p,*nat_p;
 +                    int *nsend_zone_p;
 +                    int cg0_th,cg1_th;
 +
 +                    if (th == 0)
 +                    {
 +                        /* Thread 0 writes in the comm buffers */
 +                        ind_p         = ind;
 +                        ibuf_p        = &comm->buf_int;
 +                        ibuf_nalloc_p = &comm->nalloc_int;
 +                        vbuf_p        = &comm->vbuf;
 +                        nsend_p       = &nsend;
 +                        nat_p         = &nat;
 +                        nsend_zone_p  = &ind->nsend[zone];
 +                    }
 +                    else
 +                    {
 +                        /* Other threads write into temp buffers */
 +                        ind_p         = &comm->dth[th].ind;
 +                        ibuf_p        = &comm->dth[th].ibuf;
 +                        ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
 +                        vbuf_p        = &comm->dth[th].vbuf;
 +                        nsend_p       = &comm->dth[th].nsend;
 +                        nat_p         = &comm->dth[th].nat;
 +                        nsend_zone_p  = &comm->dth[th].nsend_zone;
 +
 +                        comm->dth[th].nsend      = 0;
 +                        comm->dth[th].nat        = 0;
 +                        comm->dth[th].nsend_zone = 0;
 +                    }
 +
 +                    if (comm->nth == 1)
 +                    {
 +                        cg0_th = cg0;
 +                        cg1_th = cg1;
 +                    }
 +                    else
 +                    {
 +                        cg0_th = cg0 + ((cg1 - cg0)* th   )/comm->nth;
 +                        cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
 +                    }
 +                    
 +                    /* Get the cg's for this pulse in this zone */
 +                    get_zone_pulse_cgs(dd,zonei,zone,cg0_th,cg1_th,
 +                                       index_gl,cgindex,
 +                                       dim,dim_ind,dim0,dim1,dim2,
 +                                       r_comm2,r_bcomm2,
 +                                       box,tric_dist,
 +                                       normal,skew_fac2_d,skew_fac_01,
 +                                       v_d,v_0,v_1,&corners,sf2_round,
 +                                       bDistBonded,bBondComm,
 +                                       bDist2B,bDistMB,
 +                                       cg_cm,fr->cginfo,
 +                                       ind_p,
 +                                       ibuf_p,ibuf_nalloc_p,
 +                                       vbuf_p,
 +                                       nsend_p,nat_p,
 +                                       nsend_zone_p);
 +                }
 +
 +                /* Append data of threads>=1 to the communication buffers */
 +                for(th=1; th<comm->nth; th++)
 +                {
 +                    dd_comm_setup_work_t *dth;
 +                    int i,ns1;
 +
 +                    dth = &comm->dth[th];
 +
 +                    ns1 = nsend + dth->nsend_zone;
 +                    if (ns1 > ind->nalloc)
 +                    {
 +                        ind->nalloc = over_alloc_dd(ns1);
 +                        srenew(ind->index,ind->nalloc);
 +                    }
 +                    if (ns1 > comm->nalloc_int)
 +                    {
 +                        comm->nalloc_int = over_alloc_dd(ns1);
 +                        srenew(comm->buf_int,comm->nalloc_int);
 +                    }
 +                    if (ns1 > comm->vbuf.nalloc)
 +                    {
 +                        comm->vbuf.nalloc = over_alloc_dd(ns1);
 +                        srenew(comm->vbuf.v,comm->vbuf.nalloc);
 +                    }
 +
 +                    for(i=0; i<dth->nsend_zone; i++)
 +                    {
 +                        ind->index[nsend] = dth->ind.index[i];
 +                        comm->buf_int[nsend] = dth->ibuf[i];
 +                        copy_rvec(dth->vbuf.v[i],
 +                                  comm->vbuf.v[nsend]);
 +                        nsend++;
 +                    }
 +                    nat              += dth->nat;
 +                    ind->nsend[zone] += dth->nsend_zone;
 +                }
 +            }
 +            /* Clear the counts in case we do not have pbc */
 +            for(zone=nzone_send; zone<nzone; zone++)
 +            {
 +                ind->nsend[zone] = 0;
 +            }
 +            ind->nsend[nzone]   = nsend;
 +            ind->nsend[nzone+1] = nat;
 +            /* Communicate the number of cg's and atoms to receive */
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            ind->nsend, nzone+2,
 +                            ind->nrecv, nzone+2);
 +            
 +            /* The rvec buffer is also required for atom buffers of size nsend
 +             * in dd_move_x and dd_move_f.
 +             */
 +            vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
 +
 +            if (p > 0)
 +            {
 +                /* We can receive in place if only the last zone is not empty */
 +                for(zone=0; zone<nzone-1; zone++)
 +                {
 +                    if (ind->nrecv[zone] > 0)
 +                    {
 +                        cd->bInPlace = FALSE;
 +                    }
 +                }
 +                if (!cd->bInPlace)
 +                {
 +                    /* The int buffer is only required here for the cg indices */
 +                    if (ind->nrecv[nzone] > comm->nalloc_int2)
 +                    {
 +                        comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
 +                        srenew(comm->buf_int2,comm->nalloc_int2);
 +                    }
 +                    /* The rvec buffer is also required for atom buffers
 +                     * of size nrecv in dd_move_x and dd_move_f.
 +                     */
 +                    i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
 +                    vec_rvec_check_alloc(&comm->vbuf2,i);
 +                }
 +            }
 +            
 +            /* Make space for the global cg indices */
 +            if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
 +                || dd->cg_nalloc == 0)
 +            {
 +                dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
 +                srenew(index_gl,dd->cg_nalloc);
 +                srenew(cgindex,dd->cg_nalloc+1);
 +            }
 +            /* Communicate the global cg indices */
 +            if (cd->bInPlace)
 +            {
 +                recv_i = index_gl + pos_cg;
 +            }
 +            else
 +            {
 +                recv_i = comm->buf_int2;
 +            }
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            comm->buf_int, nsend,
 +                            recv_i,        ind->nrecv[nzone]);
 +
 +            /* Make space for cg_cm */
 +            dd_check_alloc_ncg(fr,state,f,pos_cg + ind->nrecv[nzone]);
 +            if (fr->cutoff_scheme == ecutsGROUP)
 +            {
 +                cg_cm = fr->cg_cm;
 +            }
 +            else
 +            {
 +                cg_cm = state->x;
 +            }
 +            /* Communicate cg_cm */
 +            if (cd->bInPlace)
 +            {
 +                recv_vr = cg_cm + pos_cg;
 +            }
 +            else
 +            {
 +                recv_vr = comm->vbuf2.v;
 +            }
 +            dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
 +                             comm->vbuf.v, nsend,
 +                             recv_vr,      ind->nrecv[nzone]);
 +            
 +            /* Make the charge group index */
 +            if (cd->bInPlace)
 +            {
 +                zone = (p == 0 ? 0 : nzone - 1);
 +                while (zone < nzone)
 +                {
 +                    for(cg=0; cg<ind->nrecv[zone]; cg++)
 +                    {
 +                        cg_gl = index_gl[pos_cg];
 +                        fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
 +                        nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
 +                        cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
 +                        if (bBondComm)
 +                        {
 +                            /* Update the charge group presence,
 +                             * so we can use it in the next pass of the loop.
 +                             */
 +                            comm->bLocalCG[cg_gl] = TRUE;
 +                        }
 +                        pos_cg++;
 +                    }
 +                    if (p == 0)
 +                    {
 +                        comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
 +                    }
 +                    zone++;
 +                    zone_cg_range[nzone+zone] = pos_cg;
 +                }
 +            }
 +            else
 +            {
 +                /* This part of the code is never executed with bBondComm. */
 +                merge_cg_buffers(nzone,cd,p,zone_cg_range,
 +                                 index_gl,recv_i,cg_cm,recv_vr,
 +                                 cgindex,fr->cginfo_mb,fr->cginfo);
 +                pos_cg += ind->nrecv[nzone];
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        if (!cd->bInPlace)
 +        {
 +            /* Store the atom block for easy copying of communication buffers */
 +            make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
 +        }
 +        nzone += nzone;
 +    }
 +    dd->index_gl = index_gl;
 +    dd->cgindex  = cgindex;
 +    
 +    dd->ncg_tot = zone_cg_range[zones->n];
 +    dd->nat_tot = nat_tot;
 +    comm->nat[ddnatHOME] = dd->nat_home;
 +    for(i=ddnatZONE; i<ddnatNR; i++)
 +    {
 +        comm->nat[i] = dd->nat_tot;
 +    }
 +
 +    if (!bBondComm)
 +    {
 +        /* We don't need to update cginfo, since that was alrady done above.
 +         * So we pass NULL for the forcerec.
 +         */
 +        dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
 +                      NULL,comm->bLocalCG);
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished setting up DD communication, zones:");
 +        for(c=0; c<zones->n; c++)
 +        {
 +            fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static void set_cg_boundaries(gmx_domdec_zones_t *zones)
 +{
 +    int c;
 +    
 +    for(c=0; c<zones->nizone; c++)
 +    {
 +        zones->izone[c].cg1  = zones->cg_range[c+1];
 +        zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
 +        zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
 +    }
 +}
 +
 +static void set_zones_size(gmx_domdec_t *dd,
 +                           matrix box,const gmx_ddbox_t *ddbox,
 +                           int zone_start,int zone_end)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_zones_t *zones;
 +    gmx_bool bDistMB;
 +    int  z,zi,zj0,zj1,d,dim;
 +    real rcs,rcmbs;
 +    int  i,j;
 +    real size_j,add_tric;
 +    real vol;
 +
 +    comm = dd->comm;
 +
 +    zones = &comm->zones;
 +
 +    /* Do we need to determine extra distances for multi-body bondeds? */
 +    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
 +
 +    for(z=zone_start; z<zone_end; z++)
 +    {
 +        /* Copy cell limits to zone limits.
 +         * Valid for non-DD dims and non-shifted dims.
 +         */
 +        copy_rvec(comm->cell_x0,zones->size[z].x0);
 +        copy_rvec(comm->cell_x1,zones->size[z].x1);
 +    }
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +
 +        for(z=0; z<zones->n; z++)
 +        {
 +            /* With a staggered grid we have different sizes
 +             * for non-shifted dimensions.
 +             */
 +            if (dd->bGridJump && zones->shift[z][dim] == 0)
 +            {
 +                if (d == 1)
 +                {
 +                    zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
 +                    zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
 +                }
 +                else if (d == 2)
 +                {
 +                    zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
 +                    zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
 +                }
 +            }
 +        }
 +
 +        rcs   = comm->cutoff;
 +        rcmbs = comm->cutoff_mbody;
 +        if (ddbox->tric_dir[dim])
 +        {
 +            rcs   /= ddbox->skew_fac[dim];
 +            rcmbs /= ddbox->skew_fac[dim];
 +        }
 +
 +        /* Set the lower limit for the shifted zone dimensions */
 +        for(z=zone_start; z<zone_end; z++)
 +        {
 +            if (zones->shift[z][dim] > 0)
 +            {
 +                dim = dd->dim[d];
 +                if (!dd->bGridJump || d == 0)
 +                {
 +                    zones->size[z].x0[dim] = comm->cell_x1[dim];
 +                    zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
 +                }
 +                else
 +                {
 +                    /* Here we take the lower limit of the zone from
 +                     * the lowest domain of the zone below.
 +                     */
 +                    if (z < 4)
 +                    {
 +                        zones->size[z].x0[dim] =
 +                             comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
 +                    }
 +                    else
 +                    {
 +                        if (d == 1)
 +                        {
 +                            zones->size[z].x0[dim] =
 +                                zones->size[zone_perm[2][z-4]].x0[dim];
 +                        }
 +                        else
 +                        {
 +                            zones->size[z].x0[dim] =
 +                                comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
 +                        }
 +                    }
 +                    /* A temporary limit, is updated below */
 +                    zones->size[z].x1[dim] = zones->size[z].x0[dim];
 +
 +                    if (bDistMB)
 +                    {
 +                        for(zi=0; zi<zones->nizone; zi++)
 +                        {
 +                            if (zones->shift[zi][dim] == 0)
 +                            {
 +                                /* This takes the whole zone into account.
 +                                 * With multiple pulses this will lead
 +                                 * to a larger zone then strictly necessary.
 +                                 */
 +                                zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
 +                                                             zones->size[zi].x1[dim]+rcmbs);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +
 +        /* Loop over the i-zones to set the upper limit of each
 +         * j-zone they see.
 +         */
 +        for(zi=0; zi<zones->nizone; zi++)
 +        {
 +            if (zones->shift[zi][dim] == 0)
 +            {
 +                for(z=zones->izone[zi].j0; z<zones->izone[zi].j1; z++)
 +                {
 +                    if (zones->shift[z][dim] > 0)
 +                    {
 +                        zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
 +                                                     zones->size[zi].x1[dim]+rcs);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    for(z=zone_start; z<zone_end; z++)
 +    {
 +        for(i=0; i<DIM; i++)
 +        {
 +            zones->size[z].bb_x0[i] = zones->size[z].x0[i];
 +            zones->size[z].bb_x1[i] = zones->size[z].x1[i];
 +
 +            for(j=i+1; j<ddbox->npbcdim; j++)
 +            {
 +                /* With 1D domain decomposition the cg's are not in
 +                 * the triclinic box, but trilinic x-y and rectangular y-z.
 +                 */
 +                if (box[j][i] != 0 &&
 +                    !(dd->ndim == 1 && i == YY && j == ZZ))
 +                {
 +                    /* Correct for triclinic offset of the lower corner */
 +                    add_tric = zones->size[z].x0[j]*box[j][i]/box[j][j];
 +                    zones->size[z].bb_x0[i] += add_tric;
 +                    zones->size[z].bb_x1[i] += add_tric;
 +
 +                    /* Correct for triclinic offset of the upper corner */
 +                    size_j = zones->size[z].x1[j] - zones->size[z].x0[j];
 +                    add_tric = size_j*box[j][i]/box[j][j];
 +
 +                    if (box[j][i] < 0)
 +                    {
 +                        zones->size[z].bb_x0[i] += add_tric;
 +                    }
 +                    else
 +                    {
 +                        zones->size[z].bb_x1[i] += add_tric;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    if (zone_start == 0)
 +    {
 +        vol = 1;
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
 +        }
 +        zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
 +    }
 +
 +    if (debug)
 +    {
 +        for(z=zone_start; z<zone_end; z++)
 +        {
 +            fprintf(debug,"zone %d    %6.3f - %6.3f  %6.3f - %6.3f  %6.3f - %6.3f\n",
 +                    z,
 +                    zones->size[z].x0[XX],zones->size[z].x1[XX],
 +                    zones->size[z].x0[YY],zones->size[z].x1[YY],
 +                    zones->size[z].x0[ZZ],zones->size[z].x1[ZZ]);
 +            fprintf(debug,"zone %d bb %6.3f - %6.3f  %6.3f - %6.3f  %6.3f - %6.3f\n",
 +                    z,
 +                    zones->size[z].bb_x0[XX],zones->size[z].bb_x1[XX],
 +                    zones->size[z].bb_x0[YY],zones->size[z].bb_x1[YY],
 +                    zones->size[z].bb_x0[ZZ],zones->size[z].bb_x1[ZZ]);
 +        }
 +    }
 +}
 +
 +static int comp_cgsort(const void *a,const void *b)
 +{
 +    int comp;
 +    
 +    gmx_cgsort_t *cga,*cgb;
 +    cga = (gmx_cgsort_t *)a;
 +    cgb = (gmx_cgsort_t *)b;
 +    
 +    comp = cga->nsc - cgb->nsc;
 +    if (comp == 0)
 +    {
 +        comp = cga->ind_gl - cgb->ind_gl;
 +    }
 +    
 +    return comp;
 +}
 +
 +static void order_int_cg(int n,const gmx_cgsort_t *sort,
 +                         int *a,int *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        buf[i] = a[sort[i].ind];
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        a[i] = buf[i];
 +    }
 +}
 +
 +static void order_vec_cg(int n,const gmx_cgsort_t *sort,
 +                         rvec *v,rvec *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(v[sort[i].ind],buf[i]);
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(buf[i],v[i]);
 +    }
 +}
 +
 +static void order_vec_atom(int ncg,const int *cgindex,const gmx_cgsort_t *sort,
 +                           rvec *v,rvec *buf)
 +{
 +    int a,atot,cg,cg0,cg1,i;
 +    
 +    if (cgindex == NULL)
 +    {
 +        /* Avoid the useless loop of the atoms within a cg */
 +        order_vec_cg(ncg,sort,v,buf);
 +
 +        return;
 +    }
 +
 +    /* Order the data */
 +    a = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        cg0 = cgindex[sort[cg].ind];
 +        cg1 = cgindex[sort[cg].ind+1];
 +        for(i=cg0; i<cg1; i++)
 +        {
 +            copy_rvec(v[i],buf[a]);
 +            a++;
 +        }
 +    }
 +    atot = a;
 +    
 +    /* Copy back to the original array */
 +    for(a=0; a<atot; a++)
 +    {
 +        copy_rvec(buf[a],v[a]);
 +    }
 +}
 +
 +static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
 +                         int nsort_new,gmx_cgsort_t *sort_new,
 +                         gmx_cgsort_t *sort1)
 +{
 +    int i1,i2,i_new;
 +    
 +    /* The new indices are not very ordered, so we qsort them */
 +    qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
 +    
 +    /* sort2 is already ordered, so now we can merge the two arrays */
 +    i1 = 0;
 +    i2 = 0;
 +    i_new = 0;
 +    while(i2 < nsort2 || i_new < nsort_new)
 +    {
 +        if (i2 == nsort2)
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +        else if (i_new == nsort_new)
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else if (sort2[i2].nsc < sort_new[i_new].nsc ||
 +                 (sort2[i2].nsc == sort_new[i_new].nsc &&
 +                  sort2[i2].ind_gl < sort_new[i_new].ind_gl))
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +    }
 +}
 +
 +static int dd_sort_order(gmx_domdec_t *dd,t_forcerec *fr,int ncg_home_old)
 +{
 +    gmx_domdec_sort_t *sort;
 +    gmx_cgsort_t *cgsort,*sort_i;
 +    int  ncg_new,nsort2,nsort_new,i,*a,moved,*ibuf;
 +    int  sort_last,sort_skip;
 +
 +    sort = dd->comm->sort;
 +
 +    a = fr->ns.grid->cell_index;
 +
 +    moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
 +
 +    if (ncg_home_old >= 0)
 +    {
 +        /* The charge groups that remained in the same ns grid cell
 +         * are completely ordered. So we can sort efficiently by sorting
 +         * the charge groups that did move into the stationary list.
 +         */
 +        ncg_new = 0;
 +        nsort2 = 0;
 +        nsort_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Check if this cg did not move to another node */
 +            if (a[i] < moved)
 +            {
 +                if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
 +                {
 +                    /* This cg is new on this node or moved ns grid cell */
 +                    if (nsort_new >= sort->sort_new_nalloc)
 +                    {
 +                        sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
 +                        srenew(sort->sort_new,sort->sort_new_nalloc);
 +                    }
 +                    sort_i = &(sort->sort_new[nsort_new++]);
 +                }
 +                else
 +                {
 +                    /* This cg did not move */
 +                    sort_i = &(sort->sort2[nsort2++]);
 +                }
 +                /* Sort on the ns grid cell indices
 +                 * and the global topology index.
 +                 * index_gl is irrelevant with cell ns,
 +                 * but we set it here anyhow to avoid a conditional.
 +                 */
 +                sort_i->nsc    = a[i];
 +                sort_i->ind_gl = dd->index_gl[i];
 +                sort_i->ind    = i;
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
 +                    nsort2,nsort_new);
 +        }
 +        /* Sort efficiently */
 +        ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,
 +                     sort->sort);
 +    }
 +    else
 +    {
 +        cgsort = sort->sort;
 +        ncg_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Sort on the ns grid cell indices
 +             * and the global topology index
 +             */
 +            cgsort[i].nsc    = a[i];
 +            cgsort[i].ind_gl = dd->index_gl[i];
 +            cgsort[i].ind    = i;
 +            if (cgsort[i].nsc < moved)
 +            {
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
 +        }
 +        /* Determine the order of the charge groups using qsort */
 +        qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
 +    }
 +
 +    return ncg_new;
 +}
 +
 +static int dd_sort_order_nbnxn(gmx_domdec_t *dd,t_forcerec *fr)
 +{
 +    gmx_cgsort_t *sort;
 +    int  ncg_new,i,*a,na;
 +
 +    sort = dd->comm->sort->sort;
 +
 +    nbnxn_get_atomorder(fr->nbv->nbs,&a,&na);
 +
 +    ncg_new = 0;
 +    for(i=0; i<na; i++)
 +    {
 +        if (a[i] >= 0)
 +        {
 +            sort[ncg_new].ind = a[i];
 +            ncg_new++;
 +        }
 +    }
 +
 +    return ncg_new;
 +}
 +
 +static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
 +                          rvec *cgcm,t_forcerec *fr,t_state *state,
 +                          int ncg_home_old)
 +{
 +    gmx_domdec_sort_t *sort;
 +    gmx_cgsort_t *cgsort,*sort_i;
 +    int  *cgindex;
 +    int  ncg_new,i,*ibuf,cgsize;
 +    rvec *vbuf;
 +    
 +    sort = dd->comm->sort;
 +    
 +    if (dd->ncg_home > sort->sort_nalloc)
 +    {
 +        sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(sort->sort,sort->sort_nalloc);
 +        srenew(sort->sort2,sort->sort_nalloc);
 +    }
 +    cgsort = sort->sort;
 +
 +    switch (fr->cutoff_scheme)
 +    {
 +    case ecutsGROUP:
 +        ncg_new = dd_sort_order(dd,fr,ncg_home_old);
 +        break;
 +    case ecutsVERLET:
 +        ncg_new = dd_sort_order_nbnxn(dd,fr);
 +        break;
 +    default:
 +        gmx_incons("unimplemented");
 +        ncg_new = 0;
 +    }
 +
 +    /* We alloc with the old size, since cgindex is still old */
 +    vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
 +    vbuf = dd->comm->vbuf.v;
 +    
 +    if (dd->comm->bCGs)
 +    {
 +        cgindex = dd->cgindex;
 +    }
 +    else
 +    {
 +        cgindex = NULL;
 +    }
 +
 +    /* Remove the charge groups which are no longer at home here */
 +    dd->ncg_home = ncg_new;
 +    if (debug)
 +    {
 +        fprintf(debug,"Set the new home charge group count to %d\n",
 +                dd->ncg_home);
 +    }
 +    
 +    /* Reorder the state */
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state->flags & (1<<i)))
 +        {
 +            switch (i)
 +            {
 +            case estX:
 +                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->x,vbuf);
 +                break;
 +            case estV:
 +                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->v,vbuf);
 +                break;
 +            case estSDX:
 +                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->sd_X,vbuf);
 +                break;
 +            case estCGP:
 +                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->cg_p,vbuf);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No ordering required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_sort_state");
 +                break;
 +            }
 +        }
 +    }
 +    if (fr->cutoff_scheme == ecutsGROUP)
 +    {
 +        /* Reorder cgcm */
 +        order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
 +    }
 +    
 +    if (dd->ncg_home+1 > sort->ibuf_nalloc)
 +    {
 +        sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
 +        srenew(sort->ibuf,sort->ibuf_nalloc);
 +    }
 +    ibuf = sort->ibuf;
 +    /* Reorder the global cg index */
 +    order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
 +    /* Reorder the cginfo */
 +    order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
 +    /* Rebuild the local cg index */
 +    if (dd->comm->bCGs)
 +    {
 +        ibuf[0] = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
 +            ibuf[i+1] = ibuf[i] + cgsize;
 +        }
 +        for(i=0; i<dd->ncg_home+1; i++)
 +        {
 +            dd->cgindex[i] = ibuf[i];
 +        }
 +    }
 +    else
 +    {
 +        for(i=0; i<dd->ncg_home+1; i++)
 +        {
 +            dd->cgindex[i] = i;
 +        }
 +    }
 +    /* Set the home atom number */
 +    dd->nat_home = dd->cgindex[dd->ncg_home];
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        /* The atoms are now exactly in grid order, update the grid order */
 +        nbnxn_set_atomorder(fr->nbv->nbs);
 +    }
 +    else
 +    {
 +        /* Copy the sorted ns cell indices back to the ns grid struct */
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            fr->ns.grid->cell_index[i] = cgsort[i].nsc;
 +        }
 +        fr->ns.grid->nr = dd->ncg_home;
 +    }
 +}
 +
 +static void add_dd_statistics(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +    
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] +=
 +            comm->nat[ddnat] - comm->nat[ddnat-1];
 +    }
 +    comm->ndecomp++;
 +}
 +
 +void reset_dd_statistics_counters(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +
 +    /* Reset all the statistics and counters for total run counting */
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload = 0;
 +    comm->load_step = 0;
 +    comm->load_sum = 0;
 +    comm->load_max = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf = 0;
 +    comm->load_pme = 0;
 +}
 +
 +void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    double av;
 +   
 +    comm = cr->dd->comm;
 +    
 +    gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
 +    
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +    
 +    fprintf(fplog,"\n    D O M A I N   D E C O M P O S I T I O N   S T A T I S T I C S\n\n");
 +            
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
 +        switch(ddnat)
 +        {
 +        case ddnatZONE:
 +            fprintf(fplog,
 +                    " av. #atoms communicated per step for force:  %d x %.1f\n",
 +                    2,av);
 +            break;
 +        case ddnatVSITE:
 +            if (cr->dd->vsite_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for vsites: %d x %.1f\n",
 +                        (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
 +                        av);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (cr->dd->constraint_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for LINCS:  %d x %.1f\n",
 +                        1 + ir->nLincsIter,av);
 +            }
 +            break;
 +        default:
 +            gmx_incons(" Unknown type for DD statistics");
 +        }
 +    }
 +    fprintf(fplog,"\n");
 +    
 +    if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
 +    {
 +        print_dd_load_av(fplog,cr->dd);
 +    }
 +}
 +
 +void dd_partition_system(FILE            *fplog,
 +                         gmx_large_int_t      step,
 +                         t_commrec       *cr,
 +                         gmx_bool            bMasterState,
 +                         int             nstglobalcomm,
 +                         t_state         *state_global,
 +                         gmx_mtop_t      *top_global,
 +                         t_inputrec      *ir,
 +                         t_state         *state_local,
 +                         rvec            **f,
 +                         t_mdatoms       *mdatoms,
 +                         gmx_localtop_t  *top_local,
 +                         t_forcerec      *fr,
 +                         gmx_vsite_t     *vsite,
 +                         gmx_shellfc_t   shellfc,
 +                         gmx_constr_t    constr,
 +                         t_nrnb          *nrnb,
 +                         gmx_wallcycle_t wcycle,
 +                         gmx_bool            bVerbose)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    gmx_ddbox_t ddbox={0};
 +    t_block *cgs_gl;
 +    gmx_large_int_t step_pcoupl;
 +    rvec cell_ns_x0,cell_ns_x1;
 +    int  i,j,n,cg0=0,ncg_home_old=-1,ncg_moved,nat_f_novirsum;
 +    gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
 +    gmx_bool bRedist,bSortCG,bResortAll;
 +    ivec ncells_old={0,0,0},ncells_new={0,0,0},np;
 +    real grid_density;
 +    char sbuf[22];
 +      
 +    dd = cr->dd;
 +    comm = dd->comm;
 +
 +    bBoxChanged = (bMasterState || DEFORM(*ir));
 +    if (ir->epc != epcNO)
 +    {
 +        /* With nstpcouple > 1 pressure coupling happens.
 +         * one step after calculating the pressure.
 +         * Box scaling happens at the end of the MD step,
 +         * after the DD partitioning.
 +         * We therefore have to do DLB in the first partitioning
 +         * after an MD step where P-coupling occured.
 +         * We need to determine the last step in which p-coupling occurred.
 +         * MRS -- need to validate this for vv?
 +         */
 +        n = ir->nstpcouple;
 +        if (n == 1)
 +        {
 +            step_pcoupl = step - 1;
 +        }
 +        else
 +        {
 +            step_pcoupl = ((step - 1)/n)*n + 1;
 +        }
 +        if (step_pcoupl >= comm->partition_step)
 +        {
 +            bBoxChanged = TRUE;
 +        }
 +    }
 +
 +    bNStGlobalComm = (step % nstglobalcomm == 0);
 +
 +    if (!comm->bDynLoadBal)
 +    {
 +        bDoDLB = FALSE;
 +    }
 +    else
 +    {
 +        /* Should we do dynamic load balacing this step?
 +         * Since it requires (possibly expensive) global communication,
 +         * we might want to do DLB less frequently.
 +         */
 +        if (bBoxChanged || ir->epc != epcNO)
 +        {
 +            bDoDLB = bBoxChanged;
 +        }
 +        else
 +        {
 +            bDoDLB = bNStGlobalComm;
 +        }
 +    }
 +
 +    /* Check if we have recorded loads on the nodes */
 +    if (comm->bRecordLoad && dd_load_count(comm))
 +    {
 +        if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
 +        {
 +            /* Check if we should use DLB at the second partitioning
 +             * and every 100 partitionings,
 +             * so the extra communication cost is negligible.
 +             */
 +            n = max(100,nstglobalcomm);
 +            bCheckDLB = (comm->n_load_collect == 0 ||
 +                         comm->n_load_have % n == n-1);
 +        }
 +        else
 +        {
 +            bCheckDLB = FALSE;
 +        }
 +        
 +        /* Print load every nstlog, first and last step to the log file */
 +        bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
 +                    comm->n_load_collect == 0 ||
 +                    (ir->nsteps >= 0 &&
 +                     (step + ir->nstlist > ir->init_step + ir->nsteps)));
 +
 +        /* Avoid extra communication due to verbose screen output
 +         * when nstglobalcomm is set.
 +         */
 +        if (bDoDLB || bLogLoad || bCheckDLB ||
 +            (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
 +        {
 +            get_load_distribution(dd,wcycle);
 +            if (DDMASTER(dd))
 +            {
 +                if (bLogLoad)
 +                {
 +                    dd_print_load(fplog,dd,step-1);
 +                }
 +                if (bVerbose)
 +                {
 +                    dd_print_load_verbose(dd);
 +                }
 +            }
 +            comm->n_load_collect++;
 +
 +            if (bCheckDLB) {
 +                /* Since the timings are node dependent, the master decides */
 +                if (DDMASTER(dd))
 +                {
 +                    bTurnOnDLB =
 +                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
 +                    if (debug)
 +                    {
 +                        fprintf(debug,"step %s, imb loss %f\n",
 +                                gmx_step_str(step,sbuf),
 +                                dd_force_imb_perf_loss(dd));
 +                    }
 +                }
 +                dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
 +                if (bTurnOnDLB)
 +                {
 +                    turn_on_dlb(fplog,cr,step);
 +                    bDoDLB = TRUE;
 +                }
 +            }
 +        }
 +        comm->n_load_have++;
 +    }
 +
 +    cgs_gl = &comm->cgs_gl;
 +
 +    bRedist = FALSE;
 +    if (bMasterState)
 +    {
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_global->box,
 +                  TRUE,cgs_gl,state_global->x,&ddbox);
 +    
 +        get_cg_distribution(fplog,step,dd,cgs_gl,
 +                            state_global->box,&ddbox,state_global->x);
 +        
 +        dd_distribute_state(dd,cgs_gl,
 +                            state_global,state_local,f);
 +        
 +        dd_make_local_cgs(dd,&top_local->cgs);
 +        
 +        /* Ensure that we have space for the new distribution */
 +        dd_check_alloc_ncg(fr,state_local,f,dd->ncg_home);
 +
 +        if (fr->cutoff_scheme == ecutsGROUP)
 +        {
 +            calc_cgcm(fplog,0,dd->ncg_home,
 +                      &top_local->cgs,state_local->x,fr->cg_cm);
 +        }
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +        
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        cg0 = 0;
 +    }
 +    else if (state_local->ddp_count != dd->ddp_count)
 +    {
 +        if (state_local->ddp_count > dd->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
 +        }
 +        
 +        if (state_local->ddp_count_cg_gl != state_local->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
 +        }
 +        
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +        
 +        /* Build the new indices */
 +        rebuild_cgindex(dd,cgs_gl->index,state_local);
 +        make_dd_indices(dd,cgs_gl->index,0);
 +
 +        if (fr->cutoff_scheme == ecutsGROUP)
 +        {
 +            /* Redetermine the cg COMs */
 +            calc_cgcm(fplog,0,dd->ncg_home,
 +                      &top_local->cgs,state_local->x,fr->cg_cm);
 +        }
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  TRUE,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bRedist = comm->bDynLoadBal;
 +    }
 +    else
 +    {
 +        /* We have the full state, only redistribute the cgs */
 +
 +        /* Clear the non-home indices */
 +        clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
 +
 +        /* Avoid global communication for dim's without pbc and -gcom */
 +        if (!bNStGlobalComm)
 +        {
 +            copy_rvec(comm->box0    ,ddbox.box0    );
 +            copy_rvec(comm->box_size,ddbox.box_size);
 +        }
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bBoxChanged = TRUE;
 +        bRedist = TRUE;
 +    }
 +    /* For dim's without pbc and -gcom */
 +    copy_rvec(ddbox.box0    ,comm->box0    );
 +    copy_rvec(ddbox.box_size,comm->box_size);
 +    
 +    set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
 +                      step,wcycle);
 +    
 +    if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
 +    {
 +        write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
 +    }
 +    
 +    /* Check if we should sort the charge groups */
 +    if (comm->nstSortCG > 0)
 +    {
 +        bSortCG = (bMasterState ||
 +                   (bRedist && (step % comm->nstSortCG == 0)));
 +    }
 +    else
 +    {
 +        bSortCG = FALSE;
 +    }
 +
 +    ncg_home_old = dd->ncg_home;
 +
 +    ncg_moved = 0;
 +    if (bRedist)
 +    {
 +        wallcycle_sub_start(wcycle,ewcsDD_REDIST);
 +
 +        dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
 +                           state_local,f,fr,mdatoms,
 +                           !bSortCG,nrnb,&cg0,&ncg_moved);
 +
 +        wallcycle_sub_stop(wcycle,ewcsDD_REDIST);
 +    }
 +    
 +    get_nsgrid_boundaries(ddbox.nboundeddim,state_local->box,
 +                          dd,&ddbox,
 +                          &comm->cell_x0,&comm->cell_x1,
 +                          dd->ncg_home,fr->cg_cm,
 +                          cell_ns_x0,cell_ns_x1,&grid_density);
 +
 +    if (bBoxChanged)
 +    {
 +        comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
 +    }
 +
 +    switch (fr->cutoff_scheme)
 +    {
 +    case ecutsGROUP:
 +        copy_ivec(fr->ns.grid->n,ncells_old);
 +        grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
 +                   state_local->box,cell_ns_x0,cell_ns_x1,
 +                   fr->rlistlong,grid_density);
 +        break;
 +    case ecutsVERLET:
 +        nbnxn_get_ncells(fr->nbv->nbs,&ncells_old[XX],&ncells_old[YY]);
 +        break;
 +    default:
 +        gmx_incons("unimplemented");
 +    }
 +    /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
 +    copy_ivec(ddbox.tric_dir,comm->tric_dir);
 +
 +    if (bSortCG)
 +    {
 +        wallcycle_sub_start(wcycle,ewcsDD_GRID);
 +
 +        /* Sort the state on charge group position.
 +         * This enables exact restarts from this step.
 +         * It also improves performance by about 15% with larger numbers
 +         * of atoms per node.
 +         */
 +        
 +        /* Fill the ns grid with the home cell,
 +         * so we can sort with the indices.
 +         */
 +        set_zones_ncg_home(dd);
 +
 +        switch (fr->cutoff_scheme)
 +        {
 +        case ecutsVERLET:
 +            set_zones_size(dd,state_local->box,&ddbox,0,1);
 +
 +            nbnxn_put_on_grid(fr->nbv->nbs,fr->ePBC,state_local->box,
 +                              0,
 +                              comm->zones.size[0].bb_x0,
 +                              comm->zones.size[0].bb_x1,
 +                              0,dd->ncg_home,
 +                              comm->zones.dens_zone0,
 +                              fr->cginfo,
 +                              state_local->x,
 +                              ncg_moved,comm->moved,
 +                              fr->nbv->grp[eintLocal].kernel_type,
 +                              fr->nbv->grp[eintLocal].nbat);
 +
 +            nbnxn_get_ncells(fr->nbv->nbs,&ncells_new[XX],&ncells_new[YY]);
 +            break;
 +        case ecutsGROUP:
 +            fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
 +                      0,dd->ncg_home,fr->cg_cm);
 +            
 +            copy_ivec(fr->ns.grid->n,ncells_new);
 +            break;
 +        default:
 +            gmx_incons("unimplemented");
 +        }
 +
 +        bResortAll = bMasterState;
 +   
 +        /* Check if we can user the old order and ns grid cell indices
 +         * of the charge groups to sort the charge groups efficiently.
 +         */
 +        if (ncells_new[XX] != ncells_old[XX] ||
 +            ncells_new[YY] != ncells_old[YY] ||
 +            ncells_new[ZZ] != ncells_old[ZZ])
 +        {
 +            bResortAll = TRUE;
 +        }
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"Step %s, sorting the %d home charge groups\n",
 +                    gmx_step_str(step,sbuf),dd->ncg_home);
 +        }
 +        dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
 +                      bResortAll ? -1 : ncg_home_old);
 +        /* Rebuild all the indices */
 +        cg0 = 0;
 +        ga2la_clear(dd->ga2la);
 +
 +        wallcycle_sub_stop(wcycle,ewcsDD_GRID);
 +    }
 +
 +    wallcycle_sub_start(wcycle,ewcsDD_SETUPCOMM);
 +    
 +    /* Setup up the communication and communicate the coordinates */
 +    setup_dd_communication(dd,state_local->box,&ddbox,fr,state_local,f);
 +    
 +    /* Set the indices */
 +    make_dd_indices(dd,cgs_gl->index,cg0);
 +
 +    /* Set the charge group boundaries for neighbor searching */
 +    set_cg_boundaries(&comm->zones);
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        set_zones_size(dd,state_local->box,&ddbox,
 +                       bSortCG ? 1 : 0,comm->zones.n);
 +    }
 +
 +    wallcycle_sub_stop(wcycle,ewcsDD_SETUPCOMM);
 +
 +    /*
 +    write_dd_pdb("dd_home",step,"dump",top_global,cr,
 +                 -1,state_local->x,state_local->box);
 +    */
 +
 +    wallcycle_sub_start(wcycle,ewcsDD_MAKETOP);
 +    
 +    /* Extract a local topology from the global topology */
 +    for(i=0; i<dd->ndim; i++)
 +    {
 +        np[dd->dim[i]] = comm->cd[i].np;
 +    }
 +    dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
 +                      comm->cellsize_min,np,
 +                      fr,
 +                      fr->cutoff_scheme==ecutsGROUP ? fr->cg_cm : state_local->x,
 +                      vsite,top_global,top_local);
 +
 +    wallcycle_sub_stop(wcycle,ewcsDD_MAKETOP);
 +
 +    wallcycle_sub_start(wcycle,ewcsDD_MAKECONSTR);
 +    
 +    /* Set up the special atom communication */
 +    n = comm->nat[ddnatZONE];
 +    for(i=ddnatZONE+1; i<ddnatNR; i++)
 +    {
 +        switch(i)
 +        {
 +        case ddnatVSITE:
 +            if (vsite && vsite->n_intercg_vsite)
 +            {
 +                n = dd_make_local_vsites(dd,n,top_local->idef.il);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (dd->bInterCGcons || dd->bInterCGsettles)
 +            {
 +                /* Only for inter-cg constraints we need special code */
 +                n = dd_make_local_constraints(dd,n,top_global,fr->cginfo,
 +                                              constr,ir->nProjOrder,
 +                                              top_local->idef.il);
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown special atom type setup");
 +        }
 +        comm->nat[i] = n;
 +    }
 +
 +    wallcycle_sub_stop(wcycle,ewcsDD_MAKECONSTR);
 +
 +    wallcycle_sub_start(wcycle,ewcsDD_TOPOTHER);
 +
 +    /* Make space for the extra coordinates for virtual site
 +     * or constraint communication.
 +     */
 +    state_local->natoms = comm->nat[ddnatNR-1];
 +    if (state_local->natoms > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,state_local->natoms);
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite && vsite->n_intercg_vsite)
 +        {
 +            nat_f_novirsum = comm->nat[ddnatVSITE];
 +        }
 +        else
 +        {
 +            if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
 +            {
 +                nat_f_novirsum = dd->nat_tot;
 +            }
 +            else
 +            {
 +                nat_f_novirsum = dd->nat_home;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nat_f_novirsum = 0;
 +    }
 +
 +    /* Set the number of atoms required for the force calculation.
 +     * Forces need to be constrained when using a twin-range setup
 +     * or with energy minimization. For simple simulations we could
 +     * avoid some allocation, zeroing and copying, but this is
 +     * probably not worth the complications ande checking.
 +     */
 +    forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
 +                        dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
 +
 +    /* We make the all mdatoms up to nat_tot_con.
 +     * We could save some work by only setting invmass
 +     * between nat_tot and nat_tot_con.
 +     */
 +    /* This call also sets the new number of home particles to dd->nat_home */
 +    atoms2md(top_global,ir,
 +             comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
 +
 +    /* Now we have the charges we can sort the FE interactions */
 +    dd_sort_local_top(dd,mdatoms,top_local);
 +
 +    if (vsite != NULL)
 +    {
 +        /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
 +        split_vsites_over_threads(top_local->idef.il,mdatoms,FALSE,vsite);
 +    }
 +
 +    if (shellfc)
 +    {
 +        /* Make the local shell stuff, currently no communication is done */
 +        make_local_shells(cr,mdatoms,shellfc);
 +    }
 +    
 +      if (ir->implicit_solvent)
 +    {
 +        make_local_gb(cr,fr->born,ir->gb_algorithm);
 +    }
 +
 +    init_bonded_thread_force_reduction(fr,&top_local->idef);
 +
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Send the charges to our PME only node */
 +        gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
 +                       mdatoms->chargeA,mdatoms->chargeB,
 +                       dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
 +    }
 +    
 +    if (constr)
 +    {
 +        set_constraints(constr,top_local,ir,mdatoms,cr);
 +    }
 +    
 +    if (ir->ePull != epullNO)
 +    {
 +        /* Update the local pull groups */
 +        dd_make_local_pull_groups(dd,ir->pull,mdatoms);
 +    }
 +    
 +    if (ir->bRot)
 +    {
 +        /* Update the local rotation groups */
 +        dd_make_local_rotation_groups(dd,ir->rot);
 +    }
 +
 +
 +    add_dd_statistics(dd);
 +    
 +    /* Make sure we only count the cycles for this DD partitioning */
 +    clear_dd_cycle_counts(dd);
 +    
 +    /* Because the order of the atoms might have changed since
 +     * the last vsite construction, we need to communicate the constructing
 +     * atom coordinates again (for spreading the forces this MD step).
 +     */
 +    dd_move_x_vsites(dd,state_local->box,state_local->x);
 +
 +    wallcycle_sub_stop(wcycle,ewcsDD_TOPOTHER);
 +    
 +    if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
 +    {
 +        dd_move_x(dd,state_local->box,state_local->x);
 +        write_dd_pdb("dd_dump",step,"dump",top_global,cr,
 +                     -1,state_local->x,state_local->box);
 +    }
 +
 +    /* Store the partitioning step */
 +    comm->partition_step = step;
 +    
 +    /* Increase the DD partitioning counter */
 +    dd->ddp_count++;
 +    /* The state currently matches this DD partitioning count, store it */
 +    state_local->ddp_count = dd->ddp_count;
 +    if (bMasterState)
 +    {
 +        /* The DD master node knows the complete cg distribution,
 +         * store the count so we can possibly skip the cg info communication.
 +         */
 +        comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
 +    }
 +
 +    if (comm->DD_debug > 0)
 +    {
 +        /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
 +        check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
 +                                "after partitioning");
 +    }
 +}
index 2ca0c7766063202eb7bfbc232f3e13e53c193bc2,0000000000000000000000000000000000000000..4fc5f794c59c25face22fad5608d642da262d39c
mode 100644,000000..100644
--- /dev/null
@@@ -1,922 -1,0 +1,923 @@@
-         gmx_bool       bDoLongRange,
-         gmx_bool       bDoForces,
-         rvec       *f)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "nrnb.h"
 +#include "bondf.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "pme.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "gmx_omp_nthreads.h"
 +
 +
 +void ns(FILE *fp,
 +        t_forcerec *fr,
 +        rvec       x[],
 +        matrix     box,
 +        gmx_groups_t *groups,
 +        t_grpopts  *opts,
 +        gmx_localtop_t *top,
 +        t_mdatoms  *md,
 +        t_commrec  *cr,
 +        t_nrnb     *nrnb,
 +        real       *lambda,
 +        real       *dvdlambda,
 +        gmx_grppairener_t *grppener,
 +        gmx_bool       bFillGrid,
-                                 bFillGrid,bDoLongRange,
-                                 bDoForces,f);
++        gmx_bool       bDoLongRangeNS)
 +{
 +  char   *ptr;
 +  int    nsearch;
 +
 +
 +  if (!fr->ns.nblist_initialized)
 +  {
 +      init_neighbor_list(fp, fr, md->homenr);
 +  }
 +
 +  if (fr->bTwinRange)
 +    fr->nlr=0;
 +
 +    nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
 +                                lambda,dvdlambda,grppener,
-             donb_flags |= GMX_DONB_FORCES;
++                                bFillGrid,bDoLongRangeNS);
 +  if (debug)
 +    fprintf(debug,"nsearch = %d\n",nsearch);
 +
 +  /* Check whether we have to do dynamic load balancing */
 +  /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
 +    count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
 +    &(top->idef),opts->ngener);
 +  */
 +  if (fr->ns.dump_nl > 0)
 +    dump_nblist(fp,cr,fr,fr->ns.dump_nl);
 +}
 +
 +static void reduce_thread_forces(int n,rvec *f,
 +                                 tensor vir,
 +                                 real *Vcorr,
 +                                 int efpt_ind,real *dvdl,
 +                                 int nthreads,f_thread_t *f_t)
 +{
 +    int t,i;
 +
 +    /* This reduction can run over any number of threads */
 +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
 +    for(i=0; i<n; i++)
 +    {
 +        for(t=1; t<nthreads; t++)
 +        {
 +            rvec_inc(f[i],f_t[t].f[i]);
 +        }
 +    }
 +    for(t=1; t<nthreads; t++)
 +    {
 +        *Vcorr += f_t[t].Vcorr;
 +        *dvdl  += f_t[t].dvdl[efpt_ind];
 +        m_add(vir,f_t[t].vir,vir);
 +    }
 +}
 +
 +void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
 +                       t_forcerec *fr,      t_inputrec *ir,
 +                       t_idef     *idef,    t_commrec  *cr,
 +                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
 +                       t_mdatoms  *md,
 +                       t_grpopts  *opts,
 +                       rvec       x[],      history_t  *hist,
 +                       rvec       f[],
++                       rvec       f_longrange[],
 +                       gmx_enerdata_t *enerd,
 +                       t_fcdata   *fcd,
 +                       gmx_mtop_t     *mtop,
 +                       gmx_localtop_t *top,
 +                       gmx_genborn_t *born,
 +                       t_atomtypes *atype,
 +                       gmx_bool       bBornRadii,
 +                       matrix     box,
 +                       t_lambda   *fepvals,
 +                       real       *lambda,
 +                       t_graph    *graph,
 +                       t_blocka   *excl,
 +                       rvec       mu_tot[],
 +                       int        flags,
 +                       float      *cycles_pme)
 +{
 +    int     i,j,status;
 +    int     donb_flags;
 +    gmx_bool    bDoEpot,bSepDVDL,bSB;
 +    int     pme_flags;
 +    matrix  boxs;
 +    rvec    box_size;
 +    real    Vsr,Vlr,Vcorr=0;
 +    t_pbc   pbc;
 +    real    dvdgb;
 +    char    buf[22];
 +    gmx_enerdata_t ed_lam;
 +    double  clam_i,vlam_i;
 +    real    dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
 +    real    dvdlsum;
 +
 +#ifdef GMX_MPI
 +    double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
 +#endif
 +
 +#define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
 +
 +
 +    set_pbc(&pbc,fr->ePBC,box);
 +
 +    /* reset free energy components */
 +    for (i=0;i<efptNR;i++)
 +    {
 +        dvdl_nb[i]  = 0;
 +        dvdl_dum[i] = 0;
 +    }
 +
 +    /* Reset box */
 +    for(i=0; (i<DIM); i++)
 +    {
 +        box_size[i]=box[i][i];
 +    }
 +
 +    bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
 +    debug_gmx();
 +
 +    /* do QMMM first if requested */
 +    if(fr->bQMMM)
 +    {
 +        enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
 +    }
 +
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
 +                gmx_step_str(step,buf),cr->nodeid);
 +    }
 +
 +    /* Call the short range functions all in one go. */
 +
 +#ifdef GMX_MPI
 +    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
 +#define TAKETIME FALSE
 +    if (TAKETIME)
 +    {
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t0=MPI_Wtime();
 +    }
 +#endif
 +
 +    if (ir->nwall)
 +    {
 +        /* foreign lambda component for walls */
 +        dvdl = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
 +                        enerd->grpp.ener[egLJSR],nrnb);
 +        PRINT_SEPDVDL("Walls",0.0,dvdl);
 +        enerd->dvdl_lin[efptVDW] += dvdl;
 +    }
 +
 +      /* If doing GB, reset dvda and calculate the Born radii */
 +      if (ir->implicit_solvent)
 +      {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +
 +              for(i=0;i<born->nr;i++)
 +              {
 +                      fr->dvda[i]=0;
 +              }
 +
 +              if(bBornRadii)
 +              {
 +                      calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
 +              }
 +
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +      }
 +
 +    where();
 +    if (flags & GMX_FORCE_NONBONDED)
 +    {
 +        donb_flags = 0;
++        /* Add short-range interactions */
++        donb_flags |= GMX_NONBONDED_DO_SR;
++
 +        if (flags & GMX_FORCE_FORCES)
 +        {
-         do_nonbonded(cr,fr,x,f,md,excl,
-                     fr->bBHAM ?
-                     enerd->grpp.ener[egBHAMSR] :
-                     enerd->grpp.ener[egLJSR],
-                     enerd->grpp.ener[egCOULSR],
-                     enerd->grpp.ener[egGB],box_size,nrnb,
++            donb_flags |= GMX_NONBONDED_DO_FORCE;
++        }
++        if (flags & GMX_FORCE_ENERGY)
++        {
++            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
++        }
++        if (flags & GMX_FORCE_DO_LR)
++        {
++            donb_flags |= GMX_NONBONDED_DO_LR;
 +        }
 +
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
-             do_nonbonded(cr,fr,x,f,md,excl,
-                          fr->bBHAM ?
-                          ed_lam.grpp.ener[egBHAMSR] :
-                          ed_lam.grpp.ener[egLJSR],
-                          ed_lam.grpp.ener[egCOULSR],
-                          enerd->grpp.ener[egGB], box_size,nrnb,
++        do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
++                    &enerd->grpp,box_size,nrnb,
 +                    lambda,dvdl_nb,-1,-1,donb_flags);
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +
 +    /* If we do foreign lambda and we have soft-core interactions
 +     * we have to recalculate the (non-linear) energies contributions.
 +     */
 +    if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +        init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
 +
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
 +            for (j=0;j<efptNR;j++)
 +            {
 +                lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +            }
 +            reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
-                          GMX_DONB_FOREIGNLAMBDA);
++            do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
++                         &(ed_lam.grpp), box_size,nrnb,
 +                         lam_i,dvdl_dum,-1,-1,
++                         GMX_NONBONDED_DO_FOREIGNLAMBDA | GMX_NONBONDED_DO_SR);
 +            sum_epot(&ir->opts,&ed_lam);
 +            enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
 +        }
 +        destroy_enerdata(&ed_lam);
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +    where();
 +
 +      /* If we are doing GB, calculate bonded forces and apply corrections
 +       * to the solvation forces */
 +    /* MRS: Eventually, many need to include free energy contribution here! */
 +      if (ir->implicit_solvent)
 +    {
 +              calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
 +                       ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t1=MPI_Wtime();
 +        fr->t_fnbf += t1-t0;
 +    }
 +#endif
 +
 +    if (fepvals->sc_alpha!=0)
 +    {
 +        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +
 +    if (fepvals->sc_alpha!=0)
 +
 +        /* even though coulomb part is linear, we already added it, beacuse we
 +           need to go through the vdw calculation anyway */
 +    {
 +        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +
 +    Vsr = 0;
 +    if (bSepDVDL)
 +    {
 +        for(i=0; i<enerd->grpp.nener; i++)
 +        {
 +            Vsr +=
 +                (fr->bBHAM ?
 +                 enerd->grpp.ener[egBHAMSR][i] :
 +                 enerd->grpp.ener[egLJSR][i])
 +                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
 +        }
 +        dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
 +        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
 +    }
 +    debug_gmx();
 +
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
 +    }
 +
 +    /* Shift the coordinates. Must be done before bonded forces and PPPM,
 +     * but is also necessary for SHAKE and update, therefore it can NOT
 +     * go when no bonded forces have to be evaluated.
 +     */
 +
 +    /* Here sometimes we would not need to shift with NBFonly,
 +     * but we do so anyhow for consistency of the returned coordinates.
 +     */
 +    if (graph)
 +    {
 +        shift_self(graph,box,x);
 +        if (TRICLINIC(box))
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
 +        }
 +        else
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
 +        }
 +    }
 +    /* Check whether we need to do bondeds or correct for exclusions */
 +    if (fr->bMolPBC &&
 +        ((flags & GMX_FORCE_BONDED)
 +         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
 +    {
 +        /* Since all atoms are in the rectangular or triclinic unit-cell,
 +         * only single box vector shifts (2 in x) are required.
 +         */
 +        set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
 +    }
 +    debug_gmx();
 +
 +    if (flags & GMX_FORCE_BONDED)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsBONDED);
 +        calc_bonds(fplog,cr->ms,
 +                   idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
 +                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
 +                   flags,
 +                   fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
 +
 +        /* Check if we have to determine energy differences
 +         * at foreign lambda's.
 +         */
 +        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
 +            idef->ilsort != ilsortNO_FE)
 +        {
 +            if (idef->ilsort != ilsortFE_SORTED)
 +            {
 +                gmx_incons("The bonded interactions are not sorted for free energy");
 +            }
 +            init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
 +
 +            for(i=0; i<enerd->n_lambda; i++)
 +            {
 +                reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
 +                for (j=0;j<efptNR;j++)
 +                {
 +                    lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +                }
 +                calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
 +                                  fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                sum_epot(&ir->opts,&ed_lam);
 +                enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
 +            }
 +            destroy_enerdata(&ed_lam);
 +        }
 +        debug_gmx();
 +
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +    where();
 +
 +    *cycles_pme = 0;
 +    if (EEL_FULL(fr->eeltype))
 +    {
 +        bSB = (ir->nwall == 2);
 +        if (bSB)
 +        {
 +            copy_mat(box,boxs);
 +            svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +            box_size[ZZ] *= ir->wall_ewald_zfac;
 +        }
 +
 +        clear_mat(fr->vir_el_recip);
 +
 +        if (fr->bEwald)
 +        {
 +            Vcorr = 0;
 +            dvdl  = 0;
 +
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            /* The TPI molecule does not have exclusions with the rest
 +             * of the system and no intra-molecular PME grid contributions
 +             * will be calculated in gmx_pme_calc_energy.
 +             */
 +            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
 +                ir->ewald_geometry != eewg3D ||
 +                ir->epsilon_surface != 0)
 +            {
 +                int nthreads,t;
 +
 +                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
 +
 +                if (fr->n_tpi > 0)
 +                {
 +                    gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
 +                }
 +
 +                nthreads = gmx_omp_nthreads_get(emntBonded);
 +#pragma omp parallel for num_threads(nthreads) schedule(static)
 +                for(t=0; t<nthreads; t++)
 +                {
 +                    int s,e,i;
 +                    rvec *fnv;
 +                    tensor *vir;
 +                    real *Vcorrt,*dvdlt;
 +                    if (t == 0)
 +                    {
 +                        fnv    = fr->f_novirsum;
 +                        vir    = &fr->vir_el_recip;
 +                        Vcorrt = &Vcorr;
 +                        dvdlt  = &dvdl;
 +                    }
 +                    else
 +                    {
 +                        fnv    = fr->f_t[t].f;
 +                        vir    = &fr->f_t[t].vir;
 +                        Vcorrt = &fr->f_t[t].Vcorr;
 +                        dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
 +                        for(i=0; i<fr->natoms_force; i++)
 +                        {
 +                            clear_rvec(fnv[i]);
 +                        }
 +                        clear_mat(*vir);
 +                    }
 +                    *dvdlt = 0;
 +                    *Vcorrt =
 +                        ewald_LRcorrection(fplog,
 +                                           fr->excl_load[t],fr->excl_load[t+1],
 +                                           cr,t,fr,
 +                                           md->chargeA,
 +                                           md->nChargePerturbed ? md->chargeB : NULL,
 +                                           ir->cutoff_scheme != ecutsVERLET,
 +                                           excl,x,bSB ? boxs : box,mu_tot,
 +                                           ir->ewald_geometry,
 +                                           ir->epsilon_surface,
 +                                           fnv,*vir,
 +                                           lambda[efptCOUL],dvdlt);
 +                }
 +                if (nthreads > 1)
 +                {
 +                    reduce_thread_forces(fr->natoms_force,fr->f_novirsum,
 +                                         fr->vir_el_recip,
 +                                         &Vcorr,efptCOUL,&dvdl,
 +                                         nthreads,fr->f_t);
 +                }
 +
 +                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
 +            }
 +
 +            if (fr->n_tpi == 0)
 +            {
 +                Vcorr += ewald_charge_correction(cr,fr,lambda[efptCOUL],box,
 +                                                 &dvdl,fr->vir_el_recip);
 +            }
 +
 +            PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdl);
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +        }
 +
 +        status = 0;
 +        Vlr  = 0;
 +        dvdl = 0;
 +        switch (fr->eeltype)
 +        {
 +        case eelPME:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +        case eelP3M_AD:
 +            if (cr->duty & DUTY_PME)
 +            {
 +                assert(fr->n_tpi >= 0);
 +                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
 +                {
 +                    pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
 +                    if (flags & GMX_FORCE_FORCES)
 +                    {
 +                        pme_flags |= GMX_PME_CALC_F;
 +                    }
 +                    if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
 +                    {
 +                        pme_flags |= GMX_PME_CALC_ENER_VIR;
 +                    }
 +                    if (fr->n_tpi > 0)
 +                    {
 +                        /* We don't calculate f, but we do want the potential */
 +                        pme_flags |= GMX_PME_CALC_POT;
 +                    }
 +                    wallcycle_start(wcycle,ewcPMEMESH);
 +                    status = gmx_pme_do(fr->pmedata,
 +                                        md->start,md->homenr - fr->n_tpi,
 +                                        x,fr->f_novirsum,
 +                                        md->chargeA,md->chargeB,
 +                                        bSB ? boxs : box,cr,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
 +                                        nrnb,wcycle,
 +                                        fr->vir_el_recip,fr->ewaldcoeff,
 +                                        &Vlr,lambda[efptCOUL],&dvdl,
 +                                        pme_flags);
 +                    *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +                    /* We should try to do as little computation after
 +                     * this as possible, because parallel PME synchronizes
 +                     * the nodes, so we want all load imbalance of the rest
 +                     * of the force calculation to be before the PME call.
 +                     * DD load balancing is done on the whole time of
 +                     * the force call (without PME).
 +                     */
 +                }
 +                if (fr->n_tpi > 0)
 +                {
 +                    /* Determine the PME grid energy of the test molecule
 +                     * with the PME grid potential of the other charges.
 +                     */
 +                    gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
 +                                        x + md->homenr - fr->n_tpi,
 +                                        md->chargeA + md->homenr - fr->n_tpi,
 +                                        &Vlr);
 +                }
 +                PRINT_SEPDVDL("PME mesh",Vlr,dvdl);
 +            }
 +            break;
 +        case eelEWALD:
 +            Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
 +                           md->chargeA,md->chargeB,
 +                           box_size,cr,md->homenr,
 +                           fr->vir_el_recip,fr->ewaldcoeff,
 +                           lambda[efptCOUL],&dvdl,fr->ewald_table);
 +            PRINT_SEPDVDL("Ewald long-range",Vlr,dvdl);
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"No such electrostatics method implemented %s",
 +                      eel_names[fr->eeltype]);
 +        }
 +        if (status != 0)
 +        {
 +            gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
 +                      status,EELTYPE(fr->eeltype));
 +              }
 +        /* Note that with separate PME nodes we get the real energies later */
 +        enerd->dvdl_lin[efptCOUL] += dvdl;
 +        enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
 +        if (debug)
 +        {
 +            fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
 +                    Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
 +            pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
 +            pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
 +        }
 +    }
 +    else
 +    {
 +        if (EEL_RF(fr->eeltype))
 +        {
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
 +            {
 +                dvdl = 0;
 +                enerd->term[F_RF_EXCL] =
 +                    RF_excl_correction(fplog,fr,graph,md,excl,x,f,
 +                                       fr->fshift,&pbc,lambda[efptCOUL],&dvdl);
 +            }
 +
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +            PRINT_SEPDVDL("RF exclusion correction",
 +                          enerd->term[F_RF_EXCL],dvdl);
 +        }
 +    }
 +    where();
 +    debug_gmx();
 +
 +    if (debug)
 +    {
 +        print_nrnb(debug,nrnb);
 +    }
 +    debug_gmx();
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t2=MPI_Wtime();
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t3=MPI_Wtime();
 +        fr->t_wait += t3-t2;
 +        if (fr->timesteps == 11)
 +        {
 +            fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
 +                    cr->nodeid, gmx_step_str(fr->timesteps,buf),
 +                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
 +                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
 +        }
 +        fr->timesteps++;
 +    }
 +#endif
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
 +    }
 +
 +}
 +
 +void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
 +{
 +    int i,n2;
 +
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        enerd->term[i] = 0;
 +    }
 +
 +
 +    for(i=0; i<efptNR; i++) {
 +        enerd->dvdl_lin[i]  = 0;
 +        enerd->dvdl_nonlin[i]  = 0;
 +    }
 +
 +    n2=ngener*ngener;
 +    if (debug)
 +    {
 +        fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
 +    }
 +    enerd->grpp.nener = n2;
 +    for(i=0; (i<egNR); i++)
 +    {
 +        snew(enerd->grpp.ener[i],n2);
 +    }
 +
 +    if (n_lambda)
 +    {
 +        enerd->n_lambda = 1 + n_lambda;
 +        snew(enerd->enerpart_lambda,enerd->n_lambda);
 +    }
 +    else
 +    {
 +        enerd->n_lambda = 0;
 +    }
 +}
 +
 +void destroy_enerdata(gmx_enerdata_t *enerd)
 +{
 +    int i;
 +
 +    for(i=0; (i<egNR); i++)
 +    {
 +        sfree(enerd->grpp.ener[i]);
 +    }
 +
 +    if (enerd->n_lambda)
 +    {
 +        sfree(enerd->enerpart_lambda);
 +    }
 +}
 +
 +static real sum_v(int n,real v[])
 +{
 +  real t;
 +  int  i;
 +
 +  t = 0.0;
 +  for(i=0; (i<n); i++)
 +    t = t + v[i];
 +
 +  return t;
 +}
 +
 +void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd)
 +{
 +  gmx_grppairener_t *grpp;
 +  real *epot;
 +  int i;
 +
 +  grpp = &enerd->grpp;
 +  epot = enerd->term;
 +
 +  /* Accumulate energies */
 +  epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
 +  epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
 +  epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
 +  epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
 +  epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
 +  epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
 +  /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
 +  epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
 +
 +/* lattice part of LR doesnt belong to any group
 + * and has been added earlier
 + */
 +  epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
 +  epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
 +
 +  epot[F_EPOT] = 0;
 +  for(i=0; (i<F_EPOT); i++)
 +  {
 +      if (i != F_DISRESVIOL && i != F_ORIRESDEV)
 +      {
 +          epot[F_EPOT] += epot[i];
 +      }
 +  }
 +}
 +
 +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
 +{
 +    int i,j,index;
 +    double dlam;
 +
 +    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
 +    enerd->term[F_DVDL] = 0.0;
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (fepvals->separate_dvdl[i])
 +        {
 +            /* could this be done more readably/compactly? */
 +            switch (i) {
 +            case (efptCOUL):
 +                index = F_DVDL_COUL;
 +                break;
 +            case (efptVDW):
 +                index = F_DVDL_VDW;
 +                break;
 +            case (efptBONDED):
 +                index = F_DVDL_BONDED;
 +                break;
 +            case (efptRESTRAINT):
 +                index = F_DVDL_RESTRAINT;
 +                break;
 +            case (efptMASS):
 +                index = F_DKDL;
 +                break;
 +            default:
 +                index = F_DVDL;
 +                break;
 +            }
 +            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
 +            }
 +        }
 +        else
 +        {
 +            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
 +            }
 +        }
 +    }
 +
 +    /* Notes on the foreign lambda free energy difference evaluation:
 +     * Adding the potential and ekin terms that depend linearly on lambda
 +     * as delta lam * dvdl to the energy differences is exact.
 +     * For the constraints this is not exact, but we have no other option
 +     * without literally changing the lengths and reevaluating the energies at each step.
 +     * (try to remedy this post 4.6 - MRS)
 +     * For the non-bonded LR term we assume that the soft-core (if present)
 +     * no longer affects the energy beyond the short-range cut-off,
 +     * which is a very good approximation (except for exotic settings).
 +     * (investigate how to overcome this post 4.6 - MRS)
 +     */
 +
 +    for(i=0; i<fepvals->n_lambda; i++)
 +    {                                         /* note we are iterating over fepvals here!
 +                                                 For the current lam, dlam = 0 automatically,
 +                                                 so we don't need to add anything to the
 +                                                 enerd->enerpart_lambda[0] */
 +
 +        /* we don't need to worry about dvdl contributions to the current lambda, because
 +           it's automatically zero */
 +
 +        /* first kinetic energy term */
 +        dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
 +
 +        enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
 +
 +        for (j=0;j<efptNR;j++)
 +        {
 +            if (j==efptMASS) {continue;} /* no other mass term to worry about */
 +
 +            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
 +            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
 +            if (debug)
 +            {
 +                fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
 +                        fepvals->all_lambda[j][i],efpt_names[j],
 +                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
 +                        dlam,enerd->dvdl_lin[j]);
 +            }
 +        }
 +    }
 +}
 +
 +void reset_enerdata(t_grpopts *opts,
 +                    t_forcerec *fr,gmx_bool bNS,
 +                    gmx_enerdata_t *enerd,
 +                    gmx_bool bMaster)
 +{
 +    gmx_bool bKeepLR;
 +    int  i,j;
 +
 +    /* First reset all energy components, except for the long range terms
 +     * on the master at non neighbor search steps, since the long range
 +     * terms have already been summed at the last neighbor search step.
 +     */
 +    bKeepLR = (fr->bTwinRange && !bNS);
 +    for(i=0; (i<egNR); i++) {
 +        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
 +            for(j=0; (j<enerd->grpp.nener); j++)
 +                enerd->grpp.ener[i][j] = 0.0;
 +        }
 +    }
 +    for (i=0;i<efptNR;i++)
 +    {
 +        enerd->dvdl_lin[i]    = 0.0;
 +        enerd->dvdl_nonlin[i] = 0.0;
 +    }
 +
 +    /* Normal potential energy components */
 +    for(i=0; (i<=F_EPOT); i++) {
 +        enerd->term[i] = 0.0;
 +    }
 +    /* Initialize the dVdlambda term with the long range contribution */
 +    /* Initialize the dvdl term with the long range contribution */
 +    enerd->term[F_DVDL]            = 0.0;
 +    enerd->term[F_DVDL_COUL]       = 0.0;
 +    enerd->term[F_DVDL_VDW]        = 0.0;
 +    enerd->term[F_DVDL_BONDED]     = 0.0;
 +    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
 +    enerd->term[F_DKDL]            = 0.0;
 +    if (enerd->n_lambda > 0)
 +    {
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
 +            enerd->enerpart_lambda[i] = 0.0;
 +        }
 +    }
 +}
index 2dfe5f9ea4cf39ecec6c181f6dbdfc126f2faeb6,0000000000000000000000000000000000000000..97eb67675e5dce653504ed6ada54f9ce234abc95
mode 100644,000000..100644
--- /dev/null
@@@ -1,2545 -1,0 +1,2692 @@@
-  * 
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
-               BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j));
++ *
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "gmx_fatal.h"
 +#include "gmx_fatal_collective.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "tables.h"
 +#include "nonbonded.h"
 +#include "invblock.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "md_support.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "copyrite.h"
 +#include "mtop_util.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_consts.h"
 +#include "statutil.h"
 +#include "gmx_omp_nthreads.h"
 +
 +#ifdef _MSC_VER
 +/* MSVC definition for __cpuid() */
 +#include <intrin.h>
 +#endif
 +
 +#include "types/nbnxn_cuda_types_ext.h"
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +
 +t_forcerec *mk_forcerec(void)
 +{
 +  t_forcerec *fr;
 +  
 +  snew(fr,1);
 +  
 +  return fr;
 +}
 +
 +#ifdef DEBUG
 +static void pr_nbfp(FILE *fp,real *nbfp,gmx_bool bBHAM,int atnr)
 +{
 +  int i,j;
 +  
 +  for(i=0; (i<atnr); i++) {
 +    for(j=0; (j<atnr); j++) {
 +      fprintf(fp,"%2d - %2d",i,j);
 +      if (bBHAM)
 +      fprintf(fp,"  a=%10g, b=%10g, c=%10g\n",BHAMA(nbfp,atnr,i,j),
-       fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j),
-               C12(nbfp,atnr,i,j));
++              BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j)/6.0);
 +      else
-       BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
-       BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
-       BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c;
++      fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j)/6.0,
++            C12(nbfp,atnr,i,j)/12.0);
 +    }
 +  }
 +}
 +#endif
 +
 +static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
 +{
 +  real *nbfp;
 +  int  i,j,k,atnr;
 +  
 +  atnr=idef->atnr;
 +  if (bBHAM) {
 +    snew(nbfp,3*atnr*atnr);
 +    for(i=k=0; (i<atnr); i++) {
 +      for(j=0; (j<atnr); j++,k++) {
-       C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6;
-       C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12;
++          BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
++          BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
++          /* nbfp now includes the 6.0 derivative prefactor */
++          BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c*6.0;
 +      }
 +    }
 +  }
 +  else {
 +    snew(nbfp,2*atnr*atnr);
 +    for(i=k=0; (i<atnr); i++) {
 +      for(j=0; (j<atnr); j++,k++) {
-          * atom 1 should have VdW.
++          /* nbfp now includes the 6.0/12.0 derivative prefactors */
++          C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6*6.0;
++          C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12*12.0;
 +      }
 +    }
 +  }
++
 +  return nbfp;
 +}
 +
 +/* This routine sets fr->solvent_opt to the most common solvent in the 
 + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in 
 + * the fr->solvent_type array with the correct type (or esolNO).
 + *
 + * Charge groups that fulfill the conditions but are not identical to the
 + * most common one will be marked as esolNO in the solvent_type array. 
 + *
 + * TIP3p is identical to SPC for these purposes, so we call it
 + * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 + * 
 + * NOTE: QM particle should not
 + * become an optimized solvent. Not even if there is only one charge
 + * group in the Qm 
 + */
 +
 +typedef struct 
 +{
 +    int    model;          
 +    int    count;
 +    int    vdwtype[4];
 +    real   charge[4];
 +} solvent_parameters_t;
 +
 +static void
 +check_solvent_cg(const gmx_moltype_t   *molt,
 +                 int                   cg0,
 +                 int                   nmol,
 +                 const unsigned char   *qm_grpnr,
 +                 const t_grps          *qm_grps,
 +                 t_forcerec *          fr,
 +                 int                   *n_solvent_parameters,
 +                 solvent_parameters_t  **solvent_parameters_p,
 +                 int                   cginfo,
 +                 int                   *cg_sp)
 +{
 +    const t_blocka *  excl;
 +    t_atom            *atom;
 +    int               j,k;
 +    int               j0,j1,nj;
 +    gmx_bool              perturbed;
 +    gmx_bool              has_vdw[4];
 +    gmx_bool              match;
 +    real              tmp_charge[4];
 +    int               tmp_vdwtype[4];
 +    int               tjA;
 +    gmx_bool              qm;
 +    solvent_parameters_t *solvent_parameters;
 +
 +    /* We use a list with parameters for each solvent type. 
 +     * Every time we discover a new molecule that fulfills the basic 
 +     * conditions for a solvent we compare with the previous entries
 +     * in these lists. If the parameters are the same we just increment
 +     * the counter for that type, and otherwise we create a new type
 +     * based on the current molecule.
 +     *
 +     * Once we've finished going through all molecules we check which
 +     * solvent is most common, and mark all those molecules while we
 +     * clear the flag on all others.
 +     */   
 +
 +    solvent_parameters = *solvent_parameters_p;
 +
 +    /* Mark the cg first as non optimized */
 +    *cg_sp = -1;
 +    
 +    /* Check if this cg has no exclusions with atoms in other charge groups
 +     * and all atoms inside the charge group excluded.
 +     * We only have 3 or 4 atom solvent loops.
 +     */
 +    if (GET_CGINFO_EXCL_INTER(cginfo) ||
 +        !GET_CGINFO_EXCL_INTRA(cginfo))
 +    {
 +        return;
 +    }
 +
 +    /* Get the indices of the first atom in this charge group */
 +    j0     = molt->cgs.index[cg0];
 +    j1     = molt->cgs.index[cg0+1];
 +    
 +    /* Number of atoms in our molecule */
 +    nj     = j1 - j0;
 +
 +    if (debug) {
 +        fprintf(debug,
 +                "Moltype '%s': there are %d atoms in this charge group\n",
 +                *molt->name,nj);
 +    }
 +    
 +    /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 +     * otherwise skip it.
 +     */
 +    if (nj<3 || nj>4)
 +    {
 +        return;
 +    }
 +    
 +    /* Check if we are doing QM on this group */
 +    qm = FALSE; 
 +    if (qm_grpnr != NULL)
 +    {
 +        for(j=j0 ; j<j1 && !qm; j++)
 +        {
 +            qm = (qm_grpnr[j] < qm_grps->nr - 1);
 +        }
 +    }
 +    /* Cannot use solvent optimization with QM */
 +    if (qm)
 +    {
 +        return;
 +    }
 +    
 +    atom = molt->atoms.atom;
 +
 +    /* Still looks like a solvent, time to check parameters */
 +    
 +    /* If it is perturbed (free energy) we can't use the solvent loops,
 +     * so then we just skip to the next molecule.
 +     */   
 +    perturbed = FALSE; 
 +    
 +    for(j=j0; j<j1 && !perturbed; j++)
 +    {
 +        perturbed = PERTURBED(atom[j]);
 +    }
 +    
 +    if (perturbed)
 +    {
 +        return;
 +    }
 +    
 +    /* Now it's only a question if the VdW and charge parameters 
 +     * are OK. Before doing the check we compare and see if they are 
 +     * identical to a possible previous solvent type.
 +     * First we assign the current types and charges.    
 +     */
 +    for(j=0; j<nj; j++)
 +    {
 +        tmp_vdwtype[j] = atom[j0+j].type;
 +        tmp_charge[j]  = atom[j0+j].q;
 +    } 
 +    
 +    /* Does it match any previous solvent type? */
 +    for(k=0 ; k<*n_solvent_parameters; k++)
 +    {
 +        match = TRUE;
 +        
 +        
 +        /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 +        if( (solvent_parameters[k].model==esolSPC   && nj!=3)  ||
 +            (solvent_parameters[k].model==esolTIP4P && nj!=4) )
 +            match = FALSE;
 +        
 +        /* Check that types & charges match for all atoms in molecule */
 +        for(j=0 ; j<nj && match==TRUE; j++)
 +        {                     
 +            if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 +            {
 +                match = FALSE;
 +            }
 +            if(tmp_charge[j] != solvent_parameters[k].charge[j])
 +            {
 +                match = FALSE;
 +            }
 +        }
 +        if (match == TRUE)
 +        {
 +            /* Congratulations! We have a matched solvent.
 +             * Flag it with this type for later processing.
 +             */
 +            *cg_sp = k;
 +            solvent_parameters[k].count += nmol;
 +
 +            /* We are done with this charge group */
 +            return;
 +        }
 +    }
 +    
 +    /* If we get here, we have a tentative new solvent type.
 +     * Before we add it we must check that it fulfills the requirements
 +     * of the solvent optimized loops. First determine which atoms have
 +     * VdW interactions.   
 +     */
 +    for(j=0; j<nj; j++) 
 +    {
 +        has_vdw[j] = FALSE;
 +        tjA        = tmp_vdwtype[j];
 +        
 +        /* Go through all other tpes and see if any have non-zero
 +         * VdW parameters when combined with this one.
 +         */   
 +        for(k=0; k<fr->ntype && (has_vdw[j]==FALSE); k++)
 +        {
 +            /* We already checked that the atoms weren't perturbed,
 +             * so we only need to check state A now.
 +             */ 
 +            if (fr->bBHAM) 
 +            {
 +                has_vdw[j] = (has_vdw[j] || 
 +                              (BHAMA(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
 +                              (BHAMB(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
 +                              (BHAMC(fr->nbfp,fr->ntype,tjA,k) != 0.0));
 +            }
 +            else
 +            {
 +                /* Standard LJ */
 +                has_vdw[j] = (has_vdw[j] || 
 +                              (C6(fr->nbfp,fr->ntype,tjA,k)  != 0.0) ||
 +                              (C12(fr->nbfp,fr->ntype,tjA,k) != 0.0));
 +            }
 +        }
 +    }
 +    
 +    /* Now we know all we need to make the final check and assignment. */
 +    if (nj == 3)
 +    {
 +        /* So, is it an SPC?
 +         * For this we require thatn all atoms have charge, 
 +         * the charges on atom 2 & 3 should be the same, and only
-         if (has_vdw[0] == TRUE && 
-             has_vdw[1] == FALSE &&
++         * atom 1 might have VdW.
 +         */
-          * Only atom 1 should have VdW.
++        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            tmp_charge[0]  != 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1])
 +        {
 +            srenew(solvent_parameters,*n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolSPC;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for(k=0;k<3;k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +    else if (nj==4)
 +    {
 +        /* Or could it be a TIP4P?
 +         * For this we require thatn atoms 2,3,4 have charge, but not atom 1. 
-         if(has_vdw[0] == TRUE && 
-            has_vdw[1] == FALSE &&
++         * Only atom 1 mght have VdW.
 +         */
-                         csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj);
++        if(has_vdw[1] == FALSE &&
 +           has_vdw[2] == FALSE &&
 +           has_vdw[3] == FALSE &&
 +           tmp_charge[0]  == 0 &&
 +           tmp_charge[1]  != 0 &&
 +           tmp_charge[2]  == tmp_charge[1] &&
 +           tmp_charge[3]  != 0)
 +        {
 +            srenew(solvent_parameters,*n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for(k=0;k<4;k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +            
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +
 +    *solvent_parameters_p = solvent_parameters;
 +}
 +
 +static void
 +check_solvent(FILE *                fp,
 +              const gmx_mtop_t *    mtop,
 +              t_forcerec *          fr,
 +              cginfo_mb_t           *cginfo_mb)
 +{
 +    const t_block *   cgs;
 +    const t_block *   mols;
 +    const gmx_moltype_t *molt;
 +    int               mb,mol,cg_mol,at_offset,cg_offset,am,cgm,i,nmol_ch,nmol;
 +    int               n_solvent_parameters;
 +    solvent_parameters_t *solvent_parameters;
 +    int               **cg_sp;
 +    int               bestsp,bestsol;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Going to determine what solvent types we have.\n");
 +    }
 +
 +    mols = &mtop->mols;
 +
 +    n_solvent_parameters = 0;
 +    solvent_parameters = NULL;
 +    /* Allocate temporary array for solvent type */
 +    snew(cg_sp,mtop->nmolblock);
 +
 +    cg_offset = 0;
 +    at_offset = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        molt = &mtop->moltype[mtop->molblock[mb].type];
 +        cgs  = &molt->cgs;
 +        /* Here we have to loop over all individual molecules
 +         * because we need to check for QMMM particles.
 +         */
 +        snew(cg_sp[mb],cginfo_mb[mb].cg_mod);
 +        nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 +        nmol    = mtop->molblock[mb].nmol/nmol_ch;
 +        for(mol=0; mol<nmol_ch; mol++)
 +        {
 +            cgm = mol*cgs->nr;
 +            am  = mol*cgs->index[cgs->nr];
 +            for(cg_mol=0; cg_mol<cgs->nr; cg_mol++)
 +            {
 +                check_solvent_cg(molt,cg_mol,nmol,
 +                                 mtop->groups.grpnr[egcQMMM] ?
 +                                 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 +                                 &mtop->groups.grps[egcQMMM],
 +                                 fr,
 +                                 &n_solvent_parameters,&solvent_parameters,
 +                                 cginfo_mb[mb].cginfo[cgm+cg_mol],
 +                                 &cg_sp[mb][cgm+cg_mol]);
 +            }
 +        }
 +        cg_offset += cgs->nr;
 +        at_offset += cgs->index[cgs->nr];
 +    }
 +
 +    /* Puh! We finished going through all charge groups.
 +     * Now find the most common solvent model.
 +     */   
 +    
 +    /* Most common solvent this far */
 +    bestsp = -2;
 +    for(i=0;i<n_solvent_parameters;i++)
 +    {
 +        if (bestsp == -2 ||
 +            solvent_parameters[i].count > solvent_parameters[bestsp].count)
 +        {
 +            bestsp = i;
 +        }
 +    }
 +    
 +    if (bestsp >= 0)
 +    {
 +        bestsol = solvent_parameters[bestsp].model;
 +    }
 +    else
 +    {
 +        bestsol = esolNO;
 +    }
 +    
 +#ifdef DISABLE_WATER_NLIST
 +      bestsol = esolNO;
 +#endif
 +
 +    fr->nWatMol = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
 +        nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 +        for(i=0; i<cginfo_mb[mb].cg_mod; i++)
 +        {
 +            if (cg_sp[mb][i] == bestsp)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],bestsol);
 +                fr->nWatMol += nmol;
 +            }
 +            else
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],esolNO);
 +            }
 +        }
 +        sfree(cg_sp[mb]);
 +    }
 +    sfree(cg_sp);
 +    
 +    if (bestsol != esolNO && fp!=NULL)
 +    {
 +        fprintf(fp,"\nEnabling %s-like water optimization for %d molecules.\n\n",
 +                esol_names[bestsol],
 +                solvent_parameters[bestsp].count);
 +    }
 +
 +    sfree(solvent_parameters);
 +    fr->solvent_opt = bestsol;
 +}
 +
 +enum { acNONE=0, acCONSTRAINT, acSETTLE };
 +
 +static cginfo_mb_t *init_cginfo_mb(FILE *fplog,const gmx_mtop_t *mtop,
 +                                   t_forcerec *fr,gmx_bool bNoSolvOpt,
 +                                   gmx_bool *bExcl_IntraCGAll_InterCGNone)
 +{
 +    const t_block *cgs;
 +    const t_blocka *excl;
 +    const gmx_moltype_t *molt;
 +    const gmx_molblock_t *molb;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool *type_VDW;
 +    int  *cginfo;
 +    int  cg_offset,a_offset,cgm,am;
 +    int  mb,m,ncg_tot,cg,a0,a1,gid,ai,j,aj,excl_nalloc;
 +    int  *a_con;
 +    int  ftype;
 +    int  ia;
 +    gmx_bool bId,*bExcl,bExclIntraAll,bExclInter,bHaveVDW,bHaveQ;
 +
 +    ncg_tot = ncg_mtop(mtop);
 +    snew(cginfo_mb,mtop->nmolblock);
 +
 +    snew(type_VDW,fr->ntype);
 +    for(ai=0; ai<fr->ntype; ai++)
 +    {
 +        type_VDW[ai] = FALSE;
 +        for(j=0; j<fr->ntype; j++)
 +        {
 +            type_VDW[ai] = type_VDW[ai] ||
 +                fr->bBHAM ||
 +                C6(fr->nbfp,fr->ntype,ai,j) != 0 ||
 +                C12(fr->nbfp,fr->ntype,ai,j) != 0;
 +        }
 +    }
 +
 +    *bExcl_IntraCGAll_InterCGNone = TRUE;
 +
 +    excl_nalloc = 10;
 +    snew(bExcl,excl_nalloc);
 +    cg_offset = 0;
 +    a_offset  = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        cgs  = &molt->cgs;
 +        excl = &molt->excls;
 +
 +        /* Check if the cginfo is identical for all molecules in this block.
 +         * If so, we only need an array of the size of one molecule.
 +         * Otherwise we make an array of #mol times #cgs per molecule.
 +         */
 +        bId = TRUE;
 +        am = 0;
 +        for(m=0; m<molb->nmol; m++)
 +        {
 +            am = m*cgs->index[cgs->nr];
 +            for(cg=0; cg<cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                if (ggrpnr(&mtop->groups,egcENER,a_offset+am+a0) !=
 +                    ggrpnr(&mtop->groups,egcENER,a_offset   +a0))
 +                {
 +                    bId = FALSE;
 +                }
 +                if (mtop->groups.grpnr[egcQMMM] != NULL)
 +                {
 +                    for(ai=a0; ai<a1; ai++)
 +                    {
 +                        if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 +                            mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 +                        {
 +                            bId = FALSE;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +
 +        cginfo_mb[mb].cg_start = cg_offset;
 +        cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 +        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 +        snew(cginfo_mb[mb].cginfo,cginfo_mb[mb].cg_mod);
 +        cginfo = cginfo_mb[mb].cginfo;
 +
 +        /* Set constraints flags for constrained atoms */
 +        snew(a_con,molt->atoms.nr);
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (interaction_function[ftype].flags & IF_CONSTRAINT)
 +            {
 +                int nral;
 +
 +                nral = NRAL(ftype);
 +                for(ia=0; ia<molt->ilist[ftype].nr; ia+=1+nral)
 +                {
 +                    int a;
 +
 +                    for(a=0; a<nral; a++)
 +                    {
 +                        a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 +                            (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 +                    }
 +                }
 +            }
 +        }
 +
 +        for(m=0; m<(bId ? 1 : molb->nmol); m++)
 +        {
 +            cgm = m*cgs->nr;
 +            am  = m*cgs->index[cgs->nr];
 +            for(cg=0; cg<cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +
 +                /* Store the energy group in cginfo */
 +                gid = ggrpnr(&mtop->groups,egcENER,a_offset+am+a0);
 +                SET_CGINFO_GID(cginfo[cgm+cg],gid);
 +                
 +                /* Check the intra/inter charge group exclusions */
 +                if (a1-a0 > excl_nalloc) {
 +                    excl_nalloc = a1 - a0;
 +                    srenew(bExcl,excl_nalloc);
 +                }
 +                /* bExclIntraAll: all intra cg interactions excluded
 +                 * bExclInter:    any inter cg interactions excluded
 +                 */
 +                bExclIntraAll = TRUE;
 +                bExclInter    = FALSE;
 +                bHaveVDW      = FALSE;
 +                bHaveQ        = FALSE;
 +                for(ai=a0; ai<a1; ai++)
 +                {
 +                    /* Check VDW and electrostatic interactions */
 +                    bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 +                                            type_VDW[molt->atoms.atom[ai].typeB]);
 +                    bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 +                                            molt->atoms.atom[ai].qB != 0);
 +
 +                    /* Clear the exclusion list for atom ai */
 +                    for(aj=a0; aj<a1; aj++)
 +                    {
 +                        bExcl[aj-a0] = FALSE;
 +                    }
 +                    /* Loop over all the exclusions of atom ai */
 +                    for(j=excl->index[ai]; j<excl->index[ai+1]; j++)
 +                    {
 +                        aj = excl->a[j];
 +                        if (aj < a0 || aj >= a1)
 +                        {
 +                            bExclInter = TRUE;
 +                        }
 +                        else
 +                        {
 +                            bExcl[aj-a0] = TRUE;
 +                        }
 +                    }
 +                    /* Check if ai excludes a0 to a1 */
 +                    for(aj=a0; aj<a1; aj++)
 +                    {
 +                        if (!bExcl[aj-a0])
 +                        {
 +                            bExclIntraAll = FALSE;
 +                        }
 +                    }
 +
 +                    switch (a_con[ai])
 +                    {
 +                    case acCONSTRAINT:
 +                        SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 +                        break;
 +                    case acSETTLE:
 +                        SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 +                        break;
 +                    default:
 +                        break;
 +                    }
 +                }
 +                if (bExclIntraAll)
 +                {
 +                    SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 +                }
 +                if (bExclInter)
 +                {
 +                    SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 +                }
 +                if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 +                {
 +                    /* The size in cginfo is currently only read with DD */
 +                    gmx_fatal(FARGS,"A charge group has size %d which is larger than the limit of %d atoms",a1-a0,MAX_CHARGEGROUP_SIZE);
 +                }
 +                if (bHaveVDW)
 +                {
 +                    SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 +                }
 +                if (bHaveQ)
 +                {
 +                    SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 +                }
 +                /* Store the charge group size */
 +                SET_CGINFO_NATOMS(cginfo[cgm+cg],a1-a0);
 +
 +                if (!bExclIntraAll || bExclInter)
 +                {
 +                    *bExcl_IntraCGAll_InterCGNone = FALSE;
 +                }
 +            }
 +        }
 +
 +        sfree(a_con);
 +
 +        cg_offset += molb->nmol*cgs->nr;
 +        a_offset  += molb->nmol*cgs->index[cgs->nr];
 +    }
 +    sfree(bExcl);
 +    
 +    /* the solvent optimizer is called after the QM is initialized,
 +     * because we don't want to have the QM subsystemto become an
 +     * optimized solvent
 +     */
 +
 +    check_solvent(fplog,mtop,fr,cginfo_mb);
 +    
 +    if (getenv("GMX_NO_SOLV_OPT"))
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found environment variable GMX_NO_SOLV_OPT.\n"
 +                    "Disabling all solvent optimization\n");
 +        }
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (bNoSolvOpt)
 +    {
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (!fr->solvent_opt)
 +    {
 +        for(mb=0; mb<mtop->nmolblock; mb++)
 +        {
 +            for(cg=0; cg<cginfo_mb[mb].cg_mod; cg++)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg],esolNO);
 +            }
 +        }
 +    }
 +    
 +    return cginfo_mb;
 +}
 +
 +static int *cginfo_expand(int nmb,cginfo_mb_t *cgi_mb)
 +{
 +    int ncg,mb,cg;
 +    int *cginfo;
 +
 +    ncg = cgi_mb[nmb-1].cg_end;
 +    snew(cginfo,ncg);
 +    mb = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        while (cg >= cgi_mb[mb].cg_end)
 +        {
 +            mb++;
 +        }
 +        cginfo[cg] =
 +            cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 +    }
 +
 +    return cginfo;
 +}
 +
 +static void set_chargesum(FILE *log,t_forcerec *fr,const gmx_mtop_t *mtop)
 +{
 +    double qsum,q2sum,q;
 +    int    mb,nmol,i;
 +    const t_atoms *atoms;
 +    
 +    qsum  = 0;
 +    q2sum = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        nmol  = mtop->molblock[mb].nmol;
 +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +        for(i=0; i<atoms->nr; i++)
 +        {
 +            q = atoms->atom[i].q;
 +            qsum  += nmol*q;
 +            q2sum += nmol*q*q;
 +        }
 +    }
 +    fr->qsum[0]  = qsum;
 +    fr->q2sum[0] = q2sum;
 +    if (fr->efep != efepNO)
 +    {
 +        qsum  = 0;
 +        q2sum = 0;
 +        for(mb=0; mb<mtop->nmolblock; mb++)
 +        {
 +            nmol  = mtop->molblock[mb].nmol;
 +            atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +            for(i=0; i<atoms->nr; i++)
 +            {
 +                q = atoms->atom[i].qB;
 +                qsum  += nmol*q;
 +                q2sum += nmol*q*q;
 +            }
 +            fr->qsum[1]  = qsum;
 +            fr->q2sum[1] = q2sum;
 +        }
 +    }
 +    else
 +    {
 +        fr->qsum[1]  = fr->qsum[0];
 +        fr->q2sum[1] = fr->q2sum[0];
 +    }
 +    if (log) {
 +        if (fr->efep == efepNO)
 +            fprintf(log,"System total charge: %.3f\n",fr->qsum[0]);
 +        else
 +            fprintf(log,"System total charge, top. A: %.3f top. B: %.3f\n",
 +                    fr->qsum[0],fr->qsum[1]);
 +    }
 +}
 +
 +void update_forcerec(FILE *log,t_forcerec *fr,matrix box)
 +{
 +    if (fr->eeltype == eelGRF)
 +    {
 +        calc_rffac(NULL,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
 +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
 +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
 +    }
 +}
 +
 +void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *atoms,*atoms_tpi;
 +    const t_blocka *excl;
 +    int    mb,nmol,nmolc,i,j,tpi,tpj,j1,j2,k,n,nexcl,q;
 +#if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)    
 +    long long int  npair,npair_ij,tmpi,tmpj;
 +#else
 +    double npair, npair_ij,tmpi,tmpj;
 +#endif
 +    double csix,ctwelve;
 +    int    ntp,*typecount;
 +    gmx_bool   bBHAM;
 +    real   *nbfp;
 +
 +    ntp = fr->ntype;
 +    bBHAM = fr->bBHAM;
 +    nbfp = fr->nbfp;
 +    
 +    for(q=0; q<(fr->efep==efepNO ? 1 : 2); q++) {
 +        csix = 0;
 +        ctwelve = 0;
 +        npair = 0;
 +        nexcl = 0;
 +        if (!fr->n_tpi) {
 +            /* Count the types so we avoid natoms^2 operations */
 +            snew(typecount,ntp);
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for(i=0; i<atoms->nr; i++) {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    typecount[tpi] += nmol;
 +                }
 +            }
 +            for(tpi=0; tpi<ntp; tpi++) {
 +                for(tpj=tpi; tpj<ntp; tpj++) {
 +                    tmpi = typecount[tpi];
 +                    tmpj = typecount[tpj];
 +                    if (tpi != tpj)
 +                    {
 +                        npair_ij = tmpi*tmpj;
 +                    }
 +                    else
 +                    {
 +                        npair_ij = tmpi*(tmpi - 1)/2;
 +                    }
 +                    if (bBHAM) {
-                         csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj);
-                         ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj);
++                        /* nbfp now includes the 6.0 derivative prefactor */
++                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                    } else {
-                                csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj);
++                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
++                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj)/6.0;
++                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj)/12.0;
 +                    }
 +                    npair += npair_ij;
 +                }
 +            }
 +            sfree(typecount);
 +            /* Subtract the excluded pairs.
 +             * The main reason for substracting exclusions is that in some cases
 +             * some combinations might never occur and the parameters could have
 +             * any value. These unused values should not influence the dispersion
 +             * correction.
 +             */
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 +                for(i=0; (i<atoms->nr); i++) {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    j1  = excl->index[i];
 +                    j2  = excl->index[i+1];
 +                    for(j=j1; j<j2; j++) {
 +                        k = excl->a[j];
 +                        if (k > i)
 +                        {
 +                            if (q == 0)
 +                            {
 +                                tpj = atoms->atom[k].type;
 +                            }
 +                            else
 +                            {
 +                                tpj = atoms->atom[k].typeB;
 +                            }
 +                            if (bBHAM) {
-                                 csix    -= nmol*C6 (nbfp,ntp,tpi,tpj);
-                                 ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj);
++                                /* nbfp now includes the 6.0 derivative prefactor */
++                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                            } else {
-                             csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj);
++                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
++                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj)/6.0;
++                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj)/12.0;
 +                            }
 +                            nexcl += nmol;
 +                        }
 +                    }
 +                }
 +            }
 +        } else {
 +            /* Only correct for the interaction of the test particle
 +             * with the rest of the system.
 +             */
 +            atoms_tpi =
 +                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
 +
 +            npair = 0;
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for(j=0; j<atoms->nr; j++) {
 +                    nmolc = nmol;
 +                    /* Remove the interaction of the test charge group
 +                     * with itself.
 +                     */
 +                    if (mb == mtop->nmolblock-1)
 +                    {
 +                        nmolc--;
 +                        
 +                        if (mb == 0 && nmol == 1)
 +                        {
 +                            gmx_fatal(FARGS,"Old format tpr with TPI, please generate a new tpr file");
 +                        }
 +                    }
 +                    if (q == 0)
 +                    {
 +                        tpj = atoms->atom[j].type;
 +                    }
 +                    else
 +                    {
 +                        tpj = atoms->atom[j].typeB;
 +                    }
 +                    for(i=0; i<fr->n_tpi; i++)
 +                    {
 +                        if (q == 0)
 +                        {
 +                            tpi = atoms_tpi->atom[i].type;
 +                        }
 +                        else
 +                        {
 +                            tpi = atoms_tpi->atom[i].typeB;
 +                        }
 +                        if (bBHAM)
 +                        {
-                             csix    += nmolc*C6 (nbfp,ntp,tpi,tpj);
-                             ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj);
++                            /* nbfp now includes the 6.0 derivative prefactor */
++                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                        }
 +                        else
 +                        {
-                           const t_commrec *cr,
-                           const char *tabfn,char *eg1,char *eg2,
-                           t_nblists *nbl)
++                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
++                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj)/6.0;
++                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj)/12.0;
 +                        }
 +                        npair += nmolc;
 +                    }
 +                }
 +            }
 +        }
 +        if (npair - nexcl <= 0 && fplog) {
 +            fprintf(fplog,"\nWARNING: There are no atom pairs for dispersion correction\n\n");
 +            csix     = 0;
 +            ctwelve  = 0;
 +        } else {
 +            csix    /= npair - nexcl;
 +            ctwelve /= npair - nexcl;
 +        }
 +        if (debug) {
 +            fprintf(debug,"Counted %d exclusions\n",nexcl);
 +            fprintf(debug,"Average C6 parameter is: %10g\n",(double)csix);
 +            fprintf(debug,"Average C12 parameter is: %10g\n",(double)ctwelve);
 +        }
 +        fr->avcsix[q]    = csix;
 +        fr->avctwelve[q] = ctwelve;
 +    }
 +    if (fplog != NULL)
 +    {
 +        if (fr->eDispCorr == edispcAllEner ||
 +            fr->eDispCorr == edispcAllEnerPres)
 +        {
 +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                    fr->avcsix[0],fr->avctwelve[0]);
 +        }
 +        else
 +        {
 +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e\n",fr->avcsix[0]);
 +        }
 +    }
 +}
 +
 +
 +static void set_bham_b_max(FILE *fplog,t_forcerec *fr,
 +                           const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *at1,*at2;
 +    int  mt1,mt2,i,j,tpi,tpj,ntypes;
 +    real b,bmin;
 +    real *nbfp;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Determining largest Buckingham b parameter for table\n");
 +    }
 +    nbfp   = fr->nbfp;
 +    ntypes = fr->ntype;
 +    
 +    bmin           = -1;
 +    fr->bham_b_max = 0;
 +    for(mt1=0; mt1<mtop->nmoltype; mt1++)
 +    {
 +        at1 = &mtop->moltype[mt1].atoms;
 +        for(i=0; (i<at1->nr); i++)
 +        {
 +            tpi = at1->atom[i].type;
 +            if (tpi >= ntypes)
 +                gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",i,tpi,ntypes);
 +            
 +            for(mt2=mt1; mt2<mtop->nmoltype; mt2++)
 +            {
 +                at2 = &mtop->moltype[mt2].atoms;
 +                for(j=0; (j<at2->nr); j++) {
 +                    tpj = at2->atom[j].type;
 +                    if (tpj >= ntypes)
 +                    {
 +                        gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",j,tpj,ntypes);
 +                    }
 +                    b = BHAMB(nbfp,ntypes,tpi,tpj);
 +                    if (b > fr->bham_b_max)
 +                    {
 +                        fr->bham_b_max = b;
 +                    }
 +                    if ((b < bmin) || (bmin==-1))
 +                    {
 +                        bmin = b;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Buckingham b parameters, min: %g, max: %g\n",
 +                bmin,fr->bham_b_max);
 +    }
 +}
 +
 +static void make_nbf_tables(FILE *fp,const output_env_t oenv,
 +                            t_forcerec *fr,real rtab,
-   char buf[STRLEN];
-   int i,j;
++                            const t_commrec *cr,
++                            const char *tabfn,char *eg1,char *eg2,
++                            t_nblists *nbl)
 +{
-   if (tabfn == NULL) {
-     if (debug)
-       fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
-     return;
-   }
-     
-   sprintf(buf,"%s",tabfn);
-   if (eg1 && eg2)
++    char buf[STRLEN];
++    int i,j;
 +
-     sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
-           eg1,eg2,ftp2ext(efXVG));
-   nbl->tab = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
-   /* Copy the contents of the table to separate coulomb and LJ tables too,
-    * to improve cache performance.
-    */
-   /* For performance reasons we want
-    * the table data to be aligned to 16-byte. The pointer could be freed
-    * but currently isn't.
-    */
-   snew_aligned(nbl->vdwtab,8*(nbl->tab.n+1),16);
-   snew_aligned(nbl->coultab,4*(nbl->tab.n+1),16);
-   
-   for(i=0; i<=nbl->tab.n; i++) {
-     for(j=0; j<4; j++)
-       nbl->coultab[4*i+j] = nbl->tab.tab[12*i+j];
-     for(j=0; j<8; j++)
-       nbl->vdwtab [8*i+j] = nbl->tab.tab[12*i+4+j];
-   }
++    if (tabfn == NULL) {
++        if (debug)
++            fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
++        return;
++    }
++
++    sprintf(buf,"%s",tabfn);
++    if (eg1 && eg2)
 +    /* Append the two energy group names */
- static void init_verlet_ewald_f_table(interaction_const_t *ic,
-                                       int verlet_kernel_type)
++        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
++                eg1,eg2,ftp2ext(efXVG));
++    nbl->table_elec_vdw = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
++    /* Copy the contents of the table to separate coulomb and LJ tables too,
++     * to improve cache performance.
++     */
++    /* For performance reasons we want
++     * the table data to be aligned to 16-byte. The pointers could be freed
++     * but currently aren't.
++     */
++    nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
++    nbl->table_elec.format = nbl->table_elec_vdw.format;
++    nbl->table_elec.r = nbl->table_elec_vdw.r;
++    nbl->table_elec.n = nbl->table_elec_vdw.n;
++    nbl->table_elec.scale = nbl->table_elec_vdw.scale;
++    nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
++    nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
++    nbl->table_elec.ninteractions = 1;
++    nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
++    snew_aligned(nbl->table_elec.data,nbl->table_elec.stride*(nbl->table_elec.n+1),16);
++
++    nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
++    nbl->table_vdw.format = nbl->table_elec_vdw.format;
++    nbl->table_vdw.r = nbl->table_elec_vdw.r;
++    nbl->table_vdw.n = nbl->table_elec_vdw.n;
++    nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
++    nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
++    nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
++    nbl->table_vdw.ninteractions = 2;
++    nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
++    snew_aligned(nbl->table_vdw.data,nbl->table_vdw.stride*(nbl->table_vdw.n+1),16);
++
++    for(i=0; i<=nbl->table_elec_vdw.n; i++)
++    {
++        for(j=0; j<4; j++)
++            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
++        for(j=0; j<8; j++)
++            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
++    }
 +}
 +
 +static void count_tables(int ftype1,int ftype2,const gmx_mtop_t *mtop,
 +                         int *ncount,int **count)
 +{
 +    const gmx_moltype_t *molt;
 +    const t_ilist *il;
 +    int mt,ftype,stride,i,j,tabnr;
 +    
 +    for(mt=0; mt<mtop->nmoltype; mt++)
 +    {
 +        molt = &mtop->moltype[mt];
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (ftype == ftype1 || ftype == ftype2) {
 +                il = &molt->ilist[ftype];
 +                stride = 1 + NRAL(ftype);
 +                for(i=0; i<il->nr; i+=stride) {
 +                    tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
 +                    if (tabnr < 0)
 +                        gmx_fatal(FARGS,"A bonded table number is smaller than 0: %d\n",tabnr);
 +                    if (tabnr >= *ncount) {
 +                        srenew(*count,tabnr+1);
 +                        for(j=*ncount; j<tabnr+1; j++)
 +                            (*count)[j] = 0;
 +                        *ncount = tabnr+1;
 +                    }
 +                    (*count)[tabnr]++;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static bondedtable_t *make_bonded_tables(FILE *fplog,
 +                                         int ftype1,int ftype2,
 +                                         const gmx_mtop_t *mtop,
 +                                         const char *basefn,const char *tabext)
 +{
 +    int  i,ncount,*count;
 +    char tabfn[STRLEN];
 +    bondedtable_t *tab;
 +    
 +    tab = NULL;
 +    
 +    ncount = 0;
 +    count = NULL;
 +    count_tables(ftype1,ftype2,mtop,&ncount,&count);
 +    
 +    if (ncount > 0) {
 +        snew(tab,ncount);
 +        for(i=0; i<ncount; i++) {
 +            if (count[i] > 0) {
 +                sprintf(tabfn,"%s",basefn);
 +                sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1,"_%s%d.%s",
 +                        tabext,i,ftp2ext(efXVG));
 +                tab[i] = make_bonded_table(fplog,tabfn,NRAL(ftype1)-2);
 +            }
 +        }
 +        sfree(count);
 +    }
 +  
 +    return tab;
 +}
 +
 +void forcerec_set_ranges(t_forcerec *fr,
 +                         int ncg_home,int ncg_force,
 +                         int natoms_force,
 +                         int natoms_force_constr,int natoms_f_novirsum)
 +{
 +    fr->cg0 = 0;
 +    fr->hcg = ncg_home;
 +
 +    /* fr->ncg_force is unused in the standard code,
 +     * but it can be useful for modified code dealing with charge groups.
 +     */
 +    fr->ncg_force           = ncg_force;
 +    fr->natoms_force        = natoms_force;
 +    fr->natoms_force_constr = natoms_force_constr;
 +
 +    if (fr->natoms_force_constr > fr->nalloc_force)
 +    {
 +        fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
 +
 +        if (fr->bTwinRange)
 +        {
 +            srenew(fr->f_twin,fr->nalloc_force);
 +        }
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        fr->f_novirsum_n = natoms_f_novirsum;
 +        if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
 +        {
 +            fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
 +            srenew(fr->f_novirsum_alloc,fr->f_novirsum_nalloc);
 +        }
 +    }
 +    else
 +    {
 +        fr->f_novirsum_n = 0;
 +    }
 +}
 +
 +static real cutoff_inf(real cutoff)
 +{
 +    if (cutoff == 0)
 +    {
 +        cutoff = GMX_CUTOFF_INF;
 +    }
 +
 +    return cutoff;
 +}
 +
 +static void make_adress_tf_tables(FILE *fp,const output_env_t oenv,
 +                            t_forcerec *fr,const t_inputrec *ir,
 +                          const char *tabfn, const gmx_mtop_t *mtop,
 +                            matrix     box)
 +{
 +  char buf[STRLEN];
 +  int i,j;
 +
 +  if (tabfn == NULL) {
 +        gmx_fatal(FARGS,"No thermoforce table file given. Use -tabletf to specify a file\n");
 +    return;
 +  }
 +
 +  snew(fr->atf_tabs, ir->adress->n_tf_grps);
 +
 +  for (i=0; i<ir->adress->n_tf_grps; i++){
 +    j = ir->adress->tf_table_index[i]; /* get energy group index */
 +    sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"tf_%s.%s",
 +        *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]) ,ftp2ext(efXVG));
 +    printf("loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[j], buf);
 +    fr->atf_tabs[i] = make_atf_table(fp,oenv,fr,buf, box);
 +  }
 +
 +}
 +
 +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 +                      gmx_bool bPrintNote,t_commrec *cr,FILE *fp)
 +{
 +    gmx_bool bAllvsAll;
 +
 +    bAllvsAll =
 +        (
 +         ir->rlist==0            &&
 +         ir->rcoulomb==0         &&
 +         ir->rvdw==0             &&
 +         ir->ePBC==epbcNONE      &&
 +         ir->vdwtype==evdwCUT    &&
 +         ir->coulombtype==eelCUT &&
 +         ir->efep==efepNO        &&
 +         (ir->implicit_solvent == eisNO || 
 +          (ir->implicit_solvent==eisGBSA && (ir->gb_algorithm==egbSTILL || 
 +                                             ir->gb_algorithm==egbHCT   || 
 +                                             ir->gb_algorithm==egbOBC))) &&
 +         getenv("GMX_NO_ALLVSALL") == NULL
 +            );
 +    
 +    if (bAllvsAll && ir->opts.ngener > 1)
 +    {
 +        const char *note="NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
 +
 +        if (bPrintNote)
 +        {
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr,"\n%s\n",note);
 +            }
 +            if (fp != NULL)
 +            {
 +                fprintf(fp,"\n%s\n",note);
 +            }
 +        }
 +        bAllvsAll = FALSE;
 +    }
 +
 +    if(bAllvsAll && fp && MASTER(cr))
 +    {
 +        fprintf(fp,"\nUsing accelerated all-vs-all kernels.\n\n");
 +    }
 +    
 +    return bAllvsAll;
 +}
 +
 +
 +static void init_forcerec_f_threads(t_forcerec *fr,int grpp_nener)
 +{
 +    int t,i;
 +
 +    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
 +
 +    if (fr->nthreads > 1)
 +    {
 +        snew(fr->f_t,fr->nthreads);
 +        /* Thread 0 uses the global force and energy arrays */
 +        for(t=1; t<fr->nthreads; t++)
 +        {
 +            fr->f_t[t].f = NULL;
 +            fr->f_t[t].f_nalloc = 0;
 +            snew(fr->f_t[t].fshift,SHIFTS);
 +            /* snew(fr->f_t[t].ener,F_NRE); */
 +            fr->f_t[t].grpp.nener = grpp_nener;
 +            for(i=0; i<egNR; i++)
 +            {
 +                snew(fr->f_t[t].grpp.ener[i],grpp_nener);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void pick_nbnxn_kernel_cpu(FILE *fp,
 +                                  const t_commrec *cr,
 +                                  const gmx_cpuid_t cpuid_info,
 +                                  int *kernel_type)
 +{
 +    *kernel_type = nbk4x4_PlainC;
 +
 +#ifdef GMX_X86_SSE2
 +    {
 +        /* On Intel Sandy-Bridge AVX-256 kernels are always faster.
 +         * On AMD Bulldozer AVX-256 is much slower than AVX-128.
 +         */
 +        if(gmx_cpuid_feature(cpuid_info, GMX_CPUID_FEATURE_X86_AVX) == 1 &&
 +           gmx_cpuid_vendor(cpuid_info) != GMX_CPUID_VENDOR_AMD)
 +        {
 +#ifdef GMX_X86_AVX_256
 +            *kernel_type = nbk4xN_X86_SIMD256;
 +#else
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +#endif
 +        }
 +        else
 +        {
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +        }
 +
 +        if (getenv("GMX_NBNXN_AVX128") != NULL)
 +        {
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +        }
 +        if (getenv("GMX_NBNXN_AVX256") != NULL)
 +        {
 +#ifdef GMX_X86_AVX_256
 +            *kernel_type = nbk4xN_X86_SIMD256;
 +#else
 +            gmx_fatal(FARGS,"You requested AVX-256 nbnxn kernels, but GROMACS was built without AVX support");
 +#endif
 +        }
 +    }
 +#endif /* GMX_X86_SSE2 */
 +}
 +
 +
 +/* Note that _mm_... intrinsics can be converted to either SSE or AVX
 + * depending on compiler flags.
 + * For gcc we check for __AVX__
 + * At least a check for icc should be added (if there is a macro)
 + */
 +static const char *nbk_name[] =
 +  { "not set", "plain C 4x4",
 +#if !(defined GMX_X86_AVX_256 || defined GMX_X86_AVX128_FMA || defined __AVX__)
 +#ifndef GMX_X86_SSE4_1
 +#ifndef GMX_DOUBLE
 +    "SSE2 4x4",
 +#else
 +    "SSE2 4x2",
 +#endif
 +#else
 +#ifndef GMX_DOUBLE
 +    "SSE4.1 4x4",
 +#else
 +    "SSE4.1 4x2",
 +#endif
 +#endif
 +#else
 +#ifndef GMX_DOUBLE
 +    "AVX-128 4x4",
 +#else
 +    "AVX-128 4x2",
 +#endif
 +#endif
 +#ifndef GMX_DOUBLE
 +    "AVX-256 4x8",
 +#else
 +    "AVX-256 4x4",
 +#endif
 +    "CUDA 8x8x8", "plain C 8x8x8" };
 +
 +static void pick_nbnxn_kernel(FILE *fp,
 +                              const t_commrec *cr,
 +                              const gmx_hw_info_t *hwinfo,
 +                              gmx_bool use_cpu_acceleration,
 +                              gmx_bool *bUseGPU,
 +                              int *kernel_type)
 +{
 +    gmx_bool bEmulateGPU, bGPU;
 +    char gpu_err_str[STRLEN];
 +
 +    assert(kernel_type);
 +
 +    *kernel_type = nbkNotSet;
 +    /* if bUseGPU == NULL we don't want a GPU (e.g. hybrid mode kernel selection) */
 +    bGPU = (bUseGPU != NULL) && hwinfo->bCanUseGPU;
 +
 +    /* Run GPU emulation mode if GMX_EMULATE_GPU is defined or in case if nobonded
 +       calculations are turned off via GMX_NO_NONBONDED -- this is the simple way
 +       to turn off GPU/CUDA initializations as well.. */
 +    bEmulateGPU = ((getenv("GMX_EMULATE_GPU") != NULL) ||
 +                   (getenv("GMX_NO_NONBONDED") != NULL));
 +
 +    if (bGPU)
 +    {
 +        if (bEmulateGPU)
 +        {
 +            bGPU = FALSE;
 +        }
 +        else
 +        {
 +            /* Each PP node will use the intra-node id-th device from the
 +             * list of detected/selected GPUs. */ 
 +            if (!init_gpu(cr->nodeid_group_intra, gpu_err_str, &hwinfo->gpu_info))
 +            {
 +                /* At this point the init should never fail as we made sure that 
 +                 * we have all the GPUs we need. If it still does, we'll bail. */
 +                gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
 +                          cr->nodeid,
 +                          get_gpu_device_id(&hwinfo->gpu_info, cr->nodeid_group_intra),
 +                          gpu_err_str);
 +            }
 +        }
 +        *bUseGPU = bGPU;
 +    }
 +
 +    if (bEmulateGPU)
 +    {
 +        *kernel_type = nbk8x8x8_PlainC;
 +
 +        md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
 +    }
 +    else if (bGPU)
 +    {
 +        *kernel_type = nbk8x8x8_CUDA;
 +    }
 +
 +    if (*kernel_type == nbkNotSet)
 +    {
 +        if (use_cpu_acceleration)
 +        {
 +            pick_nbnxn_kernel_cpu(fp,cr,hwinfo->cpuid_info,kernel_type);
 +        }
 +        else
 +        {
 +            *kernel_type = nbk4x4_PlainC;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"Using %s non-bonded kernels\n",
 +                    nbk_name[*kernel_type]);
 +        }
 +        fprintf(fp,"\nUsing %s non-bonded kernels\n\n",
 +                nbk_name[*kernel_type]);
 +    }
 +}
 +
++gmx_bool uses_simple_tables(int cutoff_scheme,
++                            nonbonded_verlet_t *nbv,
++                            int group)
++{
++    gmx_bool bUsesSimpleTables = TRUE;
++    int grp_index;
 +
-     if (nbnxn_kernel_pairlist_simple(verlet_kernel_type))
++    switch(cutoff_scheme)
++    {
++    case ecutsGROUP:
++        bUsesSimpleTables = TRUE;
++        break;
++    case ecutsVERLET:
++        assert(NULL != nbv && NULL != nbv->grp);
++        grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
++        bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
++        break;
++    default:
++        gmx_incons("unimplemented");
++    }
++    return bUsesSimpleTables;
++}
++
++static void init_ewald_f_table(interaction_const_t *ic,
++                               gmx_bool bUsesSimpleTables,
++                               real rtab)
 +{
-         ic->tabq_size  = (int)(ic->rcoulomb*ic->tabq_scale) + 2;
- #ifndef GMX_DOUBLE
-         ic->tabq_format = tableformatFDV0;
- #else
-         ic->tabq_format = tableformatF;
- #endif
++    real maxr;
++
++    if (bUsesSimpleTables)
 +    {
 +        /* With a spacing of 0.0005 we are at the force summation accuracy
 +         * for the SSE kernels for "normal" atomistic simulations.
 +         */
 +        ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
 +                                                   ic->rcoulomb);
-         if (verlet_kernel_type == nbk8x8x8_CUDA)
-         {
-             /* This case is handled in the nbnxn CUDA module */
-             ic->tabq_format = tableformatNONE;
-         }
-         else
-         {
-             ic->tabq_format = tableformatF;
-         }
++        
++        maxr = (rtab>ic->rcoulomb) ? rtab : ic->rcoulomb;
++        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
 +    }
 +    else
 +    {
 +        ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +        /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +        ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
-     switch (ic->tabq_format)
-     {
-     case tableformatNONE:
-         break;
-     case tableformatF:
-         sfree_aligned(ic->tabq_coul_F);
-         sfree_aligned(ic->tabq_coul_V);
-         snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
-         snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
-         table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,
-                                     ic->tabq_size,ic->tabq_format,
-                                     1/ic->tabq_scale,ic->ewaldcoeff);
-         break;
-     case tableformatFDV0:
-         sfree_aligned(ic->tabq_coul_F);
-         snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
-         table_spline3_fill_ewald_lr(ic->tabq_coul_FDV0,NULL,
-                                     ic->tabq_size,ic->tabq_format,
-                                     1/ic->tabq_scale,ic->ewaldcoeff);
-         break;
-     default:
-         gmx_incons("Unknown table format");
-     }
 +    }
 +
-                                    int verlet_kernel_type)
++    sfree_aligned(ic->tabq_coul_FDV0);
++    sfree_aligned(ic->tabq_coul_F);
++    sfree_aligned(ic->tabq_coul_V);
++
++    /* Create the original table data in FDV0 */
++    snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
++    snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
++    snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
++    table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,ic->tabq_coul_FDV0,
++                                ic->tabq_size,1/ic->tabq_scale,ic->ewaldcoeff);
 +}
 +
 +void init_interaction_const_tables(FILE *fp, 
 +                                   interaction_const_t *ic,
-         init_verlet_ewald_f_table(ic,verlet_kernel_type);
++                                   gmx_bool bUsesSimpleTables,
++                                   real rtab)
 +{
 +    real spacing;
 +
 +    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
 +    {
-                             const t_forcerec *fr)
++        init_ewald_f_table(ic,bUsesSimpleTables,rtab);
 +
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
 +                    1/ic->tabq_scale,ic->tabq_size);
 +        }
 +    }
 +}
 +
 +void init_interaction_const(FILE *fp, 
 +                            interaction_const_t **interaction_const,
-     ic->rlist       = fr->rlist;
++                            const t_forcerec *fr,
++                            real  rtab)
 +{
 +    interaction_const_t *ic;
++    gmx_bool bUsesSimpleTables = TRUE;
 +
 +    snew(ic, 1);
 +
-     if (fr->vdw_pot_shift)
++    /* Just allocate something so we can free it */
++    snew_aligned(ic->tabq_coul_FDV0,16,16);
++    snew_aligned(ic->tabq_coul_F,16,16);
++    snew_aligned(ic->tabq_coul_V,16,16);
 +
++    ic->rlist       = fr->rlist;
++    ic->rlistlong   = fr->rlistlong;
++    
 +    /* Lennard-Jones */
 +    ic->rvdw        = fr->rvdw;
-     if (fr->coul_pot_shift)
++    if (fr->vdw_modifier==eintmodPOTSHIFT)
 +    {
 +        ic->sh_invrc6 = pow(ic->rvdw,-6.0);
 +    }
 +    else
 +    {
 +        ic->sh_invrc6 = 0;
 +    }
 +
 +    /* Electrostatics */
 +    ic->eeltype     = fr->eeltype;
 +    ic->rcoulomb    = fr->rcoulomb;
 +    ic->epsilon_r   = fr->epsilon_r;
 +    ic->epsfac      = fr->epsfac;
 +
 +    /* Ewald */
 +    ic->ewaldcoeff  = fr->ewaldcoeff;
-         if (fr->coul_pot_shift)
++    if (fr->coulomb_modifier==eintmodPOTSHIFT)
 +    {
 +        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
 +    }
 +    else
 +    {
 +        ic->sh_ewald = 0;
 +    }
 +
 +    /* Reaction-field */
 +    if (EEL_RF(ic->eeltype))
 +    {
 +        ic->epsilon_rf = fr->epsilon_rf;
 +        ic->k_rf       = fr->k_rf;
 +        ic->c_rf       = fr->c_rf;
 +    }
 +    else
 +    {
 +        /* For plain cut-off we might use the reaction-field kernels */
 +        ic->epsilon_rf = ic->epsilon_r;
 +        ic->k_rf       = 0;
-     if (fr->cutoff_scheme == ecutsVERLET)
-     {
-         assert(fr->nbv != NULL && fr->nbv->grp != NULL);
-         init_interaction_const_tables(fp,ic,fr->nbv->grp[fr->nbv->ngrp-1].kernel_type);
-     }
++        if (fr->coulomb_modifier==eintmodPOTSHIFT)
 +        {
 +            ic->c_rf   = 1/ic->rcoulomb;
 +        }
 +        else
 +        {
 +            ic->c_rf   = 0;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"Potential shift: LJ r^-12: %.3f r^-6 %.3f",
 +                sqr(ic->sh_invrc6),ic->sh_invrc6);
 +        if (ic->eeltype == eelCUT)
 +        {
 +            fprintf(fp,", Coulomb %.3f",ic->c_rf);
 +        }
 +        else if (EEL_PME(ic->eeltype))
 +        {
 +            fprintf(fp,", Ewald %.3e",ic->sh_ewald);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +
 +    *interaction_const = ic;
 +
 +    if (fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
 +    }
 +
-                     "Disabling interaction-specific nonbonded kernels.\n\n");
++    bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
++    init_interaction_const_tables(fp,ic,bUsesSimpleTables,rtab);
 +}
 +
 +static void init_nb_verlet(FILE *fp,
 +                           nonbonded_verlet_t **nb_verlet,
 +                           const t_inputrec *ir,
 +                           const t_forcerec *fr,
 +                           const t_commrec *cr,
 +                           const char *nbpu_opt)
 +{
 +    nonbonded_verlet_t *nbv;
 +    int  i;
 +    char *env;
 +    gmx_bool bHybridGPURun = FALSE;
 +
 +    nbnxn_alloc_t *nb_alloc;
 +    nbnxn_free_t  *nb_free;
 +
 +    snew(nbv, 1);
 +
 +    nbv->nbs = NULL;
 +
 +    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
 +    for(i=0; i<nbv->ngrp; i++)
 +    {
 +        nbv->grp[i].nbl_lists.nnbl = 0;
 +        nbv->grp[i].nbat           = NULL;
 +        nbv->grp[i].kernel_type    = nbkNotSet;
 +
 +        if (i == 0) /* local */
 +        {
 +            pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                              &nbv->bUseGPU,
 +                              &nbv->grp[i].kernel_type);
 +        }
 +        else /* non-local */
 +        {
 +            if (nbpu_opt != NULL && strcmp(nbpu_opt,"gpu_cpu") == 0)
 +            {
 +                /* Use GPU for local, select a CPU kernel for non-local */
 +                pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                                  NULL,
 +                                  &nbv->grp[i].kernel_type);
 +
 +                bHybridGPURun = TRUE;
 +            }
 +            else
 +            {
 +                /* Use the same kernel for local and non-local interactions */
 +                nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
 +            }
 +        }
 +    }
 +
 +    if (nbv->bUseGPU)
 +    {
 +        /* init the NxN GPU data; the last argument tells whether we'll have
 +         * both local and non-local NB calculation on GPU */
 +        nbnxn_cuda_init(fp, &nbv->cu_nbv,
 +                        &fr->hwinfo->gpu_info, cr->nodeid_group_intra,
 +                        (nbv->ngrp > 1) && !bHybridGPURun);
 +
 +        if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
 +        {
 +            char *end;
 +
 +            nbv->min_ci_balanced = strtol(env, &end, 10);
 +            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
 +            {
 +                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
 +            }
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n", 
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +        else
 +        {
 +            nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nbv->min_ci_balanced = 0;
 +    }
 +
 +    *nb_verlet = nbv;
 +
 +    nbnxn_init_search(&nbv->nbs,
 +                      DOMAINDECOMP(cr) ? & cr->dd->nc : NULL,
 +                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
 +                      gmx_omp_nthreads_get(emntNonbonded));
 +
 +    for(i=0; i<nbv->ngrp; i++)
 +    {
 +        if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
 +        {
 +            nb_alloc = &pmalloc;
 +            nb_free  = &pfree;
 +        }
 +        else
 +        {
 +            nb_alloc = NULL;
 +            nb_free  = NULL;
 +        }
 +
 +        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                /* 8x8x8 "non-simple" lists are ATM always combined */
 +                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                nb_alloc, nb_free);
 +
 +        if (i == 0 ||
 +            nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
 +        {
 +            snew(nbv->grp[i].nbat,1);
 +            nbnxn_atomdata_init(fp,
 +                                nbv->grp[i].nbat,
 +                                nbv->grp[i].kernel_type,
 +                                fr->ntype,fr->nbfp,
 +                                ir->opts.ngener,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
 +                                nb_alloc, nb_free);
 +        }
 +        else
 +        {
 +            nbv->grp[i].nbat = nbv->grp[0].nbat;
 +        }
 +    }
 +}
 +
 +void init_forcerec(FILE *fp,
 +                   const output_env_t oenv,
 +                   t_forcerec *fr,
 +                   t_fcdata   *fcd,
 +                   const t_inputrec *ir,
 +                   const gmx_mtop_t *mtop,
 +                   const t_commrec  *cr,
 +                   matrix     box,
 +                   gmx_bool       bMolEpot,
 +                   const char *tabfn,
 +                   const char *tabafn,
 +                   const char *tabpfn,
 +                   const char *tabbfn,
 +                   const char *nbpu_opt,
 +                   gmx_bool   bNoSolvOpt,
 +                   real       print_force)
 +{
 +    int     i,j,m,natoms,ngrp,negp_pp,negptable,egi,egj;
 +    real    rtab;
 +    char    *env;
 +    double  dbl;
 +    rvec    box_size;
 +    const t_block *cgs;
 +    gmx_bool    bGenericKernelOnly;
 +    gmx_bool    bTab,bSep14tab,bNormalnblists;
 +    t_nblists *nbl;
 +    int     *nm_ind,egp_flags;
 +    
 +    /* By default we turn acceleration on, but it might be turned off further down... */
 +    fr->use_cpu_acceleration = TRUE;
 +
 +    fr->bDomDec = DOMAINDECOMP(cr);
 +
 +    natoms = mtop->natoms;
 +
 +    if (check_box(ir->ePBC,box))
 +    {
 +        gmx_fatal(FARGS,check_box(ir->ePBC,box));
 +    }
 +    
 +    /* Test particle insertion ? */
 +    if (EI_TPI(ir->eI)) {
 +        /* Set to the size of the molecule to be inserted (the last one) */
 +        /* Because of old style topologies, we have to use the last cg
 +         * instead of the last molecule type.
 +         */
 +        cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
 +        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
 +        if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1]) {
 +            gmx_fatal(FARGS,"The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
 +        }
 +    } else {
 +        fr->n_tpi = 0;
 +    }
 +    
 +    /* Copy AdResS parameters */
 +    if (ir->bAdress) {
 +      fr->adress_type     = ir->adress->type;
 +      fr->adress_const_wf = ir->adress->const_wf;
 +      fr->adress_ex_width = ir->adress->ex_width;
 +      fr->adress_hy_width = ir->adress->hy_width;
 +      fr->adress_icor     = ir->adress->icor;
 +      fr->adress_site     = ir->adress->site;
 +      fr->adress_ex_forcecap = ir->adress->ex_forcecap;
 +      fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
 +
 +
 +      snew(fr->adress_group_explicit , ir->adress->n_energy_grps);
 +      for (i=0; i< ir->adress->n_energy_grps; i++){
 +          fr->adress_group_explicit[i]= ir->adress->group_explicit[i];
 +      }
 +
 +      fr->n_adress_tf_grps = ir->adress->n_tf_grps;
 +      snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
 +      for (i=0; i< fr->n_adress_tf_grps; i++){
 +          fr->adress_tf_table_index[i]= ir->adress->tf_table_index[i];
 +      }
 +      copy_rvec(ir->adress->refs,fr->adress_refs);
 +    } else {
 +      fr->adress_type = eAdressOff;
 +      fr->adress_do_hybridpairs = FALSE;
 +    }
 +    
 +    /* Copy the user determined parameters */
 +    fr->userint1 = ir->userint1;
 +    fr->userint2 = ir->userint2;
 +    fr->userint3 = ir->userint3;
 +    fr->userint4 = ir->userint4;
 +    fr->userreal1 = ir->userreal1;
 +    fr->userreal2 = ir->userreal2;
 +    fr->userreal3 = ir->userreal3;
 +    fr->userreal4 = ir->userreal4;
 +    
 +    /* Shell stuff */
 +    fr->fc_stepsize = ir->fc_stepsize;
 +    
 +    /* Free energy */
 +    fr->efep       = ir->efep;
 +    fr->sc_alphavdw = ir->fepvals->sc_alpha;
 +    if (ir->fepvals->bScCoul)
 +    {
 +        fr->sc_alphacoul = ir->fepvals->sc_alpha;
 +        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min,6);
 +    }
 +    else
 +    {
 +        fr->sc_alphacoul = 0;
 +        fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
 +    }
 +    fr->sc_power   = ir->fepvals->sc_power;
 +    fr->sc_r_power   = ir->fepvals->sc_r_power;
 +    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma,6);
 +
 +    env = getenv("GMX_SCSIGMA_MIN");
 +    if (env != NULL)
 +    {
 +        dbl = 0;
 +        sscanf(env,"%lf",&dbl);
 +        fr->sc_sigma6_min = pow(dbl,6);
 +        if (fp)
 +        {
 +            fprintf(fp,"Setting the minimum soft core sigma to %g nm\n",dbl);
 +        }
 +    }
 +
 +    fr->bNonbonded = TRUE;
 +    if (getenv("GMX_NO_NONBONDED") != NULL)
 +    {
 +        /* turn off non-bonded calculations */
 +        fr->bNonbonded = FALSE;
 +        md_print_warn(cr,fp,
 +                      "Found environment variable GMX_NO_NONBONDED.\n"
 +                      "Disabling nonbonded calculations.\n");
 +    }
 +
 +    bGenericKernelOnly = FALSE;
++
++    /* We now check in the NS code whether a particular combination of interactions
++     * can be used with water optimization, and disable it if that is not the case.
++     */
++
 +    if (getenv("GMX_NB_GENERIC") != NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "Found environment variable GMX_NB_GENERIC.\n"
-     fr->coul_pot_shift = (ir->coulomb_modifier == eintmodPOTSHIFT);
-     fr->vdw_pot_shift  = (ir->vdw_modifier     == eintmodPOTSHIFT);
-     
++                    "Disabling all interaction-specific nonbonded kernels, will only\n"
++                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
 +        }
 +        bGenericKernelOnly = TRUE;
++    }
++
++    if (bGenericKernelOnly==TRUE)
++    {
 +        bNoSolvOpt         = TRUE;
 +    }
 +
 +    if( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
 +    {
 +        fr->use_cpu_acceleration = FALSE;
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
 +                    "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
 +        }
 +    }
 +
++    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
++
 +    /* Check if we can/should do all-vs-all kernels */
 +    fr->bAllvsAll       = can_use_allvsall(ir,mtop,FALSE,NULL,NULL);
 +    fr->AllvsAll_work   = NULL;
 +    fr->AllvsAll_workgb = NULL;
 +
 +
 +    /* Neighbour searching stuff */
 +    fr->cutoff_scheme = ir->cutoff_scheme;
 +    fr->bGrid         = (ir->ns_type == ensGRID);
 +    fr->ePBC          = ir->ePBC;
 +
 +    /* Determine if we will do PBC for distances in bonded interactions */
 +    if (fr->ePBC == epbcNONE)
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            /* The group cut-off scheme and SHAKE assume charge groups
 +             * are whole, but not using molpbc is faster in most cases.
 +             */
 +            if (fr->cutoff_scheme == ecutsGROUP ||
 +                (ir->eConstrAlg == econtSHAKE &&
 +                 (gmx_mtop_ftype_count(mtop,F_CONSTR) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_CONSTRNC) > 0)))
 +            {
 +                fr->bMolPBC = ir->bPeriodicMols;
 +            }
 +            else
 +            {
 +                fr->bMolPBC = TRUE;
 +                if (getenv("GMX_USE_GRAPH") != NULL)
 +                {
 +                    fr->bMolPBC = FALSE;
 +                    if (fp)
 +                    {
 +                        fprintf(fp,"\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fr->bMolPBC = dd_bonded_molpbc(cr->dd,fr->ePBC);
 +        }
 +    }
++
 +    fr->rc_scaling = ir->refcoord_scaling;
 +    copy_rvec(ir->posres_com,fr->posres_com);
 +    copy_rvec(ir->posres_comB,fr->posres_comB);
 +    fr->rlist      = cutoff_inf(ir->rlist);
 +    fr->rlistlong  = cutoff_inf(ir->rlistlong);
 +    fr->eeltype    = ir->coulombtype;
 +    fr->vdwtype    = ir->vdwtype;
 +
-         fr->bcoultab   = (!(fr->eeltype == eelCUT || EEL_RF(fr->eeltype)) ||
-                           fr->eeltype == eelRF_ZERO);
++    fr->coulomb_modifier = ir->coulomb_modifier;
++    fr->vdw_modifier     = ir->vdw_modifier;
++
++    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
++    switch(fr->eeltype)
++    {
++        case eelCUT:
++            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB;
++            break;
++
++        case eelRF:
++        case eelGRF:
++        case eelRF_NEC:
++            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
++            break;
++
++        case eelRF_ZERO:
++            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
++            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
++            break;
++
++        case eelSWITCH:
++        case eelSHIFT:
++        case eelUSER:
++        case eelENCADSHIFT:
++        case eelPMESWITCH:
++        case eelPMEUSER:
++        case eelPMEUSERSWITCH:
++            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
++            break;
++
++        case eelPME:
++        case eelEWALD:
++            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
++            break;
++
++        default:
++            gmx_fatal(FARGS,"Unsupported electrostatic interaction: %s",eel_names[fr->eeltype]);
++            break;
++    }
++
++    /* Vdw: Translate from mdp settings to kernel format */
++    switch(fr->vdwtype)
++    {
++        case evdwCUT:
++            if(fr->bBHAM)
++            {
++                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
++            }
++            else
++            {
++                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
++            }
++            break;
++
++        case evdwSWITCH:
++        case evdwSHIFT:
++        case evdwUSER:
++        case evdwENCADSHIFT:
++            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
++            break;
++
++        default:
++            gmx_fatal(FARGS,"Unsupported vdw interaction: %s",evdw_names[fr->vdwtype]);
++            break;
++    }
++
++    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
++    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
++    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
++
 +    fr->bTwinRange = fr->rlistlong > fr->rlist;
 +    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype==eelEWALD);
 +    
 +    fr->reppow     = mtop->ffparams.reppow;
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
 +                          !gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS));
-         fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
++        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
++        fr->bcoultab   = !(fr->eeltype == eelCUT ||
++                           fr->eeltype == eelEWALD ||
++                           fr->eeltype == eelPME ||
++                           fr->eeltype == eelRF ||
++                           fr->eeltype == eelRF_ZERO);
++
++        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
++         * going to be faster to tabulate the interaction than calling the generic kernel.
++         */
++        if(fr->nbkernel_elec_modifier==eintmodPOTSWITCH && fr->nbkernel_vdw_modifier==eintmodPOTSWITCH)
++        {
++            if((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
++            {
++                fr->bcoultab = TRUE;
++            }
++        }
++        else if((fr->nbkernel_elec_modifier==eintmodPOTSHIFT && fr->nbkernel_vdw_modifier==eintmodPOTSHIFT) ||
++                ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
++                  fr->nbkernel_elec_modifier==eintmodEXACTCUTOFF &&
++                  (fr->nbkernel_vdw_modifier==eintmodPOTSWITCH || fr->nbkernel_vdw_modifier==eintmodPOTSHIFT))))
++        {
++            if(fr->rcoulomb != fr->rvdw)
++            {
++                fr->bcoultab = TRUE;
++            }
++        }
 +
 +        if (getenv("GMX_REQUIRE_TABLES"))
 +        {
 +            fr->bvdwtab  = TRUE;
 +            fr->bcoultab = TRUE;
 +        }
 +
 +        if (fp)
 +        {
 +            fprintf(fp,"Table routines are used for coulomb: %s\n",bool_names[fr->bcoultab]);
 +            fprintf(fp,"Table routines are used for vdw:     %s\n",bool_names[fr->bvdwtab ]);
 +        }
++
++        if(fr->bvdwtab==TRUE)
++        {
++            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
++            fr->nbkernel_vdw_modifier    = eintmodNONE;
++        }
++        if(fr->bcoultab==TRUE)
++        {
++            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
++            fr->nbkernel_elec_modifier    = eintmodNONE;
++        }
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (!gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS))
 +        {
 +            gmx_fatal(FARGS,"Cut-off scheme %S only supports LJ repulsion power 12",ecutscheme_names[ir->cutoff_scheme]);
 +        }
 +        fr->bvdwtab  = FALSE;
 +        fr->bcoultab = FALSE;
 +    }
 +    
 +    /* Tables are used for direct ewald sum */
 +    if(fr->bEwald)
 +    {
 +        if (EEL_PME(ir->coulombtype))
 +        {
 +            if (fp)
 +                fprintf(fp,"Will do PME sum in reciprocal space.\n");
 +            if (ir->coulombtype == eelP3M_AD)
 +            {
 +                please_cite(fp,"Hockney1988");
 +                please_cite(fp,"Ballenegger2012");
 +            }
 +            else
 +            {
 +                please_cite(fp,"Essmann95a");
 +            }
 +            
 +            if (ir->ewald_geometry == eewg3DC)
 +            {
 +                if (fp)
 +                {
 +                    fprintf(fp,"Using the Ewald3DC correction for systems with a slab geometry.\n");
 +                }
 +                please_cite(fp,"In-Chul99a");
 +            }
 +        }
 +        fr->ewaldcoeff=calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
 +        init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
 +        if (fp)
 +        {
 +            fprintf(fp,"Using a Gaussian width (1/beta) of %g nm for Ewald\n",
 +                    1/fr->ewaldcoeff);
 +        }
 +    }
 +    
 +    /* Electrostatics */
 +    fr->epsilon_r  = ir->epsilon_r;
 +    fr->epsilon_rf = ir->epsilon_rf;
 +    fr->fudgeQQ    = mtop->ffparams.fudgeQQ;
 +    fr->rcoulomb_switch = ir->rcoulomb_switch;
 +    fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
 +    
 +    /* Parameters for generalized RF */
 +    fr->zsquare = 0.0;
 +    fr->temp    = 0.0;
 +    
 +    if (fr->eeltype == eelGRF)
 +    {
 +        init_generalized_rf(fp,mtop,ir,fr);
 +    }
 +    else if (fr->eeltype == eelSHIFT)
 +    {
 +        for(m=0; (m<DIM); m++)
 +            box_size[m]=box[m][m];
 +        
 +        if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
 +            set_shift_consts(fp,fr->rcoulomb_switch,fr->rcoulomb,box_size,fr);
 +    }
 +    
 +    fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
 +                       gmx_mtop_ftype_count(mtop,F_POSRES) > 0 ||
 +                       gmx_mtop_ftype_count(mtop,F_FBPOSRES) > 0 ||
 +                       IR_ELEC_FIELD(*ir) ||
 +                       (fr->adress_icor != eAdressICOff)
 +                      );
 +    
 +    if (fr->cutoff_scheme == ecutsGROUP &&
 +        ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr)) {
 +        /* Count the total number of charge groups */
 +        fr->cg_nalloc = ncg_mtop(mtop);
 +        srenew(fr->cg_cm,fr->cg_nalloc);
 +    }
 +    if (fr->shift_vec == NULL)
 +        snew(fr->shift_vec,SHIFTS);
 +    
 +    if (fr->fshift == NULL)
 +        snew(fr->fshift,SHIFTS);
 +    
 +    if (fr->nbfp == NULL) {
 +        fr->ntype = mtop->ffparams.atnr;
-     bTab = fr->bcoultab || fr->bvdwtab;
 +        fr->nbfp  = mk_nbfp(&mtop->ffparams,fr->bBHAM);
 +    }
 +    
 +    /* Copy the energy group exclusions */
 +    fr->egp_flags = ir->opts.egp_flags;
 +    
 +    /* Van der Waals stuff */
 +    fr->rvdw        = cutoff_inf(ir->rvdw);
 +    fr->rvdw_switch = ir->rvdw_switch;
 +    if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM) {
 +        if (fr->rvdw_switch >= fr->rvdw)
 +            gmx_fatal(FARGS,"rvdw_switch (%f) must be < rvdw (%f)",
 +                      fr->rvdw_switch,fr->rvdw);
 +        if (fp)
 +            fprintf(fp,"Using %s Lennard-Jones, switch between %g and %g nm\n",
 +                    (fr->eeltype==eelSWITCH) ? "switched":"shifted",
 +                    fr->rvdw_switch,fr->rvdw);
 +    } 
 +    
 +    if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
 +        gmx_fatal(FARGS,"Switch/shift interaction not supported with Buckingham");
 +    
 +    if (fp)
 +        fprintf(fp,"Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
 +                fr->rlist,fr->rcoulomb,fr->bBHAM ? "BHAM":"LJ",fr->rvdw);
 +    
 +    fr->eDispCorr = ir->eDispCorr;
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        set_avcsixtwelve(fp,fr,mtop);
 +    }
 +    
 +    if (fr->bBHAM)
 +    {
 +        set_bham_b_max(fp,fr,mtop);
 +    }
 +
 +    fr->bGB = (ir->implicit_solvent == eisGBSA);
 +      fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 +
 +    /* Copy the GBSA data (radius, volume and surftens for each
 +     * atomtype) from the topology atomtype section to forcerec.
 +     */
 +    snew(fr->atype_radius,fr->ntype);
 +    snew(fr->atype_vol,fr->ntype);
 +    snew(fr->atype_surftens,fr->ntype);
 +    snew(fr->atype_gb_radius,fr->ntype);
 +    snew(fr->atype_S_hct,fr->ntype);
 +
 +    if (mtop->atomtypes.nr > 0)
 +    {
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_radius[i] =mtop->atomtypes.radius[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_vol[i] = mtop->atomtypes.vol[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
 +    }  
 +      
 +      /* Generate the GB table if needed */
 +      if(fr->bGB)
 +      {
 +#ifdef GMX_DOUBLE
 +              fr->gbtabscale=2000;
 +#else
 +              fr->gbtabscale=500;
 +#endif
 +              
 +              fr->gbtabr=100;
 +              fr->gbtab=make_gb_table(fp,oenv,fr,tabpfn,fr->gbtabscale);
 +
 +        init_gb(&fr->born,cr,fr,ir,mtop,ir->rgbradii,ir->gb_algorithm);
 +
 +        /* Copy local gb data (for dd, this is done in dd_partition_system) */
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            make_local_gb(cr,fr->born,ir->gb_algorithm);
 +        }
 +    }
 +
 +    /* Set the charge scaling */
 +    if (fr->epsilon_r != 0)
 +        fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
 +    else
 +        /* eps = 0 is infinite dieletric: no coulomb interactions */
 +        fr->epsfac = 0;
 +    
 +    /* Reaction field constants */
 +    if (EEL_RF(fr->eeltype))
 +        calc_rffac(fp,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
 +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
 +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
 +    
 +    set_chargesum(fp,fr,mtop);
 +    
 +    /* if we are using LR electrostatics, and they are tabulated,
 +     * the tables will contain modified coulomb interactions.
 +     * Since we want to use the non-shifted ones for 1-4
 +     * coulombic interactions, we must have an extra set of tables.
 +     */
 +    
 +    /* Construct tables.
 +     * A little unnecessary to make both vdw and coul tables sometimes,
 +     * but what the heck... */
 +    
-                   fr->bBHAM) &&
++    bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
 +
 +    bSep14tab = ((!bTab || fr->eeltype!=eelCUT || fr->vdwtype!=evdwCUT ||
-                 fr->tab14 = fr->nblists[0].tab;
++                  fr->bBHAM || fr->bEwald) &&
 +                 (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_LJC_PAIRS_NB) > 0));
 +
 +    negp_pp = ir->opts.ngener - ir->nwall;
 +    negptable = 0;
 +    if (!bTab) {
 +        bNormalnblists = TRUE;
 +        fr->nnblists = 1;
 +    } else {
 +        bNormalnblists = (ir->eDispCorr != edispcNO);
 +        for(egi=0; egi<negp_pp; egi++) {
 +            for(egj=egi;  egj<negp_pp; egj++) {
 +                egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
 +                if (!(egp_flags & EGP_EXCL)) {
 +                    if (egp_flags & EGP_TABLE) {
 +                        negptable++;
 +                    } else {
 +                        bNormalnblists = TRUE;
 +                    }
 +                }
 +            }
 +        }
 +        if (bNormalnblists) {
 +            fr->nnblists = negptable + 1;
 +        } else {
 +            fr->nnblists = negptable;
 +        }
 +        if (fr->nnblists > 1)
 +            snew(fr->gid2nblists,ir->opts.ngener*ir->opts.ngener);
 +    }
 +    snew(fr->nblists,fr->nnblists);
 +    
 +    /* This code automatically gives table length tabext without cut-off's,
 +     * in that case grompp should already have checked that we do not need
 +     * normal tables and we only generate tables for 1-4 interactions.
 +     */
 +    rtab = ir->rlistlong + ir->tabext;
 +
 +    if (bTab) {
 +        /* make tables for ordinary interactions */
 +        if (bNormalnblists) {
 +            make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,NULL,NULL,&fr->nblists[0]);
 +            if (!bSep14tab)
-     
++                fr->tab14 = fr->nblists[0].table_elec_vdw;
 +            m = 1;
 +        } else {
 +            m = 0;
 +        }
 +        if (negptable > 0) {
 +            /* Read the special tables for certain energy group pairs */
 +            nm_ind = mtop->groups.grps[egcENER].nm_ind;
 +            for(egi=0; egi<negp_pp; egi++) {
 +                for(egj=egi;  egj<negp_pp; egj++) {
 +                    egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
 +                    if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL)) {
 +                        nbl = &(fr->nblists[m]);
 +                        if (fr->nnblists > 1) {
 +                            fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = m;
 +                        }
 +                        /* Read the table file with the two energy groups names appended */
 +                        make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,
 +                                        *mtop->groups.grpname[nm_ind[egi]],
 +                                        *mtop->groups.grpname[nm_ind[egj]],
 +                                        &fr->nblists[m]);
 +                        m++;
 +                    } else if (fr->nnblists > 1) {
 +                        fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = 0;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (bSep14tab)
 +    {
 +        /* generate extra tables with plain Coulomb for 1-4 interactions only */
 +        fr->tab14 = make_tables(fp,oenv,fr,MASTER(cr),tabpfn,rtab,
 +                                GMX_MAKETABLES_14ONLY);
 +    }
 +
 +    /* Read AdResS Thermo Force table if needed */
 +    if(fr->adress_icor == eAdressICThermoForce)
 +    {
 +        /* old todo replace */ 
 +        
 +        if (ir->adress->n_tf_grps > 0){
 +            make_adress_tf_tables(fp,oenv,fr,ir,tabfn, mtop, box);
 +
 +        }else{
 +            /* load the default table */
 +            snew(fr->atf_tabs, 1);
 +            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp,oenv,fr,tabafn, box);
 +        }
 +    }
 +    
 +    /* Wall stuff */
 +    fr->nwall = ir->nwall;
 +    if (ir->nwall && ir->wall_type==ewtTABLE)
 +    {
 +        make_wall_tables(fp,oenv,ir,tabfn,&mtop->groups,fr);
 +    }
 +    
 +    if (fcd && tabbfn) {
 +        fcd->bondtab  = make_bonded_tables(fp,
 +                                           F_TABBONDS,F_TABBONDSNC,
 +                                           mtop,tabbfn,"b");
 +        fcd->angletab = make_bonded_tables(fp,
 +                                           F_TABANGLES,-1,
 +                                           mtop,tabbfn,"a");
 +        fcd->dihtab   = make_bonded_tables(fp,
 +                                           F_TABDIHS,-1,
 +                                           mtop,tabbfn,"d");
 +    } else {
 +        if (debug)
 +            fprintf(debug,"No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
 +    }
 +    
 +    /* QM/MM initialization if requested
 +     */
 +    if (ir->bQMMM)
 +    {
 +        fprintf(stderr,"QM/MM calculation requested.\n");
 +    }
 +    
 +    fr->bQMMM      = ir->bQMMM;   
 +    fr->qr         = mk_QMMMrec();
 +    
 +    /* Set all the static charge group info */
 +    fr->cginfo_mb = init_cginfo_mb(fp,mtop,fr,bNoSolvOpt,
 +                                   &fr->bExcl_IntraCGAll_InterCGNone);
 +    if (DOMAINDECOMP(cr)) {
 +        fr->cginfo = NULL;
 +    } else {
 +        fr->cginfo = cginfo_expand(mtop->nmolblock,fr->cginfo_mb);
 +    }
 +    
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* When using particle decomposition, the effect of the second argument,
 +         * which sets fr->hcg, is corrected later in do_md and init_em.
 +         */
 +        forcerec_set_ranges(fr,ncg_mtop(mtop),ncg_mtop(mtop),
 +                            mtop->natoms,mtop->natoms,mtop->natoms);
 +    }
 +    
 +    fr->print_force = print_force;
 +
 +
 +    /* coarse load balancing vars */
 +    fr->t_fnbf=0.;
 +    fr->t_wait=0.;
 +    fr->timesteps=0;
 +    
 +    /* Initialize neighbor search */
 +    init_ns(fp,cr,&fr->ns,fr,mtop,box);
-         gmx_setup_kernels(fp,fr,bGenericKernelOnly);
-         if (ir->bAdress)
++
 +    if (cr->duty & DUTY_PP)
 +    {
-         /* initialize interaction constants
-          * TODO should be moved out during modularization.
-          */
-         init_interaction_const(fp, &fr->ic, fr);
++        gmx_nonbonded_setup(fp,fr,bGenericKernelOnly);
++    /*
++     if (ir->bAdress)
 +        {
 +            gmx_setup_adress_kernels(fp,bGenericKernelOnly);
 +        }
++     */
 +    }
 +
 +    /* Initialize the thread working data for bonded interactions */
 +    init_forcerec_f_threads(fr,mtop->groups.grps[egcENER].nr);
 +    
 +    snew(fr->excl_load,fr->nthreads+1);
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            gmx_fatal(FARGS,"With Verlet lists rcoulomb and rvdw should be identical");
 +        }
 +
 +        init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
-     pr_int(fp,fr->nblists[i].tab.n);
 +    }
 +
++    /* fr->ic is used both by verlet and group kernels (to some extent) now */
++    init_interaction_const(fp, &fr->ic, fr, rtab);
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        calc_enervirdiff(fp,ir->eDispCorr,fr);
 +    }
 +}
 +
 +#define pr_real(fp,r) fprintf(fp,"%s: %e\n",#r,r)
 +#define pr_int(fp,i)  fprintf((fp),"%s: %d\n",#i,i)
 +#define pr_bool(fp,b) fprintf((fp),"%s: %s\n",#b,bool_names[b])
 +
 +void pr_forcerec(FILE *fp,t_forcerec *fr,t_commrec *cr)
 +{
 +  int i;
 +
 +  pr_real(fp,fr->rlist);
 +  pr_real(fp,fr->rcoulomb);
 +  pr_real(fp,fr->fudgeQQ);
 +  pr_bool(fp,fr->bGrid);
 +  pr_bool(fp,fr->bTwinRange);
 +  /*pr_int(fp,fr->cg0);
 +    pr_int(fp,fr->hcg);*/
 +  for(i=0; i<fr->nnblists; i++)
++    pr_int(fp,fr->nblists[i].table_elec_vdw.n);
 +  pr_real(fp,fr->rcoulomb_switch);
 +  pr_real(fp,fr->rcoulomb);
 +  
 +  fflush(fp);
 +}
 +
 +void forcerec_set_excl_load(t_forcerec *fr,
 +                            const gmx_localtop_t *top,const t_commrec *cr)
 +{
 +    const int *ind,*a;
 +    int t,i,j,ntot,n,ntarget;
 +
 +    if (cr != NULL && PARTDECOMP(cr))
 +    {
 +        /* No OpenMP with particle decomposition */
 +        pd_at_range(cr,
 +                    &fr->excl_load[0],
 +                    &fr->excl_load[1]);
 +
 +        return;
 +    }
 +
 +    ind = top->excls.index;
 +    a   = top->excls.a;
 +
 +    ntot = 0;
 +    for(i=0; i<top->excls.nr; i++)
 +    {
 +        for(j=ind[i]; j<ind[i+1]; j++)
 +        {
 +            if (a[j] > i)
 +            {
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    fr->excl_load[0] = 0;
 +    n = 0;
 +    i = 0;
 +    for(t=1; t<=fr->nthreads; t++)
 +    {
 +        ntarget = (ntot*t)/fr->nthreads;
 +        while(i < top->excls.nr && n < ntarget)
 +        {
 +            for(j=ind[i]; j<ind[i+1]; j++)
 +            {
 +                if (a[j] > i)
 +                {
 +                    n++;
 +                }
 +            }
 +            i++;
 +        }
 +        fr->excl_load[t] = i;
 +    }
 +}
 +
Simple merge
index 7acf363ed32587f98df2ce38b27a0a96bf6ea25a,0000000000000000000000000000000000000000..196c439947268084a5befe6d5e45dc7b99f6856f
mode 100644,000000..100644
--- /dev/null
@@@ -1,2544 -1,0 +1,2544 @@@
-              (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <time.h>
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "network.h"
 +#include "confio.h"
 +#include "copyrite.h"
 +#include "smalloc.h"
 +#include "nrnb.h"
 +#include "main.h"
 +#include "force.h"
 +#include "macros.h"
 +#include "random.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "txtdump.h"
 +#include "typedefs.h"
 +#include "update.h"
 +#include "constr.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "tgroup.h"
 +#include "mdebin.h"
 +#include "vsite.h"
 +#include "force.h"
 +#include "mdrun.h"
 +#include "md_support.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "trnio.h"
 +#include "mdatoms.h"
 +#include "ns.h"
 +#include "gmx_wallcycle.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "pme.h"
 +#include "bondf.h"
 +#include "gmx_omp_nthreads.h"
 +
 +
 +#include "gromacs/linearalgebra/mtxio.h"
 +#include "gromacs/linearalgebra/sparsematrix.h"
 +
 +typedef struct {
 +  t_state s;
 +  rvec    *f;
 +  real    epot;
 +  real    fnorm;
 +  real    fmax;
 +  int     a_fmax;
 +} em_state_t;
 +
 +static em_state_t *init_em_state()
 +{
 +  em_state_t *ems;
 +
 +  snew(ems,1);
 +
 +  /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
 +  snew(ems->s.lambda,efptNR);
 +
 +  return ems;
 +}
 +
 +static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                           gmx_wallcycle_t wcycle,
 +                           const char *name)
 +{
 +    char buf[STRLEN];
 +
 +    runtime_start(runtime);
 +
 +    sprintf(buf,"Started %s",name);
 +    print_date_and_time(fplog,cr->nodeid,buf,NULL);
 +
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                        gmx_wallcycle_t wcycle)
 +{
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    runtime_end(runtime);
 +}
 +
 +static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 +{
 +    fprintf(out,"\n");
 +    fprintf(out,"%s:\n",minimizer);
 +    fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 +    fprintf(out,"   Number of steps    = %12d\n",nsteps);
 +}
 +
 +static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 +{
 +    char buffer[2048];
 +    if (bLastStep)
 +    {
 +        sprintf(buffer,
 +                "\nEnergy minimization reached the maximum number"
 +                "of steps before the forces reached the requested"
 +                "precision Fmax < %g.\n",ftol);
 +    }
 +    else
 +    {
 +        sprintf(buffer,
 +                "\nEnergy minimization has stopped, but the forces have"
 +                "not converged to the requested precision Fmax < %g (which"
 +                "may not be possible for your system). It stopped"
 +                "because the algorithm tried to make a new step whose size"
 +                "was too small, or there was no change in the energy since"
 +                "last step. Either way, we regard the minimization as"
 +                "converged to within the available machine precision,"
 +                "given your starting configuration and EM parameters.\n%s%s",
 +                ftol,
 +                sizeof(real)<sizeof(double) ?
 +                "\nDouble precision normally gives you higher accuracy, but"
 +                "this is often not needed for preparing to run molecular"
 +                "dynamics.\n" :
 +                "",
 +                bConstrain ?
 +                "You might need to increase your constraint accuracy, or turn\n"
 +                "off constraints altogether (set constraints = none in mdp file)\n" :
 +                "");
 +    }
 +    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 +}
 +
 +
 +
 +static void print_converged(FILE *fp,const char *alg,real ftol,
 +                          gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 +                          real epot,real fmax, int nfmax, real fnorm)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  if (bDone)
 +    fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 +          alg,ftol,gmx_step_str(count,buf));
 +  else if(count<nsteps)
 +    fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 +               "but did not reach the requested Fmax < %g.\n",
 +          alg,gmx_step_str(count,buf),ftol);
 +  else
 +    fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 +          alg,ftol,gmx_step_str(count,buf));
 +
 +#ifdef GMX_DOUBLE
 +  fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 +  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 +  fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 +#else
 +  fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 +  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 +  fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 +#endif
 +}
 +
 +static void get_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 +                         real *fnorm,real *fmax,int *a_fmax)
 +{
 +  double fnorm2,*sum;
 +  real fmax2,fmax2_0,fam;
 +  int  la_max,a_max,start,end,i,m,gf;
 +
 +  /* This routine finds the largest force and returns it.
 +   * On parallel machines the global max is taken.
 +   */
 +  fnorm2 = 0;
 +  fmax2 = 0;
 +  la_max = -1;
 +  gf = 0;
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +  if (mdatoms->cFREEZE) {
 +    for(i=start; i<end; i++) {
 +      gf = mdatoms->cFREEZE[i];
 +      fam = 0;
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m])
 +        fam += sqr(f[i][m]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  } else {
 +    for(i=start; i<end; i++) {
 +      fam = norm2(f[i]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  }
 +
 +  if (la_max >= 0 && DOMAINDECOMP(cr)) {
 +    a_max = cr->dd->gatindex[la_max];
 +  } else {
 +    a_max = la_max;
 +  }
 +  if (PAR(cr)) {
 +    snew(sum,2*cr->nnodes+1);
 +    sum[2*cr->nodeid]   = fmax2;
 +    sum[2*cr->nodeid+1] = a_max;
 +    sum[2*cr->nnodes]   = fnorm2;
 +    gmx_sumd(2*cr->nnodes+1,sum,cr);
 +    fnorm2 = sum[2*cr->nnodes];
 +    /* Determine the global maximum */
 +    for(i=0; i<cr->nnodes; i++) {
 +      if (sum[2*i] > fmax2) {
 +      fmax2 = sum[2*i];
 +      a_max = (int)(sum[2*i+1] + 0.5);
 +      }
 +    }
 +    sfree(sum);
 +  }
 +
 +  if (fnorm)
 +    *fnorm = sqrt(fnorm2);
 +  if (fmax)
 +    *fmax  = sqrt(fmax2);
 +  if (a_fmax)
 +    *a_fmax = a_max;
 +}
 +
 +static void get_state_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,
 +                         em_state_t *ems)
 +{
 +  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 +}
 +
 +void init_em(FILE *fplog,const char *title,
 +             t_commrec *cr,t_inputrec *ir,
 +             t_state *state_global,gmx_mtop_t *top_global,
 +             em_state_t *ems,gmx_localtop_t **top,
 +             rvec **f,rvec **f_global,
 +             t_nrnb *nrnb,rvec mu_tot,
 +             t_forcerec *fr,gmx_enerdata_t **enerd,
 +             t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin)
 +{
 +    int  start,homenr,i;
 +    real dvdlambda;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Initiating %s\n",title);
 +    }
 +
 +    state_global->ngtc = 0;
 +
 +    /* Initialize lambda variables */
 +    initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
 +
 +    init_nrnb(nrnb);
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        *top = dd_init_local_top(top_global);
 +
 +        dd_init_local_state(cr->dd,state_global,&ems->s);
 +
 +        *f = NULL;
 +
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            &ems->s,&ems->f,mdatoms,*top,
 +                            fr,vsite,NULL,constr,
 +                            nrnb,NULL,FALSE);
 +        dd_store_state(cr->dd,&ems->s);
 +
 +        if (ir->nstfout)
 +        {
 +            snew(*f_global,top_global->natoms);
 +        }
 +        else
 +        {
 +            *f_global = NULL;
 +        }
 +        *graph = NULL;
 +    }
 +    else
 +    {
 +        snew(*f,top_global->natoms);
 +
 +        /* Just copy the state */
 +        ems->s = *state_global;
 +        snew(ems->s.x,ems->s.nalloc);
 +        snew(ems->f,ems->s.nalloc);
 +        for(i=0; i<state_global->natoms; i++)
 +        {
 +            copy_rvec(state_global->x[i],ems->s.x[i]);
 +        }
 +        copy_mat(state_global->box,ems->s.box);
 +
 +        if (PAR(cr) && ir->eI != eiNM)
 +        {
 +            /* Initialize the particle decomposition and split the topology */
 +            *top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +        }
 +        else
 +        {
 +            *top = gmx_mtop_generate_local_top(top_global,ir);
 +        }
 +        *f_global = *f;
 +
 +        forcerec_set_excl_load(fr,*top,cr);
 +
 +        init_bonded_thread_force_reduction(fr,&(*top)->idef);      
 +        
 +        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
 +        {
 +            *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +        else
 +        {
 +            *graph = NULL;
 +        }
 +
 +        if (PARTDECOMP(cr))
 +        {
 +            pd_at_range(cr,&start,&homenr);
 +            homenr -= start;
 +        }
 +        else
 +        {
 +            start  = 0;
 +            homenr = top_global->natoms;
 +        }
 +        atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 +        update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
 +
 +        if (vsite)
 +        {
 +            set_vsite_top(vsite,*top,mdatoms,cr);
 +        }
 +    }
 +
 +    if (constr)
 +    {
 +        if (ir->eConstrAlg == econtSHAKE &&
 +            gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 +        {
 +            gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 +                      econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        }
 +
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            set_constraints(constr,*top,ir,mdatoms,cr);
 +        }
 +
 +        if (!ir->bContinuation)
 +        {
 +            /* Constrain the starting coordinates */
 +            dvdlambda=0;
 +            constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 +                      ir,NULL,cr,-1,0,mdatoms,
 +                      ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
 +                      ems->s.lambda[efptFEP],&dvdlambda,
 +                      NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        *gstat = global_stat_init(ir);
 +    }
 +
 +    *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 +
 +    snew(*enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 +                  *enerd);
 +
 +    if (mdebin != NULL)
 +    {
 +        /* Init bin for energy stuff */
 +        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 +    }
 +
 +    clear_rvec(mu_tot);
 +    calc_shifts(ems->s.box,fr->shift_vec);
 +}
 +
 +static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 +                      gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 +{
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Tell the PME only node to finish */
 +    gmx_pme_send_finish(cr);
 +  }
 +
 +  done_mdoutf(outf);
 +
 +  em_time_end(fplog,cr,runtime,wcycle);
 +}
 +
 +static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 +{
 +  em_state_t tmp;
 +
 +  tmp   = *ems1;
 +  *ems1 = *ems2;
 +  *ems2 = tmp;
 +}
 +
 +static void copy_em_coords(em_state_t *ems,t_state *state)
 +{
 +    int i;
 +
 +    for(i=0; (i<state->natoms); i++)
 +    {
 +        copy_rvec(ems->s.x[i],state->x[i]);
 +    }
 +}
 +
 +static void write_em_traj(FILE *fplog,t_commrec *cr,
 +                          gmx_mdoutf_t *outf,
 +                          gmx_bool bX,gmx_bool bF,const char *confout,
 +                          gmx_mtop_t *top_global,
 +                          t_inputrec *ir,gmx_large_int_t step,
 +                          em_state_t *state,
 +                          t_state *state_global,rvec *f_global)
 +{
 +    int mdof_flags;
 +
 +    if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 +    {
 +        copy_em_coords(state,state_global);
 +        f_global = state->f;
 +    }
 +
 +    mdof_flags = 0;
 +    if (bX) { mdof_flags |= MDOF_X; }
 +    if (bF) { mdof_flags |= MDOF_F; }
 +    write_traj(fplog,cr,outf,mdof_flags,
 +               top_global,step,(double)step,
 +               &state->s,state_global,state->f,f_global,NULL,NULL);
 +
 +    if (confout != NULL && MASTER(cr))
 +    {
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 +        {
 +            /* Make molecules whole only for confout writing */
 +            do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 +                        state_global->x);
 +        }
 +
 +        write_sto_conf_mtop(confout,
 +                            *top_global->name,top_global,
 +                            state_global->x,NULL,ir->ePBC,state_global->box);
 +    }
 +}
 +
 +static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 +                       gmx_bool bMolPBC,
 +                       em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 +                       gmx_constr_t constr,gmx_localtop_t *top,
 +                       t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                       gmx_large_int_t count)
 +
 +{
 +    t_state *s1,*s2;
 +    int  i;
 +    int  start,end;
 +    rvec *x1,*x2;
 +    real dvdlambda;
 +
 +    s1 = &ems1->s;
 +    s2 = &ems2->s;
 +
 +    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 +    {
 +        gmx_incons("state mismatch in do_em_step");
 +    }
 +
 +    s2->flags = s1->flags;
 +
 +    if (s2->nalloc != s1->nalloc)
 +    {
 +        s2->nalloc = s1->nalloc;
 +        srenew(s2->x,s1->nalloc);
 +        srenew(ems2->f,  s1->nalloc);
 +        if (s2->flags & (1<<estCGP))
 +        {
 +            srenew(s2->cg_p,  s1->nalloc);
 +        }
 +    }
 +  
 +    s2->natoms = s1->natoms;
 +    copy_mat(s1->box,s2->box);
 +    /* Copy free energy state */
 +    for (i=0;i<efptNR;i++)
 +    {
 +        s2->lambda[i] = s1->lambda[i];
 +    }
 +    copy_mat(s1->box,s2->box);
 +
 +    start = md->start;
 +    end   = md->start + md->homenr;
 +
 +    x1 = s1->x;
 +    x2 = s2->x;
 +
 +#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
 +    {
 +        int gf,i,m;
 +
 +        gf = 0;
 +#pragma omp for schedule(static) nowait
 +        for(i=start; i<end; i++)
 +        {
 +            if (md->cFREEZE)
 +            {
 +                gf = md->cFREEZE[i];
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                if (ir->opts.nFreeze[gf][m])
 +                {
 +                    x2[i][m] = x1[i][m];
 +                }
 +                else
 +                {
 +                    x2[i][m] = x1[i][m] + a*f[i][m];
 +                }
 +            }
 +        }
 +
 +        if (s2->flags & (1<<estCGP))
 +        {
 +            /* Copy the CG p vector */
 +            x1 = s1->cg_p;
 +            x2 = s2->cg_p;
 +#pragma omp for schedule(static) nowait
 +            for(i=start; i<end; i++)
 +            {
 +                copy_rvec(x1[i],x2[i]);
 +            }
 +        }
 +        
 +        if (DOMAINDECOMP(cr))
 +        {
 +            s2->ddp_count = s1->ddp_count;
 +            if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
 +            {
 +#pragma omp barrier
 +                s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 +                srenew(s2->cg_gl,s2->cg_gl_nalloc);
 +#pragma omp barrier
 +            }
 +            s2->ncg_gl = s1->ncg_gl;
 +#pragma omp for schedule(static) nowait
 +            for(i=0; i<s2->ncg_gl; i++)
 +            {
 +                s2->cg_gl[i] = s1->cg_gl[i];
 +            }
 +            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 +        }
 +    }
 +    
 +    if (constr)
 +    {
 +        wallcycle_start(wcycle,ewcCONSTR);
 +        dvdlambda = 0;
 +        constrain(NULL,TRUE,TRUE,constr,&top->idef,   
 +                  ir,NULL,cr,count,0,md,
 +                  s1->x,s2->x,NULL,bMolPBC,s2->box,
 +                  s2->lambda[efptBONDED],&dvdlambda,
 +                  NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +        wallcycle_stop(wcycle,ewcCONSTR);
 +    }
 +}
 +
 +static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 +                                   gmx_mtop_t *top_global,t_inputrec *ir,
 +                                   em_state_t *ems,gmx_localtop_t *top,
 +                                   t_mdatoms *mdatoms,t_forcerec *fr,
 +                                   gmx_vsite_t *vsite,gmx_constr_t constr,
 +                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 +{
 +    /* Repartition the domain decomposition */
 +    wallcycle_start(wcycle,ewcDOMDEC);
 +    dd_partition_system(fplog,step,cr,FALSE,1,
 +                        NULL,top_global,ir,
 +                        &ems->s,&ems->f,
 +                        mdatoms,top,fr,vsite,NULL,constr,
 +                        nrnb,wcycle,FALSE);
 +    dd_store_state(cr->dd,&ems->s);
 +    wallcycle_stop(wcycle,ewcDOMDEC);
 +}
 +
 +static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 +                            t_state *state_global,gmx_mtop_t *top_global,
 +                            em_state_t *ems,gmx_localtop_t *top,
 +                            t_inputrec *inputrec,
 +                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                            gmx_global_stat_t gstat,
 +                            gmx_vsite_t *vsite,gmx_constr_t constr,
 +                            t_fcdata *fcd,
 +                            t_graph *graph,t_mdatoms *mdatoms,
 +                            t_forcerec *fr,rvec mu_tot,
 +                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
 +                            gmx_large_int_t count,gmx_bool bFirst)
 +{
 +  real t;
 +  gmx_bool bNS;
 +  int  nabnsb;
 +  tensor force_vir,shake_vir,ekin;
 +  real dvdlambda,prescorr,enercorr,dvdlcorr;
 +  real terminate=0;
 +
 +  /* Set the time to the initial time, the time does not change during EM */
 +  t = inputrec->init_t;
 +
 +  if (bFirst ||
 +      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 +    /* This the first state or an old state used before the last ns */
 +    bNS = TRUE;
 +  } else {
 +    bNS = FALSE;
 +    if (inputrec->nstlist > 0) {
 +      bNS = TRUE;
 +    } else if (inputrec->nstlist == -1) {
 +      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 +      if (PAR(cr))
 +      gmx_sumi(1,&nabnsb,cr);
 +      bNS = (nabnsb > 0);
 +    }
 +  }
 +
 +  if (vsite)
 +    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 +
 +  if (DOMAINDECOMP(cr)) {
 +    if (bNS) {
 +      /* Repartition the domain decomposition */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                           ems,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +  }
 +
 +    /* Calc force & energy on new trial position  */
 +    /* do_force always puts the charge groups in the box and shifts again
 +     * We do not unshift, so molecules are always whole in congrad.c
 +     */
 +    do_force(fplog,cr,inputrec,
 +             count,nrnb,wcycle,top,top_global,&top_global->groups,
 +             ems->s.box,ems->s.x,&ems->s.hist,
 +             ems->f,force_vir,mdatoms,enerd,fcd,
 +             ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 +             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
 +             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
++             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 +
 +    /* Clear the unused shake virial and pressure */
 +    clear_mat(shake_vir);
 +    clear_mat(pres);
 +
 +    /* Communicate stuff when parallel */
 +    if (PAR(cr) && inputrec->eI != eiNM)
 +    {
 +        wallcycle_start(wcycle,ewcMoveE);
 +
 +        global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 +                    inputrec,NULL,NULL,NULL,1,&terminate,
 +                    top_global,&ems->s,FALSE,
 +                    CGLO_ENERGY |
 +                    CGLO_PRESSURE |
 +                    CGLO_CONSTRAINT |
 +                    CGLO_FIRSTITERATE);
 +
 +        wallcycle_stop(wcycle,ewcMoveE);
 +    }
 +
 +    /* Calculate long range corrections to pressure and energy */
 +    calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
 +                  pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 +    enerd->term[F_DISPCORR] = enercorr;
 +    enerd->term[F_EPOT] += enercorr;
 +    enerd->term[F_PRES] += prescorr;
 +    enerd->term[F_DVDL] += dvdlcorr;
 +
 +  ems->epot = enerd->term[F_EPOT];
 +
 +  if (constr) {
 +    /* Project out the constraint components of the force */
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdlambda = 0;
 +    constrain(NULL,FALSE,FALSE,constr,&top->idef,
 +              inputrec,NULL,cr,count,0,mdatoms,
 +              ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
 +              ems->s.lambda[efptBONDED],&dvdlambda,
 +              NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 +    if (fr->bSepDVDL && fplog)
 +      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
 +    enerd->term[F_DVDL_BONDED] += dvdlambda;
 +    m_add(force_vir,shake_vir,vir);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  } else {
 +    copy_mat(force_vir,vir);
 +  }
 +
 +  clear_mat(ekin);
 +  enerd->term[F_PRES] =
 +    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
 +
 +  sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
 +
 +    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 +    {
 +        get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 +    }
 +}
 +
 +static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                            gmx_mtop_t *mtop,
 +                            em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb,*fmg;
 +  t_block *cgs_gl;
 +  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 +  double partsum;
 +  unsigned char *grpnrFREEZE;
 +
 +  if (debug)
 +    fprintf(debug,"Doing reorder_partsum\n");
 +
 +  fm = s_min->f;
 +  fb = s_b->f;
 +
 +  cgs_gl = dd_charge_groups_global(cr->dd);
 +  index = cgs_gl->index;
 +
 +  /* Collect fm in a global vector fmg.
 +   * This conflicts with the spirit of domain decomposition,
 +   * but to fully optimize this a much more complicated algorithm is required.
 +   */
 +  snew(fmg,mtop->natoms);
 +
 +  ncg   = s_min->s.ncg_gl;
 +  cg_gl = s_min->s.cg_gl;
 +  i = 0;
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      copy_rvec(fm[i],fmg[a]);
 +      i++;
 +    }
 +  }
 +  gmx_sum(mtop->natoms*3,fmg[0],cr);
 +
 +  /* Now we will determine the part of the sum for the cgs in state s_b */
 +  ncg   = s_b->s.ncg_gl;
 +  cg_gl = s_b->s.cg_gl;
 +  partsum = 0;
 +  i = 0;
 +  gf = 0;
 +  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      if (mdatoms->cFREEZE && grpnrFREEZE) {
 +      gf = grpnrFREEZE[i];
 +      }
 +      for(m=0; m<DIM; m++) {
 +      if (!opts->nFreeze[gf][m]) {
 +        partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 +      }
 +      }
 +      i++;
 +    }
 +  }
 +
 +  sfree(fmg);
 +
 +  return partsum;
 +}
 +
 +static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                  gmx_mtop_t *mtop,
 +                  em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb;
 +  double sum;
 +  int  gf,i,m;
 +
 +  /* This is just the classical Polak-Ribiere calculation of beta;
 +   * it looks a bit complicated since we take freeze groups into account,
 +   * and might have to sum it in parallel runs.
 +   */
 +
 +  if (!DOMAINDECOMP(cr) ||
 +      (s_min->s.ddp_count == cr->dd->ddp_count &&
 +       s_b->s.ddp_count   == cr->dd->ddp_count)) {
 +    fm = s_min->f;
 +    fb = s_b->f;
 +    sum = 0;
 +    gf = 0;
 +    /* This part of code can be incorrect with DD,
 +     * since the atom ordering in s_b and s_min might differ.
 +     */
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m]) {
 +        sum += (fb[i][m] - fm[i][m])*fb[i][m];
 +      }
 +    }
 +  } else {
 +    /* We need to reorder cgs while summing */
 +    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 +  }
 +  if (PAR(cr))
 +    gmx_sumd(1,&sum,cr);
 +
 +  return sum/sqr(s_min->fnorm);
 +}
 +
 +double do_cg(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +             gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +  const char *CG="Polak-Ribiere Conjugate Gradients";
 +
 +  em_state_t *s_min,*s_a,*s_b,*s_c;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global,*p,*sf,*sfm;
 +  double gpa,gpb,gpc,tmp,sum[2],minstep;
 +  real   fnormn;
 +  real   stepsize;
 +  real   a,b,c,beta=0.0;
 +  real   epot_repl=0;
 +  real   pnorm;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,foundlower;
 +  rvec   mu_tot;
 +  gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 +  tensor vir,pres;
 +  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 +  gmx_mdoutf_t *outf;
 +  int    i,m,gf,step,nminstep;
 +  real   terminate=0;
 +
 +  step=0;
 +
 +  s_min = init_em_state();
 +  s_a   = init_em_state();
 +  s_b   = init_em_state();
 +  s_c   = init_em_state();
 +
 +  /* Init em and store the local state in s_min */
 +  init_em(fplog,CG,cr,inputrec,
 +          state_global,top_global,s_min,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,CG);
 +
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  if (MASTER(cr))
 +    sp_header(stderr,CG,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,CG,inputrec->em_tol,number_steps);
 +
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole in congrad.c
 +   */
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state_global,top_global,s_min,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +
 +    print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +
 +  /* Estimate/guess the initial stepsize */
 +  stepsize = inputrec->em_stepsize/s_min->fnorm;
 +
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(fplog,"\n");
 +  }
 +  /* Start the loop over CG steps.
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 +
 +    /* start taking steps in a new direction
 +     * First time we enter the routine, beta=0, and the direction is
 +     * simply the negative gradient.
 +     */
 +
 +    /* Calculate the new direction in p, and the gradient in this direction, gpa */
 +    p  = s_min->s.cg_p;
 +    sf = s_min->f;
 +    gpa = 0;
 +    gf = 0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++) {
 +      if (!inputrec->opts.nFreeze[gf][m]) {
 +        p[i][m] = sf[i][m] + beta*p[i][m];
 +        gpa -= p[i][m]*sf[i][m];
 +        /* f is negative gradient, thus the sign */
 +      } else {
 +          p[i][m] = 0;
 +      }
 +      }
 +    }
 +
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpa,cr);
 +
 +    /* Calculate the norm of the search vector */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
 +
 +    /* Just in case stepsize reaches zero due to numerical precision... */
 +    if(stepsize<=0)
 +      stepsize = inputrec->em_stepsize/pnorm;
 +
 +    /*
 +     * Double check the value of the derivative in the search direction.
 +     * If it is positive it must be due to the old information in the
 +     * CG formula, so just remove that and start over with beta=0.
 +     * This corresponds to a steepest descent step.
 +     */
 +    if(gpa>0) {
 +      beta = 0;
 +      step--; /* Don't count this step since we are restarting */
 +      continue; /* Go back to the beginning of the big for-loop */
 +    }
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm)
 +     * relative change in coordinate is smaller than precision
 +     */
 +    minstep=0;
 +    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++) {
 +      tmp = fabs(s_min->s.x[i][m]);
 +      if(tmp < 1.0)
 +        tmp = 1.0;
 +      tmp = p[i][m]/tmp;
 +      minstep += tmp*tmp;
 +      }
 +    }
 +    /* Add up from all CPUs */
 +    if(PAR(cr))
 +      gmx_sumd(1,&minstep,cr);
 +
 +    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
 +
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +
 +    write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                  top_global,inputrec,step,
 +                  s_min,state_global,f_global);
 +
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new CG step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next CG step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     *
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik
 +     */
 +    s_a->epot = s_min->epot;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +
 +    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
 +      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
 +                           s_min,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +
 +    /* Take a trial step (new coords in s_c) */
 +    do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
 +               constr,top,nrnb,wcycle,-1);
 +
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_c,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,-1,FALSE);
 +
 +    /* Calc derivative along line */
 +    p  = s_c->s.cg_p;
 +    sf = s_c->f;
 +    gpc=0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++)
 +        gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +
 +    /* This is the max amount of increase in energy we tolerate */
 +    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
 +
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }
 +
 +
 +
 +
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +    if (!foundlower) {
 +      nminstep=0;
 +
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);
 +
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +        /* Reload the old state */
 +        em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
 +                               s_min,top,mdatoms,fr,vsite,constr,
 +                               nrnb,wcycle);
 +      }
 +
 +      /* Take a trial step to this new point - new coords in s_b */
 +      do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
 +               constr,top,nrnb,wcycle,-1);
 +
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state_global,top_global,s_b,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,-1,FALSE);
 +
 +      /* p does not change within a step, but since the domain decomposition
 +       * might change, we have to use cg_p of s_b here.
 +       */
 +      p  = s_b->s.cg_p;
 +      sf = s_b->f;
 +      gpb=0;
 +      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +        for(m=0; m<DIM; m++)
 +            gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
 +      }
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +
 +      if (debug)
 +        fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
 +                s_a->epot,s_b->epot,s_c->epot,gpb);
 +
 +      epot_repl = s_b->epot;
 +
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if (gpb > 0) {
 +        /* Replace c endpoint with b */
 +        swap_em_state(s_b,s_c);
 +        c = b;
 +        gpc = gpb;
 +      } else {
 +        /* Replace a endpoint with b */
 +        swap_em_state(s_b,s_a);
 +        a = b;
 +        gpa = gpb;
 +      }
 +
 +      /*
 +       * Stop search as soon as we find a value smaller than the endpoints.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++;
 +      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
 +             (nminstep < 20));
 +
 +      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
 +        nminstep >= 20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If beta==0 this was steepest descent, and then we give up.
 +       * If not, set beta=0 and restart with steepest descent before quitting.
 +         */
 +      if (beta == 0.0) {
 +        /* Converged */
 +        converged = TRUE;
 +        break;
 +      } else {
 +        /* Reset memory before giving up */
 +        beta = 0.0;
 +        continue;
 +      }
 +      }
 +
 +      /* Select min energy state of A & C, put the best in B.
 +       */
 +      if (s_c->epot < s_a->epot) {
 +      if (debug)
 +        fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
 +                s_c->epot,s_a->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +      } else {
 +      if (debug)
 +        fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
 +                s_a->epot,s_c->epot);
 +      swap_em_state(s_b,s_a);
 +      gpb = gpa;
 +      b = a;
 +      }
 +
 +    } else {
 +      if (debug)
 +      fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
 +              s_c->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +    }
 +
 +    /* new search direction */
 +    /* beta = 0 means forget all memory and restart with steepest descents. */
 +    if (nstcg && ((step % nstcg)==0))
 +      beta = 0.0;
 +    else {
 +      /* s_min->fnorm cannot be zero, because then we would have converged
 +       * and broken out.
 +       */
 +
 +      /* Polak-Ribiere update.
 +       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
 +       */
 +      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
 +    }
 +    /* Limit beta to prevent oscillations */
 +    if (fabs(beta) > 5.0)
 +      beta = 0.0;
 +
 +
 +    /* update positions */
 +    swap_em_state(s_min,s_b);
 +    gpa = gpb;
 +
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
 +              s_min->fmax,s_min->a_fmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +                 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
 +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +          print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */
 +    converged = converged || (s_min->fmax < inputrec->em_tol);
 +
 +  } /* End of the loop */
 +
 +  if (converged)
 +    step--; /* we never took that last step in this case */
 +
 +    if (s_min->fmax > inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE;
 +    }
 +
 +  if (MASTER(cr)) {
 +    /* If we printed energy and/or logfile last step (which was the last step)
 +     * we don't have to do it again, but otherwise print the final values.
 +     */
 +    if(!do_log) {
 +      /* Write final value to log since we didn't do anything the last step */
 +      print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +    }
 +    if (!do_ene || !do_log) {
 +      /* Write final energy file entries */
 +      print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +               !do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +  }
 +
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
 +
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                s_min,state_global,f_global);
 +
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_lbfgs(FILE *fplog,t_commrec *cr,
 +                int nfile,const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                gmx_membed_t membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{
 +  static const char *LBFGS="Low-Memory BFGS Minimizer";
 +  em_state_t ems;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global;
 +  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
 +  double stepsize,gpa,gpb,gpc,tmp,minstep;
 +  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
 +  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
 +  real   a,b,c,maxdelta,delta;
 +  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
 +  real   dgdx,dgdg,sq,yr,beta;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,first;
 +  rvec   mu_tot;
 +  real   fnorm,fmax;
 +  gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
 +  tensor vir,pres;
 +  int    start,end,number_steps;
 +  gmx_mdoutf_t *outf;
 +  int    i,k,m,n,nfmax,gf,step;
 +  int    mdof_flags;
 +  /* not used */
 +  real   terminate;
 +
 +  if (PAR(cr))
 +    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
 +
 +  n = 3*state->natoms;
 +  nmaxcorr = inputrec->nbfgscorr;
 +
 +  /* Allocate memory */
 +  /* Use pointers to real so we dont have to loop over both atoms and
 +   * dimensions all the time...
 +   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
 +   * that point to the same memory.
 +   */
 +  snew(xa,n);
 +  snew(xb,n);
 +  snew(xc,n);
 +  snew(fa,n);
 +  snew(fb,n);
 +  snew(fc,n);
 +  snew(frozen,n);
 +
 +  snew(p,n);
 +  snew(lastx,n);
 +  snew(lastf,n);
 +  snew(rho,nmaxcorr);
 +  snew(alpha,nmaxcorr);
 +
 +  snew(dx,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dx[i],n);
 +
 +  snew(dg,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dg[i],n);
 +
 +  step = 0;
 +  neval = 0;
 +
 +  /* Init em */
 +  init_em(fplog,LBFGS,cr,inputrec,
 +          state,top_global,&ems,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +  /* Do_lbfgs is not completely updated like do_steep and do_cg,
 +   * so we free some memory again.
 +   */
 +  sfree(ems.s.x);
 +  sfree(ems.f);
 +
 +  xx = (real *)state->x;
 +  ff = (real *)f;
 +
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,LBFGS);
 +
 +  do_log = do_ene = do_x = do_f = TRUE;
 +
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +     for(m=0; m<DIM; m++)
 +       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
 +  }
 +  if (MASTER(cr))
 +    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
 +
 +  if (vsite)
 +    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole
 +   */
 +  neval++;
 +  ems.s.x = state->x;
 +  ems.f = f;
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state,top_global,&ems,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +
 +    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +
 +  /* This is the starting energy */
 +  Epot = enerd->term[F_EPOT];
 +
 +  fnorm = ems.fnorm;
 +  fmax  = ems.fmax;
 +  nfmax = ems.a_fmax;
 +
 +  /* Set the initial step.
 +   * since it will be multiplied by the non-normalized search direction
 +   * vector (force vector the first time), we scale it by the
 +   * norm of the force.
 +   */
 +
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(fplog,"\n");
 +  }
 +
 +  point=0;
 +  for(i=0;i<n;i++)
 +    if(!frozen[i])
 +      dx[point][i] = ff[i];  /* Initial search direction */
 +    else
 +      dx[point][i] = 0;
 +
 +  stepsize = 1.0/fnorm;
 +  converged = FALSE;
 +
 +  /* Start the loop over BFGS steps.
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +
 +  ncorr=0;
 +
 +  /* Set the gradient from the force */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
 +
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +
 +    mdof_flags = 0;
 +    if (do_x)
 +    {
 +        mdof_flags |= MDOF_X;
 +    }
 +
 +    if (do_f)
 +    {
 +        mdof_flags |= MDOF_F;
 +    }
 +
 +    write_traj(fplog,cr,outf,mdof_flags,
 +               top_global,step,(real)step,state,state,f,f,NULL,NULL);
 +
 +    /* Do the linesearching in the direction dx[point][0..(n-1)] */
 +
 +    /* pointer to current direction - point=0 first time here */
 +    s=dx[point];
 +
 +    /* calculate line gradient */
 +    for(gpa=0,i=0;i<n;i++)
 +      gpa-=s[i]*ff[i];
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm)
 +     * relative change in coordinate is smaller than precision
 +     */
 +    for(minstep=0,i=0;i<n;i++) {
 +      tmp=fabs(xx[i]);
 +      if(tmp<1.0)
 +      tmp=1.0;
 +      tmp = s[i]/tmp;
 +      minstep += tmp*tmp;
 +    }
 +    minstep = GMX_REAL_EPS/sqrt(minstep/n);
 +
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +
 +    /* Store old forces and coordinates */
 +    for(i=0;i<n;i++) {
 +      lastx[i]=xx[i];
 +      lastf[i]=ff[i];
 +    }
 +    Epot0=Epot;
 +
 +    first=TRUE;
 +
 +    for(i=0;i<n;i++)
 +      xa[i]=xx[i];
 +
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new BFGS step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next BFGS step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     *
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik
 +     */
 +    foundlower=FALSE;
 +    EpotA = Epot0;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +
 +    /* Check stepsize first. We do not allow displacements
 +     * larger than emstep.
 +     */
 +    do {
 +      c = a + stepsize;
 +      maxdelta=0;
 +      for(i=0;i<n;i++) {
 +      delta=c*s[i];
 +      if(delta>maxdelta)
 +        maxdelta=delta;
 +      }
 +      if(maxdelta>inputrec->em_stepsize)
 +      stepsize*=0.1;
 +    } while(maxdelta>inputrec->em_stepsize);
 +
 +    /* Take a trial step */
 +    for (i=0; i<n; i++)
 +      xc[i] = lastx[i] + c*s[i];
 +
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    ems.s.x = (rvec *)xc;
 +    ems.f   = (rvec *)fc;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state,top_global,&ems,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,step,FALSE);
 +    EpotC = ems.epot;
 +
 +    /* Calc derivative along line */
 +    for(gpc=0,i=0; i<n; i++) {
 +      gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +
 +     /* This is the max amount of increase in energy we tolerate */
 +   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
 +
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }
 +
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +
 +    if(!foundlower) {
 +
 +      nminstep=0;
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);
 +
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +
 +      /* Take a trial step */
 +      for (i=0; i<n; i++)
 +        xb[i] = lastx[i] + b*s[i];
 +
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      ems.s.x = (rvec *)xb;
 +      ems.f   = (rvec *)fb;
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state,top_global,&ems,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,step,FALSE);
 +      EpotB = ems.epot;
 +
 +      fnorm = ems.fnorm;
 +
 +      for(gpb=0,i=0; i<n; i++)
 +        gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
 +
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if(gpb>0) {
 +        /* Replace c endpoint with b */
 +        EpotC = EpotB;
 +        c = b;
 +        gpc = gpb;
 +        /* swap coord pointers b/c */
 +        xtmp = xb;
 +        ftmp = fb;
 +        xb = xc;
 +        fb = fc;
 +        xc = xtmp;
 +        fc = ftmp;
 +      } else {
 +        /* Replace a endpoint with b */
 +        EpotA = EpotB;
 +        a = b;
 +        gpa = gpb;
 +        /* swap coord pointers a/b */
 +        xtmp = xb;
 +        ftmp = fb;
 +        xb = xa;
 +        fb = fa;
 +        xa = xtmp;
 +        fa = ftmp;
 +      }
 +
 +      /*
 +       * Stop search as soon as we find a value smaller than the endpoints,
 +       * or if the tolerance is below machine precision.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++;
 +      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
 +
 +      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If ncorr==0 this was steepest descent, and then we give up.
 +       * If not, reset memory to restart as steepest descent before quitting.
 +         */
 +      if(ncorr==0) {
 +      /* Converged */
 +        converged=TRUE;
 +        break;
 +      } else {
 +        /* Reset memory */
 +        ncorr=0;
 +        /* Search in gradient direction */
 +        for(i=0;i<n;i++)
 +          dx[point][i]=ff[i];
 +        /* Reset stepsize */
 +        stepsize = 1.0/fnorm;
 +        continue;
 +      }
 +      }
 +
 +      /* Select min energy state of A & C, put the best in xx/ff/Epot
 +       */
 +      if(EpotC<EpotA) {
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xc[i];
 +        ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +      } else {
 +      Epot = EpotA;
 +      /* Use state A */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xa[i];
 +        ff[i]=fa[i];
 +      }
 +      stepsize=a;
 +      }
 +
 +    } else {
 +      /* found lower */
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +      xx[i]=xc[i];
 +      ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +    }
 +
 +    /* Update the memory information, and calculate a new
 +     * approximation of the inverse hessian
 +     */
 +
 +    /* Have new data in Epot, xx, ff */
 +    if(ncorr<nmaxcorr)
 +      ncorr++;
 +
 +    for(i=0;i<n;i++) {
 +      dg[point][i]=lastf[i]-ff[i];
 +      dx[point][i]*=stepsize;
 +    }
 +
 +    dgdg=0;
 +    dgdx=0;
 +    for(i=0;i<n;i++) {
 +      dgdg+=dg[point][i]*dg[point][i];
 +      dgdx+=dg[point][i]*dx[point][i];
 +    }
 +
 +    diag=dgdx/dgdg;
 +
 +    rho[point]=1.0/dgdx;
 +    point++;
 +
 +    if(point>=nmaxcorr)
 +      point=0;
 +
 +    /* Update */
 +    for(i=0;i<n;i++)
 +      p[i]=ff[i];
 +
 +    cp=point;
 +
 +    /* Recursive update. First go back over the memory points */
 +    for(k=0;k<ncorr;k++) {
 +      cp--;
 +      if(cp<0)
 +      cp=ncorr-1;
 +
 +      sq=0;
 +      for(i=0;i<n;i++)
 +      sq+=dx[cp][i]*p[i];
 +
 +      alpha[cp]=rho[cp]*sq;
 +
 +      for(i=0;i<n;i++)
 +      p[i] -= alpha[cp]*dg[cp][i];
 +    }
 +
 +    for(i=0;i<n;i++)
 +      p[i] *= diag;
 +
 +    /* And then go forward again */
 +    for(k=0;k<ncorr;k++) {
 +      yr = 0;
 +      for(i=0;i<n;i++)
 +      yr += p[i]*dg[cp][i];
 +
 +      beta = rho[cp]*yr;
 +      beta = alpha[cp]-beta;
 +
 +      for(i=0;i<n;i++)
 +      p[i] += beta*dx[cp][i];
 +
 +      cp++;
 +      if(cp>=ncorr)
 +      cp=0;
 +    }
 +
 +    for(i=0;i<n;i++)
 +      if(!frozen[i])
 +      dx[point][i] = p[i];
 +      else
 +      dx[point][i] = 0;
 +
 +    stepsize=1.0;
 +
 +    /* Test whether the convergence criterion is met */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
 +
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +                 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
 +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +          print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */
 +
 +    converged = converged || (fmax < inputrec->em_tol);
 +
 +  } /* End of the loop */
 +
 +  if(converged)
 +    step--; /* we never took that last step in this case */
 +
 +    if(fmax>inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE;
 +    }
 +
 +  /* If we printed energy and/or logfile last step (which was the last step)
 +   * we don't have to do it again, but otherwise print the final values.
 +   */
 +  if(!do_log) /* Write final value to log since we didn't do anythin last step */
 +    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +  if(!do_ene || !do_log) /* Write final energy file entries */
 +    print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +             !do_log ? fplog : NULL,step,step,eprNORMAL,
 +             TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = !do_per_step(step,inputrec->nstfout);
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                &ems,state,f);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_steep(FILE *fplog,t_commrec *cr,
 +                int nfile, const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state_global,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                gmx_membed_t membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{
 +  const char *SD="Steepest Descents";
 +  em_state_t *s_min,*s_try;
 +  rvec       *f_global;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  real   stepsize,constepsize;
 +  real   ustep,dvdlambda,fnormn;
 +  gmx_mdoutf_t *outf;
 +  t_mdebin   *mdebin;
 +  gmx_bool   bDone,bAbort,do_x,do_f;
 +  tensor vir,pres;
 +  rvec   mu_tot;
 +  int    nsteps;
 +  int    count=0;
 +  int    steps_accepted=0;
 +  /* not used */
 +  real   terminate=0;
 +
 +  s_min = init_em_state();
 +  s_try = init_em_state();
 +
 +  /* Init em and store the local state in s_try */
 +  init_em(fplog,SD,cr,inputrec,
 +          state_global,top_global,s_try,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +
 +  /* Print to log file  */
 +  print_em_start(fplog,cr,runtime,wcycle,SD);
 +
 +  /* Set variables for stepsize (in nm). This is the largest
 +   * step that we are going to make in any direction.
 +   */
 +  ustep = inputrec->em_stepsize;
 +  stepsize = 0;
 +
 +  /* Max number of steps  */
 +  nsteps = inputrec->nsteps;
 +
 +  if (MASTER(cr))
 +    /* Print to the screen  */
 +    sp_header(stderr,SD,inputrec->em_tol,nsteps);
 +  if (fplog)
 +    sp_header(fplog,SD,inputrec->em_tol,nsteps);
 +
 +  /**** HERE STARTS THE LOOP ****
 +   * count is the counter for the number of steps
 +   * bDone will be TRUE when the minimization has converged
 +   * bAbort will be TRUE when nsteps steps have been performed or when
 +   * the stepsize becomes smaller than is reasonable for machine precision
 +   */
 +  count  = 0;
 +  bDone  = FALSE;
 +  bAbort = FALSE;
 +  while( !bDone && !bAbort ) {
 +    bAbort = (nsteps >= 0) && (count == nsteps);
 +
 +    /* set new coordinates, except for first step */
 +    if (count > 0) {
 +        do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
 +                   s_min,stepsize,s_min->f,s_try,
 +                   constr,top,nrnb,wcycle,count);
 +    }
 +
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_try,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,count,count==0);
 +
 +    if (MASTER(cr))
 +      print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
 +
 +    if (count == 0)
 +      s_min->epot = s_try->epot + 1;
 +
 +    /* Print it if necessary  */
 +    if (MASTER(cr)) {
 +      if (bVerbose) {
 +      fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
 +              count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
 +              (s_try->epot < s_min->epot) ? '\n' : '\r');
 +      }
 +
 +      if (s_try->epot < s_min->epot) {
 +      /* Store the new (lower) energies  */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)count,
 +                 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
 +                   s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      print_ebin(outf->fp_ene,TRUE,
 +                 do_per_step(steps_accepted,inputrec->nstdisreout),
 +                 do_per_step(steps_accepted,inputrec->nstorireout),
 +                 fplog,count,count,eprNORMAL,TRUE,
 +                 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +      fflush(fplog);
 +      }
 +    }
 +
 +    /* Now if the new energy is smaller than the previous...
 +     * or if this is the first step!
 +     * or if we did random steps!
 +     */
 +
 +    if ( (count==0) || (s_try->epot < s_min->epot) ) {
 +      steps_accepted++;
 +
 +      /* Test whether the convergence criterion is met...  */
 +      bDone = (s_try->fmax < inputrec->em_tol);
 +
 +      /* Copy the arrays for force, positions and energy  */
 +      /* The 'Min' array always holds the coords and forces of the minimal
 +       sampled energy  */
 +      swap_em_state(s_min,s_try);
 +      if (count > 0)
 +      ustep *= 1.2;
 +
 +      /* Write to trn, if necessary */
 +      do_x = do_per_step(steps_accepted,inputrec->nstxout);
 +      do_f = do_per_step(steps_accepted,inputrec->nstfout);
 +      write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                    top_global,inputrec,count,
 +                    s_min,state_global,f_global);
 +    }
 +    else {
 +      /* If energy is not smaller make the step smaller...  */
 +      ustep *= 0.5;
 +
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +      /* Reload the old state */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                             s_min,top,mdatoms,fr,vsite,constr,
 +                             nrnb,wcycle);
 +      }
 +    }
 +
 +    /* Determine new step  */
 +    stepsize = ustep/s_min->fmax;
 +
 +    /* Check if stepsize is too small, with 1 nm as a characteristic length */
 +#ifdef GMX_DOUBLE
 +        if (count == nsteps || ustep < 1e-12)
 +#else
 +        if (count == nsteps || ustep < 1e-6)
 +#endif
 +        {
 +            if (MASTER(cr))
 +            {
 +                warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
 +                warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
 +            }
 +            bAbort=TRUE;
 +        }
 +
 +    count++;
 +  } /* End of the loop  */
 +
 +    /* Print some shit...  */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +  write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
 +              top_global,inputrec,count,
 +              s_min,state_global,f_global);
 +
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  inputrec->nsteps=count;
 +
 +  runtime->nsteps_done = count;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_nm(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +             gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    const char *NM = "Normal Mode Analysis";
 +    gmx_mdoutf_t *outf;
 +    int        natoms,atom,d;
 +    int        nnodes,node;
 +    rvec       *f_global;
 +    gmx_localtop_t *top;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f;
 +    gmx_global_stat_t gstat;
 +    t_graph    *graph;
 +    real       t,t0,lambda,lam0;
 +    gmx_bool       bNS;
 +    tensor     vir,pres;
 +    rvec       mu_tot;
 +    rvec       *fneg,*dfdx;
 +    gmx_bool       bSparse; /* use sparse matrix storage format */
 +    size_t     sz;
 +    gmx_sparsematrix_t * sparse_matrix = NULL;
 +    real *     full_matrix             = NULL;
 +    em_state_t *   state_work;
 +
 +    /* added with respect to mdrun */
 +    int        i,j,k,row,col;
 +    real       der_range=10.0*sqrt(GMX_REAL_EPS);
 +    real       x_min;
 +    real       fnorm,fmax;
 +
 +    if (constr != NULL)
 +    {
 +        gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
 +    }
 +
 +    state_work = init_em_state();
 +
 +    /* Init em and store the local state in state_minimum */
 +    init_em(fplog,NM,cr,inputrec,
 +            state_global,top_global,state_work,&top,
 +            &f,&f_global,
 +            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +            nfile,fnm,&outf,NULL);
 +
 +    natoms = top_global->natoms;
 +    snew(fneg,natoms);
 +    snew(dfdx,natoms);
 +
 +#ifndef GMX_DOUBLE
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,
 +                "NOTE: This version of Gromacs has been compiled in single precision,\n"
 +                "      which MIGHT not be accurate enough for normal mode analysis.\n"
 +                "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
 +                "      are fairly modest even if you recompile in double precision.\n\n");
 +    }
 +#endif
 +
 +    /* Check if we can/should use sparse storage format.
 +     *
 +     * Sparse format is only useful when the Hessian itself is sparse, which it
 +      * will be when we use a cutoff.
 +      * For small systems (n<1000) it is easier to always use full matrix format, though.
 +      */
 +    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
 +    {
 +        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
 +        bSparse = FALSE;
 +    }
 +    else if(top_global->natoms < 1000)
 +    {
 +        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
 +        bSparse = FALSE;
 +    }
 +    else
 +    {
 +        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
 +        bSparse = TRUE;
 +    }
 +
 +    sz = DIM*top_global->natoms;
 +
 +    fprintf(stderr,"Allocating Hessian memory...\n\n");
 +
 +    if(bSparse)
 +    {
 +        sparse_matrix=gmx_sparsematrix_init(sz);
 +        sparse_matrix->compressed_symmetric = TRUE;
 +    }
 +    else
 +    {
 +        snew(full_matrix,sz*sz);
 +    }
 +
 +    /* Initial values */
 +    t0           = inputrec->init_t;
 +    lam0         = inputrec->fepvals->init_lambda;
 +    t            = t0;
 +    lambda       = lam0;
 +
 +    init_nrnb(nrnb);
 +
 +    where();
 +
 +    /* Write start time and temperature */
 +    print_em_start(fplog,cr,runtime,wcycle,NM);
 +
 +    /* fudge nr of steps to nr of atoms */
 +    inputrec->nsteps = natoms*2;
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
 +                *(top_global->name),(int)inputrec->nsteps);
 +    }
 +
 +    nnodes = cr->nnodes;
 +
 +    /* Make evaluate_energy do a single node force calculation */
 +    cr->nnodes = 1;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                    state_global,top_global,state_work,top,
 +                    inputrec,nrnb,wcycle,gstat,
 +                    vsite,constr,fcd,graph,mdatoms,fr,
 +                    mu_tot,enerd,vir,pres,-1,TRUE);
 +    cr->nnodes = nnodes;
 +
 +    /* if forces are not small, warn user */
 +    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
 +        if (state_work->fmax > 1.0e-3)
 +        {
 +            fprintf(stderr,"Maximum force probably not small enough to");
 +            fprintf(stderr," ensure that you are in an \nenergy well. ");
 +            fprintf(stderr,"Be aware that negative eigenvalues may occur");
 +            fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
 +        }
 +    }
 +
 +    /***********************************************************
 +     *
 +     *      Loop over all pairs in matrix
 +     *
 +     *      do_force called twice. Once with positive and
 +     *      once with negative displacement
 +     *
 +     ************************************************************/
 +
 +    /* Steps are divided one by one over the nodes */
 +    for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
 +    {
 +
 +        for (d=0; d<DIM; d++)
 +        {
 +            x_min = state_work->s.x[atom][d];
 +
 +            state_work->s.x[atom][d] = x_min - der_range;
 +
 +            /* Make evaluate_energy do a single node force calculation */
 +            cr->nnodes = 1;
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2,FALSE);
 +
 +            for(i=0; i<natoms; i++)
 +            {
 +                copy_rvec(state_work->f[i], fneg[i]);
 +            }
 +
 +            state_work->s.x[atom][d] = x_min + der_range;
 +
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2+1,FALSE);
 +            cr->nnodes = nnodes;
 +
 +            /* x is restored to original */
 +            state_work->s.x[atom][d] = x_min;
 +
 +            for(j=0; j<natoms; j++)
 +            {
 +                for (k=0; (k<DIM); k++)
 +                {
 +                    dfdx[j][k] =
 +                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
 +                }
 +            }
 +
 +            if (!MASTER(cr))
 +            {
 +#ifdef GMX_MPI
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +                MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
 +                         cr->mpi_comm_mygroup);
 +#endif
 +            }
 +            else
 +            {
 +                for(node=0; (node<nnodes && atom+node<natoms); node++)
 +                {
 +                    if (node > 0)
 +                    {
 +#ifdef GMX_MPI
 +                        MPI_Status stat;
 +                        MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
 +                                 cr->mpi_comm_mygroup,&stat);
 +#undef mpi_type
 +#endif
 +                    }
 +
 +                    row = (atom + node)*DIM + d;
 +
 +                    for(j=0; j<natoms; j++)
 +                    {
 +                        for(k=0; k<DIM; k++)
 +                        {
 +                            col = j*DIM + k;
 +
 +                            if (bSparse)
 +                            {
 +                                if (col >= row && dfdx[j][k] != 0.0)
 +                                {
 +                                    gmx_sparsematrix_increment_value(sparse_matrix,
 +                                                                     row,col,dfdx[j][k]);
 +                                }
 +                            }
 +                            else
 +                            {
 +                                full_matrix[row*sz+col] = dfdx[j][k];
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +
 +            if (bVerbose && fplog)
 +            {
 +                fflush(fplog);
 +            }
 +        }
 +        /* write progress */
 +        if (MASTER(cr) && bVerbose)
 +        {
 +            fprintf(stderr,"\rFinished step %d out of %d",
 +                    min(atom+nnodes,natoms),natoms);
 +            fflush(stderr);
 +        }
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n\nWriting Hessian...\n");
 +        gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
 +    }
 +
 +    finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +    runtime->nsteps_done = natoms*2;
 +
 +    return 0;
 +}
 +
Simple merge
index 811a92f26ed737af1780c712458d7196c174a990,0000000000000000000000000000000000000000..208773dc99a827fc13bb492cb7f90aed6e9caf2d
mode 100644,000000..100644
--- /dev/null
@@@ -1,2802 -1,0 +1,2649 @@@
-  * 
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
-         fprintf(debug,"reallocating neigborlist il_code=%d, maxnri=%d\n",
-                 nl->il_code,nl->maxnri); 
++ *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_SHM_FDECOMP
 +#include <pthread.h> 
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "maths.h"
 +#include "vec.h"
 +#include "network.h"
 +#include "nsgrid.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "ns.h"
 +#include "pbc.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "nrnb.h"
 +#include "txtdump.h"
 +#include "mtop_util.h"
 +
 +#include "domdec.h"
 +#include "adress.h"
 +
 +
 +/* 
 + *    E X C L U S I O N   H A N D L I N G
 + */
 +
 +#ifdef DEBUG
 +static void SETEXCL_(t_excl e[],atom_id i,atom_id j)
 +{   e[j] = e[j] | (1<<i); }
 +static void RMEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ e[j]=e[j] & ~(1<<i); }
 +static gmx_bool ISEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ return (gmx_bool)(e[j] & (1<<i)); }
 +static gmx_bool NOTEXCL_(t_excl e[],atom_id i,atom_id j)
 +{  return !(ISEXCL(e,i,j)); }
 +#else
 +#define SETEXCL(e,i,j) (e)[((atom_id) (j))] |= (1<<((atom_id) (i)))
 +#define RMEXCL(e,i,j)  (e)[((atom_id) (j))] &= (~(1<<((atom_id) (i))))
 +#define ISEXCL(e,i,j)  (gmx_bool) ((e)[((atom_id) (j))] & (1<<((atom_id) (i))))
 +#define NOTEXCL(e,i,j) !(ISEXCL(e,i,j))
 +#endif
 +
 +/************************************************
 + *
 + *  U T I L I T I E S    F O R    N S
 + *
 + ************************************************/
 +
 +static void reallocate_nblist(t_nblist *nl)
 +{
 +    if (gmx_debug_at)
 +    {
-     if (nl->enlist == enlistCG_CG)
++        fprintf(debug,"reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, free_energy=%d), maxnri=%d\n",
++                nl->ielec,nl->ivdw,nl->igeometry,nl->free_energy,nl->maxnri);
 +    }
 +    srenew(nl->iinr,   nl->maxnri);
- /* ivdw/icoul are used to determine the type of interaction, so we
-  * can set an innerloop index here. The obvious choice for this would have
-  * been the vdwtype/coultype values in the forcerecord, but unfortunately 
-  * those types are braindead - for instance both Buckingham and normal 
-  * Lennard-Jones use the same value (evdwCUT), and a separate gmx_boolean variable
-  * to determine which interaction is used. There is further no special value
-  * for 'no interaction'. For backward compatibility with old TPR files we won't
-  * change this in the 3.x series, so when calling this routine you should use:
-  *
-  * icoul=0 no coulomb interaction
-  * icoul=1 cutoff standard coulomb
-  * icoul=2 reaction-field coulomb
-  * icoul=3 tabulated coulomb
-  *
-  * ivdw=0 no vdw interaction
-  * ivdw=1 standard L-J interaction
-  * ivdw=2 Buckingham
-  * ivdw=3 tabulated vdw.
-  *
-  * Kind of ugly, but it works.
-  */
- static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
++    if (nl->igeometry == GMX_NBLIST_GEOMETRY_CG_CG)
 +    {
 +        srenew(nl->iinr_end,nl->maxnri);
 +    }
 +    srenew(nl->gid,    nl->maxnri);
 +    srenew(nl->shift,  nl->maxnri);
 +    srenew(nl->jindex, nl->maxnri+1);
 +}
 +
-                         int ivdw, int icoul, 
-                         gmx_bool bfree, int enlist)
++
++static void init_nblist(FILE *log, t_nblist *nl_sr,t_nblist *nl_lr,
 +                        int maxsr,int maxlr,
-     int inloop[20] =
-     { 
-         eNR_NBKERNEL_NONE,
-         eNR_NBKERNEL010,
-         eNR_NBKERNEL020,
-         eNR_NBKERNEL030,
-         eNR_NBKERNEL100,
-         eNR_NBKERNEL110,
-         eNR_NBKERNEL120,
-         eNR_NBKERNEL130,
-         eNR_NBKERNEL200,
-         eNR_NBKERNEL210,
-         eNR_NBKERNEL220,
-         eNR_NBKERNEL230,
-         eNR_NBKERNEL300,
-         eNR_NBKERNEL310,
-         eNR_NBKERNEL320,
-         eNR_NBKERNEL330,
-         eNR_NBKERNEL400,
-         eNR_NBKERNEL410,
-         eNR_NBKERNEL_NONE,
-         eNR_NBKERNEL430
-     };
-   
++                        int ivdw, int ivdwmod,
++                        int ielec, int ielecmod,
++                        gmx_bool bfree, int igeometry)
 +{
 +    t_nblist *nl;
 +    int      homenr;
 +    int      i,nn;
 +    
-         
 +    for(i=0; (i<2); i++)
 +    {
 +        nl     = (i == 0) ? nl_sr : nl_lr;
 +        homenr = (i == 0) ? maxsr : maxlr;
 +
 +        if (nl == NULL)
 +        {
 +            continue;
 +        }
-         nl->ivdw  = ivdw;
-         nl->icoul = icoul;
++
++
 +        /* Set coul/vdw in neighborlist, and for the normal loops we determine
 +         * an index of which one to call.
 +         */
-     
++        nl->ivdw        = ivdw;
++        nl->ivdwmod     = ivdwmod;
++        nl->ielec       = ielec;
++        nl->ielecmod    = ielecmod;
 +        nl->free_energy = bfree;
-             nl->enlist  = enlistATOM_ATOM;
-             nl->il_code = eNR_NBKERNEL_FREE_ENERGY;
++        nl->igeometry   = igeometry;
++
 +        if (bfree)
 +        {
-         else
-         {
-             nl->enlist = enlist;
-             nn = inloop[4*icoul + ivdw];
-             
-             /* solvent loops follow directly after the corresponding
-             * ordinary loops, in the order:
-             *
-             * SPC, SPC-SPC, TIP4p, TIP4p-TIP4p
-             *   
-             */
-             switch (enlist) {
-             case enlistATOM_ATOM:
-             case enlistCG_CG:
-                 break;
-             case enlistSPC_ATOM:     nn += 1; break;
-             case enlistSPC_SPC:      nn += 2; break;
-             case enlistTIP4P_ATOM:   nn += 3; break;
-             case enlistTIP4P_TIP4P:  nn += 4; break;
-             }
-             
-             nl->il_code = nn;
-         }
-         if (debug)
-             fprintf(debug,"Initiating neighbourlist type %d for %s interactions,\nwith %d SR, %d LR atoms.\n",
-                     nl->il_code,ENLISTTYPE(enlist),maxsr,maxlr);
++            nl->igeometry  = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
 +        }
-    int icoul,icoulf,ivdw;
++        
++        gmx_nonbonded_set_kernel_pointers( (i==0) ? log : NULL,nl);
 +        
 +        /* maxnri is influenced by the number of shifts (maximum is 8)
 +         * and the number of energy groups.
 +         * If it is not enough, nl memory will be reallocated during the run.
 +         * 4 seems to be a reasonable factor, which only causes reallocation
 +         * during runs with tiny and many energygroups.
 +         */
 +        nl->maxnri      = homenr*4;
 +        nl->maxnrj      = 0;
 +        nl->maxlen      = 0;
 +        nl->nri         = -1;
 +        nl->nrj         = 0;
 +        nl->iinr        = NULL;
 +        nl->gid         = NULL;
 +        nl->shift       = NULL;
 +        nl->jindex      = NULL;
 +        reallocate_nblist(nl);
 +        nl->jindex[0] = 0;
++
++        if(debug)
++        {
++            fprintf(debug,"Initiating neighbourlist (ielec=%d, ivdw=%d, free=%d) for %s interactions,\nwith %d SR, %d LR atoms.\n",
++                    nl->ielec,nl->ivdw,nl->free_energy,gmx_nblist_geometry_names[nl->igeometry],maxsr,maxlr);
++        }
++
 +#ifdef GMX_THREAD_SHM_FDECOMP
 +        nl->counter = 0;
 +        snew(nl->mtx,1);
 +        pthread_mutex_init(nl->mtx,NULL);
 +#endif
 +    }
 +}
 +
 +void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
 +{
 +   /* Make maxlr tunable! (does not seem to be a big difference though) 
 +    * This parameter determines the number of i particles in a long range 
 +    * neighbourlist. Too few means many function calls, too many means
 +    * cache trashing.
 +    */
 +   int maxsr,maxsr_wat,maxlr,maxlr_wat;
-    int enlist_def,enlist_w,enlist_ww;
++   int ielec,ielecf,ivdw,ielecmod,ielecmodf,ivdwmod;
 +   int solvent;
-    /* Determine the values for icoul/ivdw. */
-    /* Start with GB */
-    if(fr->bGB)
-    {
-        icoul=enbcoulGB;
-    }
-    else if (fr->bcoultab)
-    {
-        icoul = enbcoulTAB;
-    }
-    else if (EEL_RF(fr->eeltype))
-    {
-        icoul = enbcoulRF;
-    }
-    else 
-    {
-        icoul = enbcoulOOR;
-    }
-    
-    if (fr->bvdwtab)
-    {
-        ivdw = enbvdwTAB;
-    }
-    else if (fr->bBHAM)
-    {
-        ivdw = enbvdwBHAM;
-    }
-    else 
-    {
-        ivdw = enbvdwLJ;
-    }
++   int igeometry_def,igeometry_w,igeometry_ww;
 +   int i;
 +   t_nblists *nbl;
 +
 +   /* maxsr     = homenr-fr->nWatMol*3; */
 +   maxsr     = homenr;
 +
 +   if (maxsr < 0)
 +   {
 +     gmx_fatal(FARGS,"%s, %d: Negative number of short range atoms.\n"
 +               "Call your Gromacs dealer for assistance.",__FILE__,__LINE__);
 +   }
 +   /* This is just for initial allocation, so we do not reallocate
 +    * all the nlist arrays many times in a row.
 +    * The numbers seem very accurate, but they are uncritical.
 +    */
 +   maxsr_wat = min(fr->nWatMol,(homenr+2)/3); 
 +   if (fr->bTwinRange) 
 +   {
 +       maxlr     = 50;
 +       maxlr_wat = min(maxsr_wat,maxlr);
 +   }
 +   else
 +   {
 +     maxlr = maxlr_wat = 0;
 +   }  
 +
-        enlist_def = enlistATOM_ATOM;
++   /* Determine the values for ielec/ivdw. */
++   ielec = fr->nbkernel_elec_interaction;
++   ivdw  = fr->nbkernel_vdw_interaction;
++   ielecmod = fr->nbkernel_elec_modifier;
++   ivdwmod  = fr->nbkernel_vdw_modifier;
 +
 +   fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
 +   if (!fr->ns.bCGlist)
 +   {
-        enlist_def = enlistCG_CG;
++       igeometry_def = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
 +   }
 +   else
 +   {
-        enlist_w  = enlistTIP4P_ATOM;
-        enlist_ww = enlistTIP4P_TIP4P;
++       igeometry_def = GMX_NBLIST_GEOMETRY_CG_CG;
 +       if (log != NULL)
 +       {
 +           fprintf(log,"\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
 +       }
 +   }
 +   
 +   if (fr->solvent_opt == esolTIP4P) {
-        enlist_w  = enlistSPC_ATOM;
-        enlist_ww = enlistSPC_SPC;
++       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER4_PARTICLE;
++       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER4_WATER4;
 +   } else {
-        init_nblist(&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
-                    maxsr,maxlr,ivdw,icoul,FALSE,enlist_def);
-        init_nblist(&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
-                    maxsr,maxlr,ivdw,0,FALSE,enlist_def);
-        init_nblist(&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
-                    maxsr,maxlr,0,icoul,FALSE,enlist_def);
-        init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
-                    maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_w);
-        init_nblist(&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
-                    maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_w);
-        init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
-                    maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_ww);
-        init_nblist(&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
-                    maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_ww);
++       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER3_PARTICLE;
++       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER3_WATER3;
 +   }
 +
 +   for(i=0; i<fr->nnblists; i++) 
 +   {
 +       nbl = &(fr->nblists[i]);
-                icoulf = enbcoulFEWALD;
++       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
++                   maxsr,maxlr,ivdw,ivdwmod,ielec,ielecmod,FALSE,igeometry_def);
++       init_nblist(log,&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
++                   maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,FALSE,igeometry_def);
++       init_nblist(log,&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
++                   maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod,FALSE,igeometry_def);
++       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
++                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_w);
++       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
++                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_w);
++       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
++                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_ww);
++       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
++                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_ww);
++
++       /* Did we get the solvent loops so we can use optimized water kernels? */
++       if(nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf==NULL
++          || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf==NULL
++#ifndef DISABLE_WATERWATER_NLIST
++          || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf==NULL
++          || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf==NULL
++#endif
++          )
++       {
++           fr->solvent_opt = esolNO;
++           fprintf(log,"Note: The available nonbonded kernels do not support water optimization - disabling.\n");
++       }
 +       
 +       if (fr->efep != efepNO) 
 +       {
 +           if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
 +           {
-                icoulf = icoul;
++               ielecf = GMX_NBKERNEL_ELEC_EWALD;
++               ielecmodf = eintmodNONE;
 +           }
 +           else
 +           {
-            init_nblist(&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
-                        maxsr,maxlr,ivdw,icoulf,TRUE,enlistATOM_ATOM);
-            init_nblist(&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
-                        maxsr,maxlr,ivdw,0,TRUE,enlistATOM_ATOM);
-            init_nblist(&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
-                        maxsr,maxlr,0,icoulf,TRUE,enlistATOM_ATOM);
++               ielecf = ielec;
++               ielecmodf = ielecmod;
 +           }
 +
-        init_nblist(&fr->QMMMlist,NULL,
-                    maxsr,maxlr,0,icoul,FALSE,enlistATOM_ATOM);
++           init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
++                       maxsr,maxlr,ivdw,ivdwmod,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
++           init_nblist(log,&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
++                       maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
++           init_nblist(log,&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
++                       maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
 +       }  
 +   }
 +   /* QMMM MM list */
 +   if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
 +   {
- static void reset_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL)
++       init_nblist(log,&fr->QMMMlist,NULL,
++                   maxsr,maxlr,0,0,ielec,ielecmod,FALSE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
++   }
++
++   if(log!=NULL)
++   {
++       fprintf(log,"\n");
 +   }
 +
 +   fr->ns.nblist_initialized=TRUE;
 +}
 +
 +static void reset_nblist(t_nblist *nl)
 +{
 +     nl->nri       = -1;
 +     nl->nrj       = 0;
 +     nl->maxlen    = 0;
 +     if (nl->jindex)
 +     {
 +         nl->jindex[0] = 0;
 +     }
 +}
 +
-     if (bLR) 
++static void reset_neighbor_lists(t_forcerec *fr,gmx_bool bResetSR, gmx_bool bResetLR)
 +{
 +    int n,i;
 +  
-         reset_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
++    if (fr->bQMMM)
 +    {
-     else 
++        /* only reset the short-range nblist */
++        reset_nblist(&(fr->QMMMlist));
 +    }
-         for(n=0; n<fr->nnblists; n++)
++
++    for(n=0; n<fr->nnblists; n++)
 +    {
-             for(i=0; i<eNL_NR; i++)
++        for(i=0; i<eNL_NR; i++)
 +        {
-                 reset_nblist(&(fr->nblists[n].nlist_sr[i]));
++            if(bResetSR)
 +            {
-         }
-         if (fr->bQMMM)
-         { 
-             /* only reset the short-range nblist */
-             reset_nblist(&(fr->QMMMlist));
++                reset_nblist( &(fr->nblists[n].nlist_sr[i]) );
++            }
++            if(bResetLR)
++            {
++                reset_nblist( &(fr->nblists[n].nlist_lr[i]) );
 +            }
- static inline void close_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL, 
-                                        gmx_bool bMakeQMMMnblist)
 +        }
 +    }
 +}
 +
 +
 +
 +
 +static inline void new_i_nblist(t_nblist *nlist,
 +                                gmx_bool bLR,atom_id i_atom,int shift,int gid)
 +{
 +    int    i,k,nri,nshift;
 +    
 +    nri = nlist->nri;
 +    
 +    /* Check whether we have to increase the i counter */
 +    if ((nri == -1) ||
 +        (nlist->iinr[nri]  != i_atom) || 
 +        (nlist->shift[nri] != shift) || 
 +        (nlist->gid[nri]   != gid))
 +    {
 +        /* This is something else. Now see if any entries have 
 +         * been added in the list of the previous atom.
 +         */
 +        if ((nri == -1) ||
 +            ((nlist->jindex[nri+1] > nlist->jindex[nri]) && 
 +             (nlist->gid[nri] != -1)))
 +        {
 +            /* If so increase the counter */
 +            nlist->nri++;
 +            nri++;
 +            if (nlist->nri >= nlist->maxnri)
 +            {
 +                nlist->maxnri += over_alloc_large(nlist->nri);
 +                reallocate_nblist(nlist);
 +            }
 +        }
 +        /* Set the number of neighbours and the atom number */
 +        nlist->jindex[nri+1] = nlist->jindex[nri];
 +        nlist->iinr[nri]     = i_atom;
 +        nlist->gid[nri]      = gid;
 +        nlist->shift[nri]    = shift;
 +    }
 +}
 +
 +static inline void close_i_nblist(t_nblist *nlist) 
 +{
 +    int nri = nlist->nri;
 +    int len;
 +    
 +    if (nri >= 0)
 +    {
 +        nlist->jindex[nri+1] = nlist->nrj;
 +        
 +        len=nlist->nrj -  nlist->jindex[nri];
 +        
 +        /* nlist length for water i molecules is treated statically 
 +         * in the innerloops 
 +         */
 +        if (len > nlist->maxlen)
 +        {
 +            nlist->maxlen = len;
 +        }
 +    }
 +}
 +
 +static inline void close_nblist(t_nblist *nlist)
 +{
 +    /* Only close this nblist when it has been initialized.
 +     * Avoid the creation of i-lists with no j-particles.
 +     */
 +    if (nlist->nrj == 0)
 +    {
 +        /* Some assembly kernels do not support empty lists,
 +         * make sure here that we don't generate any empty lists.
 +         * With the current ns code this branch is taken in two cases:
 +         * No i-particles at all: nri=-1 here
 +         * There are i-particles, but no j-particles; nri=0 here
 +         */
 +        nlist->nri = 0;
 +    }
 +    else
 +    {
 +        /* Close list number nri by incrementing the count */
 +        nlist->nri++;
 +    }
 +}
 +
-     if (bMakeQMMMnblist) {
-         if (!bLR)
-         {
++static inline void close_neighbor_lists(t_forcerec *fr,gmx_bool bMakeQMMMnblist)
 +{
 +    int n,i;
 +    
-         }
++    if (bMakeQMMMnblist)
++    {
 +            close_nblist(&(fr->QMMMlist));
-     else 
 +    }
-         if (bLR)
++
++    for(n=0; n<fr->nnblists; n++)
 +    {
-             close_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
-         }
-         else
-         { 
-             for(n=0; n<fr->nnblists; n++)
-             {
-                 for(i=0; (i<eNL_NR); i++)
-                 {
-                     close_nblist(&(fr->nblists[n].nlist_sr[i]));
-                 }
-             }
++        for(i=0; (i<eNL_NR); i++)
 +        {
-             fprintf(debug,"Increasing %s nblist %s j size to %d\n",
-                     bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
++            close_nblist(&(fr->nblists[n].nlist_sr[i]));
++            close_nblist(&(fr->nblists[n].nlist_lr[i]));
 +        }
 +    }
 +}
 +
++
 +static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
-             fprintf(debug,"Increasing %s nblist %s j size to %d\n",
-                     bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
++            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
++                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr,nlist->maxnrj);
 +    }
 +
 +    nlist->jjnr[nrj] = j_atom;
 +    nlist->nrj ++;
 +}
 +
 +static inline void add_j_to_nblist_cg(t_nblist *nlist,
 +                                      atom_id j_start,int j_end,
 +                                      t_excl *bexcl,gmx_bool i_is_j,
 +                                      gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    int j;
 +
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
-               gmx_bool              bLR,
-               gmx_bool              bDoVdW,
-               gmx_bool              bDoCoul);
++            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
++                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr    ,nlist->maxnrj);
 +        srenew(nlist->jjnr_end,nlist->maxnrj);
 +        srenew(nlist->excl    ,nlist->maxnrj*MAX_CGCGSIZE);
 +    }
 +
 +    nlist->jjnr[nrj]     = j_start;
 +    nlist->jjnr_end[nrj] = j_end;
 +
 +    if (j_end - j_start > MAX_CGCGSIZE)
 +    {
 +        gmx_fatal(FARGS,"The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d",MAX_CGCGSIZE,j_end-j_start);
 +    }
 +
 +    /* Set the exclusions */
 +    for(j=j_start; j<j_end; j++)
 +    {
 +        nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j];
 +    }
 +    if (i_is_j)
 +    {
 +        /* Avoid double counting of intra-cg interactions */
 +        for(j=1; j<j_end-j_start; j++)
 +        {
 +            nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<<j) - 1;
 +        }
 +    }
 +
 +    nlist->nrj ++;
 +}
 +
 +typedef void
 +put_in_list_t(gmx_bool              bHaveVdW[],
 +              int               ngid,
 +              t_mdatoms *       md,
 +              int               icg,
 +              int               jgid,
 +              int               nj,
 +              atom_id           jjcg[],
 +              atom_id           index[],
 +              t_excl            bExcl[],
 +              int               shift,
 +              t_forcerec *      fr,
-                gmx_bool              bLR,
-                gmx_bool              bDoVdW,
-                gmx_bool              bDoCoul)
++              gmx_bool          bLR,
++              gmx_bool          bDoVdW,
++              gmx_bool          bDoCoul,
++              int               solvent_opt);
 +
 +static void 
 +put_in_list_at(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
-     iwater = GET_CGINFO_SOLOPT(cginfo[icg]);
++               gmx_bool          bLR,
++               gmx_bool          bDoVdW,
++               gmx_bool          bDoCoul,
++               int               solvent_opt)
 +{
 +    /* The a[] index has been removed,
 +     * to put it back in i_atom should be a[i0] and jj should be a[jj].
 +     */
 +    t_nblist *   vdwc;
 +    t_nblist *   vdw;
 +    t_nblist *   coul;
 +    t_nblist *   vdwc_free  = NULL;
 +    t_nblist *   vdw_free   = NULL;
 +    t_nblist *   coul_free  = NULL;
 +    t_nblist *   vdwc_ww    = NULL;
 +    t_nblist *   coul_ww    = NULL;
 +    
 +    int           i,j,jcg,igid,gid,nbl_ind,ind_ij;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg,len;
 +    
 +    int       *cginfo;
 +    int       *type,*typeB;
 +    real      *charge,*chargeB;
 +    real      qi,qiB,qq,rlj;
 +    gmx_bool      bFreeEnergy,bFree,bFreeJ,bNotEx,*bPert;
 +    gmx_bool      bDoVdW_i,bDoCoul_i,bDoCoul_i_sol;
 +    int       iwater,jwater;
 +    t_nblist  *nlist;
 +    
 +    /* Copy some pointers */
 +    cginfo  = fr->cginfo;
 +    charge  = md->chargeA;
 +    chargeB = md->chargeB;
 +    type    = md->typeA;
 +    typeB   = md->typeB;
 +    bPert   = md->bPerturbed;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(cginfo[icg]);
-                  gmx_bool              bLR,
-                  gmx_bool              bDoVdW,
-                  gmx_bool              bDoCoul)
++
++    iwater = (solvent_opt!=esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
 +    
 +    bFreeEnergy = FALSE;
 +    if (md->nPerturbed) 
 +    {
 +        /* Check if any of the particles involved are perturbed. 
 +         * If not we can do the cheaper normal put_in_list
 +         * and use more solvent optimization.
 +         */
 +        for(i=0; i<nicg; i++)
 +        {
 +            bFreeEnergy |= bPert[i0+i];
 +        }
 +        /* Loop over the j charge groups */
 +        for(j=0; (j<nj && !bFreeEnergy); j++) 
 +        {
 +            jcg = jjcg[j];
 +            jj0 = index[jcg];
 +            jj1 = index[jcg+1];
 +            /* Finally loop over the atoms in the j-charge group */   
 +            for(jj=jj0; jj<jj1; jj++)
 +            {
 +                bFreeEnergy |= bPert[jj];
 +            }
 +        }
 +    }
 +    
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[GID(igid,jgid,ngid)];
 +    }
 +    if (bLR)
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_lr;
 +    }
 +    else
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_sr;
 +    }
 +    
 +    if (iwater != esolNO)
 +    {
 +        vdwc = &nlist[eNL_VDWQQ_WATER];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ_WATER];
 +#ifndef DISABLE_WATERWATER_NLIST
 +        vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
 +        coul_ww = &nlist[eNL_QQ_WATERWATER];
 +#endif
 +    } 
 +    else 
 +    {
 +        vdwc = &nlist[eNL_VDWQQ];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ];
 +    }
 +    
 +    if (!bFreeEnergy) 
 +    {
 +        if (iwater != esolNO) 
 +        {
 +            /* Loop over the atoms in the i charge group */    
 +            i_atom  = i0;
 +            gid     = GID(igid,jgid,ngid);
 +            /* Create new i_atom for each energy group */
 +            if (bDoCoul && bDoVdW)
 +            {
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(vdwc_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }
 +            if (bDoVdW)
 +            {
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            }
 +            if (bDoCoul) 
 +            {
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(coul_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }      
 +        /* Loop over the j charge groups */
 +            for(j=0; (j<nj); j++) 
 +            {
 +                jcg=jjcg[j];
 +                
 +                if (jcg == icg)
 +                {
 +                    continue;
 +                }
 +                
 +                jj0 = index[jcg];
 +                jwater = GET_CGINFO_SOLOPT(cginfo[jcg]);
 +                
 +                if (iwater == esolSPC && jwater == esolSPC)
 +                {
 +                    /* Interaction between two SPC molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST       
 +                        /* Add entries for the three atoms - only do VdW if we need to */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);          
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }  
 +                } 
 +                else if (iwater == esolTIP4P && jwater == esolTIP4P) 
 +                {
 +                    /* Interaction between two TIP4p molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST       
 +                        /* Add entries for the four atoms - only do VdW if we need to */
 +                        if (bDoVdW)
 +                        {
 +                            add_j_to_nblist(vdw,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);          
 +                        add_j_to_nblist(coul,jj0+3,bLR);          
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }                                         
 +                }
 +                else 
 +                {
 +                    /* j charge group is not water, but i is.
 +                     * Add entries to the water-other_atom lists; the geometry of the water
 +                     * molecule doesn't matter - that is taken care of in the nonbonded kernel,
 +                     * so we don't care if it is SPC or TIP4P...
 +                     */
 +                    
 +                    jj1 = index[jcg+1];
 +                    
 +                    if (!bDoVdW) 
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else if (!bDoCoul)
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++)
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                add_j_to_nblist(vdw,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else 
 +                    {
 +                        /* _charge_ _groups_ interact with both coulomb and LJ */
 +                        /* Check which atoms we should add to the lists!       */
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (bHaveVdW[type[jj]]) 
 +                            {
 +                                if (charge[jj] != 0)
 +                                {
 +                                    add_j_to_nblist(vdwc,jj,bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(vdw,jj,bLR);
 +                                }
 +                            }
 +                            else if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw); 
 +            close_i_nblist(coul); 
 +            close_i_nblist(vdwc);  
 +#ifndef DISABLE_WATERWATER_NLIST
 +            close_i_nblist(coul_ww);
 +            close_i_nblist(vdwc_ww); 
 +#endif
 +        } 
 +        else
 +        { 
 +            /* no solvent as i charge group */
 +            /* Loop over the atoms in the i charge group */    
 +            for(i=0; i<nicg; i++) 
 +            {
 +                i_atom  = i0+i;
 +                gid     = GID(igid,jgid,ngid);
 +                qi      = charge[i_atom];
 +                
 +                /* Create new i_atom for each energy group */
 +                if (bDoVdW && bDoCoul)
 +                {
 +                    new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoVdW)
 +                {
 +                    new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoCoul)
 +                {
 +                    new_i_nblist(coul,bLR,i_atom,shift,gid);
 +                }
 +                bDoVdW_i  = (bDoVdW  && bHaveVdW[type[i_atom]]);
 +                bDoCoul_i = (bDoCoul && qi!=0);
 +                
 +                if (bDoVdW_i || bDoCoul_i) 
 +                {
 +                    /* Loop over the j charge groups */
 +                    for(j=0; (j<nj); j++) 
 +                    {
 +                        jcg=jjcg[j];
 +                        
 +                        /* Check for large charge groups */
 +                        if (jcg == icg)
 +                        {
 +                            jj0 = i0 + i + 1;
 +                        }
 +                        else
 +                        {
 +                            jj0 = index[jcg];
 +                        }
 +                        
 +                        jj1=index[jcg+1];
 +                        /* Finally loop over the atoms in the j-charge group */       
 +                        for(jj=jj0; jj<jj1; jj++) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (!bDoVdW_i) 
 +                                { 
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i) 
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +                close_i_nblist(vdw);
 +                close_i_nblist(coul);
 +                close_i_nblist(vdwc);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* we are doing free energy */
 +        vdwc_free = &nlist[eNL_VDWQQ_FREE];
 +        vdw_free  = &nlist[eNL_VDW_FREE];
 +        coul_free = &nlist[eNL_QQ_FREE];
 +        /* Loop over the atoms in the i charge group */    
 +        for(i=0; i<nicg; i++) 
 +        {
 +            i_atom  = i0+i;
 +            gid     = GID(igid,jgid,ngid);
 +            qi      = charge[i_atom];
 +            qiB     = chargeB[i_atom];
 +            
 +            /* Create new i_atom for each energy group */
 +            if (bDoVdW && bDoCoul) 
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +            if (bDoVdW)   
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            if (bDoCoul) 
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +            
 +            new_i_nblist(vdw_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(coul_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(vdwc_free,bLR,i_atom,shift,gid);
 +            
 +            bDoVdW_i  = (bDoVdW  &&
 +                         (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]]));
 +            bDoCoul_i = (bDoCoul && (qi!=0 || qiB!=0));
 +            /* For TIP4P the first atom does not have a charge,
 +             * but the last three do. So we should still put an atom
 +             * without LJ but with charge in the water-atom neighborlist
 +             * for a TIP4p i charge group.
 +             * For SPC type water the first atom has LJ and charge,
 +             * so there is no such problem.
 +             */
 +            if (iwater == esolNO)
 +            {
 +                bDoCoul_i_sol = bDoCoul_i;
 +            }
 +            else
 +            {
 +                bDoCoul_i_sol = bDoCoul;
 +            }
 +            
 +            if (bDoVdW_i || bDoCoul_i_sol) 
 +            {
 +                /* Loop over the j charge groups */
 +                for(j=0; (j<nj); j++)
 +                {
 +                    jcg=jjcg[j];
 +                    
 +                    /* Check for large charge groups */
 +                    if (jcg == icg)
 +                    {
 +                        jj0 = i0 + i + 1;
 +                    }
 +                    else
 +                    {
 +                        jj0 = index[jcg];
 +                    }
 +                    
 +                    jj1=index[jcg+1];
 +                    /* Finally loop over the atoms in the j-charge group */   
 +                    bFree = bPert[i_atom];
 +                    for(jj=jj0; (jj<jj1); jj++) 
 +                    {
 +                        bFreeJ = bFree || bPert[jj];
 +                        /* Complicated if, because the water H's should also
 +                         * see perturbed j-particles
 +                         */
 +                        if (iwater==esolNO || i==0 || bFreeJ) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (bFreeJ)
 +                                {
 +                                    if (!bDoVdW_i) 
 +                                    {
 +                                        if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                        {
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else if (!bDoCoul_i) 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
 +                                        {
 +                                            add_j_to_nblist(vdw_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]]) 
 +                                        {
 +                                            if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            {
 +                                                add_j_to_nblist(vdwc_free,jj,bLR);
 +                                            }
 +                                            else
 +                                            {
 +                                                add_j_to_nblist(vdw_free,jj,bLR);
 +                                            }
 +                                        }
 +                                        else if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoVdW_i) 
 +                                { 
 +                                    /* This is done whether or not bWater is set */
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i_sol) 
 +                                { 
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw);
 +            close_i_nblist(coul);
 +            close_i_nblist(vdwc);
 +            close_i_nblist(vdw_free);
 +            close_i_nblist(coul_free);
 +            close_i_nblist(vdwc_free);
 +        }
 +    }
 +}
 +
 +static void 
 +put_in_list_qmmm(gmx_bool              bHaveVdW[],
 +                 int               ngid,
 +                 t_mdatoms *       md,
 +                 int               icg,
 +                 int               jgid,
 +                 int               nj,
 +                 atom_id           jjcg[],
 +                 atom_id           index[],
 +                 t_excl            bExcl[],
 +                 int               shift,
 +                 t_forcerec *      fr,
-                gmx_bool              bLR,
-                gmx_bool              bDoVdW,
-                gmx_bool              bDoCoul)
++                 gmx_bool          bLR,
++                 gmx_bool          bDoVdW,
++                 gmx_bool          bDoCoul,
++                 int               solvent_opt)
 +{
 +    t_nblist *   coul;
 +    int         i,j,jcg,igid,gid;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg;
 +    gmx_bool      bNotEx;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(fr->cginfo[icg]);
 +    
 +    coul = &fr->QMMMlist;
 +    
 +    /* Loop over atoms in the ith charge group */
 +    for (i=0;i<nicg;i++)
 +    {
 +        i_atom = i0+i;
 +        gid    = GID(igid,jgid,ngid);
 +        /* Create new i_atom for each energy group */
 +        new_i_nblist(coul,bLR,i_atom,shift,gid);
 +        
 +        /* Loop over the j charge groups */
 +        for (j=0;j<nj;j++)
 +        {
 +            jcg=jjcg[j];
 +            
 +            /* Charge groups cannot have QM and MM atoms simultaneously */
 +            if (jcg!=icg)
 +            {
 +                jj0 = index[jcg];
 +                jj1 = index[jcg+1];
 +                /* Finally loop over the atoms in the j-charge group */
 +                for(jj=jj0; jj<jj1; jj++)
 +                {
 +                    bNotEx = NOTEXCL(bExcl,i,jj);
 +                    if(bNotEx)
 +                        add_j_to_nblist(coul,jj,bLR);
 +                }
 +            }
 +        }
 +        close_i_nblist(coul);
 +    }
 +}
 +
 +static void 
 +put_in_list_cg(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
-                     cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE);
++               gmx_bool          bLR,
++               gmx_bool          bDoVdW,
++               gmx_bool          bDoCoul,
++               int               solvent_opt)
 +{
 +    int          cginfo;
 +    int          igid,gid,nbl_ind;
 +    t_nblist *   vdwc;
 +    int          j,jcg;
 +
 +    cginfo = fr->cginfo[icg];
 +
 +    igid = GET_CGINFO_GID(cginfo);
 +    gid  = GID(igid,jgid,ngid);
 +
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[gid];
 +    }
 +    if (bLR)
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ];
 +    }
 +    else
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ];
 +    }
 +
 +    /* Make a new neighbor list for charge group icg.
 +     * Currently simply one neighbor list is made with LJ and Coulomb.
 +     * If required, zero interactions could be removed here
 +     * or in the force loop.
 +     */
 +    new_i_nblist(vdwc,bLR,index[icg],shift,gid);
 +    vdwc->iinr_end[vdwc->nri] = index[icg+1];
 +
 +    for(j=0; (j<nj); j++) 
 +    {
 +        jcg = jjcg[j];
 +        /* Skip the icg-icg pairs if all self interactions are excluded */
 +        if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo)))
 +        {
 +            /* Here we add the j charge group jcg to the list,
 +             * exclusions are also added to the list.
 +             */
 +            add_j_to_nblist_cg(vdwc,index[jcg],index[jcg+1],bExcl,icg==jcg,bLR);
 +        }
 +    }
 +
 +    close_i_nblist(vdwc);  
 +}
 +
 +static void setexcl(atom_id start,atom_id end,t_blocka *excl,gmx_bool b,
 +                    t_excl bexcl[])
 +{
 +    atom_id i,k;
 +    
 +    if (b)
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                SETEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                RMEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +}
 +
 +int calc_naaj(int icg,int cgtot)
 +{
 +    int naaj;
 +    
 +    if ((cgtot % 2) == 1)
 +    {
 +        /* Odd number of charge groups, easy */
 +        naaj = 1 + (cgtot/2);
 +    }
 +    else if ((cgtot % 4) == 0)
 +    {
 +    /* Multiple of four is hard */
 +        if (icg < cgtot/2)
 +        {
 +            if ((icg % 2) == 0)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +        else
 +        {
 +            if ((icg % 2) == 1)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* cgtot/2 = odd */
 +        if ((icg % 2) == 0)
 +        {
 +            naaj=1+(cgtot/2);
 +        }
 +        else
 +        {
 +            naaj=cgtot/2;
 +        }
 +    }
 +#ifdef DEBUG
 +    fprintf(log,"naaj=%d\n",naaj);
 +#endif
 +
 +    return naaj;
 +}
 +
 +/************************************************
 + *
 + *  S I M P L E      C O R E     S T U F F
 + *
 + ************************************************/
 +
 +static real calc_image_tric(rvec xi,rvec xj,matrix box,
 +                            rvec b_inv,int *shift)
 +{
 +    /* This code assumes that the cut-off is smaller than
 +     * a half times the smallest diagonal element of the box.
 +     */
 +    const real h25=2.5;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dz = xj[ZZ] - xi[ZZ];
 +    dy = xj[YY] - xi[YY];
 +    dx = xj[XX] - xi[XX];
 +    
 +  /* Perform NINT operation, using trunc operation, therefore
 +   * we first add 2.5 then subtract 2 again
 +   */
 +    tz = dz*b_inv[ZZ] + h25;
 +    tz -= 2;
 +    dz -= tz*box[ZZ][ZZ];
 +    dy -= tz*box[ZZ][YY];
 +    dx -= tz*box[ZZ][XX];
 +
 +    ty = dy*b_inv[YY] + h25;
 +    ty -= 2;
 +    dy -= ty*box[YY][YY];
 +    dx -= ty*box[YY][XX];
 +    
 +    tx = dx*b_inv[XX]+h25;
 +    tx -= 2;
 +    dx -= tx*box[XX][XX];
 +  
 +    /* Distance squared */
 +    r2 = (dx*dx) + (dy*dy) + (dz*dz);
 +
 +    *shift = XYZ2IS(tx,ty,tz);
 +
 +    return r2;
 +}
 +
 +static real calc_image_rect(rvec xi,rvec xj,rvec box_size,
 +                            rvec b_inv,int *shift)
 +{
 +    const real h15=1.5;
 +    real ddx,ddy,ddz;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dx = xj[XX] - xi[XX];
 +    dy = xj[YY] - xi[YY];
 +    dz = xj[ZZ] - xi[ZZ];
 +  
 +    /* Perform NINT operation, using trunc operation, therefore
 +     * we first add 1.5 then subtract 1 again
 +     */
 +    tx = dx*b_inv[XX] + h15;
 +    ty = dy*b_inv[YY] + h15;
 +    tz = dz*b_inv[ZZ] + h15;
 +    tx--;
 +    ty--;
 +    tz--;
 +    
 +    /* Correct diff vector for translation */
 +    ddx = tx*box_size[XX] - dx;
 +    ddy = ty*box_size[YY] - dy;
 +    ddz = tz*box_size[ZZ] - dz;
 +    
 +    /* Distance squared */
 +    r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz);
 +    
 +    *shift = XYZ2IS(tx,ty,tz);
 +    
 +    return r2;
 +}
 +
 +static void add_simple(t_ns_buf *nsbuf,int nrj,atom_id cg_j,
 +                       gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                       int icg,int jgid,t_block *cgs,t_excl bexcl[],
 +                       int shift,t_forcerec *fr,put_in_list_t *put_in_list)
 +{
 +    if (nsbuf->nj + nrj > MAX_CG)
 +    {
 +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nsbuf->ncg,nsbuf->jcg,
-     } 
++                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +        /* Reset buffer contents */
 +        nsbuf->ncg = nsbuf->nj = 0;
 +    }
 +    nsbuf->jcg[nsbuf->ncg++] = cg_j;
 +    nsbuf->nj += nrj;
 +}
 +
 +static void ns_inner_tric(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          matrix box,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +    
 +    cgindex = cgs->index;
 +    shift   = CENTRAL;
 +    for(j=0; (j<njcg); j++)
 +    {
 +        cg_j   = jcg[j];
 +        nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +        if (calc_image_tric(x[icg],x[cg_j],box,b_inv,&shift) < rcut2)
 +        {
 +            jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +            if (!(i_egp_flags[jgid] & EGP_EXCL))
 +            {
 +                add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                           bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                           put_in_list);
 +            }
 +        }
 +    }
 +}
 +
 +static void ns_inner_rect(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          gmx_bool bBox,rvec box_size,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +
 +    cgindex = cgs->index;
 +    if (bBox)
 +    {
 +        shift = CENTRAL;
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if (calc_image_rect(x[icg],x[cg_j],box_size,b_inv,&shift) < rcut2)
 +            {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
-                                 cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE);
++    }
 +    else
 +    {
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if ((rcut2 == 0) || (distance2(x[icg],x[cg_j]) < rcut2)) {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][CENTRAL],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,CENTRAL,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* ns_simple_core needs to be adapted for QMMM still 2005 */
 +
 +static int ns_simple_core(t_forcerec *fr,
 +                          gmx_localtop_t *top,
 +                          t_mdatoms *md,
 +                          matrix box,rvec box_size,
 +                          t_excl bexcl[],atom_id *aaj,
 +                          int ngid,t_ns_buf **ns_buf,
 +                          put_in_list_t *put_in_list,gmx_bool bHaveVdW[])
 +{
 +    int      naaj,k;
 +    real     rlist2;
 +    int      nsearch,icg,jcg,igid,i0,nri,nn;
 +    int      *cginfo;
 +    t_ns_buf *nsbuf;
 +    /* atom_id  *i_atoms; */
 +    t_block  *cgs=&(top->cgs);
 +    t_blocka *excl=&(top->excls);
 +    rvec     b_inv;
 +    int      m;
 +    gmx_bool     bBox,bTriclinic;
 +    int      *i_egp_flags;
 +    
 +    rlist2 = sqr(fr->rlist);
 +    
 +    bBox = (fr->ePBC != epbcNONE);
 +    if (bBox)
 +    {
 +        for(m=0; (m<DIM); m++)
 +        {
 +            b_inv[m] = divide_err(1.0,box_size[m]);
 +        }
 +        bTriclinic = TRICLINIC(box);
 +    }
 +    else
 +    {
 +        bTriclinic = FALSE;
 +    }
 +    
 +    cginfo = fr->cginfo;
 +    
 +    nsearch=0;
 +    for (icg=fr->cg0; (icg<fr->hcg); icg++)
 +    {
 +        /*
 +          i0        = cgs->index[icg];
 +          nri       = cgs->index[icg+1]-i0;
 +          i_atoms   = &(cgs->a[i0]);
 +          i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms];
 +          setexcl(nri,i_atoms,excl,TRUE,bexcl);
 +        */
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        i_egp_flags = fr->egp_flags + ngid*igid;
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,TRUE,bexcl);
 +        
 +        naaj=calc_naaj(icg,cgs->nr);
 +        if (bTriclinic)
 +        {
 +            ns_inner_tric(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          box,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        else
 +        {
 +            ns_inner_rect(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          bBox,box_size,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        nsearch += naaj;
 +        
 +        for(nn=0; (nn<ngid); nn++)
 +        {
 +            for(k=0; (k<SHIFTS); k++)
 +            {
 +                nsbuf = &(ns_buf[nn][k]);
 +                if (nsbuf->ncg > 0)
 +                {
 +                    put_in_list(bHaveVdW,ngid,md,icg,nn,nsbuf->ncg,nsbuf->jcg,
-     close_neighbor_list(fr,FALSE,-1,-1,FALSE);
++                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +                    nsbuf->ncg=nsbuf->nj=0;
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,FALSE,bexcl);
 +    }
- static void do_longrange(t_commrec *cr,gmx_localtop_t *top,t_forcerec *fr,
-                          int ngid,t_mdatoms *md,int icg,
-                          int jgid,int nlr,
-                          atom_id lr[],t_excl bexcl[],int shift,
-                          rvec x[],rvec box_size,t_nrnb *nrnb,
-                          real *lambda,real *dvdlambda,
-                          gmx_grppairener_t *grppener,
-                          gmx_bool bDoVdW,gmx_bool bDoCoul,
-                          gmx_bool bEvaluateNow,put_in_list_t *put_in_list,
-                          gmx_bool bHaveVdW[],
-                          gmx_bool bDoForces,rvec *f)
- {
-     int n,i;
-     t_nblist *nl;
-     
-     for(n=0; n<fr->nnblists; n++)
-     {
-         for(i=0; (i<eNL_NR); i++)
-         {
-             nl = &fr->nblists[n].nlist_lr[i];
-             if ((nl->nri > nl->maxnri-32) || bEvaluateNow)
-             {
-                 close_neighbor_list(fr,TRUE,n,i,FALSE);
-                 /* Evaluate the energies and forces */
-                 do_nonbonded(cr,fr,x,f,md,NULL,
-                              grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR],
-                              grppener->ener[egCOULLR],
-                                                        grppener->ener[egGB],box_size,
-                              nrnb,lambda,dvdlambda,n,i,
-                              GMX_DONB_LR | GMX_DONB_FORCES);
-                 
-                 reset_neighbor_list(fr,TRUE,n,i);
-             }
-         }
-     }
-     
-     if (!bEvaluateNow)
-     {  
-         /* Put the long range particles in a list */
-         /* do_longrange is never called for QMMM  */
-         put_in_list(bHaveVdW,ngid,md,icg,jgid,nlr,lr,top->cgs.index,
-                     bexcl,shift,fr,TRUE,bDoVdW,bDoCoul);
-     }
- }
++    close_neighbor_lists(fr,FALSE);
 +    
 +    return nsearch;
 +}
 +
 +/************************************************
 + *
 + *    N S 5     G R I D     S T U F F
 + *
 + ************************************************/
 +
 +static inline void get_dx(int Nx,real gridx,real rc2,int xgi,real x,
 +                          int *dx0,int *dx1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  xgi0,xgi1,i;
 +    
 +    if (xgi < 0)
 +    {
 +        *dx0 = 0;
 +        xgi0 = -1;
 +        *dx1 = -1;
 +        xgi1 = 0;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        *dx0 = Nx;
 +        xgi0 = Nx-1;
 +        *dx1 = Nx-1;
 +        xgi1 = Nx;
 +    }
 +    else
 +    {
 +        dcx2[xgi] = 0;
 +        *dx0 = xgi;
 +        xgi0 = xgi-1;
 +        *dx1 = xgi;
 +        xgi1 = xgi+1;
 +    }
 +    
 +    for(i=xgi0; i>=0; i--)
 +    {
 +        dcx = (i+1)*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +            break;
 +        *dx0 = i;
 +        dcx2[i] = tmp;
 +    }
 +    for(i=xgi1; i<Nx; i++)
 +    {
 +        dcx = i*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        *dx1 = i;
 +        dcx2[i] = tmp;
 +    }
 +}
 +
 +static inline void get_dx_dd(int Nx,real gridx,real rc2,int xgi,real x,
 +                             int ncpddc,int shift_min,int shift_max,
 +                             int *g0,int *g1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  g_min,g_max,shift_home;
 +    
 +    if (xgi < 0)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = 0;
 +        *g1   = -1;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = Nx;
 +        *g1   = Nx - 1;
 +    }
 +    else
 +    {
 +        if (ncpddc == 0)
 +        {
 +            g_min = 0;
 +            g_max = Nx - 1;
 +        }
 +        else
 +        {
 +            if (xgi < ncpddc)
 +            {
 +                shift_home = 0;
 +            }
 +            else
 +            {
 +                shift_home = -1;
 +            }
 +            g_min = (shift_min == shift_home ? 0          : ncpddc);
 +            g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1);
 +        }
 +        if (shift_min > 0)
 +        {
 +            *g0 = g_min;
 +            *g1 = g_min - 1;
 +        }
 +        else if (shift_max < 0)
 +        {
 +            *g0 = g_max + 1;
 +            *g1 = g_max;
 +        }
 +        else
 +        {
 +            *g0 = xgi;
 +            *g1 = xgi;
 +            dcx2[xgi] = 0;
 +        }
 +    }
 +    
 +    while (*g0 > g_min)
 +    {
 +        /* Check one grid cell down */
 +        dcx = ((*g0 - 1) + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g0)--;
 +        dcx2[*g0] = tmp;
 +    }
 +    
 +    while (*g1 < g_max)
 +    {
 +        /* Check one grid cell up */
 +        dcx = (*g1 + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g1)++;
 +        dcx2[*g1] = tmp;
 +    }
 +}
 +
 +
 +#define sqr(x) ((x)*(x))
 +#define calc_dx2(XI,YI,ZI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ]))
 +#define calc_cyl_dx2(XI,YI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]))
 +/****************************************************
 + *
 + *    F A S T   N E I G H B O R  S E A R C H I N G
 + *
 + *    Optimized neighboursearching routine using grid 
 + *    at least 1x1x1, see GROMACS manual
 + *
 + ****************************************************/
 +
-     if (rm2 > rs2)
-     {
-             /* Long range VdW and Coul buffers */
-         snew(ns->nl_lr_ljc,ngid);
-     }
-     if (rl2 > rm2)
-     {
-         /* Long range VdW or Coul only buffers */
-         snew(ns->nl_lr_one,ngid);
-     }
 +
 +static void get_cutoff2(t_forcerec *fr,gmx_bool bDoLongRange,
 +                        real *rvdw2,real *rcoul2,
 +                        real *rs2,real *rm2,real *rl2)
 +{
 +    *rs2 = sqr(fr->rlist);
++
 +    if (bDoLongRange && fr->bTwinRange)
 +    {
 +        /* The VdW and elec. LR cut-off's could be different,
 +         * so we can not simply set them to rlistlong.
 +         */
 +        if (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(fr->vdwtype) &&
 +            fr->rvdw > fr->rlist)
 +        {
 +            *rvdw2  = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rvdw2  = sqr(fr->rvdw);
 +        }
 +        if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(fr->eeltype) &&
 +            fr->rcoulomb > fr->rlist)
 +        {
 +            *rcoul2 = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rcoul2 = sqr(fr->rcoulomb);
 +        }
 +    }
 +    else
 +    {
 +        /* Workaround for a gcc -O3 or -ffast-math problem */
 +        *rvdw2  = *rs2;
 +        *rcoul2 = *rs2;
 +    }
 +    *rm2 = min(*rvdw2,*rcoul2);
 +    *rl2 = max(*rvdw2,*rcoul2);
 +}
 +
 +static void init_nsgrid_lists(t_forcerec *fr,int ngid,gmx_ns_t *ns)
 +{
 +    real rvdw2,rcoul2,rs2,rm2,rl2;
 +    int j;
 +
 +    get_cutoff2(fr,TRUE,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    /* Short range buffers */
 +    snew(ns->nl_sr,ngid);
 +    /* Counters */
 +    snew(ns->nsr,ngid);
 +    snew(ns->nlr_ljc,ngid);
 +    snew(ns->nlr_one,ngid);
 +    
-         if (rm2 > rs2)
-         {
-             snew(ns->nl_lr_ljc[j],MAX_CG);
-         }
-         if (rl2 > rm2)
-         {
-             snew(ns->nl_lr_one[j],MAX_CG);
-         }
++    /* Always allocate both list types, since rcoulomb might now change with PME load balancing */
++    /* Long range VdW and Coul buffers */
++    snew(ns->nl_lr_ljc,ngid);
++    /* Long range VdW or Coul only buffers */
++    snew(ns->nl_lr_one,ngid);
++
 +    for(j=0; (j<ngid); j++) {
 +        snew(ns->nl_sr[j],MAX_CG);
-                        gmx_bool bDoLongRange,gmx_bool bDoForces,rvec *f,
-                        gmx_bool bMakeQMMMnblist)
++        snew(ns->nl_lr_ljc[j],MAX_CG);
++        snew(ns->nl_lr_one[j],MAX_CG);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n",
 +                rs2,rm2,rl2);
 +    }
 +}
 +
 +static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
 +                       matrix box,rvec box_size,int ngid,
 +                       gmx_localtop_t *top,
 +                       t_grid *grid,rvec x[],
 +                       t_excl bexcl[],gmx_bool *bExcludeAlleg,
 +                       t_nrnb *nrnb,t_mdatoms *md,
 +                       real *lambda,real *dvdlambda,
 +                       gmx_grppairener_t *grppener,
 +                       put_in_list_t *put_in_list,
 +                       gmx_bool bHaveVdW[],
-                                                                             shift,fr,FALSE,TRUE,TRUE);
++                       gmx_bool bDoLongRange,gmx_bool bMakeQMMMnblist)
 +{
 +    gmx_ns_t *ns;
 +    atom_id **nl_lr_ljc,**nl_lr_one,**nl_sr;
 +    int     *nlr_ljc,*nlr_one,*nsr;
 +    gmx_domdec_t *dd=NULL;
 +    t_block *cgs=&(top->cgs);
 +    int     *cginfo=fr->cginfo;
 +    /* atom_id *i_atoms,*cgsindex=cgs->index; */
 +    ivec    sh0,sh1,shp;
 +    int     cell_x,cell_y,cell_z;
 +    int     d,tx,ty,tz,dx,dy,dz,cj;
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    int     zsh_ty,zsh_tx,ysh_tx;
 +#endif
 +    int     dx0,dx1,dy0,dy1,dz0,dz1;
 +    int     Nx,Ny,Nz,shift=-1,j,nrj,nns,nn=-1;
 +    real    gridx,gridy,gridz,grid_x,grid_y,grid_z;
 +    real    *dcx2,*dcy2,*dcz2;
 +    int     zgi,ygi,xgi;
 +    int     cg0,cg1,icg=-1,cgsnr,i0,igid,nri,naaj,max_jcg;
 +    int     jcg0,jcg1,jjcg,cgj0,jgid;
 +    int     *grida,*gridnra,*gridind;
 +    gmx_bool    rvdw_lt_rcoul,rcoul_lt_rvdw;
 +    rvec    xi,*cgcm,grid_offset;
 +    real    r2,rs2,rvdw2,rcoul2,rm2,rl2,XI,YI,ZI,dcx,dcy,dcz,tmp1,tmp2;
 +    int     *i_egp_flags;
 +    gmx_bool    bDomDec,bTriclinicX,bTriclinicY;
 +    ivec    ncpddc;
 +    
 +    ns = &fr->ns;
 +    
 +    bDomDec = DOMAINDECOMP(cr);
 +    if (bDomDec)
 +    {
 +        dd = cr->dd;
 +    }
 +    
 +    bTriclinicX = ((YY < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[YY]==1) && box[YY][XX] != 0) ||
 +                   (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][XX] != 0));
 +    bTriclinicY =  (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][YY] != 0);
 +    
 +    cgsnr    = cgs->nr;
 +
 +    get_cutoff2(fr,bDoLongRange,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    rvdw_lt_rcoul = (rvdw2 >= rcoul2);
 +    rcoul_lt_rvdw = (rcoul2 >= rvdw2);
 +    
 +    if (bMakeQMMMnblist)
 +    {
 +        rm2 = rl2;
 +        rs2 = rl2;
 +    }
 +
 +    nl_sr     = ns->nl_sr;
 +    nsr       = ns->nsr;
 +    nl_lr_ljc = ns->nl_lr_ljc;
 +    nl_lr_one = ns->nl_lr_one;
 +    nlr_ljc   = ns->nlr_ljc;
 +    nlr_one   = ns->nlr_one;
 +    
 +    /* Unpack arrays */
 +    cgcm    = fr->cg_cm;
 +    Nx      = grid->n[XX];
 +    Ny      = grid->n[YY];
 +    Nz      = grid->n[ZZ];
 +    grida   = grid->a;
 +    gridind = grid->index;
 +    gridnra = grid->nra;
 +    nns     = 0;
 +    
 +    gridx      = grid->cell_size[XX];
 +    gridy      = grid->cell_size[YY];
 +    gridz      = grid->cell_size[ZZ];
 +    grid_x     = 1/gridx;
 +    grid_y     = 1/gridy;
 +    grid_z     = 1/gridz;
 +    copy_rvec(grid->cell_offset,grid_offset);
 +    copy_ivec(grid->ncpddc,ncpddc);
 +    dcx2       = grid->dcx2;
 +    dcy2       = grid->dcy2;
 +    dcz2       = grid->dcz2;
 +    
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5);
 +    zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5);
 +    ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5);
 +    if (zsh_tx!=0 && ysh_tx!=0)
 +    {
 +        /* This could happen due to rounding, when both ratios are 0.5 */
 +        ysh_tx = 0;
 +    }
 +#endif
 +    
 +    debug_gmx();
 +
 +    if (fr->n_tpi)
 +    {
 +        /* We only want a list for the test particle */
 +        cg0 = cgsnr - 1;
 +    }
 +    else
 +    {
 +        cg0 = grid->icg0;
 +    }
 +    cg1 = grid->icg1;
 +
 +    /* Set the shift range */
 +    for(d=0; d<DIM; d++)
 +    {
 +        sh0[d] = -1;
 +        sh1[d] = 1;
 +        /* Check if we need periodicity shifts.
 +         * Without PBC or with domain decomposition we don't need them.
 +         */
 +        if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1))
 +        {
 +            shp[d] = 0;
 +        }
 +        else
 +        {
 +            if (d == XX &&
 +                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
 +            {
 +                shp[d] = 2;
 +            }
 +            else
 +            {
 +                shp[d] = 1;
 +            }
 +        }
 +    }
 +    
 +    /* Loop over charge groups */
 +    for(icg=cg0; (icg < cg1); icg++)
 +    {
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        /* Skip this charge group if all energy groups are excluded! */
 +        if (bExcludeAlleg[igid])
 +        {
 +            continue;
 +        }
 +        
 +        i0   = cgs->index[icg];
 +        
 +        if (bMakeQMMMnblist)
 +        { 
 +            /* Skip this charge group if it is not a QM atom while making a
 +             * QM/MM neighbourlist
 +             */
 +            if (md->bQM[i0]==FALSE)
 +            {
 +                continue; /* MM particle, go to next particle */ 
 +            }
 +            
 +            /* Compute the number of charge groups that fall within the control
 +             * of this one (icg)
 +             */
 +            naaj    = calc_naaj(icg,cgsnr);
 +            jcg0    = icg;
 +            jcg1    = icg + naaj;
 +            max_jcg = cgsnr;       
 +        } 
 +        else
 +        { 
 +            /* make a normal neighbourlist */
 +            
 +            if (bDomDec)
 +            {
 +                /* Get the j charge-group and dd cell shift ranges */
 +                dd_get_ns_ranges(cr->dd,icg,&jcg0,&jcg1,sh0,sh1);
 +                max_jcg = 0;
 +            }
 +            else
 +            {
 +                /* Compute the number of charge groups that fall within the control
 +                 * of this one (icg)
 +                 */
 +                naaj = calc_naaj(icg,cgsnr);
 +                jcg0 = icg;
 +                jcg1 = icg + naaj;
 +                
 +                if (fr->n_tpi)
 +                {
 +                    /* The i-particle is awlways the test particle,
 +                     * so we want all j-particles
 +                     */
 +                    max_jcg = cgsnr - 1;
 +                }
 +                else
 +                {
 +                    max_jcg  = jcg1 - cgsnr;
 +                }
 +            }
 +        }
 +        
 +        i_egp_flags = fr->egp_flags + igid*ngid;
 +        
 +        /* Set the exclusions for the atoms in charge group icg using a bitmask */
 +        setexcl(i0,cgs->index[icg+1],&top->excls,TRUE,bexcl);
 +        
 +        ci2xyz(grid,icg,&cell_x,&cell_y,&cell_z);
 +        
 +        /* Changed iicg to icg, DvdS 990115 
 +         * (but see consistency check above, DvdS 990330) 
 +         */
 +#ifdef NS5DB
 +        fprintf(log,"icg=%5d, naaj=%5d, cell %d %d %d\n",
 +                icg,naaj,cell_x,cell_y,cell_z);
 +#endif
 +        /* Loop over shift vectors in three dimensions */
 +        for (tz=-shp[ZZ]; tz<=shp[ZZ]; tz++)
 +        {
 +            ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ];
 +            /* Calculate range of cells in Z direction that have the shift tz */
 +            zgi = cell_z + tz*Nz;
 +#define FAST_DD_NS
 +#ifndef FAST_DD_NS
 +            get_dx(Nz,gridz,rl2,zgi,ZI,&dz0,&dz1,dcz2);
 +#else
 +            get_dx_dd(Nz,gridz,rl2,zgi,ZI-grid_offset[ZZ],
 +                      ncpddc[ZZ],sh0[ZZ],sh1[ZZ],&dz0,&dz1,dcz2);
 +#endif
 +            if (dz0 > dz1)
 +            {
 +                continue;
 +            }
 +            for (ty=-shp[YY]; ty<=shp[YY]; ty++)
 +            {
 +                YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
 +                /* Calculate range of cells in Y direction that have the shift ty */
 +                if (bTriclinicY)
 +                {
 +                    ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny;
 +                }
 +                else
 +                {
 +                    ygi = cell_y + ty*Ny;
 +                }
 +#ifndef FAST_DD_NS
 +                get_dx(Ny,gridy,rl2,ygi,YI,&dy0,&dy1,dcy2);
 +#else
 +                get_dx_dd(Ny,gridy,rl2,ygi,YI-grid_offset[YY],
 +                          ncpddc[YY],sh0[YY],sh1[YY],&dy0,&dy1,dcy2);
 +#endif
 +                if (dy0 > dy1)
 +                {
 +                    continue;
 +                }
 +                for (tx=-shp[XX]; tx<=shp[XX]; tx++)
 +                {
 +                    XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
 +                    /* Calculate range of cells in X direction that have the shift tx */
 +                    if (bTriclinicX)
 +                    {
 +                        xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx;
 +                    }
 +                    else
 +                    {
 +                        xgi = cell_x + tx*Nx;
 +                    }
 +#ifndef FAST_DD_NS
 +                    get_dx(Nx,gridx,rl2,xgi*Nx,XI,&dx0,&dx1,dcx2);
 +#else
 +                    get_dx_dd(Nx,gridx,rl2,xgi,XI-grid_offset[XX],
 +                              ncpddc[XX],sh0[XX],sh1[XX],&dx0,&dx1,dcx2);
 +#endif
 +                    if (dx0 > dx1)
 +                    {
 +                        continue;
 +                    }
 +                    /* Adress: an explicit cg that has a weigthing function of 0 is excluded
 +                     *  from the neigbour list as it will not interact  */
 +                    if (fr->adress_type != eAdressOff){
 +                        if (md->wf[cgs->index[icg]]==0 && egp_explicit(fr, igid)){
 +                            continue;
 +                        }
 +                    }
 +                    /* Get shift vector */      
 +                    shift=XYZ2IS(tx,ty,tz);
 +#ifdef NS5DB
 +                    range_check(shift,0,SHIFTS);
 +#endif
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        nsr[nn]      = 0;
 +                        nlr_ljc[nn]  = 0;
 +                        nlr_one[nn] = 0;
 +                    }
 +#ifdef NS5DB
 +                    fprintf(log,"shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n",
 +                            shift,dx0,dx1,dy0,dy1,dz0,dz1);
 +                    fprintf(log,"cgcm: %8.3f  %8.3f  %8.3f\n",cgcm[icg][XX],
 +                            cgcm[icg][YY],cgcm[icg][ZZ]);
 +                    fprintf(log,"xi:   %8.3f  %8.3f  %8.3f\n",XI,YI,ZI);
 +#endif
 +                    for (dx=dx0; (dx<=dx1); dx++)
 +                    {
 +                        tmp1 = rl2 - dcx2[dx];
 +                        for (dy=dy0; (dy<=dy1); dy++)
 +                        {
 +                            tmp2 = tmp1 - dcy2[dy];
 +                            if (tmp2 > 0)
 +                            {
 +                                for (dz=dz0; (dz<=dz1); dz++) {
 +                                    if (tmp2 > dcz2[dz]) {
 +                                        /* Find grid-cell cj in which possible neighbours are */
 +                                        cj   = xyz2ci(Ny,Nz,dx,dy,dz);
 +                                        
 +                                        /* Check out how many cgs (nrj) there in this cell */
 +                                        nrj  = gridnra[cj];
 +                                        
 +                                        /* Find the offset in the cg list */
 +                                        cgj0 = gridind[cj];
 +                                        
 +                                        /* Check if all j's are out of range so we
 +                                         * can skip the whole cell.
 +                                         * Should save some time, especially with DD.
 +                                         */
 +                                        if (nrj == 0 ||
 +                                            (grida[cgj0] >= max_jcg &&
 +                                             (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0)))
 +                                        {
 +                                            continue;
 +                                        }
 +                                        
 +                                        /* Loop over cgs */
 +                                        for (j=0; (j<nrj); j++)
 +                                        {
 +                                            jjcg = grida[cgj0+j];
 +                                            
 +                                            /* check whether this guy is in range! */
 +                                            if ((jjcg >= jcg0 && jjcg < jcg1) ||
 +                                                (jjcg < max_jcg))
 +                                            {
 +                                                r2=calc_dx2(XI,YI,ZI,cgcm[jjcg]);
 +                                                if (r2 < rl2) {
 +                                                    /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */
 +                                                    jgid = GET_CGINFO_GID(cginfo[jjcg]);
 +                                                    /* check energy group exclusions */
 +                                                    if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                                                    {
 +                                                        if (r2 < rs2)
 +                                                        {
 +                                                            if (nsr[jgid] >= MAX_CG)
 +                                                            {
++                                                                /* Add to short-range list */
 +                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
 +                                                                            nsr[jgid],nl_sr[jgid],
 +                                                                            cgs->index,/* cgsatoms, */ bexcl,
-                                                                 do_longrange(cr,top,fr,ngid,md,icg,jgid,
-                                                                              nlr_ljc[jgid],
-                                                                              nl_lr_ljc[jgid],bexcl,shift,x,
-                                                                              box_size,nrnb,
-                                                                              lambda,dvdlambda,
-                                                                              grppener,
-                                                                              TRUE,TRUE,FALSE,
-                                                                              put_in_list,
-                                                                              bHaveVdW,
-                                                                              bDoForces,f);
++                                                                            shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +                                                                nsr[jgid]=0;
 +                                                            }
 +                                                            nl_sr[jgid][nsr[jgid]++]=jjcg;
 +                                                        } 
 +                                                        else if (r2 < rm2)
 +                                                        {
 +                                                            if (nlr_ljc[jgid] >= MAX_CG)
 +                                                            {
-                                                             if (nlr_one[jgid] >= MAX_CG) {
-                                                                 do_longrange(cr,top,fr,ngid,md,icg,jgid,
-                                                                              nlr_one[jgid],
-                                                                              nl_lr_one[jgid],bexcl,shift,x,
-                                                                              box_size,nrnb,
-                                                                              lambda,dvdlambda,
-                                                                              grppener,
-                                                                              rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
-                                                                              put_in_list,
-                                                                              bHaveVdW,
-                                                                              bDoForces,f);
++                                                                /* Add to LJ+coulomb long-range list */
++                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
++                                                                            nlr_ljc[jgid],nl_lr_ljc[jgid],top->cgs.index,
++                                                                            bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
 +                                                                nlr_ljc[jgid]=0;
 +                                                            }
 +                                                            nl_lr_ljc[jgid][nlr_ljc[jgid]++]=jjcg;
 +                                                        }
 +                                                        else
 +                                                        {
-                                         shift,fr,FALSE,TRUE,TRUE);
++                                                            if (nlr_one[jgid] >= MAX_CG)
++                                                            {
++                                                                /* Add to long-range list with only coul, or only LJ */
++                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
++                                                                            nlr_one[jgid],nl_lr_one[jgid],top->cgs.index,
++                                                                            bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
 +                                                                nlr_one[jgid]=0;
 +                                                            }
 +                                                            nl_lr_one[jgid][nlr_one[jgid]++]=jjcg;
 +                                                        }
 +                                                    }
 +                                                }
 +                                                nns++;
 +                                            }
 +                                        }
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                    /* CHECK whether there is anything left in the buffers */
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        if (nsr[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW,ngid,md,icg,nn,nsr[nn],nl_sr[nn],
 +                                        cgs->index, /* cgsatoms, */ bexcl,
-                             do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_ljc[nn],
-                                          nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
-                                          lambda,dvdlambda,grppener,TRUE,TRUE,FALSE,
-                                          put_in_list,bHaveVdW,bDoForces,f);
++                                        shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
 +                        }
 +                        
 +                        if (nlr_ljc[nn] > 0)
 +                        {
-                             do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_one[nn],
-                                          nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
-                                          lambda,dvdlambda,grppener,
-                                          rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
-                                          put_in_list,bHaveVdW,bDoForces,f);
++                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_ljc[nn],
++                                        nl_lr_ljc[nn],top->cgs.index,
++                                        bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
 +                        }
 +                        
 +                        if (nlr_one[nn] > 0)
 +                        {
-     /* Perform any left over force calculations */
-     for (nn=0; (nn<ngid); nn++)
-     {
-         if (rm2 > rs2)
-         {
-             do_longrange(cr,top,fr,0,md,icg,nn,nlr_ljc[nn],
-                          nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
-                          lambda,dvdlambda,grppener,
-                          TRUE,TRUE,TRUE,put_in_list,bHaveVdW,bDoForces,f);
-         }
-         if (rl2 > rm2) {
-             do_longrange(cr,top,fr,0,md,icg,nn,nlr_one[nn],
-                          nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
-                          lambda,dvdlambda,grppener,
-                          rvdw_lt_rcoul,rcoul_lt_rvdw,
-                          TRUE,put_in_list,bHaveVdW,bDoForces,f);
-         }
-     }
++                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_one[nn],
++                                        nl_lr_one[nn],top->cgs.index,
++                                        bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],&top->excls,FALSE,bexcl);
 +    }
-     
-     /* Close off short range neighbourlists */
-     close_neighbor_list(fr,FALSE,-1,-1,bMakeQMMMnblist);
++    /* No need to perform any left-over force calculations anymore (as we used to do here)
++     * since we now save the proper long-range lists for later evaluation.
++     */
++
 +    debug_gmx();
-                       gmx_bool bDoLongRange,
-                       gmx_bool bDoForces,rvec *f)
++     
++    /* Close neighbourlists */
++    close_neighbor_lists(fr,bMakeQMMMnblist);
 +    
 +    return nns;
 +}
 +
 +void ns_realloc_natoms(gmx_ns_t *ns,int natoms)
 +{
 +    int i;
 +    
 +    if (natoms > ns->nra_alloc)
 +    {
 +        ns->nra_alloc = over_alloc_dd(natoms);
 +        srenew(ns->bexcl,ns->nra_alloc);
 +        for(i=0; i<ns->nra_alloc; i++)
 +        {
 +            ns->bexcl[i] = 0;
 +        }
 +    }
 +}
 +
 +void init_ns(FILE *fplog,const t_commrec *cr,
 +             gmx_ns_t *ns,t_forcerec *fr,
 +             const gmx_mtop_t *mtop,
 +             matrix box)
 +{
 +    int  mt,icg,nr_in_cg,maxcg,i,j,jcg,ngid,ncg;
 +    t_block *cgs;
 +    char *ptr;
 +    
 +    /* Compute largest charge groups size (# atoms) */
 +    nr_in_cg=1;
 +    for(mt=0; mt<mtop->nmoltype; mt++) {
 +        cgs = &mtop->moltype[mt].cgs;
 +        for (icg=0; (icg < cgs->nr); icg++)
 +        {
 +            nr_in_cg=max(nr_in_cg,(int)(cgs->index[icg+1]-cgs->index[icg]));
 +        }
 +    }
 +
 +    /* Verify whether largest charge group is <= max cg.
 +     * This is determined by the type of the local exclusion type 
 +     * Exclusions are stored in bits. (If the type is not large
 +     * enough, enlarge it, unsigned char -> unsigned short -> unsigned long)
 +     */
 +    maxcg = sizeof(t_excl)*8;
 +    if (nr_in_cg > maxcg)
 +    {
 +        gmx_fatal(FARGS,"Max #atoms in a charge group: %d > %d\n",
 +                  nr_in_cg,maxcg);
 +    }
 +    
 +    ngid = mtop->groups.grps[egcENER].nr;
 +    snew(ns->bExcludeAlleg,ngid);
 +    for(i=0; i<ngid; i++) {
 +        ns->bExcludeAlleg[i] = TRUE;
 +        for(j=0; j<ngid; j++)
 +        {
 +            if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL))
 +            {
 +                ns->bExcludeAlleg[i] = FALSE;
 +            }
 +        }
 +    }
 +    
 +    if (fr->bGrid) {
 +        /* Grid search */
 +        ns->grid = init_grid(fplog,fr);
 +        init_nsgrid_lists(fr,ngid,ns);
 +    }
 +    else
 +    {
 +        /* Simple search */
 +        snew(ns->ns_buf,ngid);
 +        for(i=0; (i<ngid); i++)
 +        {
 +            snew(ns->ns_buf[i],SHIFTS);
 +        }
 +        ncg = ncg_mtop(mtop);
 +        snew(ns->simple_aaj,2*ncg);
 +        for(jcg=0; (jcg<ncg); jcg++)
 +        {
 +            ns->simple_aaj[jcg]     = jcg;
 +            ns->simple_aaj[jcg+ncg] = jcg;
 +        }
 +    }
 +    
 +    /* Create array that determines whether or not atoms have VdW */
 +    snew(ns->bHaveVdW,fr->ntype);
 +    for(i=0; (i<fr->ntype); i++)
 +    {
 +        for(j=0; (j<fr->ntype); j++)
 +        {
 +            ns->bHaveVdW[i] = (ns->bHaveVdW[i] || 
 +                               (fr->bBHAM ? 
 +                                ((BHAMA(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMB(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMC(fr->nbfp,fr->ntype,i,j) != 0)) :
 +                                ((C6(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (C12(fr->nbfp,fr->ntype,i,j) != 0))));
 +        }
 +    }
 +    if (debug) 
 +        pr_bvec(debug,0,"bHaveVdW",ns->bHaveVdW,fr->ntype,TRUE);
 +    
 +    ns->nra_alloc = 0;
 +    ns->bexcl = NULL;
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* This could be reduced with particle decomposition */
 +        ns_realloc_natoms(ns,mtop->natoms);
 +    }
 +
 +    ns->nblist_initialized=FALSE;
 +
 +    /* nbr list debug dump */
 +    {
 +        char *ptr=getenv("GMX_DUMP_NL");
 +        if (ptr)
 +        {
 +            ns->dump_nl=strtol(ptr,NULL,10);
 +            if (fplog)
 +            {
 +                fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl);
 +            }
 +        }
 +        else
 +        {
 +            ns->dump_nl=0;
 +        }
 +    }
 +}
 +
 +                       
 +int search_neighbours(FILE *log,t_forcerec *fr,
 +                      rvec x[],matrix box,
 +                      gmx_localtop_t *top,
 +                      gmx_groups_t *groups,
 +                      t_commrec *cr,
 +                      t_nrnb *nrnb,t_mdatoms *md,
 +                      real *lambda,real *dvdlambda,
 +                      gmx_grppairener_t *grppener,
 +                      gmx_bool bFillGrid,
-       
++                      gmx_bool bDoLongRangeNS)
 +{
 +    t_block  *cgs=&(top->cgs);
 +    rvec     box_size,grid_x0,grid_x1;
 +    int      i,j,m,ngid;
 +    real     min_size,grid_dens;
 +    int      nsearch;
 +    gmx_bool     bGrid;
 +    char     *ptr;
 +    gmx_bool     *i_egp_flags;
 +    int      cg_start,cg_end,start,end;
 +    gmx_ns_t *ns;
 +    t_grid   *grid;
 +    gmx_domdec_zones_t *dd_zones;
 +    put_in_list_t *put_in_list;
-     reset_neighbor_list(fr,FALSE,-1,-1);
++
 +    ns = &fr->ns;
 +
 +    /* Set some local variables */
 +    bGrid = fr->bGrid;
 +    ngid = groups->grps[egcENER].nr;
 +    
 +    for(m=0; (m<DIM); m++)
 +    {
 +        box_size[m] = box[m][m];
 +    }
 +  
 +    if (fr->ePBC != epbcNONE)
 +    {
 +        if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC,box))
 +        {
 +            gmx_fatal(FARGS,"One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
 +        }
 +        if (!bGrid)
 +        {
 +            min_size = min(box_size[XX],min(box_size[YY],box_size[ZZ]));
 +            if (2*fr->rlistlong >= min_size)
 +                gmx_fatal(FARGS,"One of the box diagonal elements has become smaller than twice the cut-off length.");
 +        }
 +    }
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        ns_realloc_natoms(ns,cgs->index[cgs->nr]);
 +    }
 +    debug_gmx();
 +    
 +    /* Reset the neighbourlists */
-                               bDoLongRange,bDoForces,f,
-                               FALSE);
++    reset_neighbor_lists(fr,TRUE,TRUE);
 +    
 +    if (bGrid && bFillGrid)
 +    {
 +              
 +        grid = ns->grid;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_zones = domdec_zones(cr->dd);
 +        }
 +        else
 +        {
 +            dd_zones = NULL;
 +
 +            get_nsgrid_boundaries(grid->nboundeddim,box,NULL,NULL,NULL,NULL,
 +                                  cgs->nr,fr->cg_cm,grid_x0,grid_x1,&grid_dens);
 +
 +            grid_first(log,grid,NULL,NULL,fr->ePBC,box,grid_x0,grid_x1,
 +                       fr->rlistlong,grid_dens);
 +        }
 +        debug_gmx();
 +        
 +        /* Don't know why this all is... (DvdS 3/99) */
 +#ifndef SEGV
 +        start = 0;
 +        end   = cgs->nr;
 +#else
 +        start = fr->cg0;
 +        end   = (cgs->nr+1)/2;
 +#endif
 +        
 +        if (DOMAINDECOMP(cr))
 +        {
 +            end = cgs->nr;
 +            fill_grid(log,dd_zones,grid,end,-1,end,fr->cg_cm);
 +            grid->icg0 = 0;
 +            grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
 +        }
 +        else
 +        {
 +            fill_grid(log,NULL,grid,cgs->nr,fr->cg0,fr->hcg,fr->cg_cm);
 +            grid->icg0 = fr->cg0;
 +            grid->icg1 = fr->hcg;
 +            debug_gmx();
 +            
 +            if (PARTDECOMP(cr))
 +                mv_grid(cr,grid);
 +            debug_gmx();
 +        }
 +        
 +        calc_elemnr(log,grid,start,end,cgs->nr);
 +        calc_ptrs(grid);
 +        grid_last(log,grid,start,end,cgs->nr);
 +        
 +        if (gmx_debug_at)
 +        {
 +            check_grid(debug,grid);
 +            print_grid(debug,grid);
 +        }
 +    }
 +    else if (fr->n_tpi)
 +    {
 +        /* Set the grid cell index for the test particle only.
 +         * The cell to cg index is not corrected, but that does not matter.
 +         */
 +        fill_grid(log,NULL,ns->grid,fr->hcg,fr->hcg-1,fr->hcg,fr->cg_cm);
 +    }
 +    debug_gmx();
 +    
 +    if (!fr->ns.bCGlist)
 +    {
 +        put_in_list = put_in_list_at;
 +    }
 +    else
 +    {
 +        put_in_list = put_in_list_cg;
 +    }
 +
 +    /* Do the core! */
 +    if (bGrid)
 +    {
 +        grid = ns->grid;
 +        nsearch = nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                              grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                              nrnb,md,lambda,dvdlambda,grppener,
 +                              put_in_list,ns->bHaveVdW,
-                                    bDoLongRange,bDoForces,f,
-                                    TRUE);
++                              bDoLongRangeNS,FALSE);
 +        
 +        /* neighbour searching withouth QMMM! QM atoms have zero charge in
 +         * the classical calculation. The charge-charge interaction
 +         * between QM and MM atoms is handled in the QMMM core calculation
 +         * (see QMMM.c). The VDW however, we'd like to compute classically
 +         * and the QM MM atom pairs have just been put in the
 +         * corresponding neighbourlists. in case of QMMM we still need to
 +         * fill a special QMMM neighbourlist that contains all neighbours
 +         * of the QM atoms. If bQMMM is true, this list will now be made: 
 +         */
 +        if (fr->bQMMM && fr->qr->QMMMscheme!=eQMMMschemeoniom)
 +        {
 +            nsearch += nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                                   grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                                   nrnb,md,lambda,dvdlambda,grppener,
 +                                   put_in_list_qmmm,ns->bHaveVdW,
-     
++                                   bDoLongRangeNS,TRUE);
 +        }
 +    }
 +    else 
 +    {
 +        nsearch = ns_simple_core(fr,top,md,box,box_size,
 +                                 ns->bexcl,ns->simple_aaj,
 +                                 ngid,ns->ns_buf,put_in_list,ns->bHaveVdW);
 +    }
 +    debug_gmx();
++
 +#ifdef DEBUG
 +    pr_nsblock(log);
 +#endif
 +    
 +    inc_nrnb(nrnb,eNR_NS,nsearch);
 +    /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
 +    
 +    return nsearch;
 +}
 +
 +int natoms_beyond_ns_buffer(t_inputrec *ir,t_forcerec *fr,t_block *cgs,
 +                            matrix scale_tot,rvec *x)
 +{
 +    int  cg0,cg1,cg,a0,a1,a,i,j;
 +    real rint,hbuf2,scale;
 +    rvec *cg_cm,cgsc;
 +    gmx_bool bIsotropic;
 +    int  nBeyond;
 +    
 +    nBeyond = 0;
 +    
 +    rint = max(ir->rcoulomb,ir->rvdw);
 +    if (ir->rlist < rint)
 +    {
 +        gmx_fatal(FARGS,"The neighbor search buffer has negative size: %f nm",
 +                  ir->rlist - rint);
 +    }
 +    cg_cm = fr->cg_cm;
 +    
 +    cg0 = fr->cg0;
 +    cg1 = fr->hcg;
 +    
 +    if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir))
 +    {
 +        hbuf2 = sqr(0.5*(ir->rlist - rint));
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            a0 = cgs->index[cg];
 +            a1 = cgs->index[cg+1];
 +            for(a=a0; a<a1; a++)
 +            {
 +                if (distance2(cg_cm[cg],x[a]) > hbuf2)
 +                {
 +                    nBeyond++;
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        bIsotropic = TRUE;
 +        scale = scale_tot[0][0];
 +        for(i=1; i<DIM; i++)
 +        {
 +            /* With anisotropic scaling, the original spherical ns volumes become
 +             * ellipsoids. To avoid costly transformations we use the minimum
 +             * eigenvalue of the scaling matrix for determining the buffer size.
 +             * Since the lower half is 0, the eigenvalues are the diagonal elements.
 +             */
 +            scale = min(scale,scale_tot[i][i]);
 +            if (scale_tot[i][i] != scale_tot[i-1][i-1])
 +            {
 +                bIsotropic = FALSE;
 +            }
 +            for(j=0; j<i; j++)
 +            {
 +                if (scale_tot[i][j] != 0)
 +                {
 +                    bIsotropic = FALSE;
 +                }
 +            }
 +        }
 +        hbuf2 = sqr(0.5*(scale*ir->rlist - rint));
 +        if (bIsotropic)
 +        {
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                svmul(scale,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {                    
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Anistropic scaling */
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                /* Since scale_tot contains the transpose of the scaling matrix,
 +                 * we need to multiply with the transpose.
 +                 */
 +                tmvmul_ur0(scale_tot,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    return nBeyond;
 +}
index 6e808087f72d19a8d104c73ad2f96bb93fe2856e,0000000000000000000000000000000000000000..f03117c3129d784869a554421b81886228c66df5
mode 100644,000000..100644
--- /dev/null
@@@ -1,1115 -1,0 +1,1112 @@@
-       if(qr->qm[j]->bOPT || qr->qm[j]->bTS){
-       for(i=0;i<qm_nr;i++){
-         qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
-                                 atom->type,atom->type)/c6au;
-         qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
-                                 atom->type,atom->type)/c12au;
-       }
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + 
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "physics.h"
 +#include "macros.h"
 +#include "vec.h"
 +#include "force.h"
 +#include "invblock.h"
 +#include "confio.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "nrnb.h"
 +#include "bondf.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "copyrite.h"
 +#include "qmmm.h"
 +#include <stdio.h>
 +#include <string.h>
 +#include "gmx_fatal.h"
 +#include "typedefs.h"
 +#include <stdlib.h>
 +#include "mtop_util.h"
 +
 +
 +/* declarations of the interfaces to the QM packages. The _SH indicate
 + * the QM interfaces can be used for Surface Hopping simulations 
 + */
 +#ifdef GMX_QMMM_GAMESS
 +/* GAMESS interface */
 +
 +void 
 +init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_gamess(t_commrec *cr,t_forcerec *fr,
 +            t_QMrec *qm, t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#elif defined GMX_QMMM_MOPAC
 +/* MOPAC interface */
 +
 +void 
 +init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_mopac(t_commrec *cr,t_forcerec *fr, t_QMrec *qm, 
 +           t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +real 
 +call_mopac_SH(t_commrec *cr,t_forcerec *fr,t_QMrec *qm, 
 +              t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#elif defined GMX_QMMM_GAUSSIAN
 +/* GAUSSIAN interface */
 +
 +void 
 +init_gaussian(t_commrec *cr ,t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_gaussian_SH(t_commrec *cr,t_forcerec *fr,t_QMrec *qm, 
 +                 t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +real 
 +call_gaussian(t_commrec *cr,t_forcerec *fr, t_QMrec *qm,
 +              t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#elif defined GMX_QMMM_ORCA
 +/* ORCA interface */
 +
 +void 
 +init_orca(t_commrec *cr ,t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_orca(t_commrec *cr,t_forcerec *fr, t_QMrec *qm,
 +              t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#endif
 +
 +
 +
 +
 +/* this struct and these comparison functions are needed for creating
 + * a QMMM input for the QM routines from the QMMM neighbor list.  
 + */
 +
 +typedef struct {
 +  int      j;
 +  int      shift;
 +} t_j_particle;
 +
 +static int struct_comp(const void *a, const void *b){
 +
 +  return (int)(((t_j_particle *)a)->j)-(int)(((t_j_particle *)b)->j);
 +  
 +} /* struct_comp */
 +
 +static int int_comp(const void *a,const void *b){
 +  
 +  return (*(int *)a) - (*(int *)b);
 +  
 +} /* int_comp */
 +
 +static int QMlayer_comp(const void *a, const void *b){
 +  
 +  return (int)(((t_QMrec *)a)->nrQMatoms)-(int)(((t_QMrec *)b)->nrQMatoms);
 +  
 +} /* QMlayer_comp */
 +
 +real call_QMroutine(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, 
 +                  t_MMrec *mm, rvec f[], rvec fshift[])
 +{
 +  /* makes a call to the requested QM routine (qm->QMmethod) 
 +   * Note that f is actually the gradient, i.e. -f
 +   */
 +  real
 +    QMener=0.0;
 +
 +    /* do a semi-empiprical calculation */
 +    
 +    if (qm->QMmethod<eQMmethodRHF && !(mm->nrMMatoms))
 +    {
 +#ifdef GMX_QMMM_MOPAC
 +        if (qm->bSH)
 +            QMener = call_mopac_SH(cr,fr,qm,mm,f,fshift);
 +        else
 +            QMener = call_mopac(cr,fr,qm,mm,f,fshift);
 +#else
 +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
 +#endif
 +    }
 +    else
 +    {
 +        /* do an ab-initio calculation */
 +        if (qm->bSH && qm->QMmethod==eQMmethodCASSCF)
 +        {
 +#ifdef GMX_QMMM_GAUSSIAN            
 +            QMener = call_gaussian_SH(cr,fr,qm,mm,f,fshift);
 +#else
 +            gmx_fatal(FARGS,"Ab-initio Surface-hopping only supported with Gaussian.");
 +#endif
 +        }
 +        else
 +        {
 +#ifdef GMX_QMMM_GAMESS
 +            QMener = call_gamess(cr,fr,qm,mm,f,fshift);
 +#elif defined GMX_QMMM_GAUSSIAN
 +            QMener = call_gaussian(cr,fr,qm,mm,f,fshift);
 +#elif defined GMX_QMMM_ORCA
 +            QMener = call_orca(cr,fr,qm,mm,f,fshift);
 +#else
 +            gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");
 +#endif
 +        }
 +    }
 +    return (QMener);
 +}
 +
 +void init_QMroutine(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
 +{
 +    /* makes a call to the requested QM routine (qm->QMmethod) 
 +     */
 +    if (qm->QMmethod<eQMmethodRHF){
 +#ifdef GMX_QMMM_MOPAC
 +        /* do a semi-empiprical calculation */
 +        init_mopac(cr,qm,mm);
 +#else
 +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
 +#endif
 +    }
 +    else 
 +    {
 +        /* do an ab-initio calculation */
 +#ifdef GMX_QMMM_GAMESS
 +        init_gamess(cr,qm,mm);
 +#elif defined GMX_QMMM_GAUSSIAN
 +        init_gaussian(cr,qm,mm);
 +#elif defined GMX_QMMM_ORCA
 +        init_orca(cr,qm,mm);
 +#else
 +        gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");   
 +#endif
 +    }
 +} /* init_QMroutine */
 +
 +void update_QMMM_coord(rvec x[],t_forcerec *fr, t_QMrec *qm, t_MMrec *mm)
 +{
 +  /* shifts the QM and MM particles into the central box and stores
 +   * these shifted coordinates in the coordinate arrays of the
 +   * QMMMrec. These coordinates are passed on the QM subroutines.
 +   */
 +  int
 +    i;
 +
 +  /* shift the QM atoms into the central box 
 +   */
 +  for(i=0;i<qm->nrQMatoms;i++){
 +    rvec_sub(x[qm->indexQM[i]],fr->shift_vec[qm->shiftQM[i]],qm->xQM[i]);
 +  }
 +  /* also shift the MM atoms into the central box, if any 
 +   */
 +  for(i=0;i<mm->nrMMatoms;i++){
 +      rvec_sub(x[mm->indexMM[i]],fr->shift_vec[mm->shiftMM[i]],mm->xMM[i]);   
 +  }
 +} /* update_QMMM_coord */
 +
 +static void punch_QMMM_excl(t_QMrec *qm,t_MMrec *mm,t_blocka *excls)
 +{
 +  /* punch a file containing the bonded interactions of each QM
 +   * atom with MM atoms. These need to be excluded in the QM routines
 +   * Only needed in case of QM/MM optimizations
 +   */
 +  FILE
 +    *out=NULL;
 +  int
 +    i,j,k,nrexcl=0,*excluded=NULL,max=0;
 +  
 +  
 +  out = fopen("QMMMexcl.dat","w");
 +  
 +  /* this can be done more efficiently I think 
 +   */
 +  for(i=0;i<qm->nrQMatoms;i++){
 +    nrexcl = 0;
 +    for(j=excls->index[qm->indexQM[i]];
 +      j<excls->index[qm->indexQM[i]+1];
 +      j++){
 +      for(k=0;k<mm->nrMMatoms;k++){
 +      if(mm->indexMM[k]==excls->a[j]){/* the excluded MM atom */
 +        if(nrexcl >= max){
 +          max += 1000;
 +          srenew(excluded,max);
 +        }     
 +        excluded[nrexcl++]=k;
 +        continue;
 +      }
 +      }
 +    }
 +    /* write to file: */
 +    fprintf(out,"%5d %5d\n",i+1,nrexcl);
 +    for(j=0;j<nrexcl;j++){
 +      fprintf(out,"%5d ",excluded[j]);
 +    }
 +    fprintf(out,"\n");
 +  }
 +  free(excluded);
 +  fclose(out);
 +} /* punch_QMMM_excl */
 +
 +
 +/* end of QMMM subroutines */
 +
 +/* QMMM core routines */
 +
 +t_QMrec *mk_QMrec(void){
 +  t_QMrec *qm;
 +  snew(qm,1);
 +  return qm;
 +} /* mk_QMrec */
 +
 +t_MMrec *mk_MMrec(void){
 +  t_MMrec *mm;
 +  snew(mm,1);
 +  return mm;
 +} /* mk_MMrec */
 +
 +static void init_QMrec(int grpnr, t_QMrec *qm,int nr, int *atomarray, 
 +                     gmx_mtop_t *mtop, t_inputrec *ir)
 +{
 +  /* fills the t_QMrec struct of QM group grpnr 
 +   */
 +  int i;
 +  gmx_mtop_atomlookup_t alook;
 +  t_atom *atom;
 +
 +
 +  qm->nrQMatoms = nr;
 +  snew(qm->xQM,nr);
 +  snew(qm->indexQM,nr);
 +  snew(qm->shiftQM,nr); /* the shifts */
 +  for(i=0;i<nr;i++){
 +    qm->indexQM[i]=atomarray[i];
 +  }
 +
 +  alook = gmx_mtop_atomlookup_init(mtop);
 +
 +  snew(qm->atomicnumberQM,nr);
 +  for (i=0;i<qm->nrQMatoms;i++){
 +    gmx_mtop_atomnr_to_atom(alook,qm->indexQM[i],&atom);
 +    qm->nelectrons       += mtop->atomtypes.atomnumber[atom->type];
 +    qm->atomicnumberQM[i] = mtop->atomtypes.atomnumber[atom->type];
 +  }
 +
 +  gmx_mtop_atomlookup_destroy(alook);
 +
 +  qm->QMcharge       = ir->opts.QMcharge[grpnr];
 +  qm->multiplicity   = ir->opts.QMmult[grpnr];
 +  qm->nelectrons    -= ir->opts.QMcharge[grpnr];
 +
 +  qm->QMmethod       = ir->opts.QMmethod[grpnr];
 +  qm->QMbasis        = ir->opts.QMbasis[grpnr];
 +  /* trajectory surface hopping setup (Gaussian only) */
 +  qm->bSH            = ir->opts.bSH[grpnr];
 +  qm->CASorbitals    = ir->opts.CASorbitals[grpnr];
 +  qm->CASelectrons   = ir->opts.CASelectrons[grpnr];
 +  qm->SAsteps        = ir->opts.SAsteps[grpnr];
 +  qm->SAon           = ir->opts.SAon[grpnr];
 +  qm->SAoff          = ir->opts.SAoff[grpnr];
 +  /* hack to prevent gaussian from reinitializing all the time */
 +  qm->nQMcpus        = 0; /* number of CPU's to be used by g01, is set
 +                         * upon initializing gaussian
 +                         * (init_gaussian() 
 +                         */
 +  /* print the current layer to allow users to check their input */
 +  fprintf(stderr,"Layer %d\nnr of QM atoms %d\n",grpnr,nr);
 +  fprintf(stderr,"QMlevel: %s/%s\n\n",
 +        eQMmethod_names[qm->QMmethod],eQMbasis_names[qm->QMbasis]);
 +  
 +  /* frontier atoms */
 +  snew(qm->frontatoms,nr);
 +  /* Lennard-Jones coefficients */ 
 +  snew(qm->c6,nr);
 +  snew(qm->c12,nr);
 +  /* do we optimize the QM separately using the algorithms of the QM program??
 +   */
 +  qm->bTS      = ir->opts.bTS[grpnr];
 +  qm->bOPT     = ir->opts.bOPT[grpnr];
 +
 +} /* init_QMrec */  
 +
 +t_QMrec *copy_QMrec(t_QMrec *qm)
 +{
 +  /* copies the contents of qm into a new t_QMrec struct */
 +  t_QMrec
 +    *qmcopy;
 +  int
 +    i;
 +  
 +  qmcopy = mk_QMrec();
 +  qmcopy->nrQMatoms = qm->nrQMatoms;
 +  snew(qmcopy->xQM,qmcopy->nrQMatoms);
 +  snew(qmcopy->indexQM,qmcopy->nrQMatoms);
 +  snew(qmcopy->atomicnumberQM,qm->nrQMatoms);
 +  snew(qmcopy->shiftQM,qmcopy->nrQMatoms); /* the shifts */
 +  for (i=0;i<qmcopy->nrQMatoms;i++){
 +    qmcopy->shiftQM[i]        = qm->shiftQM[i];
 +    qmcopy->indexQM[i]        = qm->indexQM[i];
 +    qmcopy->atomicnumberQM[i] = qm->atomicnumberQM[i];
 +  }
 +  qmcopy->nelectrons   = qm->nelectrons;
 +  qmcopy->multiplicity = qm->multiplicity;
 +  qmcopy->QMcharge     = qm->QMcharge;
 +  qmcopy->nelectrons   = qm->nelectrons;
 +  qmcopy->QMmethod     = qm->QMmethod; 
 +  qmcopy->QMbasis      = qm->QMbasis;  
 +  /* trajectory surface hopping setup (Gaussian only) */
 +  qmcopy->bSH          = qm->bSH;
 +  qmcopy->CASorbitals  = qm->CASorbitals;
 +  qmcopy->CASelectrons = qm->CASelectrons;
 +  qmcopy->SAsteps      = qm->SAsteps;
 +  qmcopy->SAon         = qm->SAon;
 +  qmcopy->SAoff        = qm->SAoff;
 +  qmcopy->bOPT         = qm->bOPT;
 +
 +  /* Gaussian init. variables */
 +  qmcopy->nQMcpus      = qm->nQMcpus;
 +  for(i=0;i<DIM;i++)
 +    qmcopy->SHbasis[i] = qm->SHbasis[i];
 +  qmcopy->QMmem        = qm->QMmem;
 +  qmcopy->accuracy     = qm->accuracy;
 +  qmcopy->cpmcscf      = qm->cpmcscf;
 +  qmcopy->SAstep       = qm->SAstep;
 +  snew(qmcopy->frontatoms,qm->nrQMatoms);
 +  snew(qmcopy->c12,qmcopy->nrQMatoms);
 +  snew(qmcopy->c6,qmcopy->nrQMatoms);
 +  if(qmcopy->bTS||qmcopy->bOPT){
 +    for(i=1;i<qmcopy->nrQMatoms;i++){
 +      qmcopy->frontatoms[i] = qm->frontatoms[i];
 +      qmcopy->c12[i]        = qm->c12[i];
 +      qmcopy->c6[i]         = qm->c6[i];
 +    }
 +  }
 +
 +  return(qmcopy);
 +
 +} /*copy_QMrec */
 +
 +t_QMMMrec *mk_QMMMrec(void)
 +{
 +
 +  t_QMMMrec *qr;
 +
 +  snew(qr,1);
 +
 +  return qr;
 +
 +} /* mk_QMMMrec */
 +
 +void init_QMMMrec(t_commrec *cr,
 +                matrix box,
 +                gmx_mtop_t *mtop,
 +                t_inputrec *ir,
 +                t_forcerec *fr)
 +{
 +  /* we put the atomsnumbers of atoms that belong to the QMMM group in
 +   * an array that will be copied later to QMMMrec->indexQM[..]. Also
 +   * it will be used to create an QMMMrec->bQMMM index array that
 +   * simply contains true/false for QM and MM (the other) atoms.
 +   */
 +
 +  gmx_groups_t *groups;
 +  atom_id   *qm_arr=NULL,vsite,ai,aj;
 +  int       qm_max=0,qm_nr=0,i,j,jmax,k,l,nrvsite2=0;
 +  t_QMMMrec *qr;
 +  t_MMrec   *mm;
 +  t_iatom   *iatoms;
 +  real      c12au,c6au;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom    *atom;
 +  gmx_mtop_ilistloop_all_t iloop;
 +  int       a_offset;
 +  t_ilist   *ilist_mol;
 +  gmx_mtop_atomlookup_t alook;
 +
 +  c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6)); 
 +  c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12)); 
 +  /* issue a fatal if the user wants to run with more than one node */
 +  if ( PAR(cr)) gmx_fatal(FARGS,"QM/MM does not work in parallel, use a single node instead\n");
 +
 +  /* Make a local copy of the QMMMrec */
 +  qr = fr->qr;
 +
 +  /* bQMMM[..] is an array containing TRUE/FALSE for atoms that are
 +   * QM/not QM. We first set all elemenst at false. Afterwards we use
 +   * the qm_arr (=MMrec->indexQM) to changes the elements
 +   * corresponding to the QM atoms at TRUE.  */
 +
 +  qr->QMMMscheme     = ir->QMMMscheme;
 +
 +  /* we take the possibility into account that a user has
 +   * defined more than one QM group:
 +   */
 +  /* an ugly work-around in case there is only one group In this case
 +   * the whole system is treated as QM. Otherwise the second group is
 +   * always the rest of the total system and is treated as MM.  
 +   */
 +
 +  /* small problem if there is only QM.... so no MM */
 +  
 +  jmax = ir->opts.ngQM;
 +
 +  if(qr->QMMMscheme==eQMMMschemeoniom)
 +    qr->nrQMlayers = jmax;
 +  else
 +    qr->nrQMlayers = 1; 
 +
 +  groups = &mtop->groups;
 +
 +  /* there are jmax groups of QM atoms. In case of multiple QM groups
 +   * I assume that the users wants to do ONIOM. However, maybe it
 +   * should also be possible to define more than one QM subsystem with
 +   * independent neighbourlists. I have to think about
 +   * that.. 11-11-2003 
 +   */
 +  snew(qr->qm,jmax);
 +  for(j=0;j<jmax;j++){
 +    /* new layer */
 +    aloop = gmx_mtop_atomloop_all_init(mtop);
 +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +      if(qm_nr >= qm_max){
 +      qm_max += 1000;
 +      srenew(qm_arr,qm_max);
 +      }
 +      if (ggrpnr(groups,egcQMMM ,i) == j) {
 +      /* hack for tip4p */
 +      qm_arr[qm_nr++] = i;
 +      }
 +    }
 +    if(qr->QMMMscheme==eQMMMschemeoniom){
 +      /* add the atoms to the bQMMM array
 +       */
 +
 +      /* I assume that users specify the QM groups from small to
 +       * big(ger) in the mdp file 
 +       */
 +      qr->qm[j] = mk_QMrec(); 
 +      /* we need to throw out link atoms that in the previous layer
 +       * existed to separate this QMlayer from the previous
 +       * QMlayer. We use the iatoms array in the idef for that
 +       * purpose. If all atoms defining the current Link Atom (Dummy2)
 +       * are part of the current QM layer it needs to be removed from
 +       * qm_arr[].  */
 +   
 +      iloop = gmx_mtop_ilistloop_all_init(mtop);
 +      while (gmx_mtop_ilistloop_all_next(iloop,&ilist_mol,&a_offset)) {
 +      nrvsite2 = ilist_mol[F_VSITE2].nr;
 +      iatoms   = ilist_mol[F_VSITE2].iatoms;
 +      
 +      for(k=0; k<nrvsite2; k+=4) {
 +        vsite = a_offset + iatoms[k+1]; /* the vsite         */
 +        ai    = a_offset + iatoms[k+2]; /* constructing atom */
 +        aj    = a_offset + iatoms[k+3]; /* constructing atom */
 +        if (ggrpnr(groups, egcQMMM, vsite) == ggrpnr(groups, egcQMMM, ai)
 +            &&
 +            ggrpnr(groups, egcQMMM, vsite) == ggrpnr(groups, egcQMMM, aj)) {
 +          /* this dummy link atom needs to be removed from the qm_arr
 +           * before making the QMrec of this layer!  
 +           */
 +          for(i=0;i<qm_nr;i++){
 +            if(qm_arr[i]==vsite){
 +              /* drop the element */
 +              for(l=i;l<qm_nr;l++){
 +                qm_arr[l]=qm_arr[l+1];
 +              }
 +              qm_nr--;
 +            }
 +          }
 +        }
 +      }
 +      }
 +
 +      /* store QM atoms in this layer in the QMrec and initialise layer 
 +       */
 +      init_QMrec(j,qr->qm[j],qm_nr,qm_arr,mtop,ir);
 +      
 +      /* we now store the LJ C6 and C12 parameters in QM rec in case
 +       * we need to do an optimization 
 +       */
-     if(qr->qm[0]->bOPT || qr->qm[0]->bTS){
-       for(i=0;i<qm_nr;i++){
-       gmx_mtop_atomnr_to_atom(alook,qm_arr[i],&atom);
-       qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
-                               atom->type,atom->type)/c6au;
-       qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
-                               atom->type,atom->type)/c12au;
-       }
-       
++      if(qr->qm[j]->bOPT || qr->qm[j]->bTS)
++      {
++          for(i=0;i<qm_nr;i++)
++          {
++              /* nbfp now includes the 6.0/12.0 derivative prefactors */
++              qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c6au/6.0;
++              qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c12au/12.0;
++          }
 +      }
 +      /* now we check for frontier QM atoms. These occur in pairs that
 +       * construct the vsite
 +       */
 +      iloop = gmx_mtop_ilistloop_all_init(mtop);
 +      while (gmx_mtop_ilistloop_all_next(iloop,&ilist_mol,&a_offset)) {
 +      nrvsite2 = ilist_mol[F_VSITE2].nr;
 +      iatoms   = ilist_mol[F_VSITE2].iatoms;
 +
 +      for(k=0; k<nrvsite2; k+=4){
 +        vsite = a_offset + iatoms[k+1]; /* the vsite         */
 +        ai    = a_offset + iatoms[k+2]; /* constructing atom */
 +        aj    = a_offset + iatoms[k+3]; /* constructing atom */
 +        if(ggrpnr(groups,egcQMMM,ai) < (groups->grps[egcQMMM].nr-1) &&
 +           (ggrpnr(groups,egcQMMM,aj) >= (groups->grps[egcQMMM].nr-1))){
 +            /* mark ai as frontier atom */
 +          for(i=0;i<qm_nr;i++){
 +            if( (qm_arr[i]==ai) || (qm_arr[i]==vsite) ){
 +              qr->qm[j]->frontatoms[i]=TRUE;
 +            }
 +          }
 +        }
 +        else if(ggrpnr(groups,egcQMMM,aj) < (groups->grps[egcQMMM].nr-1) &&
 +                (ggrpnr(groups,egcQMMM,ai) >= (groups->grps[egcQMMM].nr-1))){
 +          /* mark aj as frontier atom */
 +          for(i=0;i<qm_nr;i++){
 +            if( (qm_arr[i]==aj) || (qm_arr[i]==vsite)){
 +              qr->qm[j]->frontatoms[i]=TRUE;
 +            }
 +          }
 +        }
 +      }
 +      }
 +    }
 +  }
 +  if(qr->QMMMscheme!=eQMMMschemeoniom){
 +
 +    /* standard QMMM, all layers are merged together so there is one QM 
 +     * subsystem and one MM subsystem. 
 +     * Also we set the charges to zero in the md->charge arrays to prevent 
 +     * the innerloops from doubly counting the electostatic QM MM interaction
 +     */
 +
 +    alook = gmx_mtop_atomlookup_init(mtop);
 +
 +    for (k=0;k<qm_nr;k++){
 +      gmx_mtop_atomnr_to_atom(alook,qm_arr[k],&atom);
 +      atom->q  = 0.0;
 +      atom->qB = 0.0;
 +    } 
 +    qr->qm[0] = mk_QMrec();
 +    /* store QM atoms in the QMrec and initialise
 +     */
 +    init_QMrec(0,qr->qm[0],qm_nr,qm_arr,mtop,ir);
-     
++    if(qr->qm[0]->bOPT || qr->qm[0]->bTS)
++    {
++        for(i=0;i<qm_nr;i++)
++        {
++            gmx_mtop_atomnr_to_atom(alook,qm_arr[i],&atom);
++            /* nbfp now includes the 6.0/12.0 derivative prefactors */
++            qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c6au/6.0;
++            qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c12au/12.0;
++        }
 +    }
-       for (i=0;i<mm->nrMMatoms;i++){
-       mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,
-                       md->typeA[mm->indexMM[i]],
-                       md->typeA[mm->indexMM[i]])/c6au;
-       mm->c12[i] =C12(fr->nbfp,top->idef.atnr,
-                       md->typeA[mm->indexMM[i]],
-                       md->typeA[mm->indexMM[i]])/c12au;
 +
 +    /* find frontier atoms and mark them true in the frontieratoms array.
 +     */
 +    for(i=0;i<qm_nr;i++) {
 +      gmx_mtop_atomnr_to_ilist(alook,qm_arr[i],&ilist_mol,&a_offset);
 +      nrvsite2 = ilist_mol[F_VSITE2].nr;
 +      iatoms   = ilist_mol[F_VSITE2].iatoms;
 +      
 +      for(k=0;k<nrvsite2;k+=4){
 +      vsite = a_offset + iatoms[k+1]; /* the vsite         */
 +      ai    = a_offset + iatoms[k+2]; /* constructing atom */
 +      aj    = a_offset + iatoms[k+3]; /* constructing atom */
 +      if(ggrpnr(groups,egcQMMM,ai) < (groups->grps[egcQMMM].nr-1) &&
 +         (ggrpnr(groups,egcQMMM,aj) >= (groups->grps[egcQMMM].nr-1))){
 +      /* mark ai as frontier atom */
 +        if ( (qm_arr[i]==ai) || (qm_arr[i]==vsite) ){
 +          qr->qm[0]->frontatoms[i]=TRUE;
 +        }
 +      }
 +      else if (ggrpnr(groups,egcQMMM,aj) < (groups->grps[egcQMMM].nr-1) &&
 +               (ggrpnr(groups,egcQMMM,ai) >=(groups->grps[egcQMMM].nr-1))) {
 +        /* mark aj as frontier atom */
 +        if ( (qm_arr[i]==aj) || (qm_arr[i]==vsite) ){
 +          qr->qm[0]->frontatoms[i]=TRUE;
 +        }
 +      }
 +      }
 +    }
 +
 +    gmx_mtop_atomlookup_destroy(alook);
 +
 +    /* MM rec creation */
 +    mm               = mk_MMrec(); 
 +    mm->scalefactor  = ir->scalefactor;
 +    mm->nrMMatoms    = (mtop->natoms)-(qr->qm[0]->nrQMatoms); /* rest of the atoms */
 +    qr->mm           = mm;
 +  } else {/* ONIOM */
 +    /* MM rec creation */    
 +    mm               = mk_MMrec(); 
 +    mm->scalefactor  = ir->scalefactor;
 +    mm->nrMMatoms    = 0;
 +    qr->mm           = mm;
 +  }
 +  
 +  /* these variables get updated in the update QMMMrec */
 +
 +  if(qr->nrQMlayers==1){
 +    /* with only one layer there is only one initialisation
 +     * needed. Multilayer is a bit more complicated as it requires
 +     * re-initialisation at every step of the simulation. This is due
 +     * to the use of COMMON blocks in the fortran QM subroutines.  
 +     */
 +    if (qr->qm[0]->QMmethod<eQMmethodRHF)
 +    {
 +#ifdef GMX_QMMM_MOPAC
 +        /* semi-empiprical 1-layer ONIOM calculation requested (mopac93) */
 +        init_mopac(cr,qr->qm[0],qr->mm);
 +#else
 +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
 +#endif
 +    }
 +    else 
 +    { 
 +        /* ab initio calculation requested (gamess/gaussian/ORCA) */
 +#ifdef GMX_QMMM_GAMESS
 +        init_gamess(cr,qr->qm[0],qr->mm);
 +#elif defined GMX_QMMM_GAUSSIAN
 +        init_gaussian(cr,qr->qm[0],qr->mm);
 +#elif defined GMX_QMMM_ORCA
 +        init_orca(cr,qr->qm[0],qr->mm);
 +#else
 +        gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");
 +#endif
 +    }
 +  }
 +} /* init_QMMMrec */
 +
 +void update_QMMMrec(t_commrec *cr,
 +                  t_forcerec *fr,
 +                  rvec x[],
 +                  t_mdatoms *md,
 +                  matrix box,
 +                  gmx_localtop_t *top)
 +{
 +  /* updates the coordinates of both QM atoms and MM atoms and stores
 +   * them in the QMMMrec.  
 +   *
 +   * NOTE: is NOT yet working if there are no PBC. Also in ns.c, simple
 +   * ns needs to be fixed!  
 +   */
 +  int 
 +    mm_max=0,mm_nr=0,mm_nr_new,i,j,is,k,shift;
 +  t_j_particle 
 +    *mm_j_particles=NULL,*qm_i_particles=NULL;
 +  t_QMMMrec 
 +    *qr; 
 +  t_nblist 
 +    QMMMlist;
 +  rvec
 +    dx,crd;
 +  int
 +    *MMatoms;
 +  t_QMrec
 +    *qm;
 +  t_MMrec
 +    *mm;
 +  t_pbc
 +    pbc;
 +  int  
 +    *parallelMMarray=NULL;
 +  real
 +    c12au,c6au;
 +
 +  c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6)); 
 +  c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12)); 
 +
 +  /* every cpu has this array. On every processor we fill this array
 +   * with 1's and 0's. 1's indicate the atoms is a QM atom on the
 +   * current cpu in a later stage these arrays are all summed. indexes
 +   * > 0 indicate the atom is a QM atom. Every node therefore knows
 +   * whcih atoms are part of the QM subsystem.  
 +   */
 +  /* copy some pointers */
 +  qr          = fr->qr;
 +  mm          = qr->mm;
 +  QMMMlist    = fr->QMMMlist;
 +
 +  
 +
 +  /*  init_pbc(box);  needs to be called first, see pbc.h */
 +  set_pbc_dd(&pbc,fr->ePBC,DOMAINDECOMP(cr) ? cr->dd : NULL,FALSE,box);
 +  /* only in standard (normal) QMMM we need the neighbouring MM
 +   * particles to provide a electric field of point charges for the QM
 +   * atoms.  
 +   */
 +  if(qr->QMMMscheme==eQMMMschemenormal){ /* also implies 1 QM-layer */
 +    /* we NOW create/update a number of QMMMrec entries:
 +     *
 +     * 1) the shiftQM, containing the shifts of the QM atoms
 +     *
 +     * 2) the indexMM array, containing the index of the MM atoms
 +     * 
 +     * 3) the shiftMM, containing the shifts of the MM atoms
 +     *
 +     * 4) the shifted coordinates of the MM atoms
 +     *
 +     * the shifts are used for computing virial of the QM/MM particles.
 +     */
 +    qm = qr->qm[0]; /* in case of normal QMMM, there is only one group */
 +    snew(qm_i_particles,QMMMlist.nri);
 +    if(QMMMlist.nri){
 +      qm_i_particles[0].shift = XYZ2IS(0,0,0);
 +      for(i=0;i<QMMMlist.nri;i++){
 +      qm_i_particles[i].j     = QMMMlist.iinr[i];
 +      
 +      if(i){
 +        qm_i_particles[i].shift = pbc_dx_aiuc(&pbc,x[QMMMlist.iinr[0]],
 +                                              x[QMMMlist.iinr[i]],dx);
 +        
 +      }
 +      /* However, since nri >= nrQMatoms, we do a quicksort, and throw
 +       * out double, triple, etc. entries later, as we do for the MM
 +       * list too.  
 +       */
 +      
 +      /* compute the shift for the MM j-particles with respect to
 +       * the QM i-particle and store them. 
 +       */
 +      
 +      crd[0] = IS2X(QMMMlist.shift[i]) + IS2X(qm_i_particles[i].shift);
 +      crd[1] = IS2Y(QMMMlist.shift[i]) + IS2Y(qm_i_particles[i].shift);
 +      crd[2] = IS2Z(QMMMlist.shift[i]) + IS2Z(qm_i_particles[i].shift);
 +      is = XYZ2IS(crd[0],crd[1],crd[2]); 
 +      for(j=QMMMlist.jindex[i];
 +          j<QMMMlist.jindex[i+1];
 +          j++){
 +        if(mm_nr >= mm_max){
 +          mm_max += 1000;
 +          srenew(mm_j_particles,mm_max);
 +        }       
 +        
 +        mm_j_particles[mm_nr].j = QMMMlist.jjnr[j];
 +        mm_j_particles[mm_nr].shift = is;
 +        mm_nr++;
 +      }
 +      }
 +      
 +      /* quicksort QM and MM shift arrays and throw away multiple entries */
 +      
 +
 +
 +      qsort(qm_i_particles,QMMMlist.nri,
 +          (size_t)sizeof(qm_i_particles[0]),
 +          struct_comp);
 +      qsort(mm_j_particles,mm_nr,
 +          (size_t)sizeof(mm_j_particles[0]),
 +          struct_comp);
 +      /* remove multiples in the QM shift array, since in init_QMMM() we
 +       * went through the atom numbers from 0 to md.nr, the order sorted
 +       * here matches the one of QMindex already.
 +       */
 +      j=0;
 +      for(i=0;i<QMMMlist.nri;i++){
 +      if (i==0 || qm_i_particles[i].j!=qm_i_particles[i-1].j){
 +        qm_i_particles[j++] = qm_i_particles[i];
 +      }
 +      }
 +      mm_nr_new = 0;
 +      if(qm->bTS||qm->bOPT){
 +      /* only remove double entries for the MM array */
 +      for(i=0;i<mm_nr;i++){
 +        if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
 +           && !md->bQM[mm_j_particles[i].j]){
 +          mm_j_particles[mm_nr_new++] = mm_j_particles[i];
 +        }
 +      }
 +      }      
 +      /* we also remove mm atoms that have no charges! 
 +      * actually this is already done in the ns.c  
 +      */
 +      else{
 +      for(i=0;i<mm_nr;i++){
 +        if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
 +           && !md->bQM[mm_j_particles[i].j] 
 +           && (md->chargeA[mm_j_particles[i].j]
 +               || (md->chargeB && md->chargeB[mm_j_particles[i].j]))) {
 +          mm_j_particles[mm_nr_new++] = mm_j_particles[i];
 +        }
 +      }
 +      }
 +      mm_nr = mm_nr_new;
 +      /* store the data retrieved above into the QMMMrec
 +       */    
 +      k=0;
 +      /* Keep the compiler happy,
 +       * shift will always be set in the loop for i=0
 +       */
 +      shift = 0;
 +      for(i=0;i<qm->nrQMatoms;i++){
 +      /* not all qm particles might have appeared as i
 +       * particles. They might have been part of the same charge
 +       * group for instance.
 +       */
 +      if (qm->indexQM[i] == qm_i_particles[k].j) {
 +        shift = qm_i_particles[k++].shift;
 +      }
 +      /* use previous shift, assuming they belong the same charge
 +       * group anyway,
 +       */
 +      
 +      qm->shiftQM[i] = shift;
 +      }
 +    }
 +    /* parallel excecution */
 +    if(PAR(cr)){
 +      snew(parallelMMarray,2*(md->nr)); 
 +      /* only MM particles have a 1 at their atomnumber. The second part
 +       * of the array contains the shifts. Thus:
 +       * p[i]=1/0 depending on wether atomnumber i is a MM particle in the QM
 +       * step or not. p[i+md->nr] is the shift of atomnumber i.
 +       */
 +      for(i=0;i<2*(md->nr);i++){
 +      parallelMMarray[i]=0;
 +      }
 +      
 +      for(i=0;i<mm_nr;i++){
 +      parallelMMarray[mm_j_particles[i].j]=1;
 +      parallelMMarray[mm_j_particles[i].j+(md->nr)]=mm_j_particles[i].shift;
 +      }
 +      gmx_sumi(md->nr,parallelMMarray,cr);
 +      mm_nr=0;
 +      
 +      mm_max = 0;
 +      for(i=0;i<md->nr;i++){
 +      if(parallelMMarray[i]){
 +        if(mm_nr >= mm_max){
 +          mm_max += 1000;
 +          srenew(mm->indexMM,mm_max);
 +          srenew(mm->shiftMM,mm_max);
 +        }
 +        mm->indexMM[mm_nr]  = i;
 +        mm->shiftMM[mm_nr++]= parallelMMarray[i+md->nr]/parallelMMarray[i];
 +      }
 +      }
 +      mm->nrMMatoms=mm_nr;
 +      free(parallelMMarray);
 +    }
 +    /* serial execution */
 +    else{
 +      mm->nrMMatoms = mm_nr;
 +      srenew(mm->shiftMM,mm_nr);
 +      srenew(mm->indexMM,mm_nr);
 +      for(i=0;i<mm_nr;i++){
 +      mm->indexMM[i]=mm_j_particles[i].j;
 +      mm->shiftMM[i]=mm_j_particles[i].shift;
 +      }
 +      
 +    }
 +    /* (re) allocate memory for the MM coordiate array. The QM
 +     * coordinate array was already allocated in init_QMMM, and is
 +     * only (re)filled in the update_QMMM_coordinates routine 
 +     */
 +    srenew(mm->xMM,mm->nrMMatoms);
 +    /* now we (re) fill the array that contains the MM charges with
 +     * the forcefield charges. If requested, these charges will be
 +     * scaled by a factor 
 +     */
 +    srenew(mm->MMcharges,mm->nrMMatoms);
 +    for(i=0;i<mm->nrMMatoms;i++){/* no free energy yet */
 +      mm->MMcharges[i]=md->chargeA[mm->indexMM[i]]*mm->scalefactor; 
 +    }  
 +    if(qm->bTS||qm->bOPT){
 +      /* store (copy) the c6 and c12 parameters into the MMrec struct 
 +       */
 +      srenew(mm->c6,mm->nrMMatoms);
 +      srenew(mm->c12,mm->nrMMatoms);
++      for (i=0;i<mm->nrMMatoms;i++)
++      {
++          /* nbfp now includes the 6.0/12.0 derivative prefactors */
++          mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,md->typeA[mm->indexMM[i]],md->typeA[mm->indexMM[i]])/c6au/6.0;
++          mm->c12[i] =C12(fr->nbfp,top->idef.atnr,md->typeA[mm->indexMM[i]],md->typeA[mm->indexMM[i]])/c12au/12.0;
 +      }
 +      punch_QMMM_excl(qr->qm[0],mm,&(top->excls));
 +    }
 +    /* the next routine fills the coordinate fields in the QMMM rec of
 +     * both the qunatum atoms and the MM atoms, using the shifts
 +     * calculated above.  
 +     */
 +
 +    update_QMMM_coord(x,fr,qr->qm[0],qr->mm);
 +    free(qm_i_particles);
 +    free(mm_j_particles);
 +  } 
 +  else { /* ONIOM */ /* ????? */
 +    mm->nrMMatoms=0;
 +    /* do for each layer */
 +    for (j=0;j<qr->nrQMlayers;j++){
 +      qm = qr->qm[j];
 +      qm->shiftQM[0]=XYZ2IS(0,0,0);
 +      for(i=1;i<qm->nrQMatoms;i++){
 +      qm->shiftQM[i] = pbc_dx_aiuc(&pbc,x[qm->indexQM[0]],x[qm->indexQM[i]],
 +                                   dx);
 +      }
 +      update_QMMM_coord(x,fr,qm,mm);    
 +    }
 +  }
 +} /* update_QMMM_rec */
 +
 +
 +real calculate_QMMM(t_commrec *cr,
 +                  rvec x[],rvec f[],
 +                  t_forcerec *fr,
 +                  t_mdatoms *md)
 +{
 +  real
 +    QMener=0.0;
 +  /* a selection for the QM package depending on which is requested
 +   * (Gaussian, GAMESS-UK, MOPAC or ORCA) needs to be implemented here. Now
 +   * it works through defines.... Not so nice yet 
 +   */
 +  t_QMMMrec
 +    *qr;
 +  t_QMrec
 +    *qm,*qm2;
 +  t_MMrec
 +    *mm=NULL;
 +  rvec 
 +    *forces=NULL,*fshift=NULL,    
 +    *forces2=NULL, *fshift2=NULL; /* needed for multilayer ONIOM */
 +  int
 +    i,j,k;
 +  /* make a local copy the QMMMrec pointer 
 +   */
 +  qr = fr->qr;
 +  mm = qr->mm;
 +
 +  /* now different procedures are carried out for one layer ONION and
 +   * normal QMMM on one hand and multilayer oniom on the other
 +   */
 +  if(qr->QMMMscheme==eQMMMschemenormal || qr->nrQMlayers==1){
 +    qm = qr->qm[0];
 +    snew(forces,(qm->nrQMatoms+mm->nrMMatoms));
 +    snew(fshift,(qm->nrQMatoms+mm->nrMMatoms));
 +    QMener = call_QMroutine(cr,fr,qm,mm,forces,fshift);
 +    for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      f[qm->indexQM[i]][j]          -= forces[i][j];
 +      fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
 +      }
 +    }
 +    for(i=0;i<mm->nrMMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      f[mm->indexMM[i]][j]          -= forces[qm->nrQMatoms+i][j];
 +      fr->fshift[mm->shiftMM[i]][j] += fshift[qm->nrQMatoms+i][j];
 +      }
 +      
 +    }
 +    free(forces);
 +    free(fshift);
 +  }
 +  else{ /* Multi-layer ONIOM */
 +    for(i=0;i<qr->nrQMlayers-1;i++){ /* last layer is special */
 +      qm  = qr->qm[i];
 +      qm2 = copy_QMrec(qr->qm[i+1]);
 +
 +      qm2->nrQMatoms = qm->nrQMatoms;
 +    
 +      for(j=0;j<qm2->nrQMatoms;j++){
 +      for(k=0;k<DIM;k++)
 +        qm2->xQM[j][k]       = qm->xQM[j][k];
 +      qm2->indexQM[j]        = qm->indexQM[j];
 +      qm2->atomicnumberQM[j] = qm->atomicnumberQM[j];
 +      qm2->shiftQM[j]        = qm->shiftQM[j];
 +      }
 +
 +      qm2->QMcharge = qm->QMcharge;
 +      /* this layer at the higher level of theory */
 +      srenew(forces,qm->nrQMatoms);
 +      srenew(fshift,qm->nrQMatoms);
 +      /* we need to re-initialize the QMroutine every step... */
 +      init_QMroutine(cr,qm,mm);
 +      QMener += call_QMroutine(cr,fr,qm,mm,forces,fshift);
 +
 +      /* this layer at the lower level of theory */
 +      srenew(forces2,qm->nrQMatoms);
 +      srenew(fshift2,qm->nrQMatoms);
 +      init_QMroutine(cr,qm2,mm);
 +      QMener -= call_QMroutine(cr,fr,qm2,mm,forces2,fshift2);
 +      /* E = E1high-E1low The next layer includes the current layer at
 +       * the lower level of theory, which provides + E2low
 +       * this is similar for gradients
 +       */
 +      for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +        f[qm->indexQM[i]][j]          -= (forces[i][j]-forces2[i][j]);
 +        fr->fshift[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]);
 +      }
 +      }
 +      free(qm2);
 +    }
 +    /* now the last layer still needs to be done: */
 +    qm      = qr->qm[qr->nrQMlayers-1]; /* C counts from 0 */
 +    init_QMroutine(cr,qm,mm);
 +    srenew(forces,qm->nrQMatoms);
 +    srenew(fshift,qm->nrQMatoms);
 +    QMener += call_QMroutine(cr,fr,qm,mm,forces,fshift);
 +    for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      f[qm->indexQM[i]][j]          -= forces[i][j];
 +      fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
 +      }
 +    }
 +    free(forces);
 +    free(fshift);
 +    free(forces2);
 +    free(fshift2);
 +  }
 +  if(qm->bTS||qm->bOPT){
 +    /* qm[0] still contains the largest ONIOM QM subsystem 
 +     * we take the optimized coordiates and put the in x[]
 +     */
 +    for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      x[qm->indexQM[i]][j] = qm->xQM[i][j];
 +      }
 +    }
 +  }
 +  return(QMener);
 +} /* calculate_QMMM */
 +
 +/* end of QMMM core routines */
index 01a1e3b2e8525fc135e9166dd807ac1446ce57de,0000000000000000000000000000000000000000..ccce5f82473a1d9005c94006313796ae7f068b29
mode 100644,000000..100644
--- /dev/null
@@@ -1,2615 -1,0 +1,2634 @@@
-     bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#include<catamount/dclock.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include <time.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +#include <math.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
 +#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "sim_util.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "copyrite.h"
 +#include "pull_rotation.h"
 +#include "gmx_random.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "gmx_wallcycle.h"
 +#include "genborn.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_kernels/nbnxn_kernel_ref.h"
 +#include "nbnxn_kernels/nbnxn_kernel_x86_simd128.h"
 +#include "nbnxn_kernels/nbnxn_kernel_x86_simd256.h"
 +#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include "adress.h"
 +#include "qmmm.h"
 +
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "nbnxn_cuda/nbnxn_cuda.h"
 +
 +#if 0
 +typedef struct gmx_timeprint {
 +
 +} t_gmx_timeprint;
 +#endif
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +double
 +gmx_gettime()
 +{
 +#ifdef HAVE_GETTIMEOFDAY
 +      struct timeval t;
 +      double seconds;
 +
 +      gettimeofday(&t,NULL);
 +
 +      seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
 +
 +      return seconds;
 +#else
 +      double  seconds;
 +
 +      seconds = time(NULL);
 +
 +      return seconds;
 +#endif
 +}
 +
 +
 +#define difftime(end,start) ((double)(end)-(double)(start))
 +
 +void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,
 +                t_inputrec *ir, t_commrec *cr)
 +{
 +    time_t finish;
 +    char   timebuf[STRLEN];
 +    double dt;
 +    char buf[48];
 +
 +#ifndef GMX_THREAD_MPI
 +    if (!PAR(cr))
 +#endif
 +    {
 +        fprintf(out,"\r");
 +    }
 +    fprintf(out,"step %s",gmx_step_str(step,buf));
 +    if ((step >= ir->nstlist))
 +    {
 +        runtime->last = gmx_gettime();
 +        dt = difftime(runtime->last,runtime->real);
 +        runtime->time_per_step = dt/(step - ir->init_step + 1);
 +
 +        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
 +
 +        if (ir->nsteps >= 0)
 +        {
 +            if (dt >= 300)
 +            {
 +                finish = (time_t) (runtime->last + dt);
 +                gmx_ctime_r(&finish,timebuf,STRLEN);
 +                sprintf(buf,"%s",timebuf);
 +                buf[strlen(buf)-1]='\0';
 +                fprintf(out,", will finish %s",buf);
 +            }
 +            else
 +                fprintf(out,", remaining runtime: %5d s          ",(int)dt);
 +        }
 +        else
 +        {
 +            fprintf(out," performance: %.1f ns/day    ",
 +                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        fprintf(out,"\n");
 +    }
 +#endif
 +
 +    fflush(out);
 +}
 +
 +#ifdef NO_CLOCK
 +#define clock() -1
 +#endif
 +
 +static double set_proctime(gmx_runtime_t *runtime)
 +{
 +    double diff;
 +#ifdef GMX_CRAY_XT3
 +    double prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = dclock();
 +
 +    diff = runtime->proc - prev;
 +#else
 +    clock_t prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = clock();
 +
 +    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
 +#endif
 +    if (diff < 0)
 +    {
 +        /* The counter has probably looped, ignore this data */
 +        diff = 0;
 +    }
 +
 +    return diff;
 +}
 +
 +void runtime_start(gmx_runtime_t *runtime)
 +{
 +    runtime->real = gmx_gettime();
 +    runtime->proc          = 0;
 +    set_proctime(runtime);
 +    runtime->realtime      = 0;
 +    runtime->proctime      = 0;
 +    runtime->last          = 0;
 +    runtime->time_per_step = 0;
 +}
 +
 +void runtime_end(gmx_runtime_t *runtime)
 +{
 +    double now;
 +
 +    now = gmx_gettime();
 +
 +    runtime->proctime += set_proctime(runtime);
 +    runtime->realtime  = now - runtime->real;
 +    runtime->real      = now;
 +}
 +
 +void runtime_upd_proc(gmx_runtime_t *runtime)
 +{
 +    runtime->proctime += set_proctime(runtime);
 +}
 +
 +void print_date_and_time(FILE *fplog,int nodeid,const char *title,
 +                         const gmx_runtime_t *runtime)
 +{
 +    int i;
 +    char timebuf[STRLEN];
 +    char time_string[STRLEN];
 +    time_t tmptime;
 +
 +    if (fplog)
 +    {
 +        if (runtime != NULL)
 +        {
 +            tmptime = (time_t) runtime->real;
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        else
 +        {
 +            tmptime = (time_t) gmx_gettime();
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        for(i=0; timebuf[i]>=' '; i++)
 +        {
 +            time_string[i]=timebuf[i];
 +        }
 +        time_string[i]='\0';
 +
 +        fprintf(fplog,"%s on node %d %s\n",title,nodeid,time_string);
 +    }
 +}
 +
 +static void sum_forces(int start,int end,rvec f[],rvec flr[])
 +{
 +  int i;
 +
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"fsr",f+start,end-start);
 +    pr_rvecs(debug,0,"flr",flr+start,end-start);
 +  }
 +  for(i=start; (i<end); i++)
 +    rvec_inc(f[i],flr[i]);
 +}
 +
 +/*
 + * calc_f_el calculates forces due to an electric field.
 + *
 + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
 + *
 + * Et[] contains the parameters for the time dependent
 + * part of the field (not yet used).
 + * Ex[] contains the parameters for
 + * the spatial dependent part of the field. You can have cool periodic
 + * fields in principle, but only a constant field is supported
 + * now.
 + * The function should return the energy due to the electric field
 + * (if any) but for now returns 0.
 + *
 + * WARNING:
 + * There can be problems with the virial.
 + * Since the field is not self-consistent this is unavoidable.
 + * For neutral molecules the virial is correct within this approximation.
 + * For neutral systems with many charged molecules the error is small.
 + * But for systems with a net charge or a few charged molecules
 + * the error can be significant when the field is high.
 + * Solution: implement a self-consitent electric field into PME.
 + */
 +static void calc_f_el(FILE *fp,int  start,int homenr,
 +                      real charge[],rvec x[],rvec f[],
 +                      t_cosines Ex[],t_cosines Et[],double t)
 +{
 +    rvec Ext;
 +    real t0;
 +    int  i,m;
 +
 +    for(m=0; (m<DIM); m++)
 +    {
 +        if (Et[m].n > 0)
 +        {
 +            if (Et[m].n == 3)
 +            {
 +                t0 = Et[m].a[1];
 +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
 +            }
 +            else
 +            {
 +                Ext[m] = cos(Et[m].a[0]*t);
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 1.0;
 +        }
 +        if (Ex[m].n > 0)
 +        {
 +            /* Convert the field strength from V/nm to MD-units */
 +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
 +            for(i=start; (i<start+homenr); i++)
 +                f[i][m] += charge[i]*Ext[m];
 +        }
 +        else
 +        {
 +            Ext[m] = 0;
 +        }
 +    }
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
 +                Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
 +    }
 +}
 +
 +static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
 +                      tensor vir_part,t_graph *graph,matrix box,
 +                      t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
 +{
 +  int i,j;
 +  tensor virtest;
 +
 +  /* The short-range virial from surrounding boxes */
 +  clear_mat(vir_part);
 +  calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
 +
 +  /* Calculate partial virial, for local atoms only, based on short range.
 +   * Total virial is computed in global_stat, called from do_md
 +   */
 +  f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,homenr);
 +
 +  /* Add position restraint contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][i] += fr->vir_diag_posres[i];
 +  }
 +
 +  /* Add wall contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][ZZ] += fr->vir_wall_z[i];
 +  }
 +
 +  if (debug)
 +    pr_rvecs(debug,0,"vir_part",vir_part,DIM);
 +}
 +
 +static void posres_wrapper(FILE *fplog,
 +                           int flags,
 +                           gmx_bool bSepDVDL,
 +                           t_inputrec *ir,
 +                           t_nrnb *nrnb,
 +                           gmx_localtop_t *top,
 +                           matrix box,rvec x[],
 +                           rvec f[],
 +                           gmx_enerdata_t *enerd,
 +                           real *lambda,
 +                           t_forcerec *fr)
 +{
 +    t_pbc pbc;
 +    real  v,dvdl;
 +    int   i;
 +
 +    /* Position restraints always require full pbc */
 +    set_pbc(&pbc,ir->ePBC,box);
 +    dvdl = 0;
 +    v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +               top->idef.iparams_posres,
 +               (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +               ir->ePBC==epbcNONE ? NULL : &pbc,
 +               lambda[efptRESTRAINT],&dvdl,
 +               fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,sepdvdlformat,
 +                interaction_function[F_POSRES].longname,v,dvdl);
 +    }
 +    enerd->term[F_POSRES] += v;
 +    /* If just the force constant changes, the FEP term is linear,
 +     * but if k changes, it is not.
 +     */
 +    enerd->dvdl_nonlin[efptRESTRAINT] += dvdl;
 +    inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
 +
 +    if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
 +    {
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
 +            real dvdl_dum,lambda_dum;
 +
 +            lambda_dum = (i==0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]);
 +            v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +                       top->idef.iparams_posres,
 +                       (const rvec*)x,NULL,NULL,
 +                       ir->ePBC==epbcNONE ? NULL : &pbc,lambda_dum,&dvdl,
 +                       fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +            enerd->enerpart_lambda[i] += v;
 +        }
 +    }
 +}
 +
 +static void pull_potential_wrapper(FILE *fplog,
 +                                   gmx_bool bSepDVDL,
 +                                   t_commrec *cr,
 +                                   t_inputrec *ir,
 +                                   matrix box,rvec x[],
 +                                   rvec f[],
 +                                   tensor vir_force,
 +                                   t_mdatoms *mdatoms,
 +                                   gmx_enerdata_t *enerd,
 +                                   real *lambda,
 +                                   double t)
 +{
 +    t_pbc  pbc;
 +    real   dvdl;
 +
 +    /* Calculate the center of mass forces, this requires communication,
 +     * which is why pull_potential is called close to other communication.
 +     * The virial contribution is calculated directly,
 +     * which is why we call pull_potential after calc_virial.
 +     */
 +    set_pbc(&pbc,ir->ePBC,box);
 +    dvdl = 0; 
 +    enerd->term[F_COM_PULL] +=
 +        pull_potential(ir->ePull,ir->pull,mdatoms,&pbc,
 +                       cr,t,lambda[efptRESTRAINT],x,f,vir_force,&dvdl);
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
 +    }
 +    enerd->dvdl_lin[efptRESTRAINT] += dvdl;
 +}
 +
 +static void pme_receive_force_ener(FILE *fplog,
 +                                   gmx_bool bSepDVDL,
 +                                   t_commrec *cr,
 +                                   gmx_wallcycle_t wcycle,
 +                                   gmx_enerdata_t *enerd,
 +                                   t_forcerec *fr)
 +{
 +    real   e,v,dvdl;    
 +    float  cycles_ppdpme,cycles_seppme;
 +
 +    cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
 +    dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
 +
 +    /* In case of node-splitting, the PP nodes receive the long-range 
 +     * forces, virial and energy from the PME nodes here.
 +     */    
 +    wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
 +    dvdl = 0;
 +    gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
 +                      &cycles_seppme);
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
 +    }
 +    enerd->term[F_COUL_RECIP] += e;
 +    enerd->dvdl_lin[efptCOUL] += dvdl;
 +    if (wcycle)
 +    {
 +        dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
 +    }
 +    wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
 +}
 +
 +static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
 +                             gmx_large_int_t step,real pforce,rvec *x,rvec *f)
 +{
 +  int  i;
 +  real pf2,fn2;
 +  char buf[STEPSTRSIZE];
 +
 +  pf2 = sqr(pforce);
 +  for(i=md->start; i<md->start+md->homenr; i++) {
 +    fn2 = norm2(f[i]);
 +    /* We also catch NAN, if the compiler does not optimize this away. */
 +    if (fn2 >= pf2 || fn2 != fn2) {
 +      fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
 +            gmx_step_str(step,buf),
 +            ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
 +    }
 +  }
 +}
 +
 +static void post_process_forces(FILE *fplog,
 +                                t_commrec *cr,
 +                                gmx_large_int_t step,
 +                                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                                gmx_localtop_t *top,
 +                                matrix box,rvec x[],
 +                                rvec f[],
 +                                tensor vir_force,
 +                                t_mdatoms *mdatoms,
 +                                t_graph *graph,
 +                                t_forcerec *fr,gmx_vsite_t *vsite,
 +                                int flags)
 +{
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite)
 +        {
 +            /* Spread the mesh force on virtual sites to the other particles... 
 +             * This is parallellized. MPI communication is performed
 +             * if the constructing atoms aren't local.
 +             */
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,
 +                           (flags & GMX_FORCE_VIRIAL),fr->vir_el_recip,
 +                           nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +        }
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Now add the forces, this is local */
 +            if (fr->bDomDec)
 +            {
 +                sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
 +            }
 +            else
 +            {
 +                sum_forces(mdatoms->start,mdatoms->start+mdatoms->homenr,
 +                           f,fr->f_novirsum);
 +            }
 +            if (EEL_FULL(fr->eeltype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force,fr->vir_el_recip,vir_force);
 +            }
 +            if (debug)
 +            {
 +                pr_rvecs(debug,0,"vir_force",vir_force,DIM);
 +            }
 +        }
 +    }
 +    
 +    if (fr->print_force >= 0)
 +    {
 +        print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
 +    }
 +}
 +
 +static void do_nb_verlet(t_forcerec *fr,
 +                         interaction_const_t *ic,
 +                         gmx_enerdata_t *enerd,
 +                         int flags, int ilocality,
 +                         int clearF,
 +                         t_nrnb *nrnb,
 +                         gmx_wallcycle_t wcycle)
 +{
 +    int     nnbl, kernel_type, sh_e;
 +    char    *env;
 +    nonbonded_verlet_group_t  *nbvg;
 +
 +    if (!(flags & GMX_FORCE_NONBONDED))
 +    {
 +        /* skip non-bonded calculation */
 +        return;
 +    }
 +
 +    nbvg = &fr->nbv->grp[ilocality];
 +
 +    /* CUDA kernel launch overhead is already timed separately */
 +    if (fr->cutoff_scheme != ecutsVERLET)
 +    {
 +        gmx_incons("Invalid cut-off scheme passed!");
 +    }
 +
 +    if (nbvg->kernel_type != nbk8x8x8_CUDA)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +    }
 +    switch (nbvg->kernel_type)
 +    {
 +        case nbk4x4_PlainC:
 +            nbnxn_kernel_ref(&nbvg->nbl_lists,
 +                             nbvg->nbat, ic,
 +                             fr->shift_vec,
 +                             flags,
 +                             clearF,
 +                             fr->fshift[0],
 +                             enerd->grpp.ener[egCOULSR],
 +                             fr->bBHAM ?
 +                             enerd->grpp.ener[egBHAMSR] :
 +                             enerd->grpp.ener[egLJSR]);
 +            break;
 +        
 +        case nbk4xN_X86_SIMD128:
 +            nbnxn_kernel_x86_simd128(&nbvg->nbl_lists,
 +                                     nbvg->nbat, ic,
 +                                     fr->shift_vec,
 +                                     flags,
 +                                     clearF,
 +                                     fr->fshift[0],
 +                                     enerd->grpp.ener[egCOULSR],
 +                                     fr->bBHAM ?
 +                                     enerd->grpp.ener[egBHAMSR] :
 +                                     enerd->grpp.ener[egLJSR]);
 +            break;
 +        case nbk4xN_X86_SIMD256:
 +            nbnxn_kernel_x86_simd256(&nbvg->nbl_lists,
 +                                     nbvg->nbat, ic,
 +                                     fr->shift_vec,
 +                                     flags,
 +                                     clearF,
 +                                     fr->fshift[0],
 +                                     enerd->grpp.ener[egCOULSR],
 +                                     fr->bBHAM ?
 +                                     enerd->grpp.ener[egBHAMSR] :
 +                                     enerd->grpp.ener[egLJSR]);
 +            break;
 +
 +        case nbk8x8x8_CUDA:
 +            nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality);
 +            break;
 +
 +        case nbk8x8x8_PlainC:
 +            nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
 +                                 nbvg->nbat, ic,
 +                                 fr->shift_vec,
 +                                 flags,
 +                                 clearF,
 +                                 nbvg->nbat->out[0].f,
 +                                 fr->fshift[0],
 +                                 enerd->grpp.ener[egCOULSR],
 +                                 fr->bBHAM ?
 +                                 enerd->grpp.ener[egBHAMSR] :
 +                                 enerd->grpp.ener[egLJSR]);
 +            break;
 +
 +        default:
 +            gmx_incons("Invalid nonbonded kernel type passed!");
 +
 +    }
 +    if (nbvg->kernel_type != nbk8x8x8_CUDA)
 +    {
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +
 +    /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
 +    sh_e = ((flags & GMX_FORCE_ENERGY) ? 1 : 0);
 +    inc_nrnb(nrnb,
 +             ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
 +              eNR_NBNXN_LJ_RF : eNR_NBNXN_LJ_TAB) + sh_e,
 +             nbvg->nbl_lists.natpair_ljq);
 +    inc_nrnb(nrnb,eNR_NBNXN_LJ+sh_e,nbvg->nbl_lists.natpair_lj);
 +    inc_nrnb(nrnb,
 +             ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
 +              eNR_NBNXN_RF : eNR_NBNXN_TAB)+sh_e,
 +             nbvg->nbl_lists.natpair_q);
 +}
 +
 +void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real *lambda,t_graph *graph,
 +              t_forcerec *fr, interaction_const_t *ic,
 +              gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int     cg0,cg1,i,j;
 +    int     start,homenr;
 +    int     nb_kernel_type;
 +    double  mu[2*DIM];
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
 +    gmx_bool   bDoLongRange,bDoForces,bSepLRF,bUseGPU,bUseOrEmulGPU;
 +    gmx_bool   bDiffKernels=FALSE;
 +    matrix  boxs;
 +    rvec    vzero,box_diag;
 +    real    e,v,dvdl;
 +    float  cycles_pme,cycles_force;
 +    nonbonded_verlet_t *nbv;
 +
 +    cycles_force = 0;
 +    nbv = fr->nbv;
 +    nb_kernel_type = fr->nbv->grp[0].kernel_type;
 +
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    cg0 = 0;
 +    if (DOMAINDECOMP(cr))
 +    {
 +        cg1 = cr->dd->ncg_tot;
 +    }
 +    else
 +    {
 +        cg1 = top->cgs.nr;
 +    }
 +    if (fr->n_tpi > 0)
 +    {
 +        cg1--;
 +    }
 +
 +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
 +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
 +    bFillGrid     = (bNS && bStateChanged);
 +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
-         if (bSepLRF)
++    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR));
 +    bDoForces     = (flags & GMX_FORCE_FORCES);
 +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
 +    bUseGPU       = fr->nbv->bUseGPU;
 +    bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbk8x8x8_PlainC);
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
 +
 +        if (NEED_MUTOT(*inputrec))
 +        {
 +            /* Calculate total (local) dipole moment in a temporary common array.
 +             * This makes it possible to sum them over nodes faster.
 +             */
 +            calc_mu(start,homenr,
 +                    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                    mu,mu+DIM);
 +        }
 +    }
 +
 +    if (fr->ePBC != epbcNONE) { 
 +        /* Compute shift vectors every step,
 +         * because of pressure coupling or box deformation!
 +         */
 +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +            calc_shifts(box,fr->shift_vec);
 +
 +        if (bCalcCGCM) { 
 +            put_atoms_in_box_omp(fr->ePBC,box,homenr,x);
 +            inc_nrnb(nrnb,eNR_SHIFTX,homenr);
 +        } 
 +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +            unshift_self(graph,box,x);
 +        }
 +    } 
 +
 +    nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
 +                                  fr->shift_vec,nbv->grp[0].nbat);
 +
 +#ifdef GMX_MPI
 +    if (!(cr->duty & DUTY_PME)) {
 +        /* Send particle coordinates to the pme nodes.
 +         * Since this is only implemented for domain decomposition
 +         * and domain decomposition does not use the graph,
 +         * we do not need to worry about shifting.
 +         */    
 +
 +        wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +        bBS = (inputrec->nwall == 2);
 +        if (bBS) {
 +            copy_mat(box,boxs);
 +            svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +        }
 +
 +        gmx_pme_send_x(cr,bBS ? boxs : box,x,
 +                       mdatoms->nChargePerturbed,lambda[efptCOUL],
 +                       (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),step);
 +
 +        wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +    }
 +#endif /* GMX_MPI */
 +
 +    /* do gridding for pair search */
 +    if (bNS)
 +    {
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
 +        clear_rvec(vzero);
 +        box_diag[XX] = box[XX][XX];
 +        box_diag[YY] = box[YY][YY];
 +        box_diag[ZZ] = box[ZZ][ZZ];
 +
 +        wallcycle_start(wcycle,ewcNS);
 +        if (!fr->bDomDec)
 +        {
 +            wallcycle_sub_start(wcycle,ewcsNBS_GRID_LOCAL);
 +            nbnxn_put_on_grid(nbv->nbs,fr->ePBC,box,
 +                              0,vzero,box_diag,
 +                              0,mdatoms->homenr,-1,fr->cginfo,x,
 +                              0,NULL,
 +                              nbv->grp[eintLocal].kernel_type,
 +                              nbv->grp[eintLocal].nbat);
 +            wallcycle_sub_stop(wcycle,ewcsNBS_GRID_LOCAL);
 +        }
 +        else
 +        {
 +            wallcycle_sub_start(wcycle,ewcsNBS_GRID_NONLOCAL);
 +            nbnxn_put_on_grid_nonlocal(nbv->nbs,domdec_zones(cr->dd),
 +                                       fr->cginfo,x,
 +                                       nbv->grp[eintNonlocal].kernel_type,
 +                                       nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle,ewcsNBS_GRID_NONLOCAL);
 +        }
 +
 +        if (nbv->ngrp == 1 ||
 +            nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
 +        {
 +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat,eatAll,
 +                                nbv->nbs,mdatoms,fr->cginfo);
 +        }
 +        else
 +        {
 +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat,eatLocal,
 +                                nbv->nbs,mdatoms,fr->cginfo);
 +            nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat,eatAll,
 +                                nbv->nbs,mdatoms,fr->cginfo);
 +        }
 +        wallcycle_stop(wcycle, ewcNS);
 +    }
 +
 +    /* initialize the GPU atom data and copy shift vector */
 +    if (bUseGPU)
 +    {
 +        if (bNS)
 +        {
 +            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +            nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
 +            wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +        }
 +
 +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +        nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
 +        wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +    }
 +
 +    /* do local pair search */
 +    if (bNS)
 +    {
 +        wallcycle_start_nocount(wcycle,ewcNS);
 +        wallcycle_sub_start(wcycle,ewcsNBS_SEARCH_LOCAL);
 +        nbnxn_make_pairlist(nbv->nbs,nbv->grp[eintLocal].nbat,
 +                            &top->excls,
 +                            ic->rlist,
 +                            nbv->min_ci_balanced,
 +                            &nbv->grp[eintLocal].nbl_lists,
 +                            eintLocal,
 +                            nbv->grp[eintLocal].kernel_type,
 +                            nrnb);
 +        wallcycle_sub_stop(wcycle,ewcsNBS_SEARCH_LOCAL);
 +
 +        if (bUseGPU)
 +        {
 +            /* initialize local pair-list on the GPU */
 +            nbnxn_cuda_init_pairlist(nbv->cu_nbv,
 +                                     nbv->grp[eintLocal].nbl_lists.nbl[0],
 +                                     eintLocal);
 +        }
 +        wallcycle_stop(wcycle, ewcNS);
 +    }
 +    else
 +    {
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +        nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatLocal,FALSE,x,
 +                                        nbv->grp[eintLocal].nbat);
 +        wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +    }
 +
 +    if (bUseGPU)
 +    {
 +        wallcycle_start(wcycle,ewcLAUNCH_GPU_NB);
 +        /* launch local nonbonded F on GPU */
 +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
 +                     nrnb, wcycle);
 +        wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
 +    }
 +
 +    /* Communicate coordinates and sum dipole if necessary + 
 +       do non-local pair search */
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
 +                        nbv->grp[eintLocal].kernel_type);
 +
 +        if (bDiffKernels)
 +        {
 +            /* With GPU+CPU non-bonded calculations we need to copy
 +             * the local coordinates to the non-local nbat struct
 +             * (in CPU format) as the non-local kernel call also
 +             * calculates the local - non-local interactions.
 +             */
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatLocal,TRUE,x,
 +                                             nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +
 +        if (bNS)
 +        {
 +            wallcycle_start_nocount(wcycle,ewcNS);
 +            wallcycle_sub_start(wcycle,ewcsNBS_SEARCH_NONLOCAL);
 +
 +            if (bDiffKernels)
 +            {
 +                nbnxn_grid_add_simple(nbv->nbs,nbv->grp[eintNonlocal].nbat);
 +            }
 +
 +            nbnxn_make_pairlist(nbv->nbs,nbv->grp[eintNonlocal].nbat,
 +                                &top->excls,
 +                                ic->rlist,
 +                                nbv->min_ci_balanced,
 +                                &nbv->grp[eintNonlocal].nbl_lists,
 +                                eintNonlocal,
 +                                nbv->grp[eintNonlocal].kernel_type,
 +                                nrnb);
 +
 +            wallcycle_sub_stop(wcycle,ewcsNBS_SEARCH_NONLOCAL);
 +
 +            if (nbv->grp[eintNonlocal].kernel_type == nbk8x8x8_CUDA)
 +            {
 +                /* initialize non-local pair-list on the GPU */
 +                nbnxn_cuda_init_pairlist(nbv->cu_nbv,
 +                                         nbv->grp[eintNonlocal].nbl_lists.nbl[0],
 +                                         eintNonlocal);
 +            }
 +            wallcycle_stop(wcycle,ewcNS);
 +        } 
 +        else
 +        {
 +            wallcycle_start(wcycle,ewcMOVEX);
 +            dd_move_x(cr->dd,box,x);
 +
 +            /* When we don't need the total dipole we sum it in global_stat */
 +            if (bStateChanged && NEED_MUTOT(*inputrec))
 +            {
 +                gmx_sumd(2*DIM,mu,cr);
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEX);
 +
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatNonlocal,FALSE,x,
 +                                            nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +
 +        if (bUseGPU && !bDiffKernels)
 +        { 
 +            wallcycle_start(wcycle,ewcLAUNCH_GPU_NB);
 +            /* launch non-local nonbonded F on GPU */
 +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
 +                         nrnb, wcycle);
 +            cycles_force += wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
 +        }
 +    }
 +
 +    if (bUseGPU)
 +    {
 +        /* launch D2H copy-back F */
 +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +        if (DOMAINDECOMP(cr) && !bDiffKernels)
 +        {
 +            nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat,
 +                                      flags, eatNonlocal);
 +        }
 +        nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat,
 +                                  flags, eatLocal);
 +        cycles_force += wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
 +    }
 +
 +    if (bStateChanged && NEED_MUTOT(*inputrec))
 +    {
 +        if (PAR(cr))
 +        {
 +            gmx_sumd(2*DIM,mu,cr);
 +        } 
 +
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0;j<DIM;j++)
 +            {
 +                fr->mu_tot[i][j] = mu[i*DIM + j];
 +            }
 +        }
 +    }
 +    if (fr->efep == efepNO)
 +    {
 +        copy_rvec(fr->mu_tot[0],mu_tot);
 +    }
 +    else
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            mu_tot[j] =
 +                (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
 +                lambda[efptCOUL]*fr->mu_tot[1][j];
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
 +    
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum) 
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
-             /* Add the long range forces to the short range forces */
-             for(i=0; i<fr->natoms_force_constr; i++)
-             {
-                 copy_rvec(fr->f_twin[i],f[i]);
-             }
++        /* Clear the short- and long-range forces */
++        clear_rvecs(fr->natoms_force_constr,f);
++        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
 +        {
-         else if (!(fr->bTwinRange && bNS))
-         {
-             /* Clear the short-range forces */
-             clear_rvecs(fr->natoms_force_constr,f);
-         }
++            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
 +        }
-                       x,hist,f,enerd,fcd,mtop,top,fr->born,
++        
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        posres_wrapper(fplog,flags,bSepDVDL,inputrec,nrnb,top,box,x,
 +                       f,enerd,lambda,fr);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */    
 +    /* if we use the GPU turn off the nonbonded */
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
-     gmx_bool   bDoLongRange,bDoForces,bSepLRF;
++                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
 +                      inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
 +                      ((nb_kernel_type == nbk8x8x8_CUDA || nb_kernel_type == nbk8x8x8_PlainC) 
 +                        ? flags&~GMX_FORCE_NONBONDED : flags),
 +                      &cycles_pme);
 +
++    if(bSepLRF)
++    {
++        if (do_per_step(step,inputrec->nstcalclr))
++        {
++            /* Add the long range forces to the short range forces */
++            for(i=0; i<fr->natoms_force_constr; i++)
++            {
++                rvec_add(fr->f_twin[i],f[i],f[i]);
++            }
++        }
++    }
++    
 +    if (!bUseOrEmulGPU)
 +    {
 +        /* Maybe we should move this into do_force_lowlevel */
 +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
 +                     nrnb, wcycle);
 +    }
 +        
 +
 +    if (!bUseOrEmulGPU || bDiffKernels)
 +    {
 +        int aloc;
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
 +                         bDiffKernels ? enbvClearFYes : enbvClearFNo,
 +                         nrnb, wcycle);
 +        }
 +
 +        if (!bUseOrEmulGPU)
 +        {
 +            aloc = eintLocal;
 +        }
 +        else
 +        {
 +            aloc = eintNonlocal;
 +        }
 +
 +        /* Add all the non-bonded force to the normal force array.
 +         * This can be split into a local a non-local part when overlapping
 +         * communication with calculation with domain decomposition.
 +         */
 +        cycles_force += wallcycle_stop(wcycle,ewcFORCE);
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatAll,nbv->grp[aloc].nbat,f);
 +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +        cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_start_nocount(wcycle,ewcFORCE);
 +
 +        /* if there are multiple fshift output buffers reduce them */
 +        if ((flags & GMX_FORCE_VIRIAL) &&
 +            nbv->grp[aloc].nbl_lists.nnbl > 1)
 +        {
 +            nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
 +                                                      fr->fshift);
 +        }
 +    }
 +    
 +    cycles_force += wallcycle_stop(wcycle,ewcFORCE);
 +    
 +    if (ed)
 +    {
 +        do_flood(fplog,cr,x,f,ed,box,step,bNS);
 +    }
 +
 +    if (bUseOrEmulGPU && !bDiffKernels)
 +    {
 +        /* wait for non-local forces (or calculate in emulation mode) */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            if (bUseGPU)
 +            {
 +                wallcycle_start(wcycle,ewcWAIT_GPU_NB_NL);
 +                nbnxn_cuda_wait_gpu(nbv->cu_nbv,
 +                                    nbv->grp[eintNonlocal].nbat,
 +                                    flags, eatNonlocal,
 +                                    enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
 +                                    fr->fshift);
 +                cycles_force += wallcycle_stop(wcycle,ewcWAIT_GPU_NB_NL);
 +            }
 +            else
 +            {
 +                wallcycle_start_nocount(wcycle,ewcFORCE);
 +                do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
 +                             nrnb, wcycle);
 +                cycles_force += wallcycle_stop(wcycle,ewcFORCE);
 +            }            
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +            /* skip the reduction if there was no non-local work to do */
 +            if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
 +            {
 +                nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatNonlocal,
 +                                               nbv->grp[eintNonlocal].nbat,f);
 +            }
 +            wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +    }
 + 
 +    if (bUseOrEmulGPU)
 +    {
 +        /* wait for local forces (or calculate in emulation mode) */
 +        if (bUseGPU)
 +        {
 +            wallcycle_start(wcycle,ewcWAIT_GPU_NB_L);
 +            nbnxn_cuda_wait_gpu(nbv->cu_nbv,
 +                                nbv->grp[eintLocal].nbat,
 +                                flags, eatLocal,
 +                                enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
 +                                fr->fshift);
 +            wallcycle_stop(wcycle,ewcWAIT_GPU_NB_L);
 +
 +            /* now clear the GPU outputs while we finish the step on the CPU */
 +            nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
 +        }
 +        else
 +        {            
 +            wallcycle_start_nocount(wcycle,ewcFORCE);
 +            do_nb_verlet(fr, ic, enerd, flags, eintLocal,
 +                         DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
 +                         nrnb, wcycle);
 +            wallcycle_stop(wcycle,ewcFORCE);
 +        }
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +        if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
 +        {
 +            /* skip the reduction if there was no non-local work to do */
 +            nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatLocal,
 +                                           nbv->grp[eintLocal].nbat,f);
 +        }
 +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +    }
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
 +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
 +
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        pull_potential_wrapper(fplog,bSepDVDL,cr,inputrec,box,x,
 +                               f,vir_force,mdatoms,enerd,lambda,t);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        /* In case of node-splitting, the PP nodes receive the long-range 
 +         * forces, virial and energy from the PME nodes here.
 +         */    
 +        pme_receive_force_ener(fplog,bSepDVDL,cr,wcycle,enerd,fr);
 +    }
 +
 +    if (bDoForces)
 +    {
 +        post_process_forces(fplog,cr,step,nrnb,wcycle,
 +                            top,box,x,f,vir_force,mdatoms,graph,fr,vsite,
 +                            flags);
 +    }
 +    
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(inputrec->opts),enerd);
 +}
 +
 +void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real *lambda,t_graph *graph,
 +              t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int    cg0,cg1,i,j;
 +    int    start,homenr;
 +    double mu[2*DIM];
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
-     bStateChanged = (flags & GMX_FORCE_STATECHANGED);
-     bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
-     bFillGrid     = (bNS && bStateChanged);
-     bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
-     bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
-     bDoForces     = (flags & GMX_FORCE_FORCES);
-     bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
++    gmx_bool   bDoLongRangeNS,bDoForces,bDoPotential,bSepLRF;
 +    gmx_bool   bDoAdressWF;
 +    matrix boxs;
 +    rvec   vzero,box_diag;
 +    real   e,v,dvdlambda[efptNR];
 +    t_pbc  pbc;
 +    float  cycles_pme,cycles_force;
 +
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        pd_cg_range(cr,&cg0,&cg1);
 +    }
 +    else
 +    {
 +        cg0 = 0;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            cg1 = cr->dd->ncg_tot;
 +        }
 +        else
 +        {
 +            cg1 = top->cgs.nr;
 +        }
 +        if (fr->n_tpi > 0)
 +        {
 +            cg1--;
 +        }
 +    }
 +
-         /* Reset long range forces if necessary */
-         if (fr->bTwinRange)
-         {
-             /* Reset the (long-range) forces if necessary */
-             clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
-         }
++    bStateChanged  = (flags & GMX_FORCE_STATECHANGED);
++    bNS            = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
++    /* Should we update the long-range neighborlists at this step? */
++    bDoLongRangeNS = fr->bTwinRange && bNS;
++    /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
++    bFillGrid      = (bNS && bStateChanged);
++    bCalcCGCM      = (bFillGrid && !DOMAINDECOMP(cr));
++    bDoForces      = (flags & GMX_FORCE_FORCES);
++    bDoPotential   = (flags & GMX_FORCE_ENERGY);
++    bSepLRF        = ((inputrec->nstcalclr>1) && bDoForces &&
++                      (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR));
++
 +    /* should probably move this to the forcerec since it doesn't change */
 +    bDoAdressWF   = ((fr->adress_type!=eAdressOff));
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
 +
 +        if (NEED_MUTOT(*inputrec))
 +        {
 +            /* Calculate total (local) dipole moment in a temporary common array.
 +             * This makes it possible to sum them over nodes faster.
 +             */
 +            calc_mu(start,homenr,
 +                    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                    mu,mu+DIM);
 +        }
 +    }
 +
 +    if (fr->ePBC != epbcNONE) { 
 +        /* Compute shift vectors every step,
 +         * because of pressure coupling or box deformation!
 +         */
 +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +            calc_shifts(box,fr->shift_vec);
 +
 +        if (bCalcCGCM) { 
 +            put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
 +                    &(top->cgs),x,fr->cg_cm);
 +            inc_nrnb(nrnb,eNR_CGCM,homenr);
 +            inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
 +        } 
 +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +            unshift_self(graph,box,x);
 +        }
 +    } 
 +    else if (bCalcCGCM) {
 +        calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
 +        inc_nrnb(nrnb,eNR_CGCM,homenr);
 +    }
 +
 +    if (bCalcCGCM) {
 +        if (PAR(cr)) {
 +            move_cgcm(fplog,cr,fr->cg_cm);
 +        }
 +        if (gmx_debug_at)
 +            pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (!(cr->duty & DUTY_PME)) {
 +        /* Send particle coordinates to the pme nodes.
 +         * Since this is only implemented for domain decomposition
 +         * and domain decomposition does not use the graph,
 +         * we do not need to worry about shifting.
 +         */    
 +
 +        wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +        bBS = (inputrec->nwall == 2);
 +        if (bBS) {
 +            copy_mat(box,boxs);
 +            svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +        }
 +
 +        gmx_pme_send_x(cr,bBS ? boxs : box,x,
 +                       mdatoms->nChargePerturbed,lambda[efptCOUL],
 +                       (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),step);
 +
 +        wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +    }
 +#endif /* GMX_MPI */
 +
 +    /* Communicate coordinates and sum dipole if necessary */
 +    if (PAR(cr))
 +    {
 +        wallcycle_start(wcycle,ewcMOVEX);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_move_x(cr->dd,box,x);
 +        }
 +        else
 +        {
 +            move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
 +        }
 +        wallcycle_stop(wcycle,ewcMOVEX);
 +    }
 +
 +    /* update adress weight beforehand */
 +    if(bStateChanged && bDoAdressWF)
 +    {
 +        /* need pbc for adress weight calculation with pbc_dx */
 +        set_pbc(&pbc,inputrec->ePBC,box);
 +        if(fr->adress_site == eAdressSITEcog)
 +        {
 +            update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
 +                                      inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        else if (fr->adress_site == eAdressSITEcom)
 +        {
 +            update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                      inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        else if (fr->adress_site == eAdressSITEatomatom){
 +            update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        else
 +        {
 +            update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                       inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +    }
 +
 +    if (NEED_MUTOT(*inputrec))
 +    {
 +
 +        if (bStateChanged)
 +        {
 +            if (PAR(cr))
 +            {
 +                gmx_sumd(2*DIM,mu,cr);
 +            }
 +            for(i=0; i<2; i++)
 +            {
 +                for(j=0;j<DIM;j++)
 +                {
 +                    fr->mu_tot[i][j] = mu[i*DIM + j];
 +                }
 +            }
 +        }
 +        if (fr->efep == efepNO)
 +        {
 +            copy_rvec(fr->mu_tot[0],mu_tot);
 +        }
 +        else
 +        {
 +            for(j=0; j<DIM; j++)
 +            {
 +                mu_tot[j] =
 +                    (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
 +            }
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (bNS)
 +    {
 +        wallcycle_start(wcycle,ewcNS);
 +
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
-            bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
 +        /* Do the actual neighbour searching and if twin range electrostatics
 +         * also do the calculation of long range forces and energies.
 +         */
 +        for (i=0;i<efptNR;i++) {dvdlambda[i] = 0;}
 +        ns(fplog,fr,x,box,
 +           groups,&(inputrec->opts),top,mdatoms,
 +           cr,nrnb,lambda,dvdlambda,&enerd->grpp,bFillGrid,
-         if (bSepLRF)
-         {
-             /* Add the long range forces to the short range forces */
-             for(i=0; i<fr->natoms_force_constr; i++)
-             {
-                 copy_rvec(fr->f_twin[i],f[i]);
-             }
-         }
-         else if (!(fr->bTwinRange && bNS))
++           bDoLongRangeNS);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdlambda);
 +        }
 +        enerd->dvdl_lin[efptVDW] += dvdlambda[efptVDW];
 +        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +
 +        wallcycle_stop(wcycle,ewcNS);
 +    }
 +
 +    if (inputrec->implicit_solvent && bNS)
 +    {
 +        make_gb_nblist(cr,inputrec->gb_algorithm,inputrec->rlist,
 +                       x,box,fr,&top->idef,graph,fr->born);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
 +
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle,ewcROT);
 +        do_rotation(cr,inputrec,box,x,t,step,wcycle,bNS);
 +        wallcycle_stop(wcycle,ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +    
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum)
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
-             /* Clear the short-range forces */
-             clear_rvecs(fr->natoms_force_constr,f);
++        /* Clear the short- and long-range forces */
++        clear_rvecs(fr->natoms_force_constr,f);
++        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
 +        {
++            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
 +        }
-                       x,hist,f,enerd,fcd,mtop,top,fr->born,
++        
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        posres_wrapper(fplog,flags,bSepDVDL,inputrec,nrnb,top,box,x,
 +                       f,enerd,lambda,fr);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
 +    {
 +        /* Flat-bottomed position restraints always require full pbc */
 +        if(!(bStateChanged && bDoAdressWF))
 +        {
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +        }
 +        v = fbposres(top->idef.il[F_FBPOSRES].nr,top->idef.il[F_FBPOSRES].iatoms,
 +                     top->idef.iparams_fbposres,
 +                     (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +                     inputrec->ePBC==epbcNONE ? NULL : &pbc,
 +                     fr->rc_scaling,fr->ePBC,fr->posres_com);
 +        enerd->term[F_FBPOSRES] += v;
 +        inc_nrnb(nrnb,eNR_FBPOSRES,top->idef.il[F_FBPOSRES].nr/2);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
-   real   scale,*vdwtab;
++                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
 +                      inputrec->fepvals,lambda,
 +                      graph,&(top->excls),fr->mu_tot,
 +                      flags,
 +                      &cycles_pme);
 +
++    if(bSepLRF)
++    {
++        if (do_per_step(step,inputrec->nstcalclr))
++        {
++            /* Add the long range forces to the short range forces */
++            for(i=0; i<fr->natoms_force_constr; i++)
++            {
++                rvec_add(fr->f_twin[i],f[i],f[i]);
++            }
++        }
++    }
++    
 +    cycles_force = wallcycle_stop(wcycle,ewcFORCE);
 +
 +    if (ed)
 +    {
 +        do_flood(fplog,cr,x,f,ed,box,step,bNS);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
 +        {
 +            /* Compute thermodynamic force in hybrid AdResS region */
 +            adress_thermo_force(start,homenr,&(top->cgs),x,fr->f_novirsum,fr,mdatoms,
 +                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            else
 +            {
 +                pd_move_f(cr,f,nrnb);
 +                if (bSepLRF)
 +                {
 +                    pd_move_f(cr,fr->f_twin,nrnb);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
 +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
 +
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        pull_potential_wrapper(fplog,bSepDVDL,cr,inputrec,box,x,
 +                               f,vir_force,mdatoms,enerd,lambda,t);
 +    }
 +
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle,ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr,step,t);
 +        wallcycle_stop(wcycle,ewcROTadd);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        /* In case of node-splitting, the PP nodes receive the long-range 
 +         * forces, virial and energy from the PME nodes here.
 +         */
 +        pme_receive_force_ener(fplog,bSepDVDL,cr,wcycle,enerd,fr);
 +    }
 +
 +    if (bDoForces)
 +    {
 +        post_process_forces(fplog,cr,step,nrnb,wcycle,
 +                            top,box,x,f,vir_force,mdatoms,graph,fr,vsite,
 +                            flags);
 +    }
 +
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(inputrec->opts),enerd);
 +}
 +
 +void do_force(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real *lambda,t_graph *graph,
 +              t_forcerec *fr,
 +              gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    /* modify force flag if not doing nonbonded */
 +    if (!fr->bNonbonded)
 +    {
 +        flags &= ~GMX_FORCE_NONBONDED;
 +    }
 +
 +    switch (inputrec->cutoff_scheme)
 +    {
 +        case ecutsVERLET:
 +            do_force_cutsVERLET(fplog, cr, inputrec,
 +                                step, nrnb, wcycle,
 +                                top, mtop,
 +                                groups,
 +                                box, x, hist,
 +                                f, vir_force,
 +                                mdatoms,
 +                                enerd, fcd,
 +                                lambda, graph,
 +                                fr, fr->ic, 
 +                                vsite, mu_tot,
 +                                t, field, ed,
 +                                bBornRadii,
 +                                flags);
 +            break;
 +        case ecutsGROUP:
 +             do_force_cutsGROUP(fplog, cr, inputrec,
 +                                step, nrnb, wcycle,
 +                                top, mtop,
 +                                groups,
 +                                box, x, hist,
 +                                f, vir_force,
 +                                mdatoms,
 +                                enerd, fcd,
 +                                lambda, graph,
 +                                fr, vsite, mu_tot,
 +                                t, field, ed,
 +                                bBornRadii,
 +                                flags);
 +            break;
 +        default:
 +            gmx_incons("Invalid cut-off scheme passed!");
 +    }
 +}
 +
 +
 +void do_constrain_first(FILE *fplog,gmx_constr_t constr,
 +                        t_inputrec *ir,t_mdatoms *md,
 +                        t_state *state,rvec *f,
 +                        t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
 +                        t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir)
 +{
 +    int    i,m,start,end;
 +    gmx_large_int_t step;
 +    real   dt=ir->delta_t;
 +    real   dvdl_dum;
 +    rvec   *savex;
 +
 +    snew(savex,state->natoms);
 +
 +    start = md->start;
 +    end   = md->homenr + start;
 +
 +    if (debug)
 +        fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
 +                start,md->homenr,end);
 +    /* Do a first constrain to reset particles... */
 +    step = ir->init_step;
 +    if (fplog)
 +    {
 +        char buf[STEPSTRSIZE];
 +        fprintf(fplog,"\nConstraining the starting coordinates (step %s)\n",
 +                gmx_step_str(step,buf));
 +    }
 +    dvdl_dum = 0;
 +
 +    /* constrain the current position */
 +    constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +              ir,NULL,cr,step,0,md,
 +              state->x,state->x,NULL,
 +              fr->bMolPBC,state->box,
 +              state->lambda[efptBONDED],&dvdl_dum,
 +              NULL,NULL,nrnb,econqCoord,
 +              ir->epc==epcMTTK,state->veta,state->veta);
 +    if (EI_VV(ir->eI))
 +    {
 +        /* constrain the inital velocity, and save it */
 +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
 +        /* might not yet treat veta correctly */
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,0,md,
 +                  state->x,state->v,state->v,
 +                  fr->bMolPBC,state->box,
 +                  state->lambda[efptBONDED],&dvdl_dum,
 +                  NULL,NULL,nrnb,econqVeloc,
 +                  ir->epc==epcMTTK,state->veta,state->veta);
 +    }
 +    /* constrain the inital velocities at t-dt/2 */
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI!=eiVV)
 +    {
 +        for(i=start; (i<end); i++)
 +        {
 +            for(m=0; (m<DIM); m++)
 +            {
 +                /* Reverse the velocity */
 +                state->v[i][m] = -state->v[i][m];
 +                /* Store the position at t-dt in buf */
 +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
 +            }
 +        }
 +    /* Shake the positions at t=-dt with the positions at t=0
 +     * as reference coordinates.
 +         */
 +        if (fplog)
 +        {
 +            char buf[STEPSTRSIZE];
 +            fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %s)\n",
 +                    gmx_step_str(step,buf));
 +        }
 +        dvdl_dum = 0;
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,-1,md,
 +                  state->x,savex,NULL,
 +                  fr->bMolPBC,state->box,
 +                  state->lambda[efptBONDED],&dvdl_dum,
 +                  state->v,NULL,nrnb,econqCoord,
 +                  ir->epc==epcMTTK,state->veta,state->veta);
 +        
 +        for(i=start; i<end; i++) {
 +            for(m=0; m<DIM; m++) {
 +                /* Re-reverse the velocities */
 +                state->v[i][m] = -state->v[i][m];
 +            }
 +        }
 +    }
 +    sfree(savex);
 +}
 +
 +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
 +{
 +  double eners[2],virs[2],enersum,virsum,y0,f,g,h;
 +  double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
 +  double invscale,invscale2,invscale3;
 +  int    ri0,ri1,ri,i,offstart,offset;
-       scale  = fr->nblists[0].tab.scale;
-       vdwtab = fr->nblists[0].vdwtab;
++  real   scale,*vdwtab,tabfactor,tmp;
 +
 +  fr->enershiftsix = 0;
 +  fr->enershifttwelve = 0;
 +  fr->enerdiffsix = 0;
 +  fr->enerdifftwelve = 0;
 +  fr->virdiffsix = 0;
 +  fr->virdifftwelve = 0;
 +
 +  if (eDispCorr != edispcNO) {
 +    for(i=0; i<2; i++) {
 +      eners[i] = 0;
 +      virs[i]  = 0;
 +    }
 +    if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT)) {
 +      if (fr->rvdw_switch == 0)
 +      gmx_fatal(FARGS,
 +                "With dispersion correction rvdw-switch can not be zero "
 +                "for vdw-type = %s",evdw_names[fr->vdwtype]);
 +
-       if (fr->vdwtype == evdwSHIFT) {
-       /* Determine the constant energy shift below rvdw_switch */
-       fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
-       fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
++      scale  = fr->nblists[0].table_elec_vdw.scale;
++      vdwtab = fr->nblists[0].table_vdw.data;
 +
 +      /* Round the cut-offs to exact table values for precision */
 +      ri0 = floor(fr->rvdw_switch*scale);
 +      ri1 = ceil(fr->rvdw*scale);
 +      r0  = ri0/scale;
 +      r1  = ri1/scale;
 +      rc3 = r0*r0*r0;
 +      rc9  = rc3*rc3*rc3;
 +
-         offstart = 0;
-       else
-         offstart = 4;
++      if (fr->vdwtype == evdwSHIFT)
++      {
++          /* Determine the constant energy shift below rvdw_switch.
++           * Table has a scale factor since we have scaled it down to compensate
++           * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
++           */
++          fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
++          fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
 +      }
 +      /* Add the constant part from 0 to rvdw_switch.
 +       * This integration from 0 to rvdw_switch overcounts the number
 +       * of interactions by 1, as it also counts the self interaction.
 +       * We will correct for this later.
 +       */
 +      eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
 +      eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
 +
 +      invscale = 1.0/(scale);
 +      invscale2 = invscale*invscale;
 +      invscale3 = invscale*invscale2;
 +
 +      /* following summation derived from cubic spline definition,
 +      Numerical Recipies in C, second edition, p. 113-116.  Exact
 +      for the cubic spline.  We first calculate the negative of
 +      the energy from rvdw to rvdw_switch, assuming that g(r)=1,
 +      and then add the more standard, abrupt cutoff correction to
 +      that result, yielding the long-range correction for a
 +      switched function.  We perform both the pressure and energy
 +      loops at the same time for simplicity, as the computational
 +      cost is low. */
 +
 +      for (i=0;i<2;i++) {
 +        enersum = 0.0; virsum = 0.0;
 +        if (i==0)
-           /* this "8" is from the packing in the vdwtab array - perhaps
-           should be #define'ed? */
++        {
++            offstart = 0;
++            /* Since the dispersion table has been scaled down a factor 6.0 and the repulsion
++             * a factor 12.0 to compensate for the c6/c12 parameters inside nbfp[] being scaled
++             * up (to save flops in kernels), we need to correct for this.
++             */
++            tabfactor = 6.0;
++        }
++        else
++        {
++            offstart = 4;
++            tabfactor = 12.0;
++        }
 +      for (ri=ri0; ri<ri1; ri++) {
 +          r = ri*invscale;
 +          ea = invscale3;
 +          eb = 2.0*invscale2*r;
 +          ec = invscale*r*r;
 +
 +          pa = invscale3;
 +          pb = 3.0*invscale2*r;
 +          pc = 3.0*invscale*r*r;
 +          pd = r*r*r;
 +
-           f = vdwtab[offset+1];
-           g = vdwtab[offset+2];
-           h = vdwtab[offset+3];
-           enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
-             g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
-           virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) +
-             2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
++          /* this "8" is from the packing in the vdwtab array - perhaps should be #define'ed? */
 +          offset = 8*ri + offstart;
 +          y0 = vdwtab[offset];
-         enersum *= 4.0*M_PI;
-         virsum  *= 4.0*M_PI;
++          f  = vdwtab[offset+1];
++          g  = vdwtab[offset+2];
++          h  = vdwtab[offset+3];
 +
++          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
++          virsum  += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
 +        }
-       if (fr->vdw_pot_shift) {
++          
++        enersum *= 4.0*M_PI*tabfactor;
++        virsum  *= 4.0*M_PI*tabfactor;
 +        eners[i] -= enersum;
 +        virs[i]  -= virsum;
 +      }
 +
 +      /* now add the correction for rvdw_switch to infinity */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    }
 +    else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER)) {
 +      if (fr->vdwtype == evdwUSER && fplog)
 +      fprintf(fplog,
 +              "WARNING: using dispersion correction with user tables\n");
 +      rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
 +      rc9  = rc3*rc3*rc3;
 +      /* Contribution beyond the cut-off */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
++      if (fr->vdw_modifier==eintmodPOTSHIFT) {
 +          /* Contribution within the cut-off */
 +          eners[0] += -4.0*M_PI/(3.0*rc3);
 +          eners[1] +=  4.0*M_PI/(3.0*rc9);
 +      }
 +      /* Contribution beyond the cut-off */
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } else {
 +      gmx_fatal(FARGS,
 +              "Dispersion correction is not implemented for vdw-type = %s",
 +              evdw_names[fr->vdwtype]);
 +    }
 +    fr->enerdiffsix    = eners[0];
 +    fr->enerdifftwelve = eners[1];
 +    /* The 0.5 is due to the Gromacs definition of the virial */
 +    fr->virdiffsix     = 0.5*virs[0];
 +    fr->virdifftwelve  = 0.5*virs[1];
 +  }
 +}
 +
 +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
 +                   gmx_large_int_t step,int natoms,
 +                   matrix box,real lambda,tensor pres,tensor virial,
 +                   real *prescorr, real *enercorr, real *dvdlcorr)
 +{
 +    gmx_bool bCorrAll,bCorrPres;
 +    real dvdlambda,invvol,dens,ninter,avcsix,avctwelve,enerdiff,svir=0,spres=0;
 +    int  m;
 +
 +    *prescorr = 0;
 +    *enercorr = 0;
 +    *dvdlcorr = 0;
 +
 +    clear_mat(virial);
 +    clear_mat(pres);
 +
 +    if (ir->eDispCorr != edispcNO) {
 +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +
 +        invvol = 1/det(box);
 +        if (fr->n_tpi)
 +        {
 +            /* Only correct for the interactions with the inserted molecule */
 +            dens = (natoms - fr->n_tpi)*invvol;
 +            ninter = fr->n_tpi;
 +        }
 +        else
 +        {
 +            dens = natoms*invvol;
 +            ninter = 0.5*natoms;
 +        }
 +
 +        if (ir->efep == efepNO)
 +        {
 +            avcsix    = fr->avcsix[0];
 +            avctwelve = fr->avctwelve[0];
 +        }
 +        else
 +        {
 +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
 +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
 +        }
 +
 +        enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
 +        *enercorr += avcsix*enerdiff;
 +        dvdlambda = 0.0;
 +        if (ir->efep != efepNO)
 +        {
 +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
 +        }
 +        if (bCorrAll)
 +        {
 +            enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
 +            *enercorr += avctwelve*enerdiff;
 +            if (fr->efep != efepNO)
 +            {
 +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
 +            }
 +        }
 +
 +        if (bCorrPres)
 +        {
 +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
 +            if (ir->eDispCorr == edispcAllEnerPres)
 +            {
 +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
 +            }
 +            /* The factor 2 is because of the Gromacs virial definition */
 +            spres = -2.0*invvol*svir*PRESFAC;
 +
 +            for(m=0; m<DIM; m++) {
 +                virial[m][m] += svir;
 +                pres[m][m] += spres;
 +            }
 +            *prescorr += spres;
 +        }
 +
 +        /* Can't currently control when it prints, for now, just print when degugging */
 +        if (debug)
 +        {
 +            if (bCorrAll) {
 +                fprintf(debug,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                        avcsix,avctwelve);
 +            }
 +            if (bCorrPres)
 +            {
 +                fprintf(debug,
 +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
 +                        *enercorr,spres,svir);
 +            }
 +            else
 +            {
 +                fprintf(debug,"Long Range LJ corr.: Epot %10g\n",*enercorr);
 +            }
 +        }
 +
 +        if (fr->bSepDVDL && do_per_step(step,ir->nstlog))
 +        {
 +            fprintf(fplog,sepdvdlformat,"Dispersion correction",
 +                    *enercorr,dvdlambda);
 +        }
 +        if (fr->efep != efepNO)
 +        {
 +            *dvdlcorr += dvdlambda;
 +        }
 +    }
 +}
 +
 +void do_pbc_first(FILE *fplog,matrix box,t_forcerec *fr,
 +                t_graph *graph,rvec x[])
 +{
 +  if (fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +  calc_shifts(box,fr->shift_vec);
 +  if (graph) {
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 1",graph);
 +    shift_self(graph,box,x);
 +    /* By doing an extra mk_mshift the molecules that are broken
 +     * because they were e.g. imported from another software
 +     * will be made whole again. Such are the healing powers
 +     * of GROMACS.
 +     */
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 2",graph);
 +  }
 +  if (fplog)
 +    fprintf(fplog,"Done rmpbc\n");
 +}
 +
 +static void low_do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +                          gmx_mtop_t *mtop,rvec x[],
 +                          gmx_bool bFirst)
 +{
 +  t_graph *graph;
 +  int mb,as,mol;
 +  gmx_molblock_t *molb;
 +
 +  if (bFirst && fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +
 +  snew(graph,1);
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    if (molb->natoms_mol == 1 ||
 +      (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) {
 +      /* Just one atom or charge group in the molecule, no PBC required */
 +      as += molb->nmol*molb->natoms_mol;
 +    } else {
 +      /* Pass NULL iso fplog to avoid graph prints for each molecule type */
 +      mk_graph_ilist(NULL,mtop->moltype[molb->type].ilist,
 +                   0,molb->natoms_mol,FALSE,FALSE,graph);
 +
 +      for(mol=0; mol<molb->nmol; mol++) {
 +      mk_mshift(fplog,graph,ePBC,box,x+as);
 +
 +      shift_self(graph,box,x+as);
 +      /* The molecule is whole now.
 +       * We don't need the second mk_mshift call as in do_pbc_first,
 +       * since we no longer need this graph.
 +       */
 +
 +      as += molb->natoms_mol;
 +      }
 +      done_graph(graph);
 +    }
 +  }
 +  sfree(graph);
 +}
 +
 +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
 +                     gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,TRUE);
 +}
 +
 +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +               gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,FALSE);
 +}
 +
 +void finish_run(FILE *fplog,t_commrec *cr,const char *confout,
 +                t_inputrec *inputrec,
 +                t_nrnb nrnb[],gmx_wallcycle_t wcycle,
 +                gmx_runtime_t *runtime,
 +                wallclock_gpu_t *gputimes,
 +                int omp_nth_pp,
 +                gmx_bool bWriteStat)
 +{
 +    int    i,j;
 +    t_nrnb *nrnb_tot=NULL;
 +    real   delta_t;
 +    double nbfs,mflop;
 +
 +    wallcycle_sum(cr,wcycle);
 +
 +    if (cr->nnodes > 1)
 +    {
 +        snew(nrnb_tot,1);
 +#ifdef GMX_MPI
 +        MPI_Allreduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +    }
 +    else
 +    {
 +        nrnb_tot = nrnb;
 +    }
 +
 +#if defined(GMX_MPI) && !defined(GMX_THREAD_MPI)
 +    if (cr->nnodes > 1)
 +    {
 +        /* reduce nodetime over all MPI processes in the current simulation */
 +        double sum;
 +        MPI_Allreduce(&runtime->proctime,&sum,1,MPI_DOUBLE,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        runtime->proctime = sum;
 +    }
 +#endif
 +
 +    if (SIMMASTER(cr))
 +    {
 +        print_flop(fplog,nrnb_tot,&nbfs,&mflop);
 +    }
 +    if (cr->nnodes > 1)
 +    {
 +        sfree(nrnb_tot);
 +    }
 +
 +    if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
 +    {
 +        print_dd_statistics(cr,inputrec,fplog);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (PARTDECOMP(cr))
 +    {
 +        if (MASTER(cr))
 +        {
 +            t_nrnb     *nrnb_all;
 +            int        s;
 +            MPI_Status stat;
 +
 +            snew(nrnb_all,cr->nnodes);
 +            nrnb_all[0] = *nrnb;
 +            for(s=1; s<cr->nnodes; s++)
 +            {
 +                MPI_Recv(nrnb_all[s].n,eNRNB,MPI_DOUBLE,s,0,
 +                         cr->mpi_comm_mysim,&stat);
 +            }
 +            pr_load(fplog,cr,nrnb_all);
 +            sfree(nrnb_all);
 +        }
 +        else
 +        {
 +            MPI_Send(nrnb->n,eNRNB,MPI_DOUBLE,MASTERRANK(cr),0,
 +                     cr->mpi_comm_mysim);
 +        }
 +    }
 +#endif
 +
 +    if (SIMMASTER(cr))
 +    {
 +        wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
 +                        wcycle,gputimes);
 +
 +        if (EI_DYNAMICS(inputrec->eI))
 +        {
 +            delta_t = inputrec->delta_t;
 +        }
 +        else
 +        {
 +            delta_t = 0;
 +        }
 +
 +        if (fplog)
 +        {
 +            print_perf(fplog,runtime->proctime,runtime->realtime,
 +                       cr->nnodes-cr->npmenodes,
 +                       runtime->nsteps_done,delta_t,nbfs,mflop,
 +                       omp_nth_pp);
 +        }
 +        if (bWriteStat)
 +        {
 +            print_perf(stderr,runtime->proctime,runtime->realtime,
 +                       cr->nnodes-cr->npmenodes,
 +                       runtime->nsteps_done,delta_t,nbfs,mflop,
 +                       omp_nth_pp);
 +        }
 +    }
 +}
 +
 +extern void initialize_lambdas(FILE *fplog,t_inputrec *ir,int *fep_state,real *lambda,double *lam0)
 +{
 +    /* this function works, but could probably use a logic rewrite to keep all the different
 +       types of efep straight. */
 +
 +    int i;
 +    t_lambda *fep = ir->fepvals;
 +
 +    if ((ir->efep==efepNO) && (ir->bSimTemp == FALSE)) {
 +        for (i=0;i<efptNR;i++)  {
 +            lambda[i] = 0.0;
 +            if (lam0)
 +            {
 +                lam0[i] = 0.0;
 +            }
 +        }
 +        return;
 +    } else {
 +        *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
 +                                             if checkpoint is set -- a kludge is in for now
 +                                             to prevent this.*/
 +        for (i=0;i<efptNR;i++)
 +        {
 +            /* overwrite lambda state with init_lambda for now for backwards compatibility */
 +            if (fep->init_lambda>=0) /* if it's -1, it was never initializd */
 +            {
 +                lambda[i] = fep->init_lambda;
 +                if (lam0) {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +            else
 +            {
 +                lambda[i] = fep->all_lambda[i][*fep_state];
 +                if (lam0) {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +        }
 +        if (ir->bSimTemp) {
 +            /* need to rescale control temperatures to match current state */
 +            for (i=0;i<ir->opts.ngtc;i++) {
 +                if (ir->opts.ref_t[i] > 0) {
 +                    ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Send to the log the information on the current lambdas */
 +    if (fplog != NULL)
 +    {
 +        fprintf(fplog,"Initial vector of lambda components:[ ");
 +        for (i=0;i<efptNR;i++)
 +        {
 +            fprintf(fplog,"%10.4f ",lambda[i]);
 +        }
 +        fprintf(fplog,"]\n");
 +    }
 +    return;
 +}
 +
 +
 +void init_md(FILE *fplog,
 +             t_commrec *cr,t_inputrec *ir,const output_env_t oenv,
 +             double *t,double *t0,
 +             real *lambda, int *fep_state, double *lam0,
 +             t_nrnb *nrnb,gmx_mtop_t *mtop,
 +             gmx_update_t *upd,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin,
 +             tensor force_vir,tensor shake_vir,rvec mu_tot,
 +             gmx_bool *bSimAnn,t_vcm **vcm, t_state *state, unsigned long Flags)
 +{
 +    int  i,j,n;
 +    real tmpt,mod;
 +
 +    /* Initial values */
 +    *t = *t0       = ir->init_t;
 +
 +    *bSimAnn=FALSE;
 +    for(i=0;i<ir->opts.ngtc;i++)
 +    {
 +        /* set bSimAnn if any group is being annealed */
 +        if(ir->opts.annealing[i]!=eannNO)
 +        {
 +            *bSimAnn = TRUE;
 +        }
 +    }
 +    if (*bSimAnn)
 +    {
 +        update_annealing_target_temp(&(ir->opts),ir->init_t);
 +    }
 +
 +    /* Initialize lambda variables */
 +    initialize_lambdas(fplog,ir,fep_state,lambda,lam0);
 +
 +    if (upd)
 +    {
 +        *upd = init_update(fplog,ir);
 +    }
 +
 +
 +    if (vcm != NULL)
 +    {
 +        *vcm = init_vcm(fplog,&mtop->groups,ir);
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
 +    {
 +        if (ir->etc == etcBERENDSEN)
 +        {
 +            please_cite(fplog,"Berendsen84a");
 +        }
 +        if (ir->etc == etcVRESCALE)
 +        {
 +            please_cite(fplog,"Bussi2007a");
 +        }
 +    }
 +
 +    init_nrnb(nrnb);
 +
 +    if (nfile != -1)
 +    {
 +        *outf = init_mdoutf(nfile,fnm,Flags,cr,ir,oenv);
 +
 +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : (*outf)->fp_ene,
 +                              mtop,ir, (*outf)->fp_dhdl);
 +    }
 +
 +    if (ir->bAdress)
 +    {
 +      please_cite(fplog,"Fritsch12");
 +      please_cite(fplog,"Junghans10");
 +    }
 +    /* Initiate variables */
 +    clear_mat(force_vir);
 +    clear_mat(shake_vir);
 +    clear_rvec(mu_tot);
 +
 +    debug_gmx();
 +}
 +
index 44a42f7c550b115984e5f371d37ae780abf3bb61,0000000000000000000000000000000000000000..7be0435b4c6a668858c6182ea9c37b532c935a01
mode 100644,000000..100644
--- /dev/null
@@@ -1,1397 -1,0 +1,1429 @@@
- void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
-                                  int ntab,int tableformat,
-                                  real dx,real beta)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "maths.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "futil.h"
 +#include "xvgr.h"
 +#include "vec.h"
 +#include "main.h"
 +#include "network.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "gmxfio.h"
 +#include "macros.h"
 +#include "tables.h"
 +
 +/* All the possible (implemented) table functions */
 +enum { 
 +  etabLJ6,   
 +  etabLJ12, 
 +  etabLJ6Shift, 
 +  etabLJ12Shift, 
 +  etabShift,
 +  etabRF,
 +  etabRF_ZERO,
 +  etabCOUL, 
 +  etabEwald, 
 +  etabEwaldSwitch, 
 +  etabEwaldUser,
 +  etabEwaldUserSwitch,
 +  etabLJ6Switch, 
 +  etabLJ12Switch, 
 +  etabCOULSwitch, 
 +  etabLJ6Encad, 
 +  etabLJ12Encad, 
 +  etabCOULEncad,  
 +  etabEXPMIN, 
 +  etabUSER, 
 +  etabNR 
 +};
 +
 +/** Evaluates to true if the table type contains user data. */
 +#define ETAB_USER(e)  ((e) == etabUSER || \
 +                       (e) == etabEwaldUser || (e) == etabEwaldUserSwitch)
 +
 +typedef struct {
 +  const char *name;
 +  gmx_bool bCoulomb;
 +} t_tab_props;
 +
 +/* This structure holds name and a flag that tells whether 
 +   this is a Coulomb type funtion */
 +static const t_tab_props tprops[etabNR] = {
 +  { "LJ6",  FALSE },
 +  { "LJ12", FALSE },
 +  { "LJ6Shift", FALSE },
 +  { "LJ12Shift", FALSE },
 +  { "Shift", TRUE },
 +  { "RF", TRUE },
 +  { "RF-zero", TRUE },
 +  { "COUL", TRUE },
 +  { "Ewald", TRUE },
 +  { "Ewald-Switch", TRUE },
 +  { "Ewald-User", TRUE },
 +  { "Ewald-User-Switch", TRUE },
 +  { "LJ6Switch", FALSE },
 +  { "LJ12Switch", FALSE },
 +  { "COULSwitch", TRUE },
 +  { "LJ6-Encad shift", FALSE },
 +  { "LJ12-Encad shift", FALSE },
 +  { "COUL-Encad shift",  TRUE },
 +  { "EXPMIN", FALSE },
 +  { "USER", FALSE }
 +};
 +
 +/* Index in the table that says which function to use */
 +enum { etiCOUL, etiLJ6, etiLJ12, etiNR };
 +
 +typedef struct {
 +  int  nx,nx0;
 +  double tabscale;
 +  double *x,*v,*f;
 +} t_tabledata;
 +
 +#define pow2(x) ((x)*(x))
 +#define pow3(x) ((x)*(x)*(x))
 +#define pow4(x) ((x)*(x)*(x)*(x))
 +#define pow5(x) ((x)*(x)*(x)*(x)*(x))
 +
 +
 +static double v_ewald_lr(double beta,double r)
 +{
 +    if (r == 0)
 +    {
 +        return beta*2/sqrt(M_PI);
 +    }
 +    else
 +    {
 +        return gmx_erfd(beta*r)/r;
 +    }
 +}
 +
-     int stride=0;
++void table_spline3_fill_ewald_lr(real *table_f,
++                                 real *table_v,
++                                 real *table_fdv0,
++                                 int   ntab,
++                                 real  dx,
++                                 real  beta)
 +{
 +    real tab_max;
-     switch (tableformat)
-     {
-     case tableformatF:    stride = 1; break;
-     case tableformatFDV0: stride = 4; break;
-     default: gmx_incons("Unknown table format");
-     }
 +    int i,i_inrange;
 +    double dc,dc_new;
 +    gmx_bool bOutOfRange;
 +    double v_r0,v_r1,v_inrange,vi,a0,a1,a2dx;
 +    double x_r0;
 +
 +    if (ntab < 2)
 +    {
 +        gmx_fatal(FARGS,"Can not make a spline table with less than 2 points");
 +    }
 +
 +    /* We need some margin to be able to divide table values by r
 +     * in the kernel and also to do the integration arithmetics
 +     * without going out of range. Furthemore, we divide by dx below.
 +     */
 +    tab_max = GMX_REAL_MAX*0.0001;
 +
 +    /* This function produces a table with:
 +     * maximum energy error: V'''/(6*12*sqrt(3))*dx^3
 +     * maximum force error:  V'''/(6*4)*dx^2
 +     * The rms force error is the max error times 1/sqrt(5)=0.45.
 +     */
 +
-         switch (tableformat)
 +    bOutOfRange = FALSE;
 +    i_inrange = ntab;
 +    v_inrange = 0;
 +    dc = 0;
 +    for(i=ntab-1; i>=0; i--)
 +    {
 +        x_r0 = i*dx;
 +
 +        v_r0 = v_ewald_lr(beta,x_r0);
 +
 +        if (!bOutOfRange)
 +        {
 +            i_inrange = i;
 +            v_inrange = v_r0;
 +    
 +            vi = v_r0;
 +        }
 +        else
 +        {
 +            /* Linear continuation for the last point in range */
 +            vi = v_inrange - dc*(i - i_inrange)*dx;
 +        }
 +
-         case tableformatF:
-             if (tabv != NULL)
-             {
-                 tabv[i] = vi;
-             }
-             break;
-         case tableformatFDV0:
-             tabf[i*stride+2] = vi;
-             tabf[i*stride+3] = 0;
-             break;
-         default:
-             gmx_incons("Unknown table format");
++        if(table_v!=NULL)
 +        {
-             tabf[i*stride] = -dc;
++            table_v[i] = vi;
 +        }
 +
 +        if (i == 0)
 +        {
 +            continue;
 +        }
 +
 +        /* Get the potential at table point i-1 */
 +        v_r1 = v_ewald_lr(beta,(i-1)*dx);
 +
 +        if (v_r1 != v_r1 || v_r1 < -tab_max || v_r1 > tab_max)
 +        {
 +            bOutOfRange = TRUE;
 +        }
 +
 +        if (!bOutOfRange)
 +        {
 +            /* Calculate the average second derivative times dx over interval i-1 to i.
 +             * Using the function values at the end points and in the middle.
 +             */
 +            a2dx = (v_r0 + v_r1 - 2*v_ewald_lr(beta,x_r0-0.5*dx))/(0.25*dx);
 +            /* Set the derivative of the spline to match the difference in potential
 +             * over the interval plus the average effect of the quadratic term.
 +             * This is the essential step for minimizing the error in the force.
 +             */
 +            dc = (v_r0 - v_r1)/dx + 0.5*a2dx;
 +        }
 +
 +        if (i == ntab - 1)
 +        {
 +            /* Fill the table with the force, minus the derivative of the spline */
-             tabf[i*stride] += -0.5*dc;
++            table_f[i] = -dc;
 +        }
 +        else
 +        {
 +            /* tab[i] will contain the average of the splines over the two intervals */
-         tabf[(i-1)*stride] = -0.5*dc;
++            table_f[i] += -0.5*dc;
 +        }
 +
 +        if (!bOutOfRange)
 +        {
 +            /* Make spline s(x) = a0 + a1*(x - xr) + 0.5*a2*(x - xr)^2
 +             * matching the potential at the two end points
 +             * and the derivative dc at the end point xr.
 +             */
 +            a0   = v_r0;
 +            a1   = dc;
 +            a2dx = (a1*dx + v_r1 - a0)*2/dx;
 +
 +            /* Set dc to the derivative at the next point */
 +            dc_new = a1 - a2dx;
 +                
 +            if (dc_new != dc_new || dc_new < -tab_max || dc_new > tab_max)
 +            {
 +                bOutOfRange = TRUE;
 +            }
 +            else
 +            {
 +                dc = dc_new;
 +            }
 +        }
 +
-     tabf[0] *= 2;
++        table_f[(i-1)] = -0.5*dc;
 +    }
 +    /* Currently the last value only contains half the force: double it */
-     if (tableformat == tableformatFDV0)
++    table_f[0] *= 2;
 +
-         /* Store the force difference in the second entry */
-         for(i=0; i<ntab-1; i++)
++    if(table_v!=NULL && table_fdv0!=NULL)
 +    {
-             tabf[i*stride+1] = tabf[(i+1)*stride] - tabf[i*stride];
++        /* Copy to FDV0 table too. Allocation occurs in forcerec.c,
++         * init_ewald_f_table().
++         */
++        for(i=0;i<ntab-1;i++)
 +        {
-         tabf[(ntab-1)*stride+1] = -tabf[i*stride];
++            table_fdv0[4*i]     = table_f[i];
++            table_fdv0[4*i+1]   = table_f[i+1]-table_f[i];
++            table_fdv0[4*i+2]   = table_v[i];
++            table_fdv0[4*i+3]   = 0.0;
 +        }
-                      double x[],double Vtab[],double Ftab[],
++        table_fdv0[4*(ntab-1)]    = table_f[(ntab-1)];
++        table_fdv0[4*(ntab-1)+1]  = -table_f[(ntab-1)];
++        table_fdv0[4*(ntab-1)+2]  = table_v[(ntab-1)];
++        table_fdv0[4*(ntab-1)+3]  = 0.0;
 +    }
 +}
 +
 +/* The scale (1/spacing) for third order spline interpolation
 + * of the Ewald mesh contribution which needs to be subtracted
 + * from the non-bonded interactions.
 + */
 +real ewald_spline3_table_scale(real ewaldcoeff,real rc)
 +{
 +    double erf_x_d3=1.0522; /* max of (erf(x)/x)''' */
 +    double ftol,etol;
 +    double sc_f,sc_e;
 +
 +    /* Force tolerance: single precision accuracy */
 +    ftol = GMX_FLOAT_EPS;
 +    sc_f = sqrt(erf_x_d3/(6*4*ftol*ewaldcoeff))*ewaldcoeff;
 +
 +    /* Energy tolerance: 10x more accurate than the cut-off jump */
 +    etol = 0.1*gmx_erfc(ewaldcoeff*rc);
 +    etol = max(etol,GMX_REAL_EPS);
 +    sc_e = pow(erf_x_d3/(6*12*sqrt(3)*etol),1.0/3.0)*ewaldcoeff;
 +
 +    return max(sc_f,sc_e);
 +}
 +
 +/* Calculate the potential and force for an r value
 + * in exactly the same way it is done in the inner loop.
 + * VFtab is a pointer to the table data, offset is
 + * the point where we should begin and stride is 
 + * 4 if we have a buckingham table, 3 otherwise.
 + * If you want to evaluate table no N, set offset to 4*N.
 + *  
 + * We use normal precision here, since that is what we
 + * will use in the inner loops.
 + */
 +static void evaluate_table(real VFtab[], int offset, int stride, 
 +                         real tabscale, real r, real *y, real *yp)
 +{
 +  int n;
 +  real rt,eps,eps2;
 +  real Y,F,Geps,Heps2,Fp;
 +
 +  rt       =  r*tabscale;
 +  n        =  (int)rt;
 +  eps      =  rt - n;
 +  eps2     =  eps*eps;
 +  n        =  offset+stride*n;
 +  Y        =  VFtab[n];
 +  F        =  VFtab[n+1];
 +  Geps     =  eps*VFtab[n+2];
 +  Heps2    =  eps2*VFtab[n+3];
 +  Fp       =  F+Geps+Heps2;
 +  *y       =  Y+eps*Fp;
 +  *yp      =  (Fp+Geps+2.0*Heps2)*tabscale;
 +}
 +
 +static void copy2table(int n,int offset,int stride,
-     dest[nn0]   = Vtab[i];
-     dest[nn0+1] = F;
-     dest[nn0+2] = G;
-     dest[nn0+3] = H;
++                     double x[],double Vtab[],double Ftab[],real scalefactor,
 +                     real dest[])
 +{
 +/* Use double prec. for the intermediary variables
 + * and temporary x/vtab/vtab2 data to avoid unnecessary 
 + * loss of precision.
 + */
 +  int  i,nn0;
 +  double F,G,H,h;
 +
 +  h = 0;
 +  for(i=0; (i<n); i++) {
 +    if (i < n-1) {
 +      h   = x[i+1] - x[i];
 +      F   = -Ftab[i]*h;
 +      G   =  3*(Vtab[i+1] - Vtab[i]) + (Ftab[i+1] + 2*Ftab[i])*h;
 +      H   = -2*(Vtab[i+1] - Vtab[i]) - (Ftab[i+1] +   Ftab[i])*h;
 +    } else {
 +      /* Fill the last entry with a linear potential,
 +       * this is mainly for rounding issues with angle and dihedral potentials.
 +       */
 +      F   = -Ftab[i]*h;
 +      G   = 0;
 +      H   = 0;
 +    }
 +    nn0 = offset + i*stride;
-       /* Dispersion */
-       Vtab  = -r6;
-       Ftab  = 6.0*Vtab/r;
-       break;
++    dest[nn0]   = scalefactor*Vtab[i];
++    dest[nn0+1] = scalefactor*F;
++    dest[nn0+2] = scalefactor*G;
++    dest[nn0+3] = scalefactor*H;
 +  }
 +}
 +
 +static void init_table(FILE *fp,int n,int nx0,
 +                     double tabscale,t_tabledata *td,gmx_bool bAlloc)
 +{
 +  int i;
 +  
 +  td->nx  = n;
 +  td->nx0 = nx0;
 +  td->tabscale = tabscale;
 +  if (bAlloc) {
 +    snew(td->x,td->nx);
 +    snew(td->v,td->nx);
 +    snew(td->f,td->nx);
 +  }
 +  for(i=0; (i<td->nx); i++)
 +    td->x[i] = i/tabscale;
 +}
 +
 +static void spline_forces(int nx,double h,double v[],gmx_bool bS3,gmx_bool bE3,
 +                        double f[])
 +{
 +  int    start,end,i;
 +  double v3,b_s,b_e,b;
 +  double beta,*gamma;
 +
 +  /* Formulas can be found in:
 +   * H.J.C. Berendsen, Simulating the Physical World, Cambridge 2007
 +   */
 +
 +  if (nx < 4 && (bS3 || bE3))
 +    gmx_fatal(FARGS,"Can not generate splines with third derivative boundary conditions with less than 4 (%d) points",nx);
 +  
 +  /* To make life easy we initially set the spacing to 1
 +   * and correct for this at the end.
 +   */
 +  beta = 2;
 +  if (bS3) {
 +    /* Fit V''' at the start */
 +    v3  = v[3] - 3*v[2] + 3*v[1] - v[0];
 +    if (debug)
 +      fprintf(debug,"The left third derivative is %g\n",v3/(h*h*h));
 +    b_s = 2*(v[1] - v[0]) + v3/6;
 +    start = 0;
 +    
 +    if (FALSE) {
 +      /* Fit V'' at the start */
 +      real v2;
 +      
 +      v2  = -v[3] + 4*v[2] - 5*v[1] + 2*v[0];
 +      /* v2  = v[2] - 2*v[1] + v[0]; */
 +      if (debug)
 +      fprintf(debug,"The left second derivative is %g\n",v2/(h*h));
 +      b_s = 3*(v[1] - v[0]) - v2/2;
 +      start = 0;
 +    }
 +  } else {
 +    b_s = 3*(v[2] - v[0]) + f[0]*h;
 +    start = 1;
 +  }
 +  if (bE3) {
 +    /* Fit V''' at the end */
 +    v3  = v[nx-1] - 3*v[nx-2] + 3*v[nx-3] - v[nx-4];
 +    if (debug)
 +      fprintf(debug,"The right third derivative is %g\n",v3/(h*h*h));
 +    b_e = 2*(v[nx-1] - v[nx-2]) + v3/6;
 +    end = nx;
 +  } else {
 +    /* V'=0 at the end */
 +    b_e = 3*(v[nx-1] - v[nx-3]) + f[nx-1]*h;
 +    end = nx - 1;
 +  }
 +
 +  snew(gamma,nx);
 +  beta = (bS3 ? 1 : 4);
 +
 +  /* For V'' fitting */
 +  /* beta = (bS3 ? 2 : 4); */
 +
 +  f[start] = b_s/beta;
 +  for(i=start+1; i<end; i++) {
 +    gamma[i] = 1/beta;
 +    beta = 4 - gamma[i];
 +    b    =  3*(v[i+1] - v[i-1]);
 +    f[i] = (b - f[i-1])/beta;
 +  }
 +  gamma[end-1] = 1/beta;
 +  beta = (bE3 ? 1 : 4) - gamma[end-1];
 +  f[end-1] = (b_e - f[end-2])/beta;
 +
 +  for(i=end-2; i>=start; i--)
 +    f[i] -= gamma[i+1]*f[i+1];
 +  sfree(gamma);
 +
 +  /* Correct for the minus sign and the spacing */
 +  for(i=start; i<end; i++)
 +    f[i] = -f[i]/h;
 +}
 +
 +static void set_forces(FILE *fp,int angle,
 +                     int nx,double h,double v[],double f[],
 +                     int table)
 +{
 +  int start,end;
 +
 +  if (angle == 2)
 +    gmx_fatal(FARGS,
 +            "Force generation for dihedral tables is not (yet) implemented");
 +
 +  start = 0;
 +  while (v[start] == 0)
 +    start++;
 +  
 +  end = nx;
 +  while(v[end-1] == 0)
 +    end--;
 +  if (end > nx - 2)
 +    end = nx;
 +  else
 +    end++;
 +
 +  if (fp)
 +    fprintf(fp,"Generating forces for table %d, boundary conditions: V''' at %g, %s at %g\n",
 +          table+1,start*h,end==nx ? "V'''" : "V'=0",(end-1)*h);
 +  spline_forces(end-start,h,v+start,TRUE,end==nx,f+start);
 +}
 +
 +static void read_tables(FILE *fp,const char *fn,
 +                      int ntab,int angle,t_tabledata td[])
 +{
 +  char *libfn;
 +  char buf[STRLEN];
 +  double **yy=NULL,start,end,dx0,dx1,ssd,vm,vp,f,numf;
 +  int  k,i,nx,nx0=0,ny,nny,ns;
 +  gmx_bool bAllZero,bZeroV,bZeroF;
 +  double tabscale;
 +
 +  nny = 2*ntab+1;  
 +  libfn = gmxlibfn(fn);
 +  nx  = read_xvg(libfn,&yy,&ny);
 +  if (ny != nny)
 +    gmx_fatal(FARGS,"Trying to read file %s, but nr columns = %d, should be %d",
 +              libfn,ny,nny);
 +  if (angle == 0) {
 +    if (yy[0][0] != 0.0)
 +      gmx_fatal(FARGS,
 +              "The first distance in file %s is %f nm instead of %f nm",
 +              libfn,yy[0][0],0.0);
 +  } else {
 +    if (angle == 1)
 +      start = 0.0;
 +    else
 +      start = -180.0;
 +    end = 180.0;
 +    if (yy[0][0] != start || yy[0][nx-1] != end)
 +      gmx_fatal(FARGS,"The angles in file %s should go from %f to %f instead of %f to %f\n",
 +              libfn,start,end,yy[0][0],yy[0][nx-1]);
 +  }
 +
 +  tabscale = (nx-1)/(yy[0][nx-1] - yy[0][0]);
 +  
 +  if (fp) {
 +    fprintf(fp,"Read user tables from %s with %d data points.\n",libfn,nx);
 +    if (angle == 0)
 +      fprintf(fp,"Tabscale = %g points/nm\n",tabscale);
 +  }
 +
 +  bAllZero = TRUE;
 +  for(k=0; k<ntab; k++) {
 +    bZeroV = TRUE;
 +    bZeroF = TRUE;
 +    for(i=0; (i < nx); i++) {
 +      if (i >= 2) {
 +      dx0 = yy[0][i-1] - yy[0][i-2];
 +      dx1 = yy[0][i]   - yy[0][i-1];
 +      /* Check for 1% deviation in spacing */
 +      if (fabs(dx1 - dx0) >= 0.005*(fabs(dx0) + fabs(dx1))) {
 +        gmx_fatal(FARGS,"In table file '%s' the x values are not equally spaced: %f %f %f",fn,yy[0][i-2],yy[0][i-1],yy[0][i]);
 +      }
 +      }
 +      if (yy[1+k*2][i] != 0) {
 +      bZeroV = FALSE;
 +      if (bAllZero) {
 +        bAllZero = FALSE;
 +        nx0 = i;
 +      }
 +      if (yy[1+k*2][i] >  0.01*GMX_REAL_MAX ||
 +          yy[1+k*2][i] < -0.01*GMX_REAL_MAX) {
 +        gmx_fatal(FARGS,"Out of range potential value %g in file '%s'",
 +                  yy[1+k*2][i],fn);
 +      }
 +      }
 +      if (yy[1+k*2+1][i] != 0) {
 +      bZeroF = FALSE;
 +      if (bAllZero) {
 +        bAllZero = FALSE;
 +        nx0 = i;
 +      }
 +      if (yy[1+k*2+1][i] >  0.01*GMX_REAL_MAX ||
 +          yy[1+k*2+1][i] < -0.01*GMX_REAL_MAX) {
 +        gmx_fatal(FARGS,"Out of range force value %g in file '%s'",
 +                  yy[1+k*2+1][i],fn);
 +      }
 +      }
 +    }
 +
 +    if (!bZeroV && bZeroF) {
 +      set_forces(fp,angle,nx,1/tabscale,yy[1+k*2],yy[1+k*2+1],k);
 +    } else {
 +      /* Check if the second column is close to minus the numerical
 +       * derivative of the first column.
 +       */
 +      ssd = 0;
 +      ns = 0;
 +      for(i=1; (i < nx-1); i++) {
 +      vm = yy[1+2*k][i-1];
 +      vp = yy[1+2*k][i+1];
 +      f  = yy[1+2*k+1][i];
 +      if (vm != 0 && vp != 0 && f != 0) {
 +        /* Take the centered difference */
 +        numf = -(vp - vm)*0.5*tabscale;
 +        ssd += fabs(2*(f - numf)/(f + numf));
 +        ns++;
 +      }
 +      }
 +      if (ns > 0) {
 +      ssd /= ns;
 +      sprintf(buf,"For the %d non-zero entries for table %d in %s the forces deviate on average %d%% from minus the numerical derivative of the potential\n",ns,k,libfn,(int)(100*ssd+0.5));
 +      if (debug)
 +        fprintf(debug,"%s",buf);
 +      if (ssd > 0.2) {
 +        if (fp)
 +          fprintf(fp,"\nWARNING: %s\n",buf);
 +        fprintf(stderr,"\nWARNING: %s\n",buf);
 +      }
 +      }
 +    }
 +  }
 +  if (bAllZero && fp) {
 +    fprintf(fp,"\nNOTE: All elements in table %s are zero\n\n",libfn);
 +  }
 +
 +  for(k=0; (k<ntab); k++) {
 +    init_table(fp,nx,nx0,tabscale,&(td[k]),TRUE);
 +    for(i=0; (i<nx); i++) {
 +      td[k].x[i] = yy[0][i];
 +      td[k].v[i] = yy[2*k+1][i];
 +      td[k].f[i] = yy[2*k+2][i];
 +    }
 +  }
 +  for(i=0; (i<ny); i++)
 +    sfree(yy[i]);
 +  sfree(yy);
 +  sfree(libfn);
 +}
 +
 +static void done_tabledata(t_tabledata *td)
 +{
 +  int i;
 +  
 +  if (!td)
 +    return;
 +    
 +  sfree(td->x);
 +  sfree(td->v);
 +  sfree(td->f);
 +}
 +
 +static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
 +{
 +  /* Fill the table according to the formulas in the manual.
 +   * In principle, we only need the potential and the second
 +   * derivative, but then we would have to do lots of calculations
 +   * in the inner loop. By precalculating some terms (see manual)
 +   * we get better eventual performance, despite a larger table.
 +   *
 +   * Since some of these higher-order terms are very small,
 +   * we always use double precision to calculate them here, in order
 +   * to avoid unnecessary loss of precision.
 +   */
 +#ifdef DEBUG_SWITCH
 +  FILE *fp;
 +#endif
 +  int  i;
 +  double reppow,p;
 +  double r1,rc,r12,r13;
 +  double r,r2,r6,rc6;
 +  double expr,Vtab,Ftab;
 +  /* Parameters for David's function */
 +  double A=0,B=0,C=0,A_3=0,B_4=0;
 +  /* Parameters for the switching function */
 +  double ksw,swi,swi1;
 +  /* Temporary parameters */
 +  gmx_bool bSwitch,bShift;
 +  double ewc=fr->ewaldcoeff;
 +  double isp= 0.564189583547756;
 +   
 +  bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || 
 +           (tp == etabCOULSwitch) ||
 +           (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch));
 +  bShift  = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) || 
 +           (tp == etabShift));
 +
 +  reppow = fr->reppow;
 +
 +  if (tprops[tp].bCoulomb) {
 +    r1 = fr->rcoulomb_switch;
 +    rc = fr->rcoulomb;
 +  } 
 +  else {
 +    r1 = fr->rvdw_switch;
 +    rc = fr->rvdw;
 +  }
 +  if (bSwitch)
 +    ksw  = 1.0/(pow5(rc-r1));
 +  else
 +    ksw  = 0.0;
 +  if (bShift) {
 +    if (tp == etabShift)
 +      p = 1;
 +    else if (tp == etabLJ6Shift) 
 +      p = 6; 
 +    else 
 +      p = reppow;
 +    
 +    A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc,p+2)*pow2(rc-r1));
 +    B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc,p+2)*pow3(rc-r1));
 +    C = 1.0/pow(rc,p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1);
 +    if (tp == etabLJ6Shift) {
 +      A=-A;
 +      B=-B;
 +      C=-C;
 +    }
 +    A_3=A/3.0;
 +    B_4=B/4.0;
 +  }
 +  if (debug) { fprintf(debug,"Setting up tables\n"); fflush(debug); }
 +    
 +#ifdef DEBUG_SWITCH
 +  fp=xvgropen("switch.xvg","switch","r","s");
 +#endif
 +  
 +  for(i=td->nx0; (i<td->nx); i++) {
 +    r     = td->x[i];
 +    r2    = r*r;
 +    r6    = 1.0/(r2*r2*r2);
 +    if (gmx_within_tol(reppow,12.0,10*GMX_DOUBLE_EPS)) {
 +      r12 = r6*r6;
 +    } else {
 +      r12 = pow(r,-reppow);   
 +    }
 +    Vtab  = 0.0;
 +    Ftab  = 0.0;
 +    if (bSwitch) {
 +      /* swi is function, swi1 1st derivative and swi2 2nd derivative */
 +      /* The switch function is 1 for r<r1, 0 for r>rc, and smooth for
 +       * r1<=r<=rc. The 1st and 2nd derivatives are both zero at
 +       * r1 and rc.
 +       * ksw is just the constant 1/(rc-r1)^5, to save some calculations...
 +       */ 
 +      if(r<=r1) {
 +      swi  = 1.0;
 +      swi1 = 0.0;
 +      } else if (r>=rc) {
 +      swi  = 0.0;
 +      swi1 = 0.0;
 +      } else {
 +      swi      = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1) 
 +        + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw;
 +      swi1     = -30*pow2(r-r1)*ksw*pow2(rc-r1) 
 +        + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw;
 +      }
 +    }
 +    else { /* not really needed, but avoids compiler warnings... */
 +      swi  = 1.0;
 +      swi1 = 0.0;
 +    }
 +#ifdef DEBUG_SWITCH
 +    fprintf(fp,"%10g  %10g  %10g  %10g\n",r,swi,swi1,swi2);
 +#endif
 +
 +    rc6 = rc*rc*rc;
 +    rc6 = 1.0/(rc6*rc6);
 +
 +    switch (tp) {
 +    case etabLJ6:
-       Vtab  = -r6;
-       Ftab  = 6.0*Vtab/r;
++            /* Dispersion */
++            Vtab = -r6;
++            Ftab = 6.0*Vtab/r;
++            break;
 +    case etabLJ6Switch:
 +    case etabLJ6Shift:
 +      /* Dispersion */
 +      if (r < rc) {      
-       /* Repulsion */
-       Vtab  = r12;
-       Ftab  = reppow*Vtab/r;
++          Vtab = -r6;
++          Ftab = 6.0*Vtab/r;
++          break;
 +      }
 +      break;
 +    case etabLJ12:
-       Vtab  = r12;
-       Ftab  = reppow*Vtab/r;
-       }  
++            /* Repulsion */
++            Vtab  = r12;
++            Ftab  = reppow*Vtab/r;
 +      break;
 +    case etabLJ12Switch:
 +    case etabLJ12Shift:
 +      /* Repulsion */
 +      if (r < rc) {                
-             Vtab  = r12-12.0*(rc-r)*rc6*rc6/rc-1.0*rc6*rc6;
-             Ftab  = 12.0*r12/r-12.0*rc6*rc6/rc;
-         } else { /* r>rc */ 
++          Vtab  = r12;
++          Ftab  = reppow*Vtab/r;
++      }
 +      break;
 +      case etabLJ6Encad:
 +        if(r < rc) {
 +            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
 +            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
 +        } else { /* r>rc */ 
 +            Vtab  = 0;
 +            Ftab  = 0;
 +        } 
 +        break;
 +    case etabLJ12Encad:
 +        if(r < rc) {
-     }  
-     
++            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
++            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
++        } else { /* r>rc */
 +            Vtab  = 0;
 +            Ftab  = 0;
 +        } 
 +        break;        
 +    case etabCOUL:
 +      Vtab  = 1.0/r;
 +      Ftab  = 1.0/r2;
 +      break;
 +    case etabCOULSwitch:
 +    case etabShift:
 +      if (r < rc) { 
 +      Vtab  = 1.0/r;
 +      Ftab  = 1.0/r2;
 +      }
 +      break;
 +    case etabEwald:
 +    case etabEwaldSwitch:
 +      Vtab  = gmx_erfc(ewc*r)/r;
 +      Ftab  = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
 +      break;
 +    case etabEwaldUser:
 +    case etabEwaldUserSwitch:
 +      /* Only calculate minus the reciprocal space contribution */
 +      Vtab  = -gmx_erf(ewc*r)/r;
 +      Ftab  = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
 +      break;
 +    case etabRF:
 +    case etabRF_ZERO:
 +      Vtab  = 1.0/r      +   fr->k_rf*r2 - fr->c_rf;
 +      Ftab  = 1.0/r2     - 2*fr->k_rf*r;
 +      if (tp == etabRF_ZERO && r >= rc) {
 +      Vtab = 0;
 +      Ftab = 0;
 +      }
 +      break;
 +    case etabEXPMIN:
 +      expr  = exp(-r);
 +      Vtab  = expr;
 +      Ftab  = expr;
 +      break;
 +    case etabCOULEncad:
 +        if(r < rc) {
 +            Vtab  = 1.0/r-(rc-r)/(rc*rc)-1.0/rc;
 +            Ftab  = 1.0/r2-1.0/(rc*rc);
 +        } else { /* r>rc */ 
 +            Vtab  = 0;
 +            Ftab  = 0;
 +        } 
 +        break;
 +    default:
 +      gmx_fatal(FARGS,"Table type %d not implemented yet. (%s,%d)",
 +                tp,__FILE__,__LINE__);
 +    }
 +    if (bShift) {
 +      /* Normal coulomb with cut-off correction for potential */
 +      if (r < rc) {
 +      Vtab -= C;
 +      /* If in Shifting range add something to it */
 +      if (r > r1) {
 +        r12 = (r-r1)*(r-r1);
 +        r13 = (r-r1)*r12;
 +        Vtab  += - A_3*r13 - B_4*r12*r12;
 +        Ftab  +=   A*r12 + B*r13;
 +      }
 +      }
 +    }
 +
 +    if (ETAB_USER(tp)) {
 +      Vtab += td->v[i];
 +      Ftab += td->f[i];
 +    }
 +
 +    if ((r > r1) && bSwitch) {
 +      Ftab = Ftab*swi - Vtab*swi1;
 +      Vtab = Vtab*swi;
-   
++    }
++
 +    /* Convert to single precision when we store to mem */
 +    td->v[i]  = Vtab;
 +    td->f[i]  = Ftab;
 +  }
 +
 +  /* Continue the table linearly from nx0 to 0.
 +   * These values are only required for energy minimization with overlap or TPI.
 +   */
 +  for(i=td->nx0-1; i>=0; i--) {
 +    td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
 +    td->f[i] = td->f[i+1];
 +  }
 +
 +#ifdef DEBUG_SWITCH
 +  gmx_fio_fclose(fp);
 +#endif
 +}
 +
 +static void set_table_type(int tabsel[],const t_forcerec *fr,gmx_bool b14only)
 +{
 +  int eltype,vdwtype;
 +
 +  /* Set the different table indices.
 +   * Coulomb first.
 +   */
 +
 +
 +  if (b14only) {
 +    switch (fr->eeltype) {
 +    case eelRF_NEC:
 +      eltype = eelRF;
 +      break;
 +    case eelUSER:
 +    case eelPMEUSER:
 +    case eelPMEUSERSWITCH:
 +      eltype = eelUSER;
 +      break;
 +    default:
 +      eltype = eelCUT;
 +    }
 +  } else {
 +    eltype = fr->eeltype;
 +  }
 +  
 +  switch (eltype) {
 +  case eelCUT:
 +    tabsel[etiCOUL] = etabCOUL;
 +    break;
 +  case eelPOISSON:
 +    tabsel[etiCOUL] = etabShift;
 +    break;
 +  case eelSHIFT:
 +    if (fr->rcoulomb > fr->rcoulomb_switch)
 +      tabsel[etiCOUL] = etabShift;
 +    else
 +      tabsel[etiCOUL] = etabCOUL;
 +    break;
 +  case eelEWALD:
 +  case eelPME:
 +  case eelP3M_AD:
 +    tabsel[etiCOUL] = etabEwald;
 +    break;
 +  case eelPMESWITCH:
 +    tabsel[etiCOUL] = etabEwaldSwitch;
 +    break;
 +  case eelPMEUSER:
 +    tabsel[etiCOUL] = etabEwaldUser;
 +    break;
 +  case eelPMEUSERSWITCH:
 +    tabsel[etiCOUL] = etabEwaldUserSwitch;
 +    break;
 +  case eelRF:
 +  case eelGRF:
 +  case eelRF_NEC:
 +    tabsel[etiCOUL] = etabRF;
 +    break;
 +  case eelRF_ZERO:
 +    tabsel[etiCOUL] = etabRF_ZERO;
 +    break;
 +  case eelSWITCH:
 +    tabsel[etiCOUL] = etabCOULSwitch;
 +    break;
 +  case eelUSER:
 +    tabsel[etiCOUL] = etabUSER;
 +    break;
 +  case eelENCADSHIFT:
 +    tabsel[etiCOUL] = etabCOULEncad;
 +    break;      
 +  default:
 +    gmx_fatal(FARGS,"Invalid eeltype %d",eltype);
 +  }
 +  
 +  /* Van der Waals time */
 +  if (fr->bBHAM && !b14only) {
 +    tabsel[etiLJ6]  = etabLJ6;
 +    tabsel[etiLJ12] = etabEXPMIN;
 +  } else {
 +    if (b14only && fr->vdwtype != evdwUSER)
 +      vdwtype = evdwCUT;
 +    else
 +      vdwtype = fr->vdwtype;
 +
 +    switch (vdwtype) {
 +    case evdwSWITCH:
 +      tabsel[etiLJ6]  = etabLJ6Switch;
 +      tabsel[etiLJ12] = etabLJ12Switch;
 +      break;
 +    case evdwSHIFT:
 +      tabsel[etiLJ6]  = etabLJ6Shift;
 +      tabsel[etiLJ12] = etabLJ12Shift;
 +      break;
 +    case evdwUSER:
 +      tabsel[etiLJ6]  = etabUSER;
 +      tabsel[etiLJ12] = etabUSER;
 +      break;
 +    case evdwCUT:
 +      tabsel[etiLJ6]  = etabLJ6;
 +      tabsel[etiLJ12] = etabLJ12;
 +      break;
 +    case evdwENCADSHIFT:
 +      tabsel[etiLJ6]  = etabLJ6Encad;
 +      tabsel[etiLJ12] = etabLJ12Encad;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Invalid vdwtype %d in %s line %d",vdwtype,
 +                __FILE__,__LINE__);
 +    } 
 +  }
 +}
 +
 +t_forcetable make_tables(FILE *out,const output_env_t oenv,
 +                         const t_forcerec *fr,
 +                       gmx_bool bVerbose,const char *fn,
 +                       real rtab,int flags)
 +{
 +  const char *fns[3] = { "ctab.xvg", "dtab.xvg", "rtab.xvg" };
 +  const char *fns14[3] = { "ctab14.xvg", "dtab14.xvg", "rtab14.xvg" };
 +  FILE        *fp;
 +  t_tabledata *td;
 +  gmx_bool        b14only,bReadTab,bGenTab;
 +  real        x0,y0,yp;
 +  int         i,j,k,nx,nx0,tabsel[etiNR];
-   snew_aligned(table.tab, 12*(nx+1)*sizeof(real),16);
++  real        scalefactor;
++
 +  t_forcetable table;
 +
 +  b14only = (flags & GMX_MAKETABLES_14ONLY);
 +
 +  if (flags & GMX_MAKETABLES_FORCEUSER) {
 +    tabsel[etiCOUL] = etabUSER;
 +    tabsel[etiLJ6]  = etabUSER;
 +    tabsel[etiLJ12] = etabUSER;
 +  } else {
 +    set_table_type(tabsel,fr,b14only);
 +  }
 +  snew(td,etiNR);
 +  table.r         = rtab;
 +  table.scale     = 0;
 +  table.n         = 0;
 +  table.scale_exp = 0;
 +  nx0             = 10;
 +  nx              = 0;
 +  
++  table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
++  table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
++  table.formatsize    = 4;
++  table.ninteractions = 3;
++  table.stride        = table.formatsize*table.ninteractions;
++
 +  /* Check whether we have to read or generate */
 +  bReadTab = FALSE;
 +  bGenTab  = FALSE;
 +  for(i=0; (i<etiNR); i++) {
 +    if (ETAB_USER(tabsel[i]))
 +      bReadTab = TRUE;
 +    if (tabsel[i] != etabUSER)
 +      bGenTab  = TRUE;
 +  }
 +  if (bReadTab) {
 +    read_tables(out,fn,etiNR,0,td);
 +    if (rtab == 0 || (flags & GMX_MAKETABLES_14ONLY)) {
 +      rtab      = td[0].x[td[0].nx-1];
 +      table.n   = td[0].nx;
 +      nx        = table.n;
 +    } else {
 +      if (td[0].x[td[0].nx-1] < rtab) 
 +      gmx_fatal(FARGS,"Tables in file %s not long enough for cut-off:\n"
 +                "\tshould be at least %f nm\n",fn,rtab);
 +      nx        = table.n = (int)(rtab*td[0].tabscale + 0.5);
 +    }
 +    table.scale = td[0].tabscale;
 +    nx0         = td[0].nx0;
 +  }
 +  if (bGenTab) {
 +    if (!bReadTab) {
 +#ifdef GMX_DOUBLE
 +      table.scale = 2000.0;
 +#else
 +      table.scale = 500.0;
 +#endif
 +      nx = table.n = rtab*table.scale;
 +    }
 +  }
 +  if (fr->bBHAM) {
 +    if(fr->bham_b_max!=0)
 +      table.scale_exp = table.scale/fr->bham_b_max;
 +    else
 +      table.scale_exp = table.scale;
 +  }
 +
 +  /* Each table type (e.g. coul,lj6,lj12) requires four 
 +   * numbers per nx+1 data points. For performance reasons we want
 +   * the table data to be aligned to 16-byte.
 +   */
-     copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,table.tab);
++  snew_aligned(table.data, 12*(nx+1)*sizeof(real),16);
 +
 +  for(k=0; (k<etiNR); k++) {
 +    if (tabsel[k] != etabUSER) {
 +      init_table(out,nx,nx0,
 +               (tabsel[k] == etabEXPMIN) ? table.scale_exp : table.scale,
 +               &(td[k]),!bReadTab);
 +      fill_table(&(td[k]),tabsel[k],fr);
 +      if (out) 
 +      fprintf(out,"%s table with %d data points for %s%s.\n"
 +              "Tabscale = %g points/nm\n",
 +              ETAB_USER(tabsel[k]) ? "Modified" : "Generated",
 +              td[k].nx,b14only?"1-4 ":"",tprops[tabsel[k]].name,
 +              td[k].tabscale);
 +    }
-       evaluate_table(table.tab,4*k,12,table.scale,x0,&y0,&yp);
++
++    /* Set scalefactor for c6/c12 tables. This is because we save flops in the non-table kernels
++     * by including the derivative constants (6.0 or 12.0) in the parameters, since
++     * we no longer calculate force in most steps. This means the c6/c12 parameters
++     * have been scaled up, so we need to scale down the table interactions too.
++     * It comes here since we need to scale user tables too.
++     */
++      if(k==etiLJ6)
++      {
++          scalefactor = 1.0/6.0;
++      }
++      else if(k==etiLJ12 && tabsel[k]!=etabEXPMIN)
++      {
++          scalefactor = 1.0/12.0;
++      }
++      else
++      {
++          scalefactor = 1.0;
++      }
++
++    copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,scalefactor,table.data);
 +    
 +    if (bDebugMode() && bVerbose) {
 +      if (b14only)
 +      fp=xvgropen(fns14[k],fns14[k],"r","V",oenv);
 +      else
 +      fp=xvgropen(fns[k],fns[k],"r","V",oenv);
 +      /* plot the output 5 times denser than the table data */
 +      for(i=5*((nx0+1)/2); i<5*table.n; i++) {
 +      x0 = i*table.r/(5*(table.n-1));
-       table.r         = fr->gbtabr;
-       table.scale     = fr->gbtabscale;
-       table.scale_exp = 0;
-       table.n         = table.scale*table.r;
-       nx0             = 0;
-       nx              = table.scale*table.r;
++      evaluate_table(table.data,4*k,12,table.scale,x0,&y0,&yp);
 +      fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
 +      }
 +      gmx_fio_fclose(fp);
 +    }
 +    done_tabledata(&(td[k]));
 +  }
 +  sfree(td);
 +
 +  return table;
 +}
 +
 +t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
 +                           const t_forcerec *fr,
 +                           const char *fn,
 +                           real rtab)
 +{
 +      const char *fns[3] = { "gbctab.xvg", "gbdtab.xvg", "gbrtab.xvg" };
 +      const char *fns14[3] = { "gbctab14.xvg", "gbdtab14.xvg", "gbrtab14.xvg" };
 +      FILE        *fp;
 +      t_tabledata *td;
 +      gmx_bool        bReadTab,bGenTab;
 +      real        x0,y0,yp;
 +      int         i,j,k,nx,nx0,tabsel[etiNR];
 +      double      r,r2,Vtab,Ftab,expterm;
 +      
 +      t_forcetable table;
 +      
 +      double abs_error_r, abs_error_r2;
 +      double rel_error_r, rel_error_r2;
 +      double rel_error_r_old=0, rel_error_r2_old=0;
 +      double x0_r_error, x0_r2_error;
 +      
 +      
 +      /* Only set a Coulomb table for GB */
 +      /* 
 +       tabsel[0]=etabGB;
 +       tabsel[1]=-1;
 +       tabsel[2]=-1;
 +      */
 +      
 +      /* Set the table dimensions for GB, not really necessary to
 +       * use etiNR (since we only have one table, but ...) 
 +       */
 +      snew(td,1);
-       snew_aligned(table.tab,4*nx,16);
++    table.interaction   = GMX_TABLE_INTERACTION_ELEC;
++    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
++      table.r             = fr->gbtabr;
++      table.scale         = fr->gbtabscale;
++      table.scale_exp     = 0;
++      table.n             = table.scale*table.r;
++    table.formatsize    = 4;
++    table.ninteractions = 1;
++    table.stride        = table.formatsize*table.ninteractions;
++      nx0                 = 0;
++      nx                  = table.scale*table.r;
 +      
 +      /* Check whether we have to read or generate 
 +       * We will always generate a table, so remove the read code
 +       * (Compare with original make_table function
 +       */
 +      bReadTab = FALSE;
 +      bGenTab  = TRUE;
 +      
 +      /* Each table type (e.g. coul,lj6,lj12) requires four 
 +       * numbers per datapoint. For performance reasons we want
 +       * the table data to be aligned to 16-byte. This is accomplished
 +       * by allocating 16 bytes extra to a temporary pointer, and then
 +       * calculating an aligned pointer. This new pointer must not be
 +       * used in a free() call, but thankfully we're sloppy enough not
 +       * to do this :-)
 +       */
 +      
-       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
++      snew_aligned(table.data,4*nx,16);
 +      
 +      init_table(out,nx,nx0,table.scale,&(td[0]),!bReadTab);
 +      
 +      /* Local implementation so we don't have to use the etabGB
 +       * enum above, which will cause problems later when
 +       * making the other tables (right now even though we are using
 +       * GB, the normal Coulomb tables will be created, but this
 +       * will cause a problem since fr->eeltype==etabGB which will not
 +       * be defined in fill_table and set_table_type
 +       */
 +      
 +      for(i=nx0;i<nx;i++)
 +    {
 +              Vtab    = 0.0;
 +              Ftab    = 0.0;
 +              r       = td->x[i];
 +              r2      = r*r;
 +              expterm = exp(-0.25*r2);
 +              
 +              Vtab = 1/sqrt(r2+expterm);
 +              Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
 +              
 +              /* Convert to single precision when we store to mem */
 +              td->v[i]  = Vtab;
 +              td->f[i]  = Ftab;
 +              
 +    }
 +      
-                       evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
++      copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
 +      
 +      if(bDebugMode())
 +    {
 +              fp=xvgropen(fns[0],fns[0],"r","V",oenv);
 +              /* plot the output 5 times denser than the table data */
 +              /* for(i=5*nx0;i<5*table.n;i++) */
 +              for(i=nx0;i<table.n;i++)
 +              {
 +                      /* x0=i*table.r/(5*table.n); */
 +                      x0=i*table.r/table.n;
-        evaluate_table(table.tab,0,4,table.scale,r,&y0,&yp);
++                      evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
 +                      fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
 +                      
 +              }
 +              gmx_fio_fclose(fp);
 +    }
 +      
 +      /*
 +       for(i=100*nx0;i<99.81*table.n;i++)
 +       {
 +       r = i*table.r/(100*table.n);
 +       r2      = r*r;
 +       expterm = exp(-0.25*r2);
 +       
 +       Vtab = 1/sqrt(r2+expterm);
 +       Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
 +       
 +       
-     snew_aligned(table.tab,4*nx,16);
-       
-       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
++       evaluate_table(table.data,0,4,table.scale,r,&y0,&yp);
 +       printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab);
 +       
 +       abs_error_r=fabs(y0-Vtab);
 +       abs_error_r2=fabs(yp-(-1)*Ftab);
 +       
 +       rel_error_r=abs_error_r/y0;
 +       rel_error_r2=fabs(abs_error_r2/yp);
 +       
 +       
 +       if(rel_error_r>rel_error_r_old)
 +       {
 +       rel_error_r_old=rel_error_r;
 +       x0_r_error=x0;
 +       }
 +       
 +       if(rel_error_r2>rel_error_r2_old)
 +       {
 +       rel_error_r2_old=rel_error_r2;
 +       x0_r2_error=x0;        
 +       }
 +       }
 +       
 +       printf("gb: MAX REL ERROR IN R=%15.15f, MAX REL ERROR IN R2=%15.15f\n",rel_error_r_old, rel_error_r2_old);
 +       printf("gb: XO_R=%g, X0_R2=%g\n",x0_r_error, x0_r2_error);
 +       
 +       exit(1); */
 +      done_tabledata(&(td[0]));
 +      sfree(td);
 +      
 +      return table;
 +      
 +      
 +}
 +
 +t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
 +                          const t_forcerec *fr,
 +                          const char *fn,
 +                            matrix box)
 +{
 +      const char *fns[3] = { "tf_tab.xvg", "atfdtab.xvg", "atfrtab.xvg" };
 +      FILE        *fp;
 +      t_tabledata *td;
 +      real        x0,y0,yp,rtab;
 +      int         i,nx,nx0;
 +        real        rx, ry, rz, box_r;
 +      
 +      t_forcetable table;
 +      
 +      
 +      /* Set the table dimensions for ATF, not really necessary to
 +       * use etiNR (since we only have one table, but ...) 
 +       */
 +      snew(td,1);
 +        
 +        if (fr->adress_type == eAdressSphere){
 +            /* take half box diagonal direction as tab range */
 +               rx = 0.5*box[0][0]+0.5*box[1][0]+0.5*box[2][0];
 +               ry = 0.5*box[0][1]+0.5*box[1][1]+0.5*box[2][1];
 +               rz = 0.5*box[0][2]+0.5*box[1][2]+0.5*box[2][2];
 +               box_r = sqrt(rx*rx+ry*ry+rz*rz);
 +               
 +        }else{
 +            /* xsplit: take half box x direction as tab range */
 +               box_r        = box[0][0]/2;
 +        }
 +        table.r         = box_r;
 +      table.scale     = 0;
 +      table.n         = 0;
 +      table.scale_exp = 0;
 +      nx0             = 10;
 +      nx              = 0;
 +      
 +        read_tables(out,fn,1,0,td);
 +        rtab      = td[0].x[td[0].nx-1];
 +
 +       if (fr->adress_type == eAdressXSplit && (rtab < box[0][0]/2)){
 +           gmx_fatal(FARGS,"AdResS full box therm force table in file %s extends to %f:\n"
 +                        "\tshould extend to at least half the length of the box in x-direction"
 +                        "%f\n",fn,rtab, box[0][0]/2);
 +       }
 +       if (rtab < box_r){
 +               gmx_fatal(FARGS,"AdResS full box therm force table in file %s extends to %f:\n"
 +                "\tshould extend to at least for spherical adress"
 +                "%f (=distance from center to furthermost point in box \n",fn,rtab, box_r);
 +       }
 +
 +
 +        table.n   = td[0].nx;
 +        nx        = table.n;
 +        table.scale = td[0].tabscale;
 +        nx0         = td[0].nx0;
 +
 +      /* Each table type (e.g. coul,lj6,lj12) requires four 
 +       * numbers per datapoint. For performance reasons we want
 +       * the table data to be aligned to 16-byte. This is accomplished
 +       * by allocating 16 bytes extra to a temporary pointer, and then
 +       * calculating an aligned pointer. This new pointer must not be
 +       * used in a free() call, but thankfully we're sloppy enough not
 +       * to do this :-)
 +       */
 +      
-               evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
++    snew_aligned(table.data,4*nx,16);
++
++      copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
 +      
 +      if(bDebugMode())
 +        {
 +          fp=xvgropen(fns[0],fns[0],"r","V",oenv);
 +          /* plot the output 5 times denser than the table data */
 +          /* for(i=5*nx0;i<5*table.n;i++) */
 +         
 +            for(i=5*((nx0+1)/2); i<5*table.n; i++)
 +            {
 +              /* x0=i*table.r/(5*table.n); */
 +              x0 = i*table.r/(5*(table.n-1));
-   snew(tab.tab,tab.n*4);
-   copy2table(tab.n,0,4,td.x,td.v,td.f,tab.tab);
++              evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
 +              fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
 +              
 +            }
 +          ffclose(fp);
 +        }
 +
 +      done_tabledata(&(td[0]));
 +      sfree(td);
++
++    table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
++    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
++    table.formatsize    = 4;
++    table.ninteractions = 3;
++    table.stride        = table.formatsize*table.ninteractions;
++
 +      
 +      return table;
 +}
 +
 +bondedtable_t make_bonded_table(FILE *fplog,char *fn,int angle)
 +{
 +  t_tabledata td;
 +  double start;
 +  int    i;
 +  bondedtable_t tab;
 +  
 +  if (angle < 2)
 +    start = 0;
 +  else
 +    start = -180.0;
 +  read_tables(fplog,fn,1,angle,&td);
 +  if (angle > 0) {
 +    /* Convert the table from degrees to radians */
 +    for(i=0; i<td.nx; i++) {
 +      td.x[i] *= DEG2RAD;
 +      td.f[i] *= RAD2DEG;
 +    }
 +    td.tabscale *= RAD2DEG;
 +  }
 +  tab.n = td.nx;
 +  tab.scale = td.tabscale;
++  snew(tab.data,tab.n*4);
++  copy2table(tab.n,0,4,td.x,td.v,td.f,1.0,tab.data);
 +  done_tabledata(&td);
 +
 +  return tab;
 +}
 +
 +
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index f0936478f75cb12d99430768db8c906b9a4f39f5,0000000000000000000000000000000000000000..08d7c6378fe49d7062c6e4ad36bed4a752b41d0f
mode 100644,000000..100644
--- /dev/null
@@@ -1,503 -1,0 +1,505 @@@
-     tclj->c6  = C6(fr->nbfp,fr->ntype,ati,atj);
-     tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj);
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "xmdrun.h"
 +#include "futil.h"
 +#include "xvgr.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "network.h"
 +#include "smalloc.h"
 +#include "string2.h"
 +#include "readinp.h"
 +#include "filenm.h"
 +#include "names.h"
 +#include "gmxfio.h"
 +
 +const char *eoNames[eoNR] = { 
 +  "Pres", "Epot", "Vir", "Dist", "Mu", "Force", "Fx", "Fy", "Fz",
 +  "Px", "Py", "Pz",
 +  "Polarizability", "Dipole", "Memory", "UseEinter", "UseVirial",
 +  "CombinationRule"
 +};
 +
 +static int Name2eo(char *s)
 +{
 +  int i,res;
 +  
 +  res=-1;
 +  
 +  for(i=0; (i<eoNR); i++)
 +    if (gmx_strcasecmp(s,eoNames[i]) == 0) {
 +      res=i;
 +      fprintf(stderr,"Coupling to observable %d (%s)\n",res,eoNames[res]);
 +      break;
 +    }
 +  
 +  return res;
 +}
 +
 +#define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d),(cr))
 +#define nblock_bc(cr,nr,d) gmx_bcast((nr)*sizeof((d)[0]), (d),(cr))
 +#define   snew_bc(cr,d,nr) { if (!MASTER(cr)) snew((d),(nr)); }
 +
 +static void low_comm_tcr(t_commrec *cr,t_coupl_rec *tcr)
 +{
 +  nblock_bc(cr,eoObsNR,tcr->ref_value);
 +  
 +  block_bc(cr,tcr->nLJ);
 +  snew_bc(cr,tcr->tcLJ,tcr->nLJ);
 +  nblock_bc(cr,tcr->nLJ,tcr->tcLJ);
 +  
 +  block_bc(cr,tcr->nBU);
 +  snew_bc(cr,tcr->tcBU,tcr->nBU);
 +  nblock_bc(cr,tcr->nBU,tcr->tcBU);
 +  
 +  block_bc(cr,tcr->nQ);
 +  snew_bc(cr,tcr->tcQ,tcr->nQ);
 +  nblock_bc(cr,tcr->nQ,tcr->tcQ);
 +  
 +  block_bc(cr,tcr->nmemory);
 +  block_bc(cr,tcr->bInter);
 +  block_bc(cr,tcr->bVirial);
 +  block_bc(cr,tcr->combrule);
 +}
 +
 +void comm_tcr(FILE *log,t_commrec *cr,t_coupl_rec **tcr)
 +{
 +  if (!MASTER(cr))
 +    snew(*tcr,1);
 +  
 +  low_comm_tcr(cr,*tcr);
 +} 
 +
 +static void clear_lj(t_coupl_LJ *tc)
 +{
 +  tc->at_i   = 0;
 +  tc->at_j   = 0;
 +  tc->eObs   = -1;
 +  tc->bPrint = TRUE;
 +  tc->c6     = 0.0;
 +  tc->c12    = 0.0;
 +  tc->xi_6   = 0.0;
 +  tc->xi_12  = 0.0;
 +}
 +
 +static void clear_bu(t_coupl_BU *tc)
 +{
 +  tc->at_i   = 0;
 +  tc->at_j   = 0;
 +  tc->eObs   = -1;
 +  tc->bPrint = TRUE;
 +  tc->a      = 0.0;
 +  tc->b      = 0.0;
 +  tc->c      = 0.0;
 +  tc->xi_a   = 0.0;
 +  tc->xi_b   = 0.0;
 +  tc->xi_c   = 0.0;
 +}
 +
 +static void clear_q(t_coupl_Q *tc)
 +{
 +  tc->at_i   = 0;
 +  tc->eObs   = -1;
 +  tc->bPrint = TRUE;
 +  tc->Q      = 0.0;
 +  tc->xi_Q   = 0.0;
 +}
 +
 +void copy_ff(t_coupl_rec *tcr,t_forcerec *fr,t_mdatoms *md,t_idef *idef)
 +{
 +  int        i,j,ati,atj,type;
 +  t_coupl_LJ *tclj;
 +  t_coupl_BU *tcbu;
 +  t_coupl_Q  *tcq;
 +  
 +  /* Save values for printing */
 +  for(i=0; (i<tcr->nLJ); i++) {
 +    tclj = &(tcr->tcLJ[i]);
 +    
 +    ati  = tclj->at_i;
 +    atj  = tclj->at_j;
 +    if (atj == -1)
 +      atj = ati;
-     tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj);
++    /* nbfp now includes the 6.0/12.0 derivative prefactors */
++    tclj->c6  = C6(fr->nbfp,fr->ntype,ati,atj)/6.0;
++    tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj)/12.0;
 +  }
 +  
 +  for(i=0; (i<tcr->nBU); i++) {
 +    tcbu = &(tcr->tcBU[i]);
 +    
 +    ati  = tcbu->at_i;
 +    atj  = tcbu->at_j;
 +    if (atj == -1)
 +      atj = ati;
++    /* nbfp now includes the 6.0 derivative prefactor */
 +    tcbu->a = BHAMA(fr->nbfp,fr->ntype,ati,atj);
 +    tcbu->b = BHAMB(fr->nbfp,fr->ntype,ati,atj);
++    tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj)/6.0;
 +  }
 +  
 +  for(i=0; (i<tcr->nQ); i++) {
 +    tcq = &(tcr->tcQ[i]);
 +    for(j=0; (j<md->nr); j++) {
 +      if (md->typeA[j] == tcq->at_i) {
 +      tcr->tcQ[i].Q = md->chargeA[j];
 +      break;
 +      }
 +    }
 +  }
 +  for(i=0; (i<tcr->nIP); i++) {
 +    /* Let's just copy the whole struct !*/
 +    type = tcr->tIP[i].type;
 +    tcr->tIP[i].iprint=idef->iparams[type];
 +  }
 +}
 +
 +void write_gct(const char *fn,t_coupl_rec *tcr,t_idef *idef)
 +{
 +  FILE *fp;
 +  int  i,ftype;
 +  
 +  fp=gmx_fio_fopen(fn,"w");
 +  nice_header(fp,fn);
 +  fprintf(fp,"%-15s = %12g  ; Reference pressure for coupling\n",
 +        eoNames[eoPres],tcr->ref_value[eoPres]);
 +  fprintf(fp,"%-15s = %12g  ; Reference potential energy\n",
 +        eoNames[eoEpot],tcr->ref_value[eoEpot]);
 +  fprintf(fp,"%-15s = %12g  ; Reference distance\n",
 +        eoNames[eoDist],tcr->ref_value[eoDist]);
 +  fprintf(fp,"%-15s = %12g  ; Reference dipole\n",
 +        eoNames[eoMu],tcr->ref_value[eoMu]);
 +  fprintf(fp,"%-15s = %12g  ; Reference force\n",
 +        eoNames[eoForce],tcr->ref_value[eoForce]);
 +  fprintf(fp,"%-15s = %12g  ; Reference force in X dir\n",
 +        eoNames[eoFx],tcr->ref_value[eoFx]);
 +  fprintf(fp,"%-15s = %12g  ; Reference force in Y dir\n",
 +        eoNames[eoFy],tcr->ref_value[eoFy]);
 +  fprintf(fp,"%-15s = %12g  ; Reference force in Z dir\n",
 +        eoNames[eoFz],tcr->ref_value[eoFz]);
 +  fprintf(fp,"%-15s = %12g  ; Reference pres in X dir\n",
 +        eoNames[eoPx],tcr->ref_value[eoPx]);
 +  fprintf(fp,"%-15s = %12g  ; Reference pres in Y dir\n",
 +        eoNames[eoPy],tcr->ref_value[eoPy]);
 +  fprintf(fp,"%-15s = %12g  ; Reference pres in Z dir\n",
 +        eoNames[eoPz],tcr->ref_value[eoPz]);
 +  fprintf(fp,"%-15s = %12g  ; Polarizability used for the Epot correction\n",
 +        eoNames[eoPolarizability],tcr->ref_value[eoPolarizability]);
 +  fprintf(fp,"%-15s = %12g  ; Gas phase dipole moment used for Epot correction\n", 
 +        eoNames[eoDipole],tcr->ref_value[eoDipole]);
 +  fprintf(fp,"%-15s = %12d  ; Memory for coupling. Makes it converge faster.\n",
 +        eoNames[eoMemory],tcr->nmemory);
 +  fprintf(fp,"%-15s = %12s  ; Use intermolecular Epot only (LJ+Coul)\n",
 +        eoNames[eoInter],yesno_names[tcr->bInter]);
 +  fprintf(fp,"%-15s = %12s  ; Use virial iso pressure\n",
 +        eoNames[eoUseVirial],yesno_names[tcr->bVirial]);
 +  fprintf(fp,"%-15s = %12d  ; Combination rule, same coding as in grompp.\n",
 +        eoNames[eoCombRule],tcr->combrule);
 +  
 +  fprintf(fp,"\n; Q-Coupling   %6s  %12s\n","type","xi");
 +  for(i=0; (i<tcr->nQ); i++) {
 +    fprintf(fp,"%-8s = %8s  %6d  %12g\n",
 +          "Q",eoNames[tcr->tcQ[i].eObs],tcr->tcQ[i].at_i,tcr->tcQ[i].xi_Q);
 +  }
 +  
 +  fprintf(fp,"\n; %8s %8s  %6s  %6s  %12s  %12s\n","Couple","To",
 +        "i-type","j-type","xi-c6","xi-c12");
 +  fprintf(fp,"; j-type == -1 means mixing rules will be applied!\n");
 +  for(i=0; (i<tcr->nLJ); i++) {
 +    fprintf(fp,"%-8s = %8s  %6d  %6d  %12g  %12g\n",
 +          "LJ",eoNames[tcr->tcLJ[i].eObs],
 +          tcr->tcLJ[i].at_i,tcr->tcLJ[i].at_j,
 +          tcr->tcLJ[i].xi_6,tcr->tcLJ[i].xi_12);
 +  }
 +  
 +  fprintf(fp,"\n; %8s %8s  %6s  %6s  %12s  %12s  %12s\n","Couple","To",
 +        "i-type","j-type","xi-A","xi-B","xi-C");
 +  fprintf(fp,"; j-type == -1 means mixing rules will be applied!\n");
 +  for(i=0; (i<tcr->nBU); i++) {
 +    fprintf(fp,"%-8s = %8s  %6d  %6d  %12g  %12g  %12g\n",
 +          "BU",eoNames[tcr->tcBU[i].eObs],
 +          tcr->tcBU[i].at_i,tcr->tcBU[i].at_j,
 +          tcr->tcBU[i].xi_a,tcr->tcBU[i].xi_b,tcr->tcBU[i].xi_c);
 +  }
 +  
 +  fprintf(fp,"\n; More Coupling\n");
 +  for(i=0; (i<tcr->nIP); i++) {
 +    ftype=idef->functype[tcr->tIP[i].type];
 +    switch (ftype) {
 +    case F_BONDS:
 +      fprintf(fp,"%-15s = %-8s  %4d  %12g  %12g\n",
 +            "Bonds",eoNames[tcr->tIP[i].eObs],tcr->tIP[i].type,
 +            tcr->tIP[i].xi.harmonic.krA,
 +            tcr->tIP[i].xi.harmonic.rA);
 +      break;
 +    default:
 +      fprintf(stderr,"ftype %s not supported (yet)\n",
 +            interaction_function[ftype].longname);
 +    }
 +  }
 +  gmx_fio_fclose(fp);
 +}
 +
 +static gmx_bool add_lj(int *nLJ,t_coupl_LJ **tcLJ,char *s,gmx_bool bObsUsed[])
 +{
 +  int       j,ati,atj,eo;
 +  char      buf[256];
 +  double    xi6,xi12;
 +  
 +  if (sscanf(s,"%s%d%d%lf%lf",buf,&ati,&atj,&xi6,&xi12) != 5) 
 +    return TRUE;
 +  if ((eo=Name2eo(buf)) == -1)
 +    gmx_fatal(FARGS,"Invalid observable for LJ coupling: %s",buf);
 +  
 +  for(j=0; (j<*nLJ); j++) {
 +    if ((((*tcLJ)[j].at_i == ati) && ((*tcLJ)[j].at_j == atj)) &&
 +      ((*tcLJ)[j].xi_6 || (*tcLJ)[j].xi_12) &&
 +      ((*tcLJ)[j].eObs == eo))
 +      break;
 +  }
 +  if (j == *nLJ) {
 +    ++(*nLJ);
 +    srenew((*tcLJ),*nLJ);
 +  }
 +  else
 +    fprintf(stderr,"\n*** WARNING: overwriting entry for LJ coupling '%s'\n",s);
 +  
 +  clear_lj(&((*tcLJ)[j]));
 +  if (((*tcLJ)[j].eObs = eo) == -1) {
 +    gmx_fatal(FARGS,"Invalid observable for LJ coupling: %s",buf);
 +  }
 +  (*tcLJ)[j].at_i   = ati;
 +  (*tcLJ)[j].at_j   = atj;
 +  (*tcLJ)[j].xi_6   = xi6;
 +  (*tcLJ)[j].xi_12  = xi12;
 +  bObsUsed[eo] = TRUE;
 +  
 +  return FALSE;
 +}
 +
 +static gmx_bool add_bu(int *nBU,t_coupl_BU **tcBU,char *s,gmx_bool bObsUsed[])
 +{
 +  int       j,ati,atj,eo;
 +  char      buf[256];
 +  double    xia,xib,xic;
 +  
 +  if (sscanf(s,"%s%d%d%lf%lf%lf",buf,&ati,&atj,&xia,&xib,&xic) != 6) 
 +    return TRUE;
 +  if ((eo=Name2eo(buf)) == -1)
 +    gmx_fatal(FARGS,"Invalid observable for BU coupling: %s",buf);
 +  
 +  for(j=0; (j<*nBU); j++) {
 +    if ((((*tcBU)[j].at_i == ati) && ((*tcBU)[j].at_j == atj)) &&
 +      ((*tcBU)[j].xi_a || (*tcBU)[j].xi_b || (*tcBU)[j].xi_c ) &&
 +      ((*tcBU)[j].eObs == eo))
 +      break;
 +  }
 +  if (j == *nBU) {
 +    ++(*nBU);
 +    srenew((*tcBU),*nBU);
 +  }
 +  else
 +    fprintf(stderr,"\n*** WARNING: overwriting entry for BU coupling '%s'\n",s);
 +  
 +  clear_bu(&((*tcBU)[j]));
 +  if (((*tcBU)[j].eObs = eo) == -1) {
 +    gmx_fatal(FARGS,"Invalid observable for BU coupling: %s",buf);
 +  }
 +  (*tcBU)[j].at_i   = ati;
 +  (*tcBU)[j].at_j   = atj;
 +  (*tcBU)[j].xi_a   = xia;
 +  (*tcBU)[j].xi_b   = xib;
 +  (*tcBU)[j].xi_c   = xic;
 +  bObsUsed[eo] = TRUE;
 +
 +  return FALSE;
 +}
 +
 +static gmx_bool add_ip(int *nIP,t_coupl_iparams **tIP,char *s,int ftype,gmx_bool bObsUsed[])
 +{
 +  int    i,eo,type;
 +  char   buf[256];
 +  double kb,b0;
 +  
 +  switch (ftype) {
 +  case F_BONDS:
 +    /* Pick out the type */
 +    if (sscanf(s,"%s%d",buf,&type) != 2)
 +      return TRUE;
 +    if ((eo=Name2eo(buf)) == -1)
 +      gmx_fatal(FARGS,"Invalid observable for IP coupling: %s",buf);
 +      
 +    /* Check whether this entry is there already */
 +    for(i=0; (i<*nIP); i++) {
 +      if ((*tIP)[i].type == type)
 +      break;
 +    }
 +    if (i < *nIP) {
 +      fprintf(stderr,"*** WARNING: overwriting entry for type %d\n",type);
 +    }
 +    else {
 +      i=*nIP;
 +      srenew((*tIP),i+1);
 +      (*nIP)++;
 +    }
 +    if (sscanf(s,"%s%d%lf%lf",buf,&type,&kb,&b0) != 4)
 +      return TRUE;
 +    (*tIP)[i].type=type;
 +    (*tIP)[i].eObs=eo;
 +    (*tIP)[i].xi.harmonic.krA = kb;
 +    (*tIP)[i].xi.harmonic.rA  = b0;
 +    bObsUsed[eo] = TRUE;
 +    break;
 +  default:
 +    fprintf(stderr,"ftype %s not supported (yet)\n",
 +          interaction_function[ftype].longname);
 +    return TRUE;
 +  }
 +  return FALSE;
 +}
 +
 +static gmx_bool add_q(int *nQ,t_coupl_Q **tcQ,char *s,gmx_bool bObsUsed[])
 +{
 +  int       j,ati,eo;
 +  char      buf[256];
 +  double    xiQ;
 +  
 +  if (sscanf(s,"%s%d%lf",buf,&ati,&xiQ) != 3) 
 +    return TRUE;
 +  
 +  for(j=0; (j<*nQ); j++) {
 +    if ((*tcQ)[j].at_i == ati)
 +      break;
 +  }
 +  if (j == *nQ) {
 +    ++(*nQ);
 +    srenew((*tcQ),*nQ);
 +  }
 +  else
 +    fprintf(stderr,"\n*** WARNING: overwriting entry for Q coupling '%s'\n",s);
 +  
 +  clear_q(&((*tcQ)[j]));
 +  eo = (*tcQ)[j].eObs = Name2eo(buf);
 +  if ((*tcQ)[j].eObs == -1) {
 +    gmx_fatal(FARGS,"Invalid observable for Q coupling: %s",buf);
 +  }
 +  (*tcQ)[j].at_i   = ati;
 +  (*tcQ)[j].xi_Q  = xiQ;
 +  bObsUsed[eo] = TRUE;
 +  
 +  return FALSE;
 +}
 +
 +void read_gct(const char *fn,t_coupl_rec *tcr)
 +{
 +  warninp_t wi;
 +  t_inpfile *inp;
 +  int       i,j,ninp,nQ,nLJ,nBU,nIP;
 +  gmx_bool      bWrong;
 +  
 +  wi = init_warning(FALSE,0);
 +
 +  inp=read_inpfile(fn,&ninp,NULL,wi);
 +
 +  for(i=0; (i<eoObsNR); i++) {
 +    tcr->bObsUsed[i] = FALSE;
 +    RTYPE (eoNames[i],        tcr->ref_value[i],      0.0);
 +  }
 +  ITYPE (eoNames[eoMemory],     tcr->nmemory,   1);
 +  ETYPE (eoNames[eoInter],      tcr->bInter,    yesno_names);
 +  ETYPE (eoNames[eoUseVirial],  tcr->bVirial,   yesno_names);
 +  ITYPE (eoNames[eoCombRule],   tcr->combrule,  1);
 +  tcr->tcLJ=NULL;
 +  tcr->tcBU=NULL;
 +  tcr->tcQ=NULL;
 +  tcr->tIP=NULL;
 +  nQ=nLJ=nBU=nIP=0;
 +  
 +  for(i=0; (i<ninp); i++) {
 +    bWrong=FALSE;
 +    if (gmx_strcasecmp(inp[i].name,"LJ") == 0) 
 +      bWrong=add_lj(&nLJ,&(tcr->tcLJ),inp[i].value,tcr->bObsUsed);
 +    else if (gmx_strcasecmp(inp[i].name,"BU") == 0) 
 +      bWrong=add_bu(&nBU,&(tcr->tcBU),inp[i].value,tcr->bObsUsed);
 +    else if (gmx_strcasecmp(inp[i].name,"Q") == 0) 
 +      bWrong=add_q(&nQ,&(tcr->tcQ),inp[i].value,tcr->bObsUsed);
 +    else if (gmx_strcasecmp(inp[i].name,"Bonds") == 0)
 +      bWrong=add_ip(&nIP,&(tcr->tIP),inp[i].value,F_BONDS,tcr->bObsUsed);
 +      
 +    if (bWrong)
 +      fprintf(stderr,"Wrong line in %s: '%s = %s'\n",
 +            fn,inp[i].name,inp[i].value);
 +    /*sfree(inp[i].name);
 +      sfree(inp[i].value);*/
 +  }
 +  /* Check which ones have to be printed */
 +  for(i=1; (i<nQ); i++)
 +    for(j=0; (j<i); j++) {
 +      if (tcr->tcQ[i].at_i == tcr->tcQ[j].at_i)
 +      tcr->tcQ[j].bPrint=FALSE;
 +    }
 +  for(i=1; (i<nLJ); i++)
 +    for(j=0; (j<i); j++) {
 +      if (((tcr->tcLJ[i].at_i == tcr->tcLJ[j].at_i) &&
 +         (tcr->tcLJ[i].at_j == tcr->tcLJ[j].at_j)) ||
 +        ((tcr->tcLJ[i].at_i == tcr->tcLJ[j].at_j) &&
 +         (tcr->tcLJ[i].at_j == tcr->tcLJ[j].at_i))) 
 +      tcr->tcLJ[j].bPrint=FALSE;
 +    }
 +  
 +  for(i=1; (i<nBU); i++)
 +    for(j=0; (j<i); j++) {
 +      if (((tcr->tcBU[i].at_i == tcr->tcBU[j].at_i) &&
 +         (tcr->tcBU[i].at_j == tcr->tcBU[j].at_j)) ||
 +        ((tcr->tcBU[i].at_i == tcr->tcBU[j].at_j) &&
 +         (tcr->tcBU[i].at_j == tcr->tcBU[j].at_i))) 
 +      tcr->tcBU[j].bPrint=FALSE;
 +    }
 +  
 +  tcr->nQ  = nQ;
 +  tcr->nLJ = nLJ;
 +  tcr->nBU = nBU;
 +  tcr->nIP = nIP;
 +  
 +  sfree(inp);
 +
 +  done_warning(wi,FARGS);
 +}
 +
index 2a99d5f7ee53cb006df15b76139f621d48556db0,0000000000000000000000000000000000000000..e7704beb89732cb4062257f14992e3b0978eb816
mode 100644,000000..100644
--- /dev/null
@@@ -1,2132 -1,0 +1,2152 @@@
-         fprintf(stderr,
-                 "\n* WARNING * WARNING * WARNING * WARNING * WARNING * WARNING *\n"
-                 "We have just committed the new CPU detection code in this branch,\n"
-                 "and will commit new SSE/AVX kernels in a few days. However, this\n"
-                 "means that currently only the NxN kernels are accelerated!\n"
-                 "In the mean time, you might want to avoid production runs in 4.6.\n\n");
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "md_support.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "pme_loadbal.h"
 +#include "bondf.h"
 +#include "membed.h"
 +#include "types/nlistheuristics.h"
 +#include "types/iteratedconstraints.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +static void reset_all_counters(FILE *fplog,t_commrec *cr,
 +                               gmx_large_int_t step,
 +                               gmx_large_int_t *step_rel,t_inputrec *ir,
 +                               gmx_wallcycle_t wcycle,t_nrnb *nrnb,
 +                               gmx_runtime_t *runtime,
 +                               nbnxn_cuda_ptr_t cu_nbv)
 +{
 +    char sbuf[STEPSTRSIZE];
 +
 +    /* Reset all the counters related to performance over the run */
 +    md_print_warn(cr,fplog,"step %s: resetting all time and cycle counters\n",
 +                  gmx_step_str(step,sbuf));
 +
 +    if (cu_nbv)
 +    {
 +        nbnxn_cuda_reset_timings(cu_nbv);
 +    }
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        reset_dd_statistics_counters(cr->dd);
 +    }
 +    init_nrnb(nrnb);
 +    ir->init_step += *step_rel;
 +    ir->nsteps    -= *step_rel;
 +    *step_rel = 0;
 +    wallcycle_start(wcycle,ewcRUN);
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
 +}
 +
 +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
 +    double     t,t0,lam0[efptNR];
 +    gmx_bool       bGStatEveryStep,bGStat,bCalcVir,bCalcEner;
 +    gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
 +               bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
 +               bBornRadii,bStartingFromCpt;
 +    gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
 +    gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
 +               bForceUpdate=FALSE,bCPT;
 +    int        mdof_flags;
 +    gmx_bool       bMasterState;
 +    int        force_flags,cglo_flags;
 +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
 +    int        i,m;
 +    t_trxstatus *status;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    t_state    *bufstate=NULL;   
 +    matrix     *scale_tot,pcoupl_mu,M,ebox;
 +    gmx_nlheur_t nlh;
 +    t_trxframe rerun_fr;
 +    gmx_repl_ex_t repl_ex=NULL;
 +    int        nchkpt=1;
 +    gmx_localtop_t *top;      
 +    t_mdebin *mdebin=NULL;
 +    df_history_t df_history;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
 +    gmx_rng_t mcrng=NULL;
 +    gmx_bool        bFFscan;
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_shellfc_t shellfc;
 +    int         count,nconverged=0;
 +    real        timestep=0;
 +    double      tcount=0;
 +    gmx_bool        bIonize=FALSE;
 +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
 +    gmx_bool        bAppend;
 +    gmx_bool        bResetCountersHalfMaxH=FALSE;
 +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
++    gmx_bool        bUpdateDoLR;
 +    real        mu_aver=0,dvdl;
 +    int         a0,a1,gnx=0,ii;
 +    atom_id     *grpindex=NULL;
 +    char        *grpname;
 +    t_coupl_rec *tcr=NULL;
 +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
 +    matrix      boxcopy={{0}},lastbox;
 +      tensor      tmpvir;
 +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
 +      real        vetanew = 0;
 +    int         lamnew=0;
 +    /* for FEP */
 +    int         fep_state=0;
 +    int         nstfep;
 +    real        rate;
 +    double      cycles;
 +      real        saved_conserved_quantity = 0;
 +    real        last_ekin = 0;
 +      int         iter_i;
 +      t_extmass   MassQ;
 +    int         **trotter_seq; 
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t iterate;
 +    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
 +                                          simulation stops. If equal to zero, don't
 +                                          communicate any more between multisims.*/
 +    /* PME load balancing data for GPU kernels */
 +    pme_load_balancing_t pme_loadbal=NULL;
 +    double          cycles_pmes;
 +    gmx_bool        bPMETuneTry=FALSE,bPMETuneRunning=FALSE;
 +
 +    if(MASTER(cr))
 +    {
-         fr->cutoff_scheme == ecutsVERLET &&
-         (fr->nbv->bUseGPU || !(cr->duty & DUTY_PME)) &&
++        gmx_warning("New C kernels (and force-only) kernels are now enabled,\n"
++                    "but it will be another couple of days for SSE/AVX kernels.\n\n");
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +    
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control 
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf,top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */ 
 +    bIterations = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
 +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
 +    
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
 +            &(state_global->fep_state),lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 +                  enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f,top_global->natoms);
 +    }
 +
 +    /* lambda Monte carlo random number generator  */
 +    if (ir->bExpanded)
 +    {
 +        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
 +    }
 +    /* copy the state into df_history */
 +    copy_df_history(&df_history,&state_global->dfhist);
 +
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */    
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global,n_flexible_constraints(constr),
 +                                 (ir->bContinuation || 
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    if (DOMAINDECOMP(cr)) {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state,1);
 +        dd_init_local_state(cr->dd,state_global,state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout) {
 +            snew(f_global,state_global->natoms);
 +        }
 +    } else {
 +        if (PAR(cr)) {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +            pd_at_range(cr,&a0,&a1);
 +        } else {
 +            top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        forcerec_set_excl_load(fr,top,cr);
 +
 +        state = partdec_init_local_state(cr,state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +        if (vsite) {
 +            set_vsite_top(vsite,top,mdatoms,cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !fr->bMolPBC) {
 +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +
 +        if (shellfc) {
 +            make_local_shells(cr,mdatoms,shellfc);
 +        }
 +
 +        init_bonded_thread_force_reduction(fr,&top->idef);
 +
 +        if (ir->pull && PAR(cr)) {
 +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            state,&f,mdatoms,top,fr,
 +                            vsite,shellfc,constr,
 +                            nrnb,wcycle,FALSE);
 +
 +    }
 +
 +    update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +
 +    if (opt2bSet("-cpi",nfile,fnm))
 +    {
 +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
 +    }
 +    else
 +    {
 +        bStateFromCP = FALSE;
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (bStateFromCP)
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if ( Flags & MD_APPENDFILES )
 +            {
 +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    } 
 +
 +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
 +    {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd,state);
 +    }
 +
 +    if (state->flags & (1<<estMC_RNG))
 +    {
 +        set_mc_state(mcrng,state);
 +    }
 +
 +    /* Initialize constraints */
 +    if (constr) {
 +        if (!DOMAINDECOMP(cr))
 +            set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT,nfile,fnm);
 +    if (bTCR) {
 +        if (MASTER(cr)) {
 +            fprintf(stderr,"Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex,gnx);
 +        for(i=0; (i<gnx); i++) {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "repl_ex_nst",&repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
 +                                        repl_ex_nst,repl_ex_nex,repl_ex_seed);
 +    }
 +
 +    /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
 +    if ((Flags & MD_TUNEPME) &&
 +        EEL_PME(fr->eeltype) &&
-                        (bNStList ? GMX_FORCE_DOLR : 0) |
++        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
 +        !bRerunMD)
 +    {
 +        pme_loadbal_init(&pme_loadbal,ir,state->box,fr->ic,fr->pmedata);
 +        cycles_pmes = 0;
 +        if (cr->duty & DUTY_PME)
 +        {
 +            /* Start tuning right away, as we can't measure the load */
 +            bPMETuneRunning = TRUE;
 +        }
 +        else
 +        {
 +            /* Separate PME nodes, we can measure the PP/PME load balance */
 +            bPMETuneTry = TRUE;
 +        }
 +    }
 +
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +  
 +    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
 +    nstfep = ir->fepvals->nstdhdl;
 +    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
 +    {
 +        nstfep = ir->expandedvals->nstexpanded;
 +    }
 +    if (repl_ex_nst > 0 && repl_ex_nst > nstfep)
 +    {
 +        nstfep = repl_ex_nst;
 +    }
 +
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
 +                  | (bVV ? CGLO_PRESSURE:0)
 +                  | (bVV ? CGLO_CONSTRAINT:0)
 +                  | (bRerunMD ? CGLO_RERUNMD:0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
 +    
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                    constr,NULL,FALSE,state->box,
 +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
 +    if (ir->eI == eiVVAK) {
 +        /* a second call to get the half step temperature initialized as well */ 
 +        /* we do the same call as above, but turn the pressure off -- internally to 
 +           compute_globals, this is recognized as a velocity verlet half-step 
 +           kinetic energy calculation.  This minimized excess variables, but 
 +           perhaps loses some logic?*/
 +        
 +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                        constr,NULL,FALSE,state->box,
 +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                        cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
 +    }
 +    
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT)) 
 +    {
 +        for(i=0; (i<ir->opts.ngtc); i++) 
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
 +        } 
 +    }
 +    if (ir->eI != eiVV) 
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +    
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterations) 
 +    {
 +            bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan) 
 +    {
 +        snew(xcopy,state->natoms);
 +        snew(vcopy,state->natoms);
 +        copy_rvecn(state->x,xcopy,0,state->natoms);
 +        copy_rvecn(state->v,vcopy,0,state->natoms);
 +        copy_mat(state->box,boxcopy);
 +    } 
 +    
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
 +    
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr,FALSE));
 +        }
 +        if (EI_STATE_VELOCITY(ir->eI))
 +        {
 +            fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
 +        }
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr,"starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf,"%s","infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                        gmx_step_str(ir->init_step,sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr,"%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
 +                                    NULL,0);
 +    if ( chkpt_ret == 0 ) 
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps 
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv,&status,
 +                                             opt2fn("-rerun",nfile,fnm),
 +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms,top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box,fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !bStateFromCP;
 +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep    = FALSE;
 +    bSumEkinhOld = FALSE;
 +    bExchanged   = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
 +
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        if (bRerunMD) {
 +            if (rerun_fr.bStep) {
 +                step = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime) {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        } 
 +        else 
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t = t0 + step*ir->delta_t;
 +        }
 +
 +        if (ir->efep != efepNO || ir->bSimTemp)
 +            {
 +            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
 +               requiring different logic. */
 +            
 +            set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
 +            bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
 +            bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
 +            bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
 +        }
 +
 +        if (bSimAnn) 
 +        {
 +            update_annealing_target_temp(&(ir->opts),t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for(i=0; i<state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box,state_global->box);
 +            copy_mat(state_global->box,state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for(ii=0; (ii<state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii],state->x[ii]);
 +                copy_rvec(vcopy[ii],state->v[ii]);
 +            }
 +            copy_mat(boxcopy,state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +            
 +            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
 +                set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
 +            }
 +        } 
 +
 +        /* check whether we should stop because another simulation has 
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
 +                 (multisim_nsteps != ir->nsteps) )  
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr, 
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep=TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii=bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
 +        {
 +            bBornRadii=TRUE;
 +        }
 +        
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                  (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog,step,state->box,graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd,state,state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle,ewcDOMDEC);
 +                dd_partition_system(fplog,step,cr,
 +                                    bMasterState,nstglobalcomm,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,
 +                                    do_verbose && !bPMETuneRunning);
 +                wallcycle_stop(wcycle,ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
 +            print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +        }
 +
 +        if ((bRerunMD && rerun_fr.bV) || bExchanged)
 +        {
 +            
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                            constr,NULL,FALSE,state->box,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +        
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
 +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
 +        }
 +        
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile,fnm,fr,
 +                                  mdatoms->nr,state->x,state->box))
 +            {
 +                gmx_finalize_par();
 +
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
 +        if (EI_VV(ir->eI) && (!bInitStep))
 +        {
 +            /* for vv, the first half actually corresponds to the last step */
 +            bCalcEner = do_per_step(step-1,ir->nstcalcenergy);
 +        }
 +        else
 +        {
 +            bCalcEner = do_per_step(step,ir->nstcalcenergy);
 +        }
 +        bCalcVir = bCalcEner ||
 +            (ir->epc != epcNO && do_per_step(step,ir->nstpcouple));
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcVir || bCalcEner || bStopCM ||
 +                  do_per_step(step,nstglobalcomm) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcVir  = TRUE;
 +            bCalcEner = TRUE;
 +            bGStat    = TRUE;
 +        }
 +        
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +            );
 +        
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
-                           f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
 +                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
 +                       (bDoFEP ? GMX_FORCE_DHDL : 0)
 +            );
++
++        if(fr->bTwinRange)
++        {
++            if(do_per_step(step,ir->nstcalclr))
++            {
++                force_flags |= GMX_FORCE_DO_LR;
++            }
++        }
 +        
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
 +                                      ir,bNS,force_flags,
 +                                      bStopCM,top,top_global,
 +                                      constr,enerd,fcd,
 +                                      state,f,force_vir,mdatoms,
 +                                      nrnb,wcycle,graph,groups,
 +                                      shellfc,fr,bBornRadii,t,mu_tot,
 +                                      state->natoms,&bConverged,vsite,
 +                                      outf->fp_field);
 +            tcount+=count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too. 
 +             * Check comments in sim_util.c
 +             */
 +             do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
 +                     state->box,state->x,&state->hist,
 +                     f,force_vir,mdatoms,enerd,fcd,
 +                     state->lambda,graph,
 +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +        
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
 +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
 +        }
 +        
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
 +            fprintf(fplog,"Done init_coupling\n"); 
 +            fflush(fplog);
 +        }
 +        
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI==eiVV && bInitStep) 
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the 
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
 +            } else {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
 +            }
 +
++            /* If we are using twin-range interactions where the long-range component
++             * is only evaluated every nstcalclr>1 steps, we should do a special update
++             * step to combine the long-range forces on these steps.
++             * For nstcalclr=1 this is not done, since the forces would have been added
++             * directly to the short-range forces already.
++             */
++            bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
++            
 +            update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
-                                   fr->bTwinRange && bNStList,fr->f_twin,fcd,
++                          f,bUpdateDoLR,fr->f_twin,fcd,
 +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
 +                          cr,nrnb,constr,&top->idef);
 +            
 +            if (bIterations)
 +            {
 +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +            
 +            /* save the state */
 +            if (bIterations && iterate.bIterate) { 
 +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +            }
 +            
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || (bIterations && iterate.bIterate))
 +            {
 +                if (bIterations && iterate.bIterate) 
 +                {
 +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +                    if (bFirstIterate && bTrotter) 
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +                        
 +                        veta_save = state->veta;
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
 +                        vetanew = state->veta;
 +                        state->veta = veta_save;
 +                    } 
 +                } 
 +                
 +                bOK = TRUE;
 +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
 +                    dvdl = 0;
 +                    
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
 +                                       state,fr->bMolPBC,graph,f,
 +                                       &top->idef,shake_vir,NULL,
 +                                       cr,nrnb,wcycle,upd,constr,
 +                                       bInitStep,TRUE,bCalcVir,vetanew);
 +                    
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +                    
 +                } 
 +                else if (graph)
 +                { /* Need to unshift here if a do_force has been
 +                     called in the previous step */
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                
 +                
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
 +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 +                    /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
 +                if (bCalcEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
 +                {
 +                    bSumEkinhOld = TRUE;
 +                }
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,NULL,FALSE,state->box,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | CGLO_ENERGY 
 +                                | (bStopCM ? CGLO_STOPCM : 0)
 +                                | (bTemp ? CGLO_TEMPERATURE:0) 
 +                                | (bPres ? CGLO_PRESSURE : 0) 
 +                                | (bPres ? CGLO_CONSTRAINT : 0)
 +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_SCALEEKIN 
 +                    );
 +                /* explanation of above: 
 +                   a) We compute Ekin at the full time step
 +                   if 1) we are using the AveVel Ekin, and it's not the
 +                   initial step, or 2) if we are using AveEkin, but need the full
 +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
 +                   EkinAveVel because it's needed for the pressure */
 +                
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep) 
 +                {
 +                    if (bTrotter)
 +                    {
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
 +                    } 
 +                    else 
 +                    {
 +                        if (bExchanged)
 +                        {
 +            
 +                            /* We need the kinetic energy at minus the half step for determining
 +                             * the full step kinetic energy and possibly for T-coupling.*/
 +                            /* This may not be quite working correctly yet . . . . */
 +                            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                                            constr,NULL,FALSE,state->box,
 +                                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +                        }
 +
 +
 +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    }
 +                }
 +                
 +                if (bIterations &&
 +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                                   state->veta,&vetanew)) 
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep) {
 +                enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
 +                copy_mat(shake_vir,state->svir_prev);
 +                copy_mat(force_vir,state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI==eiVV) {
 +                copy_rvecn(cbuf,state->v,0,state->natoms);
 +            }
 +            
 +            if (fr->bSepDVDL && fplog && do_log) 
 +            {
 +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +            }
 +            enerd->term[F_DVDL_BONDED] += dvdl;
 +        }
 +    
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV) {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
 +            if (ir->eI==eiVV) 
 +            {
 +                last_ekin = enerd->term[F_EKIN];
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
 +            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd,state->lambda,ir->fepvals);
 +        }
 +        
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
 +        if (bDoExpanded) {
 +            /* perform extended ensemble sampling in lambda - we don't
 +               actually move to the new state before outputting
 +               statistics, but if performing simulated tempering, we
 +               do update the velocities and the tau_t. */
 +        
 +            lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
 +        }
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +        
 +        /* Now we have the energies and forces corresponding to the 
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
 +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
 +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
 +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
 +        if (bCPT) { mdof_flags |= MDOF_CPT; };
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +            fcReportProgress( ir->nsteps, step );
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +            fcRequestCheckPoint();
 +#endif
 +        
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd,state);
 +                }
 +                if (state->flags  & (1<<estMC_RNG))
 +                {
 +                    get_mc_state(mcrng,state);
 +                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate,ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist,mdebin);
 +                    if (ir->efep!=efepNO || ir->bSimTemp) 
 +                    {
 +                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
 +                                                                       structured so this isn't necessary.
 +                                                                       Note this reassignment is only necessary
 +                                                                       for single threads.*/
 +                        copy_df_history(&state_global->dfhist,&df_history);
 +                    }
 +                }
 +            }
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (fr->bMolPBC)
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +        
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV) 
 +        {
 +            copy_mat(state->svir_prev,shake_vir);
 +            copy_mat(state->fvir_prev,force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +        
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */    
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.sig[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.sig[eglsSTOPCOND]=-1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
 +                                                     nlh.scale_tot,state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 || 
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
 +  
 +
 +        /* at the start of step, randomize the velocities */
 +        if (ETC_ANDERSEN(ir->etc) && EI_VV(ir->eI))
 +        {
 +            gmx_bool bDoAndersenConstr;
 +            bDoAndersenConstr = (constr && update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr));
 +            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
 +            if (bDoAndersenConstr)
 +            {
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
 +                                   state,fr->bMolPBC,graph,f,
 +                                   &top->idef,tmp_vir,NULL,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,TRUE,bCalcVir,vetanew);
 +            }
 +        }
 +
 +        if (bIterations)
 +        {
 +            gmx_iterate_init(&iterate,bIterations);
 +        }
 +    
 +        /* for iterations, we save these vectors, as we will be redoing the calculations */
 +        if (bIterations && iterate.bIterate) 
 +        {
 +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +        }
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || (bIterations && iterate.bIterate))
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */    
 +            if (bIterations) 
 +            { 
 +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +            
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box,lastbox);
 +
 +            bOK = TRUE;
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle,ewcUPDATE);
 +                dvdl = 0;
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter) 
 +                {
 +                    if (bIterations && iterate.bIterate) 
 +                    {
 +                        if (bFirstIterate) 
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else 
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir,scalevir,shake_vir); 
 +                        m_add(force_vir,shake_vir,total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
 +                /* We can only do Berendsen coupling after we have summed
 +                 * the kinetic energy or virial. Since the happens
 +                 * in global_state after update, we should only do it at
 +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                 */
 +                }
 +                else 
 +                {
 +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
 +                                   upd,bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
++                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
++
 +                    /* velocity half-step update */
 +                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                 
++                                  bUpdateDoLR,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
 +                                  cr,nrnb,constr,&top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    copy_rvecn(state->x,cbuf,0,state->natoms);
 +                }
-                               fr->bTwinRange && bNStList,fr->f_twin,fcd,
++                bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
++
 +                update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                                   fr->bTwinRange && bNStList,fr->f_twin,fcd,
++                              bUpdateDoLR,fr->f_twin,fcd,
 +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,
 +                                   fr->bMolPBC,graph,f,
 +                                   &top->idef,shake_vir,force_vir,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,FALSE,bCalcVir,state->veta);  
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                    constr,NULL,FALSE,lastbox,
 +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                    cglo_flags | CGLO_TEMPERATURE    
 +                        );
 +                    wallcycle_start(wcycle,ewcUPDATE);
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf,state->x,0,state->natoms);
 +
++                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
++
 +                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
++                                  bUpdateDoLR,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                    wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure. 
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
 +                                       state,fr->bMolPBC,graph,f,
 +                                       &top->idef,tmp_vir,force_vir,
 +                                       cr,nrnb,wcycle,upd,NULL,
 +                                       bInitStep,FALSE,bCalcVir,
 +                                       state->veta);  
 +                }
 +                if (!bOK && !bFFscan) 
 +                {
 +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +                
 +                if (fr->bSepDVDL && fplog && do_log) 
 +                {
 +                    fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
 +                }
 +                enerd->term[F_DVDL_BONDED] += dvdl;
 +            } 
 +            else if (graph) 
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph,state->box,state->x);
 +            }
 +
 +            if (vsite != NULL) 
 +            {
 +                wallcycle_start(wcycle,ewcVSITECONSTR);
 +                if (graph != NULL) 
 +                {
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                
 +                if (graph != NULL) 
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                wallcycle_stop(wcycle,ewcVSITECONSTR);
 +            }
 +            
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
 +            /* With Leap-Frog we can skip compute_globals at
 +             * non-communication steps, but we need to calculate
 +             * the kinetic energy one step before communication.
 +             */
 +            if (bGStat || do_per_step(step+1,nstglobalcomm) ||
 +                EI_VV(ir->eI))
 +            {
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    gs.sig[eglsNABNSB] = nlh.nabnsb;
 +                }
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,
 +                                bFirstIterate ? &gs : NULL, 
 +                                (step_rel % gs.nstms == 0) && 
 +                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
 +                                lastbox,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0)
 +                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
 +                                | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_CONSTRAINT 
 +                    );
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    nlh.nabnsb = gs.set[eglsNABNSB];
 +                    gs.set[eglsNABNSB] = 0;
 +                }
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +        
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (bIterations && 
 +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                               trace(shake_vir),&tracevir)) 
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
 +        /* only add constraint dvdl after constraints */
 +        enerd->term[F_DVDL_BONDED] += dvdl;
 +        if (!bVV)
 +        {
 +            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd,state->lambda,ir->fepvals);
 +        }
 +        update_box(fplog,step,ir,mdatoms,state,graph,f,
 +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
 +        
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +    
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc==NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
 +                                 f,NULL,xcopy,
 +                                 &(top_global->mols),mdatoms->massT,pres))
 +            {
 +                gmx_finalize_par();
 +
 +                fprintf(stderr,"\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,                                                            
 +             * so signal that we still have to do it.                                                
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +        
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies. 
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
 +                        ir,MASTER(cr),
 +                        mdatoms,&(top->idef),mu_aver,
 +                        top_global->mols.nr,cr,
 +                        state->box,total_vir,pres,
 +                        mu_tot,state->x,f,bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI==eiVV) 
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +        
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else 
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize) 
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
 +            {
 +                fprintf(stderr,"Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep) 
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +        
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr,do_or;
 +            
 +            if (fplog && do_log && bDoExpanded)
 +            {
 +                /* only needed if doing expanded ensemble */
 +                PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
 +                                          &df_history,state->fep_state,ir->nstlog,step);
 +            }
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
 +            {
 +                if (bCalcEner)
 +                {
 +                    upd_mdebin(mdebin,bDoDHDL, TRUE,
 +                               t,mdatoms->tmass,enerd,state,
 +                               ir->fepvals,ir->expandedvals,lastbox,
 +                               shake_vir,force_vir,total_vir,pres,
 +                               ekind,mu_tot,constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +                
 +                do_dr  = do_per_step(step,ir->nstdisreout);
 +                do_or  = do_per_step(step,ir->nstorireout);
 +                
 +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
 +                           step,t,
 +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull,step,t);
 +            }
 +            
 +            if (do_per_step(step,ir->nstlog))
 +            {
 +                if(fflush(fplog) != 0)
 +                {
 +                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
 +        if (bDoExpanded)
 +        {
 +            /* Have to do this part after outputting the logfile and the edr file */
 +            state->fep_state = lamnew;
 +            for (i=0;i<efptNR;i++)
 +            {
 +                state->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
 +            }
 +        }
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
 +        {
 +            if (shellfc) 
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step,repl_ex_nst)) 
 +        {
 +            bExchanged = replica_exchange(fplog,cr,repl_ex,
 +                                          state_global,enerd,
 +                                          state,step,t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr)) 
 +            {
 +                dd_partition_system(fplog,step,cr,TRUE,1,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,FALSE);
 +            }
 +        }
 +        
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres,state->pres_prev);
 +        }
 +        
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed!=NULL) && (!bLastStep) )
 +        {
 +            rescale_membed(step_rel,membed,state_global->x);
 +        }
 +
 +        if (bRerunMD) 
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +            }
 +        }
 +        
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +        
 +        cycles = wallcycle_stop(wcycle,ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
 +        }
 +
 +        if (bPMETuneRunning || bPMETuneTry)
 +        {
 +            /* PME grid + cut-off optimization with GPUs or PME nodes */
 +
 +            /* Count the total cycles over the last steps */
 +            cycles_pmes += cycles;
 +
 +            /* We can only switch cut-off at NS steps */
 +            if (step % ir->nstlist == 0)
 +            {
 +                /* PME grid + cut-off optimization with GPUs or PME nodes */
 +                if (bPMETuneTry)
 +                {
 +                    if (DDMASTER(cr->dd))
 +                    {
 +                        /* PME node load is too high, start tuning */
 +                        bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
 +                    }
 +                    dd_bcast(cr->dd,sizeof(gmx_bool),&bPMETuneRunning);
 +
 +                    if (bPMETuneRunning || step_rel > ir->nstlist*50)
 +                    {
 +                        bPMETuneTry     = FALSE;
 +                    }
 +                }
 +                if (bPMETuneRunning)
 +                {
 +                    /* init_step might not be a multiple of nstlist,
 +                     * but the first cycle is always skipped anyhow.
 +                     */
 +                    bPMETuneRunning =
 +                        pme_load_balance(pme_loadbal,cr,
 +                                         (bVerbose && MASTER(cr)) ? stderr : NULL,
 +                                         fplog,
 +                                         ir,state,cycles_pmes,
 +                                         fr->ic,fr->nbv,&fr->pmedata,
 +                                         step);
 +
++                    /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
 +                    fr->ewaldcoeff = fr->ic->ewaldcoeff;
++                    fr->rlist      = fr->ic->rlist;
++                    fr->rlistlong  = fr->ic->rlistlong;
++                    fr->rcoulomb   = fr->ic->rcoulomb;
++                    fr->rvdw       = fr->ic->rvdw;
 +                }
-         
 +                cycles_pmes = 0;
 +            }
 +        }
++
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime,
 +                               fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
 +            wcycle_set_reset_counters(wcycle,-1);
 +            /* Correct max_hours for the elapsed time */
 +            max_hours -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +    
 +    /* Stop the time */
 +    runtime_end(runtime);
 +    
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_send_finish(cr);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
 +    }
 +
 +    if (pme_loadbal != NULL)
 +    {
 +        pme_loadbal_done(pme_loadbal,fplog);
 +    }
 +
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +    
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog,repl_ex);
 +    }
 +    
 +    runtime->nsteps_done = step_rel;
 +
 +   return 0;
 +}
Simple merge
index 248273e1bec26427ac33f11e3aa1ecece3a07c27,0000000000000000000000000000000000000000..582cedc233aef07127e5236d05557b7cd48c2699
mode 100644,000000..100644
--- /dev/null
@@@ -1,1510 -1,0 +1,1512 @@@
-         c12 = C12(nbfp, natoms, i, i);
-         c6  = C6(nbfp,  natoms, i, i);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2010, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +/*
 + * Note, that parts of this source code originate from the Simtk release 
 + * of OpenMM accelerated Gromacs, for more details see: 
 + * https://simtk.org/project/xml/downloads.xml?group_id=161#package_id600
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <types/simple.h>
 +#include <cmath>
 +#include <set>
 +#include <iostream>
 +#include <sstream>
 +#include <fstream>
 +#include <map>
 +#include <vector>
 +#include <cctype>
 +#include <algorithm>
 +
 +using namespace std;
 +
 +#include "OpenMM.h"
 +
 +#include "gmx_fatal.h"
 +#include "typedefs.h"
 +#include "mdrun.h"
 +#include "physics.h"
 +#include "string2.h"
 +#include "gpu_utils.h"
 +#include "mtop_util.h"
 +
 +#include "openmm_wrapper.h"
 +
 +using namespace OpenMM;
 +
 +/*! \cond */
 +#define MEM_ERR_MSG(str) \
 +    "The %s-simulation GPU memory test detected errors. As memory errors would cause incorrect " \
 +    "simulation results, gromacs has aborted execution.\n Make sure that your GPU's memory is not " \
 +    "overclocked and that the device is properly cooled.\n", (str)
 +/*! \endcond */
 +
 +#define COMBRULE_CHK_TOL            1e-6
 +#define COMBRULE_SIGMA(sig1, sig2)  (((sig1) + (sig2))/2)
 +#define COMBRULE_EPS(eps1, eps2)    (sqrt((eps1) * (eps2)))
 +
 +/*! 
 + * \brief Convert string to integer type.
 + * \param[in]  s    String to convert from.
 + * \param[in]  f    Basefield format flag that takes any of the following I/O
 + *                  manipulators: dec, hex, oct.
 + * \param[out] t    Destination variable to convert to.
 + */
 +template <class T>
 +static gmx_bool from_string(T& t, const string& s, ios_base& (*f)(ios_base&))
 +{
 +    istringstream iss(s);
 +    return !(iss >> f >> t).fail();
 +}
 +
 +/*!
 + * \brief Split string around a given delimiter.
 + * \param[in] s      String to split.
 + * \param[in] delim  Delimiter character.
 + * \returns          Vector of strings found in \p s.
 + */
 +static vector<string> split(const string &s, char delim)
 +{
 +    vector<string> elems;
 +    stringstream ss(s);
 +    string item;
 +    while (getline(ss, item, delim))
 +    {
 +        if (item.length() != 0)
 +            elems.push_back(item);
 +    }
 +    return elems;
 +}
 +
 +/*!
 + * \brief Split a string of the form "option=value" into "option" and "value" strings.
 + * This string corresponds to one option and the associated value from the option list 
 + * in the mdrun -device argument.
 + *
 + * \param[in]  s    A string containing an "option=value" pair that needs to be split up.
 + * \param[out] opt  The name of the option.
 + * \param[out] val  Value of the option. 
 + */
 +static void splitOptionValue(const string &s, string &opt, string &val)
 +{
 +    size_t eqPos = s.find('=');
 +    if (eqPos != string::npos)
 +    {
 +        opt = s.substr(0, eqPos);
 +        if (eqPos != s.length())  val = s.substr(eqPos+1);
 +    }
 +}
 +
 +/*!
 + * \brief Compare two strings ignoring case.
 + * This function is in fact a wrapper around the gromacs function gmx_strncasecmp().
 + * \param[in] s1 String. 
 + * \param[in] s2 String.
 + * \returns      Similarly to the C function strncasecmp(), the return value is an  
 +                 integer less than, equal to, or greater than 0 if \p s1 less than, 
 +                 identical to, or greater than \p s2.
 + */
 +static gmx_bool isStringEqNCase(const string& s1, const string& s2)
 +{
 +    return (gmx_strncasecmp(s1.c_str(), s2.c_str(), max(s1.length(), s2.length())) == 0);
 +}
 +
 +/*!
 + * \brief Convert string to upper case.
 + *
 + * \param[in]  s    String to convert to uppercase.
 + * \returns         The given string converted to uppercase.
 + */
 +static string toUpper(const string &s)
 +{
 +    string stmp(s);
 +    std::transform(stmp.begin(), stmp.end(), stmp.begin(), static_cast < int(*)(int) > (toupper));
 +    return stmp;
 +}
 +
 +/*! 
 +  \name Sizes of constant device option arrays GmxOpenMMPlatformOptions#platforms, 
 +  GmxOpenMMPlatformOptions#memtests, GmxOpenMMPlatformOptions#deviceid, 
 +  GmxOpenMMPlatformOptions#force_dev.  */
 +/* {@ */
 +#define SIZEOF_PLATFORMS    2  // 2
 +#define SIZEOF_MEMTESTS     3 
 +#define SIZEOF_DEVICEIDS    1 
 +#define SIZEOF_FORCE_DEV    2 
 +
 +#define SIZEOF_CHECK_COMBRULE 2
 +/* @} */
 +
 +/*! Possible platform options in the mdrun -device option. */
 +static const char *devOptStrings[] = { "platform", "deviceid", "memtest", "force-device", "check-combrule" }; 
 +
 +/*! Enumerated platform options in the mdrun -device option. */
 +enum devOpt
 +{
 +    PLATFORM     = 0,
 +    DEVICEID     = 1,
 +    MEMTEST      = 2,
 +    FORCE_DEVICE = 3
 +};
 +
 +/*!
 + * \brief Class to extract and manage the platform options in the mdrun -device option.
 + * 
 + */
 +class GmxOpenMMPlatformOptions
 +{
 +public:
 +    GmxOpenMMPlatformOptions(const char *opt);
 +    ~GmxOpenMMPlatformOptions() { options.clear(); }
 +    string getOptionValue(const string &opt);
 +    void remOption(const string &opt);
 +    void print();
 +private:
 +    void setOption(const string &opt, const string &val);
 +
 +    map<string, string> options; /*!< Data structure to store the option (name, value) pairs. */
 +
 +    static const char * const platforms[SIZEOF_PLATFORMS];  /*!< Available OpenMM platforms; size #SIZEOF_PLATFORMS */
 +    static const char * const memtests[SIZEOF_MEMTESTS];    /*!< Available types of memory tests, also valid 
 +                                                                 any positive integer >=15; size #SIZEOF_MEMTESTS */
 +    static const char * const deviceid[SIZEOF_DEVICEIDS];   /*!< Possible values for deviceid option; 
 +                                                                 also valid any positive integer; size #SIZEOF_DEVICEIDS */
 +    static const char * const force_dev[SIZEOF_FORCE_DEV];  /*!< Possible values for for force-device option; 
 +                                                                 size #SIZEOF_FORCE_DEV */
 +    static const char * const check_combrule[SIZEOF_CHECK_COMBRULE]; /* XXX temporary debug feature to 
 +                                                                      turn off combination rule check */
 +};
 +
 +const char * const GmxOpenMMPlatformOptions::platforms[SIZEOF_PLATFORMS]
 +                    = {"CUDA", "Reference"};
 +                    //= { "Reference", "CUDA" /*,"OpenCL"*/ };
 +const char * const GmxOpenMMPlatformOptions::memtests[SIZEOF_MEMTESTS]
 +                    = { "15", "full", "off" };
 +const char * const GmxOpenMMPlatformOptions::deviceid[SIZEOF_DEVICEIDS]
 +                    = { "0" };
 +const char * const GmxOpenMMPlatformOptions::force_dev[SIZEOF_FORCE_DEV]
 +                    = { "no", "yes" };
 +const char * const GmxOpenMMPlatformOptions::check_combrule[SIZEOF_CHECK_COMBRULE] 
 +                    = { "yes", "no" };
 +
 +/*!
 + * \brief Contructor.
 + * Takes the option list, parses it, checks the options and their values for validity.
 + * When certain options are not provided by the user, as default value the first item  
 + * of the respective constant array is taken (GmxOpenMMPlatformOptions#platforms, 
 + * GmxOpenMMPlatformOptions#memtests, GmxOpenMMPlatformOptions#deviceid, 
 + * GmxOpenMMPlatformOptions#force_dev). 
 + * \param[in] optionString  Option list part of the mdrun -device parameter.
 + */
 +GmxOpenMMPlatformOptions::GmxOpenMMPlatformOptions(const char *optionString)
 +{
 +    // set default values
 +    setOption("platform",       platforms[0]);
 +    setOption("memtest",        memtests[0]);
 +    setOption("deviceid",       deviceid[0]);
 +    setOption("force-device",   force_dev[0]);
 +    setOption("check-combrule", check_combrule[0]);
 +
 +    string opt(optionString);
 +
 +    // remove all whitespaces
 +    opt.erase(remove_if(opt.begin(), opt.end(), ::isspace), opt.end());
 +    // tokenize around ","-s
 +    vector<string> tokens = split(opt, ',');
 +
 +    for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it)
 +    {
 +        string opt = "", val = "";
 +        splitOptionValue(*it, opt, val);
 +
 +        if (isStringEqNCase(opt, "platform"))
 +        {
 +            /* no check, this will fail if platform does not exist when we try to set it */
 +            setOption(opt, val);
 +            continue;
 +        }
 +
 +        if (isStringEqNCase(opt, "memtest"))
 +        {
 +            /* the value has to be an integer >15(s) or "full" OR "off" */
 +            if (!isStringEqNCase(val, "full") && !isStringEqNCase(val, "off")) 
 +            {
 +                int secs;
 +                if (!from_string<int>(secs, val, std::dec))
 +                {
 +                    gmx_fatal(FARGS, "Invalid value for option memtest option: \"%s\"!", val.c_str());
 +                }
 +                if (secs < 15)
 +                {
 +                    gmx_fatal(FARGS, "Incorrect value for memtest option (%d). "
 +                            "Memtest needs to run for at least 15s!", secs);
 +                }
 +            }
 +            setOption(opt, val);
 +            continue;
 +        }
 +
 +        if (isStringEqNCase(opt, "deviceid"))
 +        {
 +            int id;
 +            if (!from_string<int>(id, val, std::dec) )
 +            {
 +                gmx_fatal(FARGS, "Invalid device id: \"%s\"!", val.c_str());
 +            }
 +            setOption(opt, val);
 +            continue;
 +        }
 +
 +        if (isStringEqNCase(opt, "force-device"))
 +        {
 +            /* */
 +            if (!isStringEqNCase(val, "yes") && !isStringEqNCase(val, "no"))
 +            {
 +                gmx_fatal(FARGS, "Invalid OpenMM force option: \"%s\"!", val.c_str());
 +            }
 +            setOption(opt, val);
 +            continue;
 +        }
 +
 +        if (isStringEqNCase(opt, "check-combrule"))
 +        {
 +            /* */
 +            if (!isStringEqNCase(val, "yes") && !isStringEqNCase(val, "no"))
 +            {
 +                gmx_fatal(FARGS, "Invalid OpenMM force option: \"%s\"!", val.c_str());
 +            }
 +            setOption(opt, val);
 +            continue;
 +        }
 +
 +
 +        // if we got till here something went wrong
 +        gmx_fatal(FARGS, "Invalid OpenMM platform option: \"%s\"!", (*it).c_str());
 +    }
 +}
 +
 +
 +/*!
 + * \brief Getter function.
 + * \param[in] opt   Name of the option.
 + * \returns         Returns the value associated to an option. 
 + */
 +string GmxOpenMMPlatformOptions::getOptionValue(const string &opt)
 +{
 +      map<string, string> :: const_iterator it = options.find(toUpper(opt));
 +      if (it != options.end())
 +    {
 +              return it->second;
 +    }
 +    else
 +    {
 +        return NULL;
 +    }
 +}
 +
 +/*!
 + * \brief Setter function - private, only used from contructor.
 + * \param[in] opt   Name of the option.
 + * \param[in] val   Value for the option. 
 + */
 +void GmxOpenMMPlatformOptions::setOption(const string &opt, const string &val)
 +{
 +    options[toUpper(opt)] = val;
 +}
 +
 +/*!
 + * \brief Removes an option with its value from the map structure. If the option 
 + * does not exist, returns without any action.
 + * \param[in] opt   Name of the option.
 + */
 +void GmxOpenMMPlatformOptions::remOption(const string &opt) 
 +{ 
 +    options.erase(toUpper(opt)); 
 +}
 +
 +/*!
 + * \brief Print option-value pairs to a file (debugging function). 
 + */
 +void GmxOpenMMPlatformOptions::print()
 +{
 +    cout << ">> Platform options: " << endl 
 +         << ">> platform     = " << getOptionValue("platform") << endl
 +         << ">> deviceID     = " << getOptionValue("deviceid") << endl
 +         << ">> memtest      = " << getOptionValue("memtest") << endl
 +         << ">> force-device = " << getOptionValue("force-device") << endl;
 +}
 +
 +/*!
 + * \brief Container for OpenMM related data structures that represent the bridge 
 + *        between the Gromacs data-structures and the OpenMM library and is but it's 
 + *        only passed through the API functions as void to disable direct access. 
 + */
 +class OpenMMData
 +{
 +public:
 +    System* system;      //!< The system to simulate.
 +    Context* context;   //!< The OpenMM context in which the simulation is carried out.
 +    Integrator* integrator; //!< The integrator used in the simulation.
 +    gmx_bool removeCM;          //!< If true, remove center of mass motion, false otherwise.
 +    GmxOpenMMPlatformOptions *platformOpt; //!< Platform options.
 +};
 +
 +/*!
 + *  \brief Runs memtest on the GPU that has alreaby been initialized by OpenMM.
 + *  \param[in] fplog    Pointer to gromacs log file.
 + *  \param[in] devId    Device id of the GPU to run the test on. 
 +                        Note: as OpenMM previously creates the context,for now this is always -1.
 + *  \param[in] pre_post Contains either "Pre" or "Post" just to be able to differentiate in 
 + *                      stdout messages/log between memtest carried out before and after simulation.
 + *  \param[in] opt      Pointer to platform options object.
 + */
 +static void runMemtest(FILE* fplog, int devId, const char* pre_post, GmxOpenMMPlatformOptions *opt)
 +{
 +    char        strout_buf[STRLEN];
 +    int         which_test;
 +    int         res = 0;
 +    string      s = opt->getOptionValue("memtest");
 +    const char  *test_type = s.c_str();
 +
 +    if (!gmx_strcasecmp(test_type, "off"))
 +    {
 +        which_test = 0;
 +    }
 +    else
 +    {
 +        if (!gmx_strcasecmp(test_type, "full"))
 +        {
 +            which_test = 2;
 +        }
 +        else
 +        {
 +            from_string<int>(which_test, test_type, std::dec);
 +        }
 +    }
 +
 +    if (which_test < 0) 
 +    {
 +        gmx_fatal(FARGS, "Amount of seconds for memetest is negative (%d). ", which_test);
 +    }
 +
 +    switch (which_test)
 +    {
 +        case 0: /* no memtest */
 +            sprintf(strout_buf, "%s-simulation GPU memtest skipped. Note, that faulty memory can cause "
 +                "incorrect results!", pre_post);
 +            fprintf(fplog, "%s\n", strout_buf);
 +            gmx_warning(strout_buf);
 +            break; /* case 0 */
 +
 +        case 1: /* quick memtest */
 +            fprintf(fplog,  "%s-simulation %s GPU memtest in progress...\n", pre_post, test_type);
 +            fprintf(stdout, "\n%s-simulation %s GPU memtest in progress...", pre_post, test_type);
 +            fflush(fplog);
 +            fflush(stdout);
 +            res = do_quick_memtest(devId);
 +            break; /* case 1 */
 +
 +        case 2: /* full memtest */
 +            fprintf(fplog,  "%s-simulation %s memtest in progress...\n", pre_post, test_type);
 +            fprintf(stdout, "\n%s-simulation %s memtest in progress...", pre_post, test_type);
 +            fflush(fplog);
 +            fflush(stdout);
 +            res = do_full_memtest(devId);
 +            break; /* case 2 */
 +
 +        default: /* timed memtest */
 +            fprintf(fplog,  "%s-simulation ~%ds memtest in progress...\n", pre_post, which_test);
 +            fprintf(stdout, "\n%s-simulation ~%ds memtest in progress...", pre_post, which_test);
 +            fflush(fplog);
 +            fflush(stdout);
 +            res = do_timed_memtest(devId, which_test);
 +        }
 +
 +        if (which_test != 0)
 +        {
 +            if (res != 0)
 +            {
 +                gmx_fatal(FARGS, MEM_ERR_MSG(pre_post));
 +            }
 +            else
 +            {
 +                fprintf(fplog,  "Memory test completed without errors.\n");
 +                fflush(fplog);
 +                fprintf(stdout, "done, no errors detected\n");
 +                fflush(stdout);           
 +            }
 +        }
 +}
 +
 +/*!
 + * \brief Convert Lennard-Jones parameters c12 and c6 to sigma and epsilon.
 + * 
 + * \param[in] c12
 + * \param[in] c6
 + * \param[out] sigma 
 + * \param[out] epsilon
 + */
 +static void convert_c_12_6(double c12, double c6, double *sigma, double *epsilon)
 +{
 +    if (c12 == 0 && c6 == 0)
 +    {
 +        *epsilon    = 0.0;        
 +        *sigma      = 1.0;
 +    }
 +    else if (c12 > 0 && c6 > 0)
 +    {
 +        *epsilon    = (c6*c6)/(4.0*c12);
 +        *sigma      = pow(c12/c6, 1.0/6.0);
 +    }
 +    else 
 +    {
 +        gmx_fatal(FARGS,"OpenMM only supports c6 > 0 and c12 > 0 or c6 = c12 = 0.");
 +    } 
 +}
 +
 +/*!
 + * \brief Does gromacs option checking.
 + *
 + * Checks the gromacs mdp options for features unsupported in OpenMM, case in which 
 + * interrupts the execution. It also warns the user about pecularities of OpenMM 
 + * implementations.
 + * \param[in] fplog         Gromacs log file pointer.
 + * \param[in] ir            Gromacs input parameters, see ::t_inputrec
 + * \param[in] top           Gromacs node local topology, \see gmx_localtop_t
 + * \param[in] state         Gromacs state structure \see ::t_state
 + * \param[in] mdatoms       Gromacs atom parameters, \see ::t_mdatoms
 + * \param[in] fr            \see ::t_forcerec
 + * \param[in] state         Gromacs systems state, \see ::t_state
 + */
 +static void checkGmxOptions(FILE* fplog, GmxOpenMMPlatformOptions *opt,
 +                            t_inputrec *ir, gmx_localtop_t *top,
 +                            t_forcerec *fr, t_state *state)
 +{
 +    int     i, j, natoms;
 +    double  c6, c12;
 +    double  sigma_ij=0, sigma_ji=0, sigma_ii=0, sigma_jj=0, sigma_comb;
 +    double  eps_ij=0, eps_ji=0, eps_ii=0, eps_jj=0, eps_comb;
 +
 +    /* Abort if unsupported critical options are present */
 +
 +    /* Integrator */
 +    if (ir->eI ==  eiMD)
 +    {
 +        gmx_warning( "OpenMM does not support leap-frog, will use velocity-verlet integrator.");
 +    }
 +
 +    if (    (ir->eI !=  eiMD)   &&
 +            (ir->eI !=  eiVV)   &&
 +            (ir->eI !=  eiVVAK) &&
 +            (ir->eI !=  eiSD1)  &&
 +            (ir->eI !=  eiSD2)  &&
 +            (ir->eI !=  eiBD) )
 +    {
 +        gmx_fatal(FARGS, "OpenMM supports only the following integrators: md/md-vv/md-vv-avek, sd/sd1, and bd.");
 +    }
 +
 +    /* Electroctstics */
 +    if (   !(ir->coulombtype == eelPME   ||
 +             EEL_RF(ir->coulombtype)     ||
 +             ir->coulombtype == eelRF    ||
 +             ir->coulombtype == eelEWALD ||
 +             // no-cutoff
 +             (ir->coulombtype == eelCUT && ir->rcoulomb == 0 &&  ir->rvdw == 0) ||
 +             // we could have cut-off combined with GBSA (openmm will use RF)
 +             ir->implicit_solvent == eisGBSA)   )
 +    {
 +        gmx_fatal(FARGS,"OpenMM supports only the following methods for electrostatics: "
 +                "NoCutoff (i.e. rcoulomb = rvdw = 0 ),Reaction-Field, Ewald or PME.");
 +    }
 +
 +    if (EEL_RF(ir->coulombtype) && ir->epsilon_rf != 0)
 +    {
 +        // openmm has epsilon_rf=inf hard-coded
 +        gmx_warning("OpenMM will use a Reaction-Field epsilon of infinity instead of %g.",ir->epsilon_rf);
 +    }
 +
 +    if (ir->etc != etcNO &&
 +        ir->eI  != eiSD1 &&
 +        ir->eI  != eiSD2 &&
 +        ir->eI  != eiBD )
 +    {
 +        gmx_warning("OpenMM supports only Andersen thermostat with the md/md-vv/md-vv-avek integrators.");
 +    }
 +
 +    if (ir->implicit_solvent == eisGBSA &&
 +        ir->gb_algorithm != egbOBC  )
 +    {
 +        gmx_warning("OpenMM does not support the specified algorithm for Generalized Born, will use OBC instead.");
 +    }
 +
 +    if (ir->opts.ngtc > 1)
 +        gmx_fatal(FARGS,"OpenMM does not support multiple temperature coupling groups.");
 +
 +    if (ir->epc != epcNO)
 +        gmx_warning("OpenMM supports only Monte Carlo barostat for pressure coupling.");
 +
 +    if (ir->opts.annealing[0])
 +        gmx_fatal(FARGS,"OpenMM does not support simulated annealing.");
 +    
 +    if (top->idef.il[F_CONSTR].nr > 0 && ir->eConstrAlg != econtSHAKE)
 +        gmx_warning("OpenMM provides contraints as a combination "
 +                    "of SHAKE, SETTLE and CCMA. Accuracy is based on the SHAKE tolerance set "
 +                    "by the \"shake_tol\" option.");
 +
 +    if (ir->nwall != 0)
 +        gmx_fatal(FARGS,"OpenMM does not support walls.");
 +
 +    if (ir->ePull != epullNO)
 +        gmx_fatal(FARGS,"OpenMM does not support pulling.");
 +
 +    /* check for interaction types */
 +    for (i = 0; i < F_EPOT; i++)
 +    {
 +        if (!(i == F_CONSTR ||
 +            i == F_SETTLE   ||
 +            i == F_BONDS    ||            
 +            i == F_HARMONIC ||
 +            i == F_UREY_BRADLEY ||
 +            i == F_ANGLES   ||
 +            i == F_PDIHS    ||
 +            i == F_RBDIHS   ||
 +            i == F_PIDIHS   ||
 +            i == F_IDIHS    ||
 +            i == F_LJ14     ||
 +            i == F_GB12     || /* The GB parameters are hardcoded both in */
 +            i == F_GB13     || /* Gromacs and OpenMM */
 +            i == F_GB14   ) &&
 +            top->idef.il[i].nr > 0)
 +        {
 +            gmx_fatal(FARGS, "OpenMM does not support (some) of the provided interaction " 
 +                    "type(s) (%s) ", interaction_function[i].longname);
 +        }
 +    }
 +
 +    if (ir->efep != efepNO)
 +        gmx_fatal(FARGS,"OpenMM does not support free energy calculations.");
 +
 +    if (ir->opts.ngacc > 1)
 +        gmx_fatal(FARGS,"OpenMM does not support non-equilibrium MD (accelerated groups).");
 +
 +    if (IR_ELEC_FIELD(*ir))
 +        gmx_fatal(FARGS,"OpenMM does not support electric fields.");
 +
 +    if (ir->bQMMM)
 +        gmx_fatal(FARGS,"OpenMM does not support QMMM calculations.");
 +
 +    if (ir->rcoulomb != ir->rvdw)
 +        gmx_fatal(FARGS,"OpenMM uses a single cutoff for both Coulomb "
 +                  "and VdW interactions. Please set rcoulomb equal to rvdw.");
 +    
 +    if (EEL_FULL(ir->coulombtype))
 +    {
 +        if (ir->ewald_geometry == eewg3DC)
 +            gmx_fatal(FARGS,"OpenMM supports only Ewald 3D geometry.");
 +        if (ir->epsilon_surface != 0)
 +            gmx_fatal(FARGS,"OpenMM does not support dipole correction in Ewald summation.");
 +    }
 +
 +    if (TRICLINIC(state->box))        
 +    {
 +        gmx_fatal(FARGS,"OpenMM does not support triclinic unit cells.");
 +    }
 +
 +    /* XXX this is just debugging code to disable the combination rule check */
 +    if ( isStringEqNCase(opt->getOptionValue("check-combrule"), "yes") )
 +    {
 +    /* As OpenMM by default uses hardcoded combination rules 
 +       sigma_ij = (sigma_i + sigma_j)/2, eps_ij = sqrt(eps_i * eps_j)
 +       we need to check whether the force field params obey this 
 +       and if not, we can't use this force field so we exit 
 +       grace-fatal-fully. */
 +    real *nbfp = fr->nbfp;
 +    natoms = fr->ntype;
 +    if (debug) 
 +    {   
 +        fprintf(debug, ">> Atom parameters: <<\n%10s%5s %5s %5s %5s COMB\n", 
 +                "", "i-j", "j-i", "i-i", "j-j");
 +    }
 +    /* loop over all i-j atom pairs and verify if 
 +       sigma_ij = sigma_ji = sigma_comb and eps_ij = eps_ji = eps_comb */
 +    for (i = 0; i < natoms; i++)
 +    {
 +        /* i-i */
-             c12 = C12(nbfp, natoms, i, j);
-             c6  = C6(nbfp,  natoms, i, j);
++        /* nbfp now includes the 6.0/12.0 prefactors to save flops in kernels */
++        c12 = C12(nbfp, natoms, i, i)/12.0;
++        c6  = C6(nbfp,  natoms, i, i)/6.0;
 +        convert_c_12_6(c12, c6, &sigma_ii, &eps_ii);
 +
 +        for (j = 0; j < i; j++)
 +        {
 +            /* i-j */
-             c12 = C12(nbfp, natoms, j, i);
-             c6  = C6(nbfp,  natoms, j, i);
++            c12 = C12(nbfp, natoms, i, j)/12.0;
++            c6  = C6(nbfp,  natoms, i, j)/6.0;
 +            convert_c_12_6(c12, c6, &sigma_ij, &eps_ij);
 +            /* j-i */
-             c12 = C12(nbfp, natoms, j, j);
-             c6  = C6(nbfp,  natoms, j, j);
++            c12 = C12(nbfp, natoms, j, i)/12.0;
++            c6  = C6(nbfp,  natoms, j, i)/6.0;
 +            convert_c_12_6(c12, c6, &sigma_ji, &eps_ji);
 +            /* j-j */
-             double c12 = nbfp[types[i]*2*ntypes+types[i]*2+1];
-             double c6 = nbfp[types[i]*2*ntypes+types[i]*2];
++            c12 = C12(nbfp, natoms, j, j)/12.0;
++            c6  = C6(nbfp,  natoms, j, j)/6.0;
 +            convert_c_12_6(c12, c6, &sigma_jj, &eps_jj);
 +            /* OpenMM hardcoded combination rules */
 +            sigma_comb = COMBRULE_SIGMA(sigma_ii, sigma_jj);
 +            eps_comb = COMBRULE_EPS(eps_ii, eps_jj);
 +  
 +            if (debug)
 +            {
 +                fprintf(debug, "i=%-3d j=%-3d", i, j);
 +                fprintf(debug, "%-11s", "sigma");
 +                fprintf(debug, "%5.3f %5.3f %5.3f %5.3f %5.3f\n",  
 +                        sigma_ij, sigma_ji, sigma_ii, sigma_jj, sigma_comb);
 +                fprintf(debug, "%11s%-11s", "", "epsilon");
 +                fprintf(debug, "%5.3f %5.3f %5.3f %5.3f %5.3f\n", 
 +                        eps_ij, eps_ji, eps_ii, eps_jj, eps_comb);
 +            }
 +
 +            /* check the values against the rule used by omm */
 +            if((fabs(eps_ij) > COMBRULE_CHK_TOL && 
 +                fabs(eps_ji) > COMBRULE_CHK_TOL) &&
 +               (fabs(sigma_comb - sigma_ij) > COMBRULE_CHK_TOL ||
 +               fabs(sigma_comb - sigma_ji) > COMBRULE_CHK_TOL ||
 +               fabs(eps_comb - eps_ij) > COMBRULE_CHK_TOL ||
 +               fabs(eps_comb - eps_ji) > COMBRULE_CHK_TOL ))
 +            {
 +                gmx_fatal(FARGS,
 +                        "The combination rules of the used force-field do not "
 +                        "match the one supported by OpenMM:  "
 +                        "sigma_ij = (sigma_i + sigma_j)/2, eps_ij = sqrt(eps_i * eps_j). "
 +                        "Switch to a force-field that uses these rules in order to "
 +                        "simulate this system using OpenMM.\n");                        
 +            }
 +        }
 +    }
 +    if (debug) { fprintf(debug, ">><<\n\n"); }
 +
 +    /* if we got here, log that everything is fine */
 +    if (debug)
 +    {
 +        fprintf(debug, ">> The combination rule of the used force matches the one used by OpenMM.\n");
 +    }
 +    fprintf(fplog, "The combination rule of the used force field matches the one used by OpenMM.\n");   
 +
 +    } /* if (are we checking the combination rules) ... */
 +}
 +
 +
 +/*!
 + * \brief Initialize OpenMM, run sanity/consistency checks, and return a pointer to 
 + * the OpenMMData.
 + * 
 + * Various gromacs data structures are passed that contain the parameters, state and 
 + * other porperties of the system to simulate. These serve as input for initializing 
 + * OpenMM. Besides, a set of misc action are taken:
 + *  - OpenMM plugins are loaded;
 + *  - platform options in \p platformOptStr are parsed and checked; 
 + *  - Gromacs parameters are checked for OpenMM support and consistency;
 + *  - after the OpenMM is initialized memtest executed in the same GPU context.
 + * 
 + * \param[in] fplog             Gromacs log file handler.
 + * \param[in] platformOptStr    Platform option string. 
 + * \param[in] ir                The Gromacs input parameters, see ::t_inputrec
 + * \param[in] top_global        Gromacs system toppology, \see ::gmx_mtop_t
 + * \param[in] top               Gromacs node local topology, \see gmx_localtop_t
 + * \param[in] mdatoms           Gromacs atom parameters, \see ::t_mdatoms
 + * \param[in] fr                \see ::t_forcerec
 + * \param[in] state             Gromacs systems state, \see ::t_state
 + * \returns                     Pointer to a 
 + * 
 + */
 +void* openmm_init(FILE *fplog, const char *platformOptStr,
 +                  t_inputrec *ir,
 +                  gmx_mtop_t *top_global, gmx_localtop_t *top,
 +                  t_mdatoms *mdatoms, t_forcerec *fr, t_state *state)
 +{
 +
 +    char warn_buf[STRLEN];
 +    static gmx_bool hasLoadedPlugins = false;
 +    string usedPluginDir;
 +    int devId;
 +
 +    try
 +    {
 +        if (!hasLoadedPlugins)
 +        {
 +            vector<string> loadedPlugins;
 +            /*  Look for OpenMM plugins at various locations (listed in order of priority):
 +                - on the path in OPENMM_PLUGIN_DIR environment variable if this is specified
 +                - on the path in the OPENMM_PLUGIN_DIR macro that is set by the build script
 +                - at the default location assumed by OpenMM
 +            */
 +            /* env var */
 +            char *pluginDir = getenv("OPENMM_PLUGIN_DIR");
 +            trim(pluginDir);
 +            /* no env var or empty */
 +            if (pluginDir != NULL && *pluginDir != '\0')
 +            {
 +                loadedPlugins = Platform::loadPluginsFromDirectory(pluginDir);
 +                if (!loadedPlugins.empty())
 +                {
 +                    hasLoadedPlugins = true;
 +                    usedPluginDir = pluginDir;
 +                }
 +                else
 +                {
 +                    gmx_fatal(FARGS, "The directory provided in the OPENMM_PLUGIN_DIR environment variable "
 +                              "(%s) does not contain valid OpenMM plugins. Check your OpenMM installation!", 
 +                              pluginDir);
 +                }
 +            }
 +
 +            /* macro set at build time  */
 +#ifdef OPENMM_PLUGIN_DIR
 +            if (!hasLoadedPlugins)
 +            {
 +                loadedPlugins = Platform::loadPluginsFromDirectory(OPENMM_PLUGIN_DIR);
 +                if (!loadedPlugins.empty())
 +                {
 +                    hasLoadedPlugins = true;
 +                    usedPluginDir = OPENMM_PLUGIN_DIR;
 +                }
 +            }
 +#endif
 +            /* default loocation */
 +            if (!hasLoadedPlugins)
 +            {
 +                loadedPlugins = Platform::loadPluginsFromDirectory(Platform::getDefaultPluginsDirectory());
 +                if (!loadedPlugins.empty())
 +                {
 +                    hasLoadedPlugins = true;
 +                    usedPluginDir = Platform::getDefaultPluginsDirectory();
 +                }
 +            }
 +
 +            /* if there are still no plugins loaded there won't be any */
 +            if (!hasLoadedPlugins)
 +            {
 +                gmx_fatal(FARGS, "No OpenMM plugins were found! You can provide the"
 +                          " plugin directory in the OPENMM_PLUGIN_DIR environment variable.", pluginDir);
 +            }
 +
 +            fprintf(fplog, "\nOpenMM plugins loaded from directory %s:\t", usedPluginDir.c_str());
 +            for (int i = 0; i < (int)loadedPlugins.size(); i++)
 +            {
 +                fprintf(fplog, "%s, ", loadedPlugins[i].c_str());
 +            }
 +            fprintf(fplog, "\n");
 +        }
 +
 +        /* parse option string */
 +        GmxOpenMMPlatformOptions *opt = new GmxOpenMMPlatformOptions(platformOptStr);
 +        devId = atoi(opt->getOptionValue("deviceid").c_str());
 +
 +        if (debug)
 +        {
 +            opt->print();
 +        }
 +
 +        /* check wheter Gromacs options compatibility with OpenMM */
 +        checkGmxOptions(fplog, opt, ir, top, fr, state);
 +
 +        /* Create the system. */
 +        const t_idef& idef = top->idef;
 +        const int numAtoms = top_global->natoms;
 +        const int numConstraints = idef.il[F_CONSTR].nr/3;
 +        const int numSettle = idef.il[F_SETTLE].nr/2;
 +        const int numBonds = idef.il[F_BONDS].nr/3;
 +        const int numHarmonic = idef.il[F_HARMONIC].nr/3;
 +        const int numUB = idef.il[F_UREY_BRADLEY].nr/4;
 +        const int numAngles = idef.il[F_ANGLES].nr/4;
 +        const int numPeriodic = idef.il[F_PDIHS].nr/5;
 +        const int numPeriodicImproper = idef.il[F_PIDIHS].nr/5;
 +        const int numRB = idef.il[F_RBDIHS].nr/5;
 +        const int numImproperDih = idef.il[F_IDIHS].nr/5;
 +        const int num14 = idef.il[F_LJ14].nr/3;
 +        System* sys = new System();
 +        if (ir->nstcomm > 0)
 +            sys->addForce(new CMMotionRemover(ir->nstcomm));
 +
 +        /* Set bonded force field terms. */
 +
 +              /* 
 +               * CUDA platform currently doesn't support more than one
 +               * instance of a force object, so we pack all forces that
 +               * use the same form into one.
 +              */
 +
 +        const int* bondAtoms = (int*) idef.il[F_BONDS].iatoms;
 +        HarmonicBondForce* bondForce = new HarmonicBondForce();
 +        sys->addForce(bondForce);
 +        int offset = 0;
 +        for (int i = 0; i < numBonds; ++i)
 +        {
 +            int type = bondAtoms[offset++];
 +            int atom1 = bondAtoms[offset++];
 +            int atom2 = bondAtoms[offset++];
 +            bondForce->addBond(atom1, atom2,
 +                               idef.iparams[type].harmonic.rA, idef.iparams[type].harmonic.krA);
 +        }
 +
 +        const int* harmonicAtoms = (int*) idef.il[F_HARMONIC].iatoms;
 +        offset = 0;
 +        for (int i = 0; i < numHarmonic; ++i)
 +        {
 +            int type = harmonicAtoms[offset++];
 +            int atom1 = harmonicAtoms[offset++];
 +            int atom2 = harmonicAtoms[offset++];
 +            bondForce->addBond(atom1, atom2,
 +                               idef.iparams[type].harmonic.rA, idef.iparams[type].harmonic.krA);
 +        }
 +
 +              /* Set the angle force field terms */
 +        const int* angleAtoms = (int*) idef.il[F_ANGLES].iatoms;
 +        HarmonicAngleForce* angleForce = new HarmonicAngleForce();
 +        sys->addForce(angleForce);
 +        offset = 0;
 +        for (int i = 0; i < numAngles; ++i)
 +        {
 +            int type = angleAtoms[offset++];
 +            int atom1 = angleAtoms[offset++];
 +            int atom2 = angleAtoms[offset++];
 +            int atom3 = angleAtoms[offset++];
 +            angleForce->addAngle(atom1, atom2, atom3, 
 +                    idef.iparams[type].harmonic.rA*M_PI/180.0, idef.iparams[type].harmonic.krA);
 +        }
 +
 +        /* Urey-Bradley includes both the angle and bond potential for 1-3 interactions */
 +        const int* ubAtoms = (int*) idef.il[F_UREY_BRADLEY].iatoms;
 +              /* HarmonicBondForce* ubBondForce = new HarmonicBondForce(); */
 +              /*  HarmonicAngleForce* ubAngleForce = new HarmonicAngleForce(); */
 +        /* sys->addForce(ubBondForce); */
 +        /* sys->addForce(ubAngleForce); */
 +        offset = 0;
 +        for (int i = 0; i < numUB; ++i)
 +        {
 +            int type = ubAtoms[offset++];
 +            int atom1 = ubAtoms[offset++];
 +            int atom2 = ubAtoms[offset++];
 +            int atom3 = ubAtoms[offset++];
 +            /* ubBondForce->addBond(atom1, atom3, */
 +            bondForce->addBond(atom1, atom3,
 +                               idef.iparams[type].u_b.r13A, idef.iparams[type].u_b.kUBA);
 +            /* ubAngleForce->addAngle(atom1, atom2, atom3, */ 
 +            angleForce->addAngle(atom1, atom2, atom3, 
 +                    idef.iparams[type].u_b.thetaA*M_PI/180.0, idef.iparams[type].u_b.kthetaA);
 +        }
 +
 +              /* Set proper dihedral terms */
 +        const int* periodicAtoms = (int*) idef.il[F_PDIHS].iatoms;
 +        PeriodicTorsionForce* periodicForce = new PeriodicTorsionForce();
 +        sys->addForce(periodicForce);
 +        offset = 0;
 +        for (int i = 0; i < numPeriodic; ++i)
 +        {
 +            int type = periodicAtoms[offset++];
 +            int atom1 = periodicAtoms[offset++];
 +            int atom2 = periodicAtoms[offset++];
 +            int atom3 = periodicAtoms[offset++];
 +            int atom4 = periodicAtoms[offset++];
 +            periodicForce->addTorsion(atom1, atom2, atom3, atom4,
 +                                      idef.iparams[type].pdihs.mult,
 +                                      idef.iparams[type].pdihs.phiA*M_PI/180.0, 
 +                                      idef.iparams[type].pdihs.cpA);
 +        }
 +
 +              /* Set improper dihedral terms that are represented by a periodic function (as in AMBER FF) */
 +        const int* periodicImproperAtoms = (int*) idef.il[F_PIDIHS].iatoms;
 +        /* PeriodicTorsionForce* periodicImproperForce = new PeriodicTorsionForce(); */
 +        /* sys->addForce(periodicImproperForce); */
 +        offset = 0;
 +        for (int i = 0; i < numPeriodicImproper; ++i)
 +        {
 +            int type = periodicImproperAtoms[offset++];
 +            int atom1 = periodicImproperAtoms[offset++];
 +            int atom2 = periodicImproperAtoms[offset++];
 +            int atom3 = periodicImproperAtoms[offset++];
 +            int atom4 = periodicImproperAtoms[offset++];
 +            /* periodicImproperForce->addTorsion(atom1, atom2, atom3, atom4, */
 +            periodicForce->addTorsion(atom1, atom2, atom3, atom4,
 +                                      idef.iparams[type].pdihs.mult,
 +                                      idef.iparams[type].pdihs.phiA*M_PI/180.0,
 +                                      idef.iparams[type].pdihs.cpA);
 +        }
 +
 +        /* Ryckaert-Bellemans dihedrals */
 +        const int* rbAtoms = (int*) idef.il[F_RBDIHS].iatoms;
 +        RBTorsionForce* rbForce = new RBTorsionForce();
 +        sys->addForce(rbForce);
 +        offset = 0;
 +        for (int i = 0; i < numRB; ++i)
 +        {
 +            int type = rbAtoms[offset++];
 +            int atom1 = rbAtoms[offset++];
 +            int atom2 = rbAtoms[offset++];
 +            int atom3 = rbAtoms[offset++];
 +            int atom4 = rbAtoms[offset++];
 +            rbForce->addTorsion(atom1, atom2, atom3, atom4,
 +                                idef.iparams[type].rbdihs.rbcA[0], idef.iparams[type].rbdihs.rbcA[1],
 +                                idef.iparams[type].rbdihs.rbcA[2], idef.iparams[type].rbdihs.rbcA[3],
 +                                idef.iparams[type].rbdihs.rbcA[4], idef.iparams[type].rbdihs.rbcA[5]);
 +        }
 +
 +              /* Set improper dihedral terms (as in CHARMM FF) */
 +        const int* improperDihAtoms = (int*) idef.il[F_IDIHS].iatoms;
 +              CustomTorsionForce* improperDihForce = new CustomTorsionForce("2.0*k*asin(sin((theta-theta0)/2))^2");
 +        sys->addForce(improperDihForce);
 +              improperDihForce->addPerTorsionParameter("k");
 +              improperDihForce->addPerTorsionParameter("theta0");
 +              vector<double> improperDihParameters(2);
 +        offset = 0;
 +        for (int i = 0; i < numImproperDih; ++i)
 +        {
 +            int type = improperDihAtoms[offset++];
 +            int atom1 = improperDihAtoms[offset++];
 +            int atom2 = improperDihAtoms[offset++];
 +            int atom3 = improperDihAtoms[offset++];
 +            int atom4 = improperDihAtoms[offset++];
 +                      improperDihParameters[0] = idef.iparams[type].harmonic.krA;
 +                      improperDihParameters[1] = idef.iparams[type].harmonic.rA*M_PI/180.0;
 +            improperDihForce->addTorsion(atom1, atom2, atom3, atom4,
 +                                improperDihParameters);
 +        }
 +
 +        /* Set nonbonded parameters and masses. */
 +        int ntypes = fr->ntype;
 +        int* types = mdatoms->typeA;
 +        real* nbfp = fr->nbfp;
 +        real* charges = mdatoms->chargeA;
 +        real* masses = mdatoms->massT;
 +        NonbondedForce* nonbondedForce = new NonbondedForce();
 +        sys->addForce(nonbondedForce);
 +        
 +        switch (ir->ePBC)
 +        {
 +        case epbcNONE:
 +            if (ir->rcoulomb == 0)
 +            {
 +                nonbondedForce->setNonbondedMethod(NonbondedForce::NoCutoff);
 +            }
 +            else
 +            {
 +                nonbondedForce->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
 +            }
 +            break;
 +        case epbcXYZ:
 +            switch (ir->coulombtype)
 +            {
 +            case eelCUT:
 +            case eelRF:
 +            case eelGRF:
 +            case eelRF_NEC:
 +            case eelRF_ZERO:
 +                nonbondedForce->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
 +                break;
 +
 +            case eelEWALD:
 +                nonbondedForce->setNonbondedMethod(NonbondedForce::Ewald);
 +                break;
 +
 +            case eelPME:
 +                nonbondedForce->setNonbondedMethod(NonbondedForce::PME);
 +                break;
 +
 +            default:
 +                gmx_fatal(FARGS,"Internal error: you should not see this message, it means that the"
 +                          "electrosatics option check failed. Please report this error!");
 +            }        
 +            sys->setDefaultPeriodicBoxVectors(Vec3(state->box[0][0], 0, 0),
 +                                       Vec3(0, state->box[1][1], 0), Vec3(0, 0, state->box[2][2]));                    
 +            nonbondedForce->setCutoffDistance(ir->rcoulomb);
 +           
 +            break;
 +        default:            
 +            gmx_fatal(FARGS,"OpenMM supports only full periodic boundary conditions "
 +                              "(pbc = xyz), or none (pbc = no).");
 +        }
 +
 +
 +        /* Fix for PME and Ewald error tolerance 
 +         *
 +               *  OpenMM uses approximate formulas to calculate the Ewald parameter:
 +               *  alpha = (1.0/cutoff)*sqrt(-log(2.0*tolerlance));
 +               *  and the grid spacing for PME:
 +               *  gridX = ceil(2*alpha*box[0][0]/3*(pow(tol, 0.2)))
 +               *  gridY = ceil(2*alpha*box[1][1]/3*(pow(tol, 0.2)));
 +               *  gridZ = ceil(2*alpha*box[2][2]/3*(pow(tol, 0.2)));
 +               *
 +               *  
 +               *  If the default ewald_rtol=1e-5 is used we silently adjust the value to the 
 +               *  OpenMM default of 5e-4 otherwise a warning is issued about the action taken. 
 +               *
 +              */
 +        double corr_ewald_rtol = 50.0 * ir->ewald_rtol;
 +        if ((ir->ePBC == epbcXYZ) && 
 +            (ir->coulombtype == eelEWALD || ir->coulombtype == eelPME))
 +        {
 +            if (debug)
 +            {
 +                fprintf(debug, ">> ewald_rtol = %e (corrected = %e) \n",
 +                    ir->ewald_rtol, corr_ewald_rtol);
 +            }
 +
 +            if (fabs(ir->ewald_rtol - 1e-5) > 1e-10)
 +            {
 +                gmx_warning("OpenMM uses the ewald_rtol parameter with approximate formulas "
 +                        "to calculate the alpha and grid spacing parameters of the Ewald "
 +                        "and PME methods. This tolerance need to be corrected in order to get "
 +                        "settings close to the ones used in GROMACS. Although the internal correction "
 +                        "should work for any reasonable value of ewald_rtol, using values other than "
 +                        "the default 1e-5 might cause incorrect behavior.");
 +
 +                if (corr_ewald_rtol > 1)
 +                {
 +                    gmx_fatal(FARGS, "The ewald_rtol accuracy term is >1 after the "
 +                            "adjustment for OpenMM (%e)", corr_ewald_rtol);
 +                }
 +            }
 +            nonbondedForce->setEwaldErrorTolerance(corr_ewald_rtol);
 +        }
 +
 +        for (int i = 0; i < numAtoms; ++i)
 +        {
++            /* nbfp now includes the 6.0/12.0 derivative prefactors to save flops in kernels*/
++            double c12 = nbfp[types[i]*2*ntypes+types[i]*2+1]/12.0;
++            double c6 = nbfp[types[i]*2*ntypes+types[i]*2]/6.0;
 +            double sigma=0.0, epsilon=0.0;
 +            convert_c_12_6(c12, c6, &sigma, &epsilon);
 +            nonbondedForce->addParticle(charges[i], sigma, epsilon);
 +            sys->addParticle(masses[i]);
 +        }
 +
 +        // Build a table of all exclusions.
 +        vector<set<int> > exclusions(numAtoms);
 +        for (int i = 0; i < numAtoms; i++)
 +        {
 +            int start = top->excls.index[i];
 +            int end = top->excls.index[i+1];
 +            for (int j = start; j < end; j++)
 +                exclusions[i].insert(top->excls.a[j]);
 +        }
 +
 +        // Record the 1-4 interactions, and remove them from the list of exclusions.
 +        const int* nb14Atoms = (int*) idef.il[F_LJ14].iatoms;
 +        offset = 0;
 +        for (int i = 0; i < num14; ++i)
 +        {
 +            int type = nb14Atoms[offset++];
 +            int atom1 = nb14Atoms[offset++];
 +            int atom2 = nb14Atoms[offset++];
 +            double sigma=0, epsilon=0;
 +            convert_c_12_6(idef.iparams[type].lj14.c12A, 
 +                    idef.iparams[type].lj14.c6A,
 +                    &sigma, &epsilon);
 +            nonbondedForce->addException(atom1, atom2,
 +                                         fr->fudgeQQ*charges[atom1]*charges[atom2], sigma, epsilon);
 +            exclusions[atom1].erase(atom2);
 +            exclusions[atom2].erase(atom1);
 +        }
 +
 +        // Record exclusions.
 +        for (int i = 0; i < numAtoms; i++)
 +        {
 +            for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter)
 +            {
 +                if (i < *iter)
 +                {
 +                    nonbondedForce->addException(i, *iter, 0.0, 1.0, 0.0);
 +                }
 +            }
 +        }
 +
 +        // Add GBSA if needed.
 +        if (ir->implicit_solvent == eisGBSA)
 +        {
 +            gmx_warning("The OBC scale factors alpha, beta and gamma are hardcoded in OpenMM with the default Gromacs values.");
 +            t_atoms atoms       = gmx_mtop_global_atoms(top_global);
 +            GBSAOBCForce* gbsa  = new GBSAOBCForce();
 +
 +            sys->addForce(gbsa);
 +            gbsa->setSoluteDielectric(ir->epsilon_r);
 +            gbsa->setSolventDielectric(ir->gb_epsilon_solvent);
 +            gbsa->setCutoffDistance(nonbondedForce->getCutoffDistance());
 +            if (nonbondedForce->getNonbondedMethod() == NonbondedForce::NoCutoff)
 +                gbsa->setNonbondedMethod(GBSAOBCForce::NoCutoff);
 +            else if (nonbondedForce->getNonbondedMethod() == NonbondedForce::CutoffNonPeriodic)
 +                gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
 +            else if (nonbondedForce->getNonbondedMethod() == NonbondedForce::CutoffPeriodic)
 +                gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
 +            else
 +                gmx_fatal(FARGS,"OpenMM supports only Reaction-Field electrostatics with OBC/GBSA.");
 +
 +            for (int i = 0; i < numAtoms; ++i)
 +            {
 +                gbsa->addParticle(charges[i],
 +                                  top_global->atomtypes.gb_radius[atoms.atom[i].type],
 +                                  top_global->atomtypes.S_hct[atoms.atom[i].type]);
 +            }
 +            free_t_atoms(&atoms, FALSE);
 +        }
 +
 +        // Set constraints.
 +        const int* constraintAtoms = (int*) idef.il[F_CONSTR].iatoms;
 +        offset = 0;
 +        for (int i = 0; i < numConstraints; ++i)
 +        {
 +            int type = constraintAtoms[offset++];
 +            int atom1 = constraintAtoms[offset++];
 +            int atom2 = constraintAtoms[offset++];
 +            sys->addConstraint(atom1, atom2, idef.iparams[type].constr.dA);
 +        }
 +        const int* settleAtoms = (int*) idef.il[F_SETTLE].iatoms;
 +        offset = 0;
 +        for (int i = 0; i < numSettle; ++i)
 +        {
 +            int type = settleAtoms[offset++];
 +            int oxygen = settleAtoms[offset++];
 +            sys->addConstraint(oxygen, oxygen+1, idef.iparams[type].settle.doh);
 +            sys->addConstraint(oxygen, oxygen+2, idef.iparams[type].settle.doh);
 +            sys->addConstraint(oxygen+1, oxygen+2, idef.iparams[type].settle.dhh);
 +        }
 +
 +        // Create an integrator for simulating the system.
 +        double friction = (ir->opts.tau_t[0] == 0.0 ? 0.0 : 1.0/ir->opts.tau_t[0]);
 +        Integrator* integ;
 +        if (ir->eI == eiBD)
 +        {
 +            integ = new BrownianIntegrator(ir->opts.ref_t[0], friction, ir->delta_t);
 +            static_cast<BrownianIntegrator*>(integ)->setRandomNumberSeed(ir->ld_seed); 
 +        }
 +        else if (EI_SD(ir->eI))
 +        {
 +            integ = new LangevinIntegrator(ir->opts.ref_t[0], friction, ir->delta_t);
 +            static_cast<LangevinIntegrator*>(integ)->setRandomNumberSeed(ir->ld_seed); 
 +        }
 +        else 
 +        {
 +            integ = new VerletIntegrator(ir->delta_t);
 +            if ( ir->etc != etcNO)
 +            {
 +                AndersenThermostat* thermostat = new AndersenThermostat(ir->opts.ref_t[0], friction); 
 +                sys->addForce(thermostat);
 +            }           
 +        }
 +
 +              // Add pressure coupling
 +        if (ir->epc != epcNO)
 +              {
 +          // convert gromacs pressure tensor to a scalar
 +          double pressure = (ir->ref_p[0][0] + ir->ref_p[1][1] + ir->ref_p[2][2]) / 3.0;
 +          int frequency = int(ir->tau_p / ir->delta_t); // update frequency in time steps
 +          if (frequency < 1) frequency = 1;
 +          double temperature = ir->opts.ref_t[0]; // in kelvin
 +          sys->addForce(new MonteCarloBarostat(pressure, temperature, frequency));
 +              }
 +
 +        integ->setConstraintTolerance(ir->shake_tol);
 +
 +        // Create a context and initialize it.
 +        Context* context = NULL;
 +
 +        /*      
 +        OpenMM could automatically select the "best" GPU, however we're not't 
 +        going to let it do that for now, as the current algorithm is very rudimentary
 +        and we anyway support only CUDA.        
 +        if (platformOptStr == NULL || platformOptStr == "")
 +        {
 +            context = new Context(*sys, *integ);
 +        }
 +        else
 +        */        
 +        {
 +            /* which platform should we use */
 +            for (int i = 0; i < (int)Platform::getNumPlatforms() && context == NULL; i++)
 +            {
 +                if (isStringEqNCase(opt->getOptionValue("platform"), Platform::getPlatform(i).getName()))
 +                {
 +                    Platform& platform = Platform::getPlatform(i);
 +                    // set standard properties
 +                    platform.setPropertyDefaultValue("CudaDevice", opt->getOptionValue("deviceid"));
 +                    // TODO add extra properties
 +                    context = new Context(*sys, *integ, platform);
 +                }
 +            }
 +            if (context == NULL)
 +            {
 +                gmx_fatal(FARGS, "The requested platform \"%s\" could not be found.", 
 +                        opt->getOptionValue("platform").c_str());
 +            }
 +        }
 +
 +        Platform& platform = context->getPlatform();
 +        fprintf(fplog, "Gromacs will use the OpenMM platform: %s\n", platform.getName().c_str());
 +
 +        const vector<string>& properties = platform.getPropertyNames();
 +        if (debug)
 +        {
 +            for (int i = 0; i < (int)properties.size(); i++)
 +            {
 +                fprintf(debug, ">> %s: %s\n", properties[i].c_str(), 
 +                        platform.getPropertyValue(*context, properties[i]).c_str());
 +            }
 +        }
 +
 +        /* only for CUDA */
 +        if (isStringEqNCase(opt->getOptionValue("platform"), "CUDA"))
 +        {
 +            int tmp;
 +            if (!from_string<int>(tmp, platform.getPropertyValue(*context, "CudaDevice"), std::dec))
 +            {
 +                gmx_fatal(FARGS, "Internal error: couldn't determine the device selected by OpenMM");
 +
 +            }
 +
 +            /* For now this is just to double-check if OpenMM selected the GPU we wanted,
 +            but when we'll let OpenMM select the GPU automatically, it will query the deviceId.
 +            */            
 +            if (tmp != devId)
 +            {
 +                gmx_fatal(FARGS, "Internal error: OpenMM is using device #%d"
 +                        "while initialized for device #%d", tmp, devId);
 +            }        
 +            
 +            /* check GPU compatibility */
 +            char gpuname[STRLEN];
 +            devId = atoi(opt->getOptionValue("deviceid").c_str());
 +            if (!is_gmx_openmm_supported_gpu(-1, gpuname))
 +            {
 +                if (!gmx_strcasecmp(opt->getOptionValue("force-device").c_str(), "yes"))
 +                {
 +                    sprintf(warn_buf, "Non-supported GPU selected (#%d, %s), forced continuing."
 +                            "Note, that the simulation can be slow or it migth even crash.", 
 +                            devId, gpuname);
 +                    fprintf(fplog, "%s\n", warn_buf);
 +                    gmx_warning(warn_buf);
 +                }
 +                else
 +                {
 +                    gmx_fatal(FARGS, "The selected GPU (#%d, %s) is not supported by Gromacs! "
 +                              "Most probably you have a low-end GPU which would not perform well, " 
 +                              "or new hardware that has not been tested with the current release. "
 +                              "If you still want to try using the device, use the force-device=yes option.", 
 +                              devId, gpuname);
 +                }
 +            }
 +            else
 +            {
 +                fprintf(fplog, "Gromacs will run on the GPU #%d (%s).\n", devId, gpuname);
 +            }
 +        }
 +        
 +        /* only for CUDA */
 +        if (isStringEqNCase(opt->getOptionValue("platform"), "CUDA"))
 +        {
 +            /* pre-simulation memtest */
 +            runMemtest(fplog, -1, "Pre", opt);
 +        }
 +
 +        vector<Vec3> pos(numAtoms);
 +        vector<Vec3> vel(numAtoms);
 +        for (int i = 0; i < numAtoms; ++i)
 +        {
 +            pos[i] = Vec3(state->x[i][0], state->x[i][1], state->x[i][2]);
 +            vel[i] = Vec3(state->v[i][0], state->v[i][1], state->v[i][2]);
 +        }
 +        context->setPositions(pos);
 +        context->setVelocities(vel);
 +
 +        // Return a structure containing the system, integrator, and context.
 +        OpenMMData* data = new OpenMMData();
 +        data->system = sys;
 +        data->integrator = integ;
 +        data->context = context;
 +        data->removeCM = (ir->nstcomm > 0);
 +        data->platformOpt = opt;
 +        return data;
 +    }
 +    catch (std::exception& e)
 +    {
 +        gmx_fatal(FARGS, "OpenMM exception caught while initializating: %s", e.what());
 +    } 
 +    return NULL; /* just to avoid warnings */
 +}
 +
 +/*!
 + * \brief Integrate one step.
 + *
 + * \param[in] data  OpenMMData object created by openmm_init().
 + */
 +void openmm_take_one_step(void* data)
 +{
 +    // static int step = 0; printf("----> taking step #%d\n", step++);
 +    try
 +    {
 +        static_cast<OpenMMData*>(data)->integrator->step(1);
 +    }
 +    catch (std::exception& e)
 +    {
 +        gmx_fatal(FARGS, "OpenMM exception caught while taking a step: %s", e.what());
 +    }
 +}
 +
 +/*!
 + * \brief Integrate n steps.
 + *
 + * \param[in] data  OpenMMData object created by openmm_init().
 + */
 +void openmm_take_steps(void* data, int nstep)
 +{
 +    try
 +    {
 +        static_cast<OpenMMData*>(data)->integrator->step(nstep);
 +    }
 +    catch (std::exception& e)
 +    {
 +        gmx_fatal(FARGS, "OpenMM exception caught while taking a step: %s", e.what());
 +    }
 +}
 +
 +/*!
 + * \brief Clean up the data structures cretead for OpenMM.
 + *
 + * \param[in] log   Log file pointer.
 + * \param[in] data  OpenMMData object created by openmm_init().
 + */
 +void openmm_cleanup(FILE* fplog, void* data)
 +{
 +    OpenMMData* d = static_cast<OpenMMData*>(data);
 +    /* only for CUDA */
 +    if (isStringEqNCase(d->platformOpt->getOptionValue("platform"), "CUDA"))
 +    {
 +        /* post-simulation memtest */
 +        runMemtest(fplog, -1, "Post", d->platformOpt);
 +    }
 +    delete d->system;
 +    delete d->integrator;
 +    delete d->context;
 +    delete d->platformOpt;
 +    delete d;
 +}
 +
 +/*!
 + * \brief Copy the current state information from OpenMM into the Gromacs data structures.
 + * 
 + * This function results in the requested proprties to be copied from the 
 + * GPU to host. As this represents a bottleneck, the frequency of pulling data
 + * should be minimized. 
 + *
 + * \param[in]   data        OpenMMData object created by openmm_init().
 + * \param[out]  time        Simulation time for which the state was created.
 + * \param[out]  state       State of the system: coordinates and velocities.
 + * \param[out]  f           Forces.
 + * \param[out]  enerd       Energies.
 + * \param[in]   includePos  True if coordinates are requested.
 + * \param[in]   includeVel  True if velocities are requested. 
 + * \param[in]   includeForce True if forces are requested. 
 + * \param[in]   includeEnergy True if energies are requested. 
 + */
 +void openmm_copy_state(void *data,
 +                       t_state *state, double *time,
 +                       rvec f[], gmx_enerdata_t *enerd,
 +                       gmx_bool includePos, gmx_bool includeVel, gmx_bool includeForce, gmx_bool includeEnergy)
 +{
 +    int types = 0;
 +    if (includePos)
 +        types += State::Positions;
 +    if (includeVel)
 +        types += State::Velocities;
 +    if (includeForce)
 +        types += State::Forces;
 +    if (includeEnergy)
 +        types += State::Energy;
 +    if (types == 0)
 +        return;
 +    try
 +    {
 +        State currentState = static_cast<OpenMMData*>(data)->context->getState(types);
 +        int numAtoms =  static_cast<OpenMMData*>(data)->system->getNumParticles();
 +        if (includePos)
 +        {
 +            for (int i = 0; i < numAtoms; i++)
 +            {
 +                Vec3 x = currentState.getPositions()[i];
 +                state->x[i][0] = x[0];
 +                state->x[i][1] = x[1];
 +                state->x[i][2] = x[2];
 +            }
 +        }
 +        if (includeVel)
 +        {
 +            for (int i = 0; i < numAtoms; i++)
 +            {
 +                Vec3 v = currentState.getVelocities()[i];
 +                state->v[i][0] = v[0];
 +                state->v[i][1] = v[1];
 +                state->v[i][2] = v[2];
 +            }
 +        }
 +        if (includeForce)
 +        {
 +            for (int i = 0; i < numAtoms; i++)
 +            {
 +                Vec3 force = currentState.getForces()[i];
 +                f[i][0] = force[0];
 +                f[i][1] = force[1];
 +                f[i][2] = force[2];
 +            }
 +        }
 +        if (includeEnergy)
 +        {
 +            int numConstraints = static_cast<OpenMMData*>(data)->system->getNumConstraints();
 +            int dof = 3*numAtoms-numConstraints;
 +            if (static_cast<OpenMMData*>(data)->removeCM)
 +                dof -= 3;
 +            enerd->term[F_EPOT] = currentState.getPotentialEnergy();
 +            enerd->term[F_EKIN] = currentState.getKineticEnergy();
 +            enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +            enerd->term[F_TEMP] = 2.0*enerd->term[F_EKIN]/dof/BOLTZ;
 +        }
 +        *time = currentState.getTime();
 +    }
 +    catch (std::exception& e)
 +    {
 +        gmx_fatal(FARGS, "OpenMM exception caught while retrieving state information: %s", e.what());
 +    }
 +}
index 2962dd803a8b0b5ff3b15fdb4624a31fe38eaedd,0000000000000000000000000000000000000000..3e2ecf3cf3c3b20a2768534aae05bdf9f54a0c0f
mode 100644,000000..100644
--- /dev/null
@@@ -1,650 -1,0 +1,743 @@@
-     real rcut;            /* Coulomb cut-off                              */
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 4.6.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2011, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "smalloc.h"
 +#include "network.h"
 +#include "calcgrid.h"
 +#include "pme.h"
 +#include "vec.h"
 +#include "domdec.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "force.h"
 +#include "macros.h"
 +#include "pme_loadbal.h"
 +
 +/* Parameters and setting for one PP-PME setup */
 +typedef struct {
-     real rbuf;          /* the pairlist buffer size */
++    real rcut_coulomb;    /* Coulomb cut-off                              */
 +    real rlist;           /* pair-list cut-off                            */
++    real rlistlong;       /* LR pair-list cut-off                         */
++    int  nstcalclr;       /* frequency of evaluating long-range forces for group scheme */
 +    real spacing;         /* (largest) PME grid spacing                   */
 +    ivec grid;            /* the PME grid dimensions                      */
 +    real grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
 +    real ewaldcoeff;      /* the Ewald coefficient                        */
 +    gmx_pme_t pmedata;    /* the data structure used in the PME code      */
 +
 +    int  count;           /* number of times this setup has been timed    */
 +    double cycles;        /* the fastest time for this setup in cycles    */
 +} pme_setup_t;
 +
 +/* In the initial scan, step by grids that are at least a factor 0.8 coarser */
 +#define PME_LB_GRID_SCALE_FAC  0.8
 +/* In the initial scan, try to skip grids with uneven x/y/z spacing,
 + * checking if the "efficiency" is more than 5% worse than the previous grid.
 + */
 +#define PME_LB_GRID_EFFICIENCY_REL_FAC  1.05
 +/* Rerun up till 12% slower setups than the fastest up till now */
 +#define PME_LB_SLOW_FAC  1.12
 +/* If setups get more than 2% faster, do another round to avoid
 + * choosing a slower setup due to acceleration or fluctuations.
 + */
 +#define PME_LB_ACCEL_TOL 1.02
 +
 +enum { epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimNR };
 +
 +const char *pmelblim_str[epmelblimNR] =
 +{ "no", "box size", "domain decompostion" };
 +
 +struct pme_load_balancing {
 +    int  nstage;        /* the current maximum number of stages */
 +
 +    real cut_spacing;   /* the minimum cutoff / PME grid spacing ratio */
-     pme_lb->rbuf = ic->rlist - ic->rcoulomb;
++    real rcut_vdw;      /* Vdw cutoff (does not change) */
++    real rcut_coulomb_start; /* Initial electrostatics cutoff */
++    int  nstcalclr_start; /* Initial electrostatics cutoff */
++    real rbuf_coulomb;  /* the pairlist buffer size */
++    real rbuf_vdw;      /* the pairlist buffer size */
 +    matrix box_start;   /* the initial simulation box */
 +    int n;              /* the count of setup as well as the allocation size */
 +    pme_setup_t *setup; /* the PME+cutoff setups */
 +    int cur;            /* the current setup */
 +    int fastest;        /* fastest setup up till now */
 +    int start;          /* start of setup range to consider in stage>0 */
 +    int end;            /* end   of setup range to consider in stage>0 */
 +    int elimited;       /* was the balancing limited, uses enum above */
++    int cutoff_scheme;  /* Verlet or group cut-offs */
 +
 +    int stage;          /* the current stage */
 +};
 +
 +void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
 +                      const t_inputrec *ir,matrix box,
 +                      const interaction_const_t *ic,
 +                      gmx_pme_t pmedata)
 +{
 +    pme_load_balancing_t pme_lb;
 +    real spm,sp;
 +    int  d;
 +
 +    snew(pme_lb,1);
 +
 +    /* Any number of stages >= 2 is supported */
 +    pme_lb->nstage   = 2;
 +
-     pme_lb->setup[0].rcut       = ic->rcoulomb;
-     pme_lb->setup[0].rlist      = ic->rlist;
-     pme_lb->setup[0].grid[XX]   = ir->nkx;
-     pme_lb->setup[0].grid[YY]   = ir->nky;
-     pme_lb->setup[0].grid[ZZ]   = ir->nkz;
-     pme_lb->setup[0].ewaldcoeff = ic->ewaldcoeff;
++    pme_lb->cutoff_scheme = ir->cutoff_scheme;
++
++    if(pme_lb->cutoff_scheme == ecutsVERLET)
++    {
++        pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
++        pme_lb->rbuf_vdw     = pme_lb->rbuf_coulomb;
++    }
++    else
++    {
++        if(ic->rcoulomb > ic->rlist)
++        {
++            pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
++        }
++        else
++        {
++            pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
++        }
++        if(ic->rvdw > ic->rlist)
++        {
++            pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
++        }
++        else
++        {
++            pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
++        }
++    }
 +
 +    copy_mat(box,pme_lb->box_start);
 +    if (ir->ePBC==epbcXY && ir->nwall==2)
 +    {
 +        svmul(ir->wall_ewald_zfac,pme_lb->box_start[ZZ],pme_lb->box_start[ZZ]);
 +    }
 +
 +    pme_lb->n = 1;
 +    snew(pme_lb->setup,pme_lb->n);
 +
++    pme_lb->rcut_vdw              = ic->rvdw;
++    pme_lb->rcut_coulomb_start    = ir->rcoulomb;
++    pme_lb->nstcalclr_start       = ir->nstcalclr;
++    
 +    pme_lb->cur = 0;
-     set->rcut    = pme_lb->cut_spacing*sp;
-     set->rlist   = set->rcut + pme_lb->rbuf;
-     set->spacing = sp;
++    pme_lb->setup[0].rcut_coulomb = ic->rcoulomb;
++    pme_lb->setup[0].rlist        = ic->rlist;
++    pme_lb->setup[0].rlistlong    = ic->rlistlong;
++    pme_lb->setup[0].nstcalclr    = ir->nstcalclr;
++    pme_lb->setup[0].grid[XX]     = ir->nkx;
++    pme_lb->setup[0].grid[YY]     = ir->nky;
++    pme_lb->setup[0].grid[ZZ]     = ir->nkz;
++    pme_lb->setup[0].ewaldcoeff   = ic->ewaldcoeff;
 +
 +    pme_lb->setup[0].pmedata  = pmedata;
 +    
 +    spm = 0;
 +    for(d=0; d<DIM; d++)
 +    {
 +        sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d];
 +        if (sp > spm)
 +        {
 +            spm = sp;
 +        }
 +    }
 +    pme_lb->setup[0].spacing = spm;
 +
 +    if (ir->fourier_spacing > 0)
 +    {
 +        pme_lb->cut_spacing = ir->rcoulomb/ir->fourier_spacing;
 +    }
 +    else
 +    {
 +        pme_lb->cut_spacing = ir->rcoulomb/pme_lb->setup[0].spacing;
 +    }
 +
 +    pme_lb->stage = 0;
 +
 +    pme_lb->fastest  = 0;
 +    pme_lb->start    = 0;
 +    pme_lb->end      = 0;
 +    pme_lb->elimited = epmelblimNO;
 +
 +    *pme_lb_p = pme_lb;
 +}
 +
 +static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
 +                                            int pme_order)
 +{
 +    pme_setup_t *set;
 +    real fac,sp;
++    real tmpr_coulomb,tmpr_vdw;
 +    int d;
 +
 +    /* Try to add a new setup with next larger cut-off to the list */
 +    pme_lb->n++;
 +    srenew(pme_lb->setup,pme_lb->n);
 +    set = &pme_lb->setup[pme_lb->n-1];
 +    set->pmedata = NULL;
 +
 +    fac = 1;
 +    do
 +    {
 +        fac *= 1.01;
 +        clear_ivec(set->grid);
 +        sp = calc_grid(NULL,pme_lb->box_start,
 +                       fac*pme_lb->setup[pme_lb->cur].spacing,
 +                       &set->grid[XX],
 +                       &set->grid[YY],
 +                       &set->grid[ZZ]);
 +
 +        /* In parallel we can't have grids smaller than 2*pme_order,
 +         * and we would anyhow not gain much speed at these grid sizes.
 +         */
 +        for(d=0; d<DIM; d++)
 +        {
 +            if (set->grid[d] <= 2*pme_order)
 +            {
 +                pme_lb->n--;
 +
 +                return FALSE;
 +            }
 +        }
 +    }
 +    while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
 +
-         pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut/set->rcut;
++    set->rcut_coulomb = pme_lb->cut_spacing*sp;
++
++    if(pme_lb->cutoff_scheme == ecutsVERLET)
++    {
++        set->rlist        = set->rcut_coulomb + pme_lb->rbuf_coulomb;
++        /* We dont use LR lists with Verlet, but this avoids if-statements in further checks */
++        set->rlistlong    = set->rlist;
++    }
++    else
++    {
++        tmpr_coulomb          = set->rcut_coulomb + pme_lb->rbuf_coulomb;
++        tmpr_vdw              = pme_lb->rcut_vdw + pme_lb->rbuf_vdw;
++        set->rlist            = min(tmpr_coulomb,tmpr_vdw);
++        set->rlistlong        = max(tmpr_coulomb,tmpr_vdw);
++        
++        /* Set the long-range update frequency */
++        if(set->rlist == set->rlistlong)
++        {
++            /* No long-range interactions if the short-/long-range cutoffs are identical */
++            set->nstcalclr = 0;
++        }
++        else if(pme_lb->nstcalclr_start==0 || pme_lb->nstcalclr_start==1)
++        {
++            /* We were not doing long-range before, but now we are since rlist!=rlistlong */
++            set->nstcalclr = 1;
++        }
++        else
++        {
++            /* We were already doing long-range interactions from the start */
++            if(pme_lb->rcut_vdw > pme_lb->rcut_coulomb_start)
++            {
++                /* We were originally doing long-range VdW-only interactions.
++                 * If rvdw is still longer than rcoulomb we keep the original nstcalclr,
++                 * but if the coulomb cutoff has become longer we should update the long-range
++                 * part every step.
++                 */
++                set->nstcalclr = (tmpr_vdw > tmpr_coulomb) ? pme_lb->nstcalclr_start : 1;
++            }
++            else
++            {
++                /* We were not doing any long-range interaction from the start,
++                 * since it is not possible to do twin-range coulomb for the PME interaction.
++                 */
++                set->nstcalclr = 1;
++            }
++        }
++    }
++    
++    set->spacing      = sp;
 +    /* The grid efficiency is the size wrt a grid with uniform x/y/z spacing */
 +    set->grid_efficiency = 1;
 +    for(d=0; d<DIM; d++)
 +    {
 +        set->grid_efficiency *= (set->grid[d]*sp)/norm(pme_lb->box_start[d]);
 +    }
 +    /* The Ewald coefficient is inversly proportional to the cut-off */
 +    set->ewaldcoeff =
-         fprintf(debug,"PME loadbal: grid %d %d %d, cutoff %f\n",
-                 set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut);
++        pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut_coulomb/set->rcut_coulomb;
 +
 +    set->count   = 0;
 +    set->cycles  = 0;
 +
 +    if (debug)
 +    {
++        fprintf(debug,"PME loadbal: grid %d %d %d, coulomb cutoff %f\n",
++                set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut_coulomb);
 +    }
-     
 +    return TRUE;
 +}
 +
 +static void print_grid(FILE *fp_err,FILE *fp_log,
 +                       const char *pre,
 +                       const char *desc,
 +                       const pme_setup_t *set,
 +                       double cycles)
 +{
 +    char buf[STRLEN],buft[STRLEN];
-     sprintf(buf,"%-11s%10s pme grid %d %d %d, cutoff %.3f%s",
++
 +    if (cycles >= 0)
 +    {
 +        sprintf(buft,": %.1f M-cycles",cycles*1e-6);
 +    }
 +    else
 +    {
 +        buft[0] = '\0';
 +    }
-             desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut,
++    sprintf(buf,"%-11s%10s pme grid %d %d %d, coulomb cutoff %.3f%s",
 +            pre,
-     sprintf(buf,"step %4s: the %s limited the PME load balancing to a cut-off of %.3f",
++            desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut_coulomb,
 +            buft);
 +    if (fp_err != NULL)
 +    {
 +        fprintf(fp_err,"\r%s\n",buf);
 +    }
 +    if (fp_log != NULL)
 +    {
 +        fprintf(fp_log,"%s\n",buf);
 +    }
 +}
 +
 +static int pme_loadbal_end(pme_load_balancing_t pme_lb)
 +{
 +    /* In the initial stage only n is set; end is not set yet */
 +    if (pme_lb->end > 0)
 +    {
 +        return pme_lb->end;
 +    }
 +    else
 +    {
 +        return pme_lb->n;
 +    }
 +}
 +
 +static void print_loadbal_limited(FILE *fp_err,FILE *fp_log,
 +                                  gmx_large_int_t step,
 +                                  pme_load_balancing_t pme_lb)
 +{
 +    char buf[STRLEN],sbuf[22];
 +
-             pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut);
++    sprintf(buf,"step %4s: the %s limited the PME load balancing to a coulomb cut-off of %.3f",
 +            gmx_step_str(step,sbuf),
 +            pmelblim_str[pme_lb->elimited],
++            pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
 +    if (fp_err != NULL)
 +    {
 +        fprintf(fp_err,"\r%s\n",buf);
 +    }
 +    if (fp_log != NULL)
 +    {
 +        fprintf(fp_log,"%s\n",buf);
 +    }
 +}
 +
 +static void switch_to_stage1(pme_load_balancing_t pme_lb)
 +{
 +    pme_lb->start = 0;
 +    while (pme_lb->start+1 < pme_lb->n &&
 +           (pme_lb->setup[pme_lb->start].count == 0 ||
 +            pme_lb->setup[pme_lb->start].cycles >
 +            pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC))
 +    {
 +        pme_lb->start++;
 +    }
 +    while (pme_lb->start > 0 && pme_lb->setup[pme_lb->start-1].cycles == 0)
 +    {
 +        pme_lb->start--;
 +    }
 +
 +    pme_lb->end = pme_lb->n;
 +    if (pme_lb->setup[pme_lb->end-1].count > 0 &&
 +        pme_lb->setup[pme_lb->end-1].cycles >
 +        pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
 +    {
 +        pme_lb->end--;
 +    }
 +
 +    pme_lb->stage = 1;
 +
 +    /* Next we want to choose setup pme_lb->start, but as we will increase
 +     * pme_ln->cur by one right after returning, we subtract 1 here.
 +     */
 +    pme_lb->cur = pme_lb->start - 1;
 +}
 +
 +gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
 +                          t_commrec *cr,
 +                          FILE *fp_err,
 +                          FILE *fp_log,
 +                          t_inputrec *ir,
 +                          t_state *state,
 +                          double cycles,
 +                          interaction_const_t *ic,
 +                          nonbonded_verlet_t *nbv,
 +                          gmx_pme_t *pmedata,
 +                          gmx_large_int_t step)
 +{
 +    gmx_bool OK;
 +    pme_setup_t *set;
 +    double cycles_fast;
 +    char buf[STRLEN],sbuf[22];
++    real rtab;
++    gmx_bool bUsesSimpleTables = TRUE;
 +
 +    if (pme_lb->stage == pme_lb->nstage)
 +    {
 +        return FALSE;
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        gmx_sumd(1,&cycles,cr);
 +        cycles /= cr->nnodes;
 +    }
 +
 +    set = &pme_lb->setup[pme_lb->cur];
-                 
 +    set->count++;
++
++    rtab = ir->rlistlong + ir->tabext;
++
 +    if (set->count % 2 == 1)
 +    {
 +        /* Skip the first cycle, because the first step after a switch
 +         * is much slower due to allocation and/or caching effects.
 +         */
 +        return TRUE;
 +    }
 +
 +    sprintf(buf, "step %4s: ", gmx_step_str(step,sbuf));
 +    print_grid(fp_err,fp_log,buf,"timed with",set,cycles);
 +
 +    if (set->count <= 2)
 +    {
 +        set->cycles = cycles;
 +    }
 +    else
 +    {
 +        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
 +            pme_lb->stage == pme_lb->nstage - 1)
 +        {
 +            /* The performance went up a lot (due to e.g. DD load balancing).
 +             * Add a stage, keep the minima, but rescan all setups.
 +             */
 +            pme_lb->nstage++;
 +
 +            if (debug)
 +            {
 +                fprintf(debug,"The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
 +                        "Increased the number stages to %d"
 +                        " and ignoring the previous performance\n",
 +                        set->grid[XX],set->grid[YY],set->grid[ZZ],
 +                        cycles*1e-6,set->cycles*1e-6,PME_LB_ACCEL_TOL,
 +                        pme_lb->nstage);
 +            }
 +        }
 +        set->cycles = min(set->cycles,cycles);
 +    }
 +
 +    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
 +    {
 +        pme_lb->fastest = pme_lb->cur;
 +    }
 +    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;
 +
 +    /* Check in stage 0 if we should stop scanning grids.
 +     * Stop when the time is more than SLOW_FAC longer than the fastest.
 +     */
 +    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
 +        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
 +    {
 +        pme_lb->n = pme_lb->cur + 1;
 +        /* Done with scanning, go to stage 1 */
 +        switch_to_stage1(pme_lb);
 +    }
 +
 +    if (pme_lb->stage == 0)
 +    {
 +        int gridsize_start;
 +
 +        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];
 +
 +        do
 +        {
 +            if (pme_lb->cur+1 < pme_lb->n)
 +            {
 +                /* We had already generated the next setup */
 +                OK = TRUE;
 +            }
 +            else
 +            {
 +                /* Find the next setup */
 +                OK = pme_loadbal_increase_cutoff(pme_lb,ir->pme_order);
 +            }
-                 OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlist)
++
 +            if (OK && ir->ePBC != epbcNONE)
 +            {
-                                           pme_lb->setup[pme_lb->cur].rlist);
++                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
 +                      <= max_cutoff2(ir->ePBC,state->box));
 +                if (!OK)
 +                {
 +                    pme_lb->elimited = epmelblimBOX;
 +                }
 +            }
 +
 +            if (OK)
 +            {
 +                pme_lb->cur++;
 +
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    OK = change_dd_cutoff(cr,state,ir,
-         OK = change_dd_cutoff(cr,state,ir,pme_lb->setup[pme_lb->cur].rlist);
++                                          pme_lb->setup[pme_lb->cur].rlistlong);
 +                    if (!OK)
 +                    {
 +                        /* Failed: do not use this setup */
 +                        pme_lb->cur--;
 +                        pme_lb->elimited = epmelblimDD;
 +                    }
 +                }
 +            }
 +            if (!OK)
 +            {
 +                /* We hit the upper limit for the cut-off,
 +                 * the setup should not go further than cur.
 +                 */
 +                pme_lb->n = pme_lb->cur + 1;
 +                print_loadbal_limited(fp_err,fp_log,step,pme_lb);
 +                /* Switch to the next stage */
 +                switch_to_stage1(pme_lb);
 +            }
 +        }
 +        while (OK &&
 +               !(pme_lb->setup[pme_lb->cur].grid[XX]*
 +                 pme_lb->setup[pme_lb->cur].grid[YY]*
 +                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
 +                 gridsize_start*PME_LB_GRID_SCALE_FAC
 +                 &&
 +                 pme_lb->setup[pme_lb->cur].grid_efficiency <
 +                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
 +    }
 +
 +    if (pme_lb->stage > 0 && pme_lb->end == 1)
 +    {
 +        pme_lb->cur = 0;
 +        pme_lb->stage = pme_lb->nstage;
 +    }
 +    else if (pme_lb->stage > 0 && pme_lb->end > 1)
 +    {
 +        /* If stage = nstage-1:
 +         *   scan over all setups, rerunning only those setups
 +         *   which are not much slower than the fastest
 +         * else:
 +         *   use the next setup
 +         */
 +        do
 +        {
 +            pme_lb->cur++;
 +            if (pme_lb->cur == pme_lb->end)
 +            {
 +                pme_lb->stage++;
 +                pme_lb->cur = pme_lb->start;
 +            }
 +        }
 +        while (pme_lb->stage == pme_lb->nstage - 1 &&
 +               pme_lb->setup[pme_lb->cur].count > 0 &&
 +               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);
 +
 +        if (pme_lb->stage == pme_lb->nstage)
 +        {
 +            /* We are done optimizing, use the fastest setup we found */
 +            pme_lb->cur = pme_lb->fastest;
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
 +    {
-     ic->rcoulomb   = set->rcut;
++        OK = change_dd_cutoff(cr,state,ir,pme_lb->setup[pme_lb->cur].rlistlong);
 +        if (!OK)
 +        {
 +            /* Failsafe solution */
 +            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
 +            {
 +                pme_lb->stage--;
 +            }
 +            pme_lb->fastest  = 0;
 +            pme_lb->start    = 0;
 +            pme_lb->end      = pme_lb->cur;
 +            pme_lb->cur      = pme_lb->start;
 +            pme_lb->elimited = epmelblimDD;
 +            print_loadbal_limited(fp_err,fp_log,step,pme_lb);
 +        }
 +    }
 +
 +    /* Change the Coulomb cut-off and the PME grid */
 +
 +    set = &pme_lb->setup[pme_lb->cur];
 +
-     if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
++    ic->rcoulomb   = set->rcut_coulomb;
 +    ic->rlist      = set->rlist;
++    ic->rlistlong  = set->rlistlong;
++    ir->nstcalclr  = set->nstcalclr;
 +    ic->ewaldcoeff = set->ewaldcoeff;
 +
-         init_interaction_const_tables(NULL,ic,nbv->grp[0].kernel_type);
++    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
++    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
 +    {
 +        nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv,ic);
 +    }
 +    else
 +    {
-     if (nbv->ngrp > 1)
++        init_interaction_const_tables(NULL,ic,bUsesSimpleTables,
++                                      rtab);
 +    }
 +
-         init_interaction_const_tables(NULL,ic,nbv->grp[1].kernel_type);
++    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1)
 +    {
-             setup->rcut,setup->rlist,
++        init_interaction_const_tables(NULL,ic,bUsesSimpleTables,
++                                      rtab);
 +    }
 +
 +    if (cr->duty & DUTY_PME)
 +    {
 +        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
 +        {
 +            /* Generate a new PME data structure,
 +             * copying part of the old pointers.
 +             */
 +            gmx_pme_reinit(&set->pmedata,
 +                           cr,pme_lb->setup[0].pmedata,ir,
 +                           set->grid);
 +        }
 +        *pmedata = set->pmedata;
 +    }
 +    else
 +    {
 +        /* Tell our PME-only node to switch grid */
 +        gmx_pme_send_switch(cr, set->grid, set->ewaldcoeff);
 +    }
 +
 +    if (debug)
 +    {
 +        print_grid(NULL,debug,"","switched to",set,-1);
 +    }
 +
 +    if (pme_lb->stage == pme_lb->nstage)
 +    {
 +        print_grid(fp_err,fp_log,"","optimal",set,-1);
 +    }
 +
 +    return TRUE;
 +}
 +
 +void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n)
 +{
 +    pme_lb->nstage += n;
 +}
 +
 +static int pme_grid_points(const pme_setup_t *setup)
 +{
 +    return setup->grid[XX]*setup->grid[YY]*setup->grid[ZZ];
 +}
 +
 +static void print_pme_loadbal_setting(FILE *fplog,
 +                                     char *name,
 +                                     const pme_setup_t *setup)
 +{
 +    fprintf(fplog,
 +            "   %-7s %6.3f nm %6.3f nm     %3d %3d %3d   %5.3f nm  %5.3f nm\n",
 +            name,
-     pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlist,3.0);
++            setup->rcut_coulomb,setup->rlist,
 +            setup->grid[XX],setup->grid[YY],setup->grid[ZZ],
 +            setup->spacing,1/setup->ewaldcoeff);
 +}
 +
 +static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
 +                                       FILE *fplog)
 +{
 +    double pp_ratio,grid_ratio;
 +
++    pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlistlong,3.0);
 +    grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
 +        (double)pme_grid_points(&pme_lb->setup[0]);
 +
 +    fprintf(fplog,"\n");
 +    fprintf(fplog,"       P P   -   P M E   L O A D   B A L A N C I N G\n");
 +    fprintf(fplog,"\n");
 +    /* Here we only warn when the optimal setting is the last one */
 +    if (pme_lb->elimited != epmelblimNO &&
 +        pme_lb->cur == pme_loadbal_end(pme_lb)-1)
 +    {
 +        fprintf(fplog," NOTE: The PP/PME load balancing was limited by the %s,\n",
 +                pmelblim_str[pme_lb->elimited]);
 +        fprintf(fplog,"       you might not have reached a good load balance.\n");
 +        if (pme_lb->elimited == epmelblimDD)
 +        {
 +            fprintf(fplog,"       Try different mdrun -dd settings or lower the -dds value.\n");
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    fprintf(fplog," PP/PME load balancing changed the cut-off and PME settings:\n");
 +    fprintf(fplog,"           particle-particle                    PME\n");
 +    fprintf(fplog,"            rcoulomb  rlist            grid      spacing   1/beta\n");
 +    print_pme_loadbal_setting(fplog,"initial",&pme_lb->setup[0]);
 +    print_pme_loadbal_setting(fplog,"final"  ,&pme_lb->setup[pme_lb->cur]);
 +    fprintf(fplog," cost-ratio           %4.2f             %4.2f\n",
 +            pp_ratio,grid_ratio);
 +    fprintf(fplog," (note that these numbers concern only part of the total PP and PME load)\n");
 +    fprintf(fplog,"\n");
 +}
 +
 +void pme_loadbal_done(pme_load_balancing_t pme_lb, FILE *fplog)
 +{
 +    if (fplog != NULL && (pme_lb->cur > 0 || pme_lb->elimited != epmelblimNO))
 +    {
 +        print_pme_loadbal_settings(pme_lb,fplog);
 +    }
 +
 +    /* TODO: Here we should free all pointers in pme_lb,
 +     * but as it contains pme data structures,
 +     * we need to first make pme.c free all data.
 +     */
 +}
index 937585fdd6866359c72be978125a1c2f8370d2b2,0000000000000000000000000000000000000000..8e0096bb1bbed4c7697c2605ec7ced97d2d411a7
mode 100644,000000..100644
--- /dev/null
@@@ -1,1785 -1,0 +1,1937 @@@
- #ifdef __linux
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
- static int get_tmpi_omp_thread_distribution(const gmx_hw_opt_t *hw_opt,
-                                             int nthreads_tot,
-                                             int ngpu)
++#if defined(HAVE_SCHED_H) && (defined(HAVE_SCHED_GETAFFINITY) || defined(HAVE_SCHED_SETAFFINITY))
 +#define _GNU_SOURCE
 +#include <sched.h>
 +#include <sys/syscall.h>
 +#endif
 +#include <signal.h>
 +#include <stdlib.h>
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +#include <string.h>
 +#include <assert.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "statutil.h"
 +#include "mdrun.h"
 +#include "md_logging.h"
 +#include "md_support.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "names.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "gmx_detect_hardware.h"
 +#include "gmx_omp_nthreads.h"
 +#include "pull_rotation.h"
 +#include "calc_verletbuf.h"
 +#include "../mdlib/nbnxn_search.h"
 +#include "../mdlib/nbnxn_consts.h"
 +#include "gmx_fatal_collective.h"
 +#include "membed.h"
 +#include "macros.h"
 +#include "gmx_omp.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#ifdef GMX_OPENMM
 +#include "md_openmm.h"
 +#endif
 +
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +typedef struct { 
 +    gmx_integrator_t *func;
 +} gmx_intp_t;
 +
 +/* The array should match the eI array in include/types/enums.h */
 +#ifdef GMX_OPENMM  /* FIXME do_md_openmm needs fixing */
 +const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
 +#else
 +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
 +#endif
 +
 +gmx_large_int_t     deform_init_init_step_tpx;
 +matrix              deform_init_box_tpx;
 +#ifdef GMX_THREAD_MPI
 +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +struct mdrunner_arglist
 +{
 +    gmx_hw_opt_t *hw_opt;
 +    FILE *fplog;
 +    t_commrec *cr;
 +    int nfile;
 +    const t_filenm *fnm;
 +    output_env_t oenv;
 +    gmx_bool bVerbose;
 +    gmx_bool bCompact;
 +    int nstglobalcomm;
 +    ivec ddxyz;
 +    int dd_node_order;
 +    real rdd;
 +    real rconstr;
 +    const char *dddlb_opt;
 +    real dlb_scale;
 +    const char *ddcsx;
 +    const char *ddcsy;
 +    const char *ddcsz;
 +    const char *nbpu_opt;
 +    int nsteps_cmdline;
 +    int nstepout;
 +    int resetstep;
 +    int nmultisim;
 +    int repl_ex_nst;
 +    int repl_ex_nex;
 +    int repl_ex_seed;
 +    real pforce;
 +    real cpt_period;
 +    real max_hours;
 +    const char *deviceOptions;
 +    unsigned long Flags;
 +    int ret; /* return value */
 +};
 +
 +
 +/* The function used for spawning threads. Extracts the mdrunner() 
 +   arguments from its one argument and calls mdrunner(), after making
 +   a commrec. */
 +static void mdrunner_start_fn(void *arg)
 +{
 +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
 +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
 +                                        that it's thread-local. This doesn't
 +                                        copy pointed-to items, of course,
 +                                        but those are all const. */
 +    t_commrec *cr;  /* we need a local version of this */
 +    FILE *fplog=NULL;
 +    t_filenm *fnm;
 +
 +    fnm = dup_tfn(mc.nfile, mc.fnm);
 +
 +    cr = init_par_threads(mc.cr);
 +
 +    if (MASTER(cr))
 +    {
 +        fplog=mc.fplog;
 +    }
 +
 +    mda->ret=mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, 
 +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
 +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
 +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
 +                      mc.ddcsx, mc.ddcsy, mc.ddcsz,
 +                      mc.nbpu_opt,
 +                      mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
 +                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, 
 +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
 +}
 +
 +/* called by mdrunner() to start a specific number of threads (including 
 +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
 +   for each thread. 
 +   All options besides nthreads are the same as for mdrunner(). */
 +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, 
 +              FILE *fplog,t_commrec *cr,int nfile, 
 +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +              gmx_bool bCompact, int nstglobalcomm,
 +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +              const char *dddlb_opt,real dlb_scale,
 +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +              const char *nbpu_opt,
 +              int nsteps_cmdline, int nstepout,int resetstep,
 +              int nmultisim,int repl_ex_nst,int repl_ex_nex, int repl_ex_seed,
 +              real pforce,real cpt_period, real max_hours, 
 +              const char *deviceOptions, unsigned long Flags)
 +{
 +    int ret;
 +    struct mdrunner_arglist *mda;
 +    t_commrec *crn; /* the new commrec */
 +    t_filenm *fnmn;
 +
 +    /* first check whether we even need to start tMPI */
 +    if (hw_opt->nthreads_tmpi < 2)
 +    {
 +        return cr;
 +    }
 +
 +    /* a few small, one-time, almost unavoidable memory leaks: */
 +    snew(mda,1);
 +    fnmn=dup_tfn(nfile, fnm);
 +
 +    /* fill the data structure to pass as void pointer to thread start fn */
 +    mda->hw_opt=hw_opt;
 +    mda->fplog=fplog;
 +    mda->cr=cr;
 +    mda->nfile=nfile;
 +    mda->fnm=fnmn;
 +    mda->oenv=oenv;
 +    mda->bVerbose=bVerbose;
 +    mda->bCompact=bCompact;
 +    mda->nstglobalcomm=nstglobalcomm;
 +    mda->ddxyz[XX]=ddxyz[XX];
 +    mda->ddxyz[YY]=ddxyz[YY];
 +    mda->ddxyz[ZZ]=ddxyz[ZZ];
 +    mda->dd_node_order=dd_node_order;
 +    mda->rdd=rdd;
 +    mda->rconstr=rconstr;
 +    mda->dddlb_opt=dddlb_opt;
 +    mda->dlb_scale=dlb_scale;
 +    mda->ddcsx=ddcsx;
 +    mda->ddcsy=ddcsy;
 +    mda->ddcsz=ddcsz;
 +    mda->nbpu_opt=nbpu_opt;
 +    mda->nsteps_cmdline=nsteps_cmdline;
 +    mda->nstepout=nstepout;
 +    mda->resetstep=resetstep;
 +    mda->nmultisim=nmultisim;
 +    mda->repl_ex_nst=repl_ex_nst;
 +    mda->repl_ex_nex=repl_ex_nex;
 +    mda->repl_ex_seed=repl_ex_seed;
 +    mda->pforce=pforce;
 +    mda->cpt_period=cpt_period;
 +    mda->max_hours=max_hours;
 +    mda->deviceOptions=deviceOptions;
 +    mda->Flags=Flags;
 +
 +    fprintf(stderr, "Starting %d tMPI threads\n",hw_opt->nthreads_tmpi);
 +    fflush(stderr);
 +    /* now spawn new threads that start mdrunner_start_fn(), while 
 +       the main thread returns */
 +    ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi,
 +                     (hw_opt->bThreadPinning ? TMPI_AFFINITY_ALL_CORES : TMPI_AFFINITY_NONE),
 +                     mdrunner_start_fn, (void*)(mda) );
 +    if (ret!=TMPI_SUCCESS)
 +        return NULL;
 +
 +    /* make a new comm_rec to reflect the new situation */
 +    crn=init_par_threads(cr);
 +    return crn;
 +}
 +
 +
-         if (hw_opt->nthreads_omp > nthreads_tot)
-         {
-             gmx_fatal(FARGS,"More OpenMP threads requested (%d) than the total number of threads requested (%d)",hw_opt->nthreads_omp,nthreads_tot);
-         }
-         nthreads_tmpi = nthreads_tot/hw_opt->nthreads_omp;
++static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
++                                        const gmx_hw_opt_t *hw_opt,
++                                        int nthreads_tot,
++                                        int ngpu)
 +{
 +    int nthreads_tmpi;
 +
 +    /* There are no separate PME nodes here, as we ensured in
 +     * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
 +     * and a conditional ensures we would not have ended up here.
 +     * Note that separate PME nodes might be switched on later.
 +     */
 +    if (ngpu > 0)
 +    {
 +        nthreads_tmpi = ngpu;
 +        if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
 +        {
 +            nthreads_tmpi = nthreads_tot;
 +        }
 +    }
 +    else if (hw_opt->nthreads_omp > 0)
 +    {
-         /* Don't use OpenMP parallelization */
-         nthreads_tmpi = nthreads_tot;
++        /* Here we could oversubscribe, when we do, we issue a warning later */
++        nthreads_tmpi = max(1,nthreads_tot/hw_opt->nthreads_omp);
 +    }
 +    else
 +    {
 +        /* TODO choose nthreads_omp based on hardware topology
 +           when we have a hardware topology detection library */
-     int nthreads_tot_max,nthreads_tmpi,nthreads_new,ngpu;
++        /* In general, when running up to 4 threads, OpenMP should be faster.
++         * Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
++         * On Intel>=Nehalem running OpenMP on a single CPU is always faster,
++         * even on two CPUs it's usually faster (but with many OpenMP threads
++         * it could be faster not to use HT, currently we always use HT).
++         * On Nehalem/Westmere we want to avoid running 16 threads over
++         * two CPUs with HT, so we need a limit<16; thus we use 12.
++         * A reasonable limit for Intel Sandy and Ivy bridge,
++         * not knowing the topology, is 16 threads.
++         */
++        const int nthreads_omp_always_faster             =  4;
++        const int nthreads_omp_always_faster_Nehalem     = 12;
++        const int nthreads_omp_always_faster_SandyBridge = 16;
++        const int first_model_Nehalem     = 0x1A;
++        const int first_model_SandyBridge = 0x2A;
++        gmx_bool bIntel_Family6;
++
++        bIntel_Family6 =
++            (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
++             gmx_cpuid_family(hwinfo->cpuid_info) == 6);
++
++        if (nthreads_tot <= nthreads_omp_always_faster ||
++            (bIntel_Family6 &&
++             ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
++              (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
++        {
++            /* Use pure OpenMP parallelization */
++            nthreads_tmpi = 1;
++        }
++        else
++        {
++            /* Don't use OpenMP parallelization */
++            nthreads_tmpi = nthreads_tot;
++        }
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +
 +
 +/* Get the number of threads to use for thread-MPI based on how many
 + * were requested, which algorithms we're using,
 + * and how many particles there are.
 + * At the point we have already called check_and_update_hw_opt.
 + * Thus all options should be internally consistent and consistent
 + * with the hardware, except that ntmpi could be larger than #GPU.
 + */
 +static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
 +                            gmx_hw_opt_t *hw_opt,
 +                            t_inputrec *inputrec, gmx_mtop_t *mtop,
 +                            const t_commrec *cr,
 +                            FILE *fplog)
 +{
-         nthreads_tot_max = tMPI_Thread_get_hw_number();
++    int nthreads_hw,nthreads_tot_max,nthreads_tmpi,nthreads_new,ngpu;
 +    int min_atoms_per_mpi_thread;
 +    char *env;
 +    char sbuf[STRLEN];
 +    gmx_bool bCanUseGPU;
 +
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        /* Trivial, return right away */
 +        return hw_opt->nthreads_tmpi;
 +    }
 +
++    nthreads_hw = hwinfo->nthreads_hw_avail;
++
 +    /* How many total (#tMPI*#OpenMP) threads can we start? */ 
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        nthreads_tot_max = hw_opt->nthreads_tot;
 +    }
 +    else
 +    {
-         get_tmpi_omp_thread_distribution(hw_opt,nthreads_tot_max,ngpu);
++        nthreads_tot_max = nthreads_hw;
 +    }
 +
 +    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
 +    if (bCanUseGPU)
 +    {
 +        ngpu = hwinfo->gpu_info.ncuda_dev_use;
 +    }
 +    else
 +    {
 +        ngpu = 0;
 +    }
 +
 +    nthreads_tmpi =
-         if (nthreads_new > 8 || (nthreads_tmpi == 8 && nthreads_new > 4))
++        get_tmpi_omp_thread_division(hwinfo,hw_opt,nthreads_tot_max,ngpu);
 +
 +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
 +    {
 +        /* Steps are divided over the nodes iso splitting the atoms */
 +        min_atoms_per_mpi_thread = 0;
 +    }
 +    else
 +    {
 +        if (bCanUseGPU)
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
 +        }
 +        else
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
 +        }
 +    }
 +
 +    /* Check if an algorithm does not support parallel simulation.  */
 +    if (nthreads_tmpi != 1 &&
 +        ( inputrec->eI == eiLBFGS ||
 +          inputrec->coulombtype == eelEWALD ) )
 +    {
 +        nthreads_tmpi = 1;
 +
 +        md_print_warn(cr,fplog,"The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
 +        if (hw_opt->nthreads_tmpi > nthreads_tmpi)
 +        {
 +            gmx_fatal(FARGS,"You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
 +        }
 +    }
 +    else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
 +    {
 +        /* the thread number was chosen automatically, but there are too many
 +           threads (too few atoms per thread) */
 +        nthreads_new = max(1,mtop->natoms/min_atoms_per_mpi_thread);
 +
-             /* TODO replace this once we have proper HT detection
-              * Use only multiples of 4 above 8 threads
-              * or with an 8-core processor
-              * (to avoid 6 threads on 8 core processors with 4 real cores).
-              */
-             nthreads_new = (nthreads_new/4)*4;
++        /* Avoid partial use of Hyper-Threading */
++        if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
++            nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw)
 +        {
-         else if (nthreads_new > 4)
++            nthreads_new = nthreads_hw/2;
 +        }
-             /* Avoid 5 or 7 threads */
-             nthreads_new = (nthreads_new/2)*2;
++
++        /* Avoid large prime numbers in the thread count */
++        if (nthreads_new >= 6)
++        {
++            /* Use only 6,8,10 with additional factors of 2 */
++            int fac;
++
++            fac = 2;
++            while (3*fac*2 <= nthreads_new)
++            {
++                fac *= 2;
++            }
++
++            nthreads_new = (nthreads_new/fac)*fac;
++        }
++        else
 +        {
- #ifdef __linux /* TODO: only linux? why not everywhere if sched_setaffinity is available */
++            /* Avoid 5 */
++            if (nthreads_new == 5)
++            {
++                nthreads_new = 4;
++            }
 +        }
 +
 +        nthreads_tmpi = nthreads_new;
 +
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
 +        fprintf(stderr,"      only starting %d thread-MPI threads.\n",nthreads_tmpi);
 +        fprintf(stderr,"      You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +#endif /* GMX_THREAD_MPI */
 +
 +
 +/* Environment variable for setting nstlist */
 +static const char*  NSTLIST_ENVVAR          =  "GMX_NSTLIST";
 +/* Try to increase nstlist when using a GPU with nstlist less than this */
 +static const int    NSTLIST_GPU_ENOUGH      = 20;
 +/* Increase nstlist until the non-bonded cost increases more than this factor */
 +static const float  NBNXN_GPU_LIST_OK_FAC   = 1.25;
 +/* Don't increase nstlist beyond a non-bonded cost increases of this factor */
 +static const float  NBNXN_GPU_LIST_MAX_FAC  = 1.40;
 +
 +/* Try to increase nstlist when running on a GPU */
 +static void increase_nstlist(FILE *fp,t_commrec *cr,
 +                             t_inputrec *ir,const gmx_mtop_t *mtop,matrix box)
 +{
 +    char *env;
 +    int  nstlist_orig,nstlist_prev;
 +    verletbuf_list_setup_t ls;
 +    real rlist_inc,rlist_ok,rlist_max,rlist_new,rlist_prev;
 +    int  i;
 +    t_state state_tmp;
 +    gmx_bool bBox,bDD,bCont;
 +    const char *nstl_fmt="\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
 +    const char *vbd_err="Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used";
 +    const char *box_err="Can not increase nstlist for GPU run because the box is too small";
 +    const char *dd_err ="Can not increase nstlist for GPU run because of domain decomposition limitations";
 +    char buf[STRLEN];
 +
 +    /* Number of + nstlist alternative values to try when switching  */
 +    const int nstl[]={ 20, 25, 40, 50 };
 +#define NNSTL  sizeof(nstl)/sizeof(nstl[0])
 +
 +    env = getenv(NSTLIST_ENVVAR);
 +    if (env == NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,nstl_fmt,ir->nstlist);
 +        }
 +    }
 +
 +    if (ir->verletbuf_drift == 0)
 +    {
 +        gmx_fatal(FARGS,"You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
 +    }
 +
 +    if (ir->verletbuf_drift < 0)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"%s\n",vbd_err);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"%s\n",vbd_err);
 +        }
 +
 +        return;
 +    }
 +
 +    nstlist_orig = ir->nstlist;
 +    if (env != NULL)
 +    {
 +        sprintf(buf,"Getting nstlist from environment variable GMX_NSTLIST=%s",env);
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"%s\n",buf);
 +        }
 +        sscanf(env,"%d",&ir->nstlist);
 +    }
 +
 +    verletbuf_get_list_setup(TRUE,&ls);
 +
 +    /* Allow rlist to make the list double the size of the cut-off sphere */
 +    rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE,mtop->natoms/det(box));
 +    rlist_ok  = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC,1.0/3.0) - rlist_inc;
 +    rlist_max = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC,1.0/3.0) - rlist_inc;
 +    if (debug)
 +    {
 +        fprintf(debug,"GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n",
 +                rlist_inc,rlist_max);
 +    }
 +
 +    i = 0;
 +    nstlist_prev = nstlist_orig;
 +    rlist_prev   = ir->rlist;
 +    do
 +    {
 +        if (env == NULL)
 +        {
 +            ir->nstlist = nstl[i];
 +        }
 +
 +        /* Set the pair-list buffer size in ir */
 +        calc_verlet_buffer_size(mtop,det(box),ir,ir->verletbuf_drift,&ls,
 +                                NULL,&rlist_new);
 +
 +        /* Does rlist fit in the box? */
 +        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC,box));
 +        bDD  = TRUE;
 +        if (bBox && DOMAINDECOMP(cr))
 +        {
 +            /* Check if rlist fits in the domain decomposition */
 +            if (inputrec2nboundeddim(ir) < DIM)
 +            {
 +                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
 +            }
 +            copy_mat(box,state_tmp.box);
 +            bDD = change_dd_cutoff(cr,&state_tmp,ir,rlist_new);
 +        }
 +
 +        bCont = FALSE;
 +
 +        if (env == NULL)
 +        {
 +            if (bBox && bDD && rlist_new <= rlist_max)
 +            {
 +                /* Increase nstlist */
 +                nstlist_prev = ir->nstlist;
 +                rlist_prev   = rlist_new;
 +                bCont = (i+1 < NNSTL && rlist_new < rlist_ok);
 +            }
 +            else
 +            {
 +                /* Stick with the previous nstlist */
 +                ir->nstlist = nstlist_prev;
 +                rlist_new   = rlist_prev;
 +                bBox = TRUE;
 +                bDD  = TRUE;
 +            }
 +        }
 +
 +        i++;
 +    }
 +    while (bCont);
 +
 +    if (!bBox || !bDD)
 +    {
 +        gmx_warning(!bBox ? box_err : dd_err);
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"\n%s\n",bBox ? box_err : dd_err);
 +        }
 +        ir->nstlist = nstlist_orig;
 +    }
 +    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
 +    {
 +        sprintf(buf,"Changing nstlist from %d to %d, rlist from %g to %g",
 +                nstlist_orig,ir->nstlist,
 +                ir->rlist,rlist_new);
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"%s\n\n",buf);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"%s\n\n",buf);
 +        }
 +        ir->rlist     = rlist_new;
 +        ir->rlistlong = rlist_new;
 +    }
 +}
 +
 +static void prepare_verlet_scheme(FILE *fplog,
 +                                  gmx_hw_info_t *hwinfo,
 +                                  t_commrec *cr,
 +                                  gmx_hw_opt_t *hw_opt,
 +                                  const char *nbpu_opt,
 +                                  t_inputrec *ir,
 +                                  const gmx_mtop_t *mtop,
 +                                  matrix box,
 +                                  gmx_bool *bUseGPU)
 +{
 +    /* Here we only check for GPU usage on the MPI master process,
 +     * as here we don't know how many GPUs we will use yet.
 +     * We check for a GPU on all processes later.
 +     */
 +    *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
 +
 +    if (ir->verletbuf_drift > 0)
 +    {
 +        /* Update the Verlet buffer size for the current run setup */
 +        verletbuf_list_setup_t ls;
 +        real rlist_new;
 +
 +        /* Here we assume CPU acceleration is on. But as currently
 +         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 +         * and 4x2 gives a larger buffer than 4x4, this is ok.
 +         */
 +        verletbuf_get_list_setup(*bUseGPU,&ls);
 +
 +        calc_verlet_buffer_size(mtop,det(box),ir,
 +                                ir->verletbuf_drift,&ls,
 +                                NULL,&rlist_new);
 +        if (rlist_new != ir->rlist)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog,"\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 +                        ir->rlist,rlist_new,
 +                        ls.cluster_size_i,ls.cluster_size_j);
 +            }
 +            ir->rlist     = rlist_new;
 +            ir->rlistlong = rlist_new;
 +        }
 +    }
 +
 +    /* With GPU or emulation we should check nstlist for performance */
 +    if ((EI_DYNAMICS(ir->eI) &&
 +         *bUseGPU &&
 +         ir->nstlist < NSTLIST_GPU_ENOUGH) ||
 +        getenv(NSTLIST_ENVVAR) != NULL)
 +    {
 +        /* Choose a better nstlist */
 +        increase_nstlist(fplog,cr,ir,mtop,box);
 +    }
 +}
 +
 +static void convert_to_verlet_scheme(FILE *fplog,
 +                                     t_inputrec *ir,
 +                                     gmx_mtop_t *mtop,real box_vol)
 +{
 +    char *conv_mesg="Converting input file with group cut-off scheme to the Verlet cut-off scheme";
 +
 +    md_print_warn(NULL,fplog,"%s\n",conv_mesg);
 +
 +    ir->cutoff_scheme   = ecutsVERLET;
 +    ir->verletbuf_drift = 0.005;
 +
 +    if (ir->rcoulomb != ir->rvdw)
 +    {
 +        gmx_fatal(FARGS,"The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
 +    }
 +
 +    if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
 +    {
 +        gmx_fatal(FARGS,"User non-bonded potentials are not (yet) supported with the Verlet scheme");
 +    }
 +    else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
 +    {
 +        md_print_warn(NULL,fplog,"Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
 +
 +        if (EVDW_SWITCHED(ir->vdwtype))
 +        {
 +            ir->vdwtype = evdwCUT;
 +        }
 +        if (EEL_SWITCHED(ir->coulombtype))
 +        {
 +            if (EEL_FULL(ir->coulombtype))
 +            {
 +                /* With full electrostatic only PME can be switched */
 +                ir->coulombtype = eelPME;
 +            }
 +            else
 +            {
 +                md_print_warn(NULL,fplog,"NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n",eel_names[ir->coulombtype]);
 +                ir->coulombtype = eelRF;
 +                ir->epsilon_rf  = 0.0;
 +            }
 +        }
 +
 +        /* We set the target energy drift to a small number.
 +         * Note that this is only for testing. For production the user
 +         * should think about this and set the mdp options.
 +         */
 +        ir->verletbuf_drift = 1e-4;
 +    }
 +
 +    if (inputrec2nboundeddim(ir) != 3)
 +    {
 +        gmx_fatal(FARGS,"Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
 +    }
 +
 +    if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
 +    {
 +        gmx_fatal(FARGS,"Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
 +    {
 +        verletbuf_list_setup_t ls;
 +
 +        verletbuf_get_list_setup(FALSE,&ls);
 +        calc_verlet_buffer_size(mtop,box_vol,ir,ir->verletbuf_drift,&ls,
 +                                NULL,&ir->rlist);
 +    }
 +    else
 +    {
 +        ir->verletbuf_drift = -1;
 +        ir->rlist           = 1.05*max(ir->rvdw,ir->rcoulomb);
 +    }
 +
 +    gmx_mtop_remove_chargegroups(mtop);
 +}
 +
++/* Check the process affinity mask and if it is found to be non-zero,
++ * will honor it and disable mdrun internal affinity setting.
++ * This function should be called first before the OpenMP library gets
++ * initialized with the last argument FALSE (which will detect affinity
++ * set by external tools like taskset), and later, after the OpenMP
++ * initialization, with the last argument TRUE to detect affinity changes
++ * made by the OpenMP library.
++ *
++ * Note that this will only work on Linux as we use a GNU feature. */
++static void check_cpu_affinity_set(FILE *fplog, const t_commrec *cr,
++                                   gmx_hw_opt_t *hw_opt, int ncpus,
++                                   gmx_bool bAfterOpenmpInit)
++{
++#ifdef HAVE_SCHED_GETAFFINITY
++    cpu_set_t mask_current;
++    int       i, ret, cpu_count, cpu_set;
++    gmx_bool  bAllSet;
++
++    assert(hw_opt);
++    if (!hw_opt->bThreadPinning)
++    {
++        /* internal affinity setting is off, don't bother checking process affinity */
++        return;
++    }
++
++    CPU_ZERO(&mask_current);
++    if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
++    {
++        /* failed to query affinity mask, will just return */
++        if (debug)
++        {
++            fprintf(debug, "Failed to query affinity mask (error %d)", ret);
++        }
++        return;
++    }
++
++    /* Before proceeding with the actual check, make sure that the number of
++     * detected CPUs is >= the CPUs in the current set.
++     * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
++#ifdef CPU_COUNT
++    if (ncpus < CPU_COUNT(&mask_current))
++    {
++        if (debug)
++        {
++            fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT",
++                    ncpus, CPU_COUNT(&mask_current));
++        }
++        return;
++    }
++#endif /* CPU_COUNT */
++
++    bAllSet = TRUE;
++    for (i = 0; (i < ncpus && i < CPU_SETSIZE); i++)
++    {
++        bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
++    }
++
++    if (!bAllSet)
++    {
++        if (!bAfterOpenmpInit)
++        {
++            md_print_warn(cr, fplog,
++                          "Non-default process affinity set, disabling internal affinity");
++        }
++        else
++        {
++            md_print_warn(cr, fplog,
++                          "Non-default process affinity set probably by the OpenMP library, "
++                          "disabling internal affinity");
++        }
++        hw_opt->bThreadPinning = FALSE;
++
++        if (debug)
++        {
++            fprintf(debug, "Non-default affinity mask found\n");
++        }
++    }
++    else
++    {
++        if (debug)
++        {
++            fprintf(debug, "Default affinity mask found\n");
++        }
++    }
++#endif /* HAVE_SCHED_GETAFFINITY */
++}
 +
 +/* Set CPU affinity. Can be important for performance.
 +   On some systems (e.g. Cray) CPU Affinity is set by default.
 +   But default assigning doesn't work (well) with only some ranks
 +   having threads. This causes very low performance.
 +   External tools have cumbersome syntax for setting affinity
 +   in the case that only some ranks have threads.
 +   Thus it is important that GROMACS sets the affinity internally
 +   if only PME is using threads.
 +*/
 +static void set_cpu_affinity(FILE *fplog,
 +                             const t_commrec *cr,
 +                             gmx_hw_opt_t *hw_opt,
 +                             int nthreads_pme,
 +                             const gmx_hw_info_t *hwinfo,
 +                             const t_inputrec *inputrec)
 +{
 +#if defined GMX_THREAD_MPI
 +    /* With the number of TMPI threads equal to the number of cores
 +     * we already pinned in thread-MPI, so don't pin again here.
 +     */
 +    if (hw_opt->nthreads_tmpi == tMPI_Thread_get_hw_number())
 +    {
 +        return;
 +    }
 +#endif
 +
 +#ifdef GMX_OPENMP /* TODO: actually we could do this even without OpenMP?! */
- #endif /* __linux    */
++#ifdef HAVE_SCHED_SETAFFINITY
 +    if (hw_opt->bThreadPinning)
 +    {
 +        int thread, nthread_local, nthread_node, nthread_hw_max, nphyscore;
 +        int offset;
 +        char *env;
 +
 +        /* threads on this MPI process or TMPI thread */
 +        if (cr->duty & DUTY_PP)
 +        {
 +            nthread_local = gmx_omp_nthreads_get(emntNonbonded);
 +        }
 +        else
 +        {
 +            nthread_local = gmx_omp_nthreads_get(emntPME);
 +        }
 +
 +        /* map the current process to cores */
 +        thread = 0;
 +        nthread_node = nthread_local;
 +#ifdef GMX_MPI
 +        if (PAR(cr) || MULTISIM(cr))
 +        {
 +            /* We need to determine a scan of the thread counts in this
 +             * compute node.
 +             */
 +            MPI_Comm comm_intra;
 +
 +            MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->nodeid_intra,
 +                           &comm_intra);
 +            MPI_Scan(&nthread_local,&thread,1,MPI_INT,MPI_SUM,comm_intra);
 +            /* MPI_Scan is inclusive, but here we need exclusive */
 +            thread -= nthread_local;
 +            /* Get the total number of threads on this physical node */
 +            MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
 +            MPI_Comm_free(&comm_intra);
 +        }
 +#endif
 +
 +        offset = 0;
 +        if (hw_opt->core_pinning_offset > 0)
 +        {
 +            offset = hw_opt->core_pinning_offset;
 +            if (SIMMASTER(cr))
 +            {
 +                fprintf(stderr, "Applying core pinning offset %d\n", offset);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog, "Applying core pinning offset %d\n", offset);
 +            }
 +        }
 +
 +        /* With Intel Hyper-Threading enabled, we want to pin consecutive
 +         * threads to physical cores when using more threads than physical
 +         * cores or when the user requests so.
 +         */
 +        nthread_hw_max = hwinfo->nthreads_hw_avail;
 +        nphyscore = -1;
 +        if (hw_opt->bPinHyperthreading ||
 +            (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
 +             nthread_node > nthread_hw_max/2 && getenv("GMX_DISABLE_PINHT") == NULL))
 +        {
 +            if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) != GMX_CPUID_X86_SMT_ENABLED)
 +            {
 +                /* We print to stderr on all processes, as we might have
 +                 * different settings on different physical nodes.
 +                 */
 +                if (gmx_cpuid_vendor(hwinfo->cpuid_info) != GMX_CPUID_VENDOR_INTEL)
 +                {
 +                    md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
 +                                  "but non-Intel CPU detected (vendor: %s)\n",
 +                                  gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
 +                }
 +                else
 +                {
 +                    md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
 +                                  "but the CPU detected does not have Intel Hyper-Threading support "
 +                                  "(or it is turned off)\n");
 +                }
 +            }
 +            nphyscore = nthread_hw_max/2;
 +
 +            if (SIMMASTER(cr))
 +            {
 +                fprintf(stderr, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
 +                        nphyscore);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
 +                        nphyscore);
 +            }
 +        }
 +
 +        /* set the per-thread affinity */
 +#pragma omp parallel firstprivate(thread) num_threads(nthread_local)
 +        {
 +            cpu_set_t mask;
 +            int core;
 +
 +            CPU_ZERO(&mask);
 +            thread += gmx_omp_get_thread_num();
 +            if (nphyscore <= 0)
 +            {
 +                core = offset + thread;
 +            }
 +            else
 +            {
 +                /* Lock pairs of threads to the same hyperthreaded core */
 +                core = offset + thread/2 + (thread % 2)*nphyscore;
 +            }
 +            CPU_SET(core, &mask);
 +            sched_setaffinity((pid_t) syscall (SYS_gettid), sizeof(cpu_set_t), &mask);
 +        }
 +    }
-     int      cutoff_scheme; /* The cutoff-scheme from inputrec_t */
++#endif /* HAVE_SCHED_SETAFFINITY */
 +#endif /* GMX_OPENMP */
 +}
 +
 +
 +static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
 +                                    int cutoff_scheme)
 +{
 +    gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp);
 +
 +#ifndef GMX_THREAD_MPI
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        gmx_fatal(FARGS,"Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        gmx_fatal(FARGS,"Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +#endif
 +
 +    if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
 +    {
 +        /* We have the same number of OpenMP threads for PP and PME processes,
 +         * thus we can perform several consistency checks.
 +         */
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
 +        {
 +            gmx_fatal(FARGS,"The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
 +                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi,hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
 +        {
 +            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
 +                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi);
 +        }
 +
 +        if (hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
 +        {
 +            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
 +                      hw_opt->nthreads_tot,hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +    }
 +
 +#ifndef GMX_OPENMP
 +    if (hw_opt->nthreads_omp > 1)
 +    {
 +        gmx_fatal(FARGS,"OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
 +    }
 +#endif
 +
 +    if (cutoff_scheme == ecutsGROUP)
 +    {
 +        /* We only have OpenMP support for PME only nodes */
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS,"OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
 +                      ecutscheme_names[cutoff_scheme],
 +                      ecutscheme_names[ecutsVERLET]);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
 +    {
 +        gmx_fatal(FARGS,"You need to specify -ntomp in addition to -ntomp_pme");
 +    }
 +
 +    if (hw_opt->nthreads_tot == 1)
 +    {
 +        hw_opt->nthreads_tmpi = 1;
 +
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS,"You requested %d OpenMP threads with %d total threads",
 +                      hw_opt->nthreads_tmpi,hw_opt->nthreads_tot);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
 +    {
 +        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
 +                hw_opt->nthreads_tot,
 +                hw_opt->nthreads_tmpi,
 +                hw_opt->nthreads_omp,
 +                hw_opt->nthreads_omp_pme,
 +                hw_opt->gpu_id!=NULL ? hw_opt->gpu_id : "");
 +                
 +    }
 +}
 +
 +
 +/* Override the value in inputrec with value passed on the command line (if any) */
 +static void override_nsteps_cmdline(FILE *fplog,
 +                                    int nsteps_cmdline,
 +                                    t_inputrec *ir,
 +                                    const t_commrec *cr)
 +{
 +    assert(ir);
 +    assert(cr);
 +
 +    /* override with anything else than the default -2 */
 +    if (nsteps_cmdline > -2)
 +    {
 +        char stmp[STRLEN];
 +
 +        ir->nsteps = nsteps_cmdline;
 +        if (EI_DYNAMICS(ir->eI))
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps, %.3f ps",
 +                    nsteps_cmdline, nsteps_cmdline*ir->delta_t);
 +        }
 +        else
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps",
 +                    nsteps_cmdline);
 +        }
 +
 +        md_print_warn(cr, fplog, "%s\n", stmp);
 +    }
 +}
 +
 +/* Data structure set by SIMMASTER which needs to be passed to all nodes
 + * before the other nodes have read the tpx file and called gmx_detect_hardware.
 + */
 +typedef struct {
-     /* Check for externally set OpenMP affinity and turn off internal
-      * pinning if any is found. We need to do this check early to tell
-      * thread-MPI whether it should do pinning when spawning threads.
-      */
-     gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
++    int cutoff_scheme; /* The cutoff scheme from inputrec_t */
 +    gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
 +} master_inf_t;
 +
 +int mdrunner(gmx_hw_opt_t *hw_opt,
 +             FILE *fplog,t_commrec *cr,int nfile,
 +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +             gmx_bool bCompact, int nstglobalcomm,
 +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +             const char *dddlb_opt,real dlb_scale,
 +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +             const char *nbpu_opt,
 +             int nsteps_cmdline, int nstepout,int resetstep,
 +             int nmultisim,int repl_ex_nst,int repl_ex_nex,
 +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
 +             const char *deviceOptions, unsigned long Flags)
 +{
 +    gmx_bool   bForceUseGPU,bTryUseGPU;
 +    double     nodetime=0,realtime;
 +    t_inputrec *inputrec;
 +    t_state    *state=NULL;
 +    matrix     box;
 +    gmx_ddbox_t ddbox={0};
 +    int        npme_major,npme_minor;
 +    real       tmpr1,tmpr2;
 +    t_nrnb     *nrnb;
 +    gmx_mtop_t *mtop=NULL;
 +    t_mdatoms  *mdatoms=NULL;
 +    t_forcerec *fr=NULL;
 +    t_fcdata   *fcd=NULL;
 +    real       ewaldcoeff=0;
 +    gmx_pme_t  *pmedata=NULL;
 +    gmx_vsite_t *vsite=NULL;
 +    gmx_constr_t constr;
 +    int        i,m,nChargePerturbed=-1,status,nalloc;
 +    char       *gro;
 +    gmx_wallcycle_t wcycle;
 +    gmx_bool       bReadRNG,bReadEkin;
 +    int        list;
 +    gmx_runtime_t runtime;
 +    int        rc;
 +    gmx_large_int_t reset_counters;
 +    gmx_edsam_t ed=NULL;
 +    t_commrec   *cr_old=cr; 
 +    int         nthreads_pme=1;
 +    int         nthreads_pp=1;
 +    gmx_membed_t membed=NULL;
 +    gmx_hw_info_t *hwinfo=NULL;
 +    master_inf_t minf={-1,FALSE};
 +
 +    /* CAUTION: threads may be started later on in this function, so
 +       cr doesn't reflect the final parallel state right now */
 +    snew(inputrec,1);
 +    snew(mtop,1);
 +    
 +    if (Flags & MD_APPENDFILES) 
 +    {
 +        fplog = NULL;
 +    }
 +
 +    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
 +    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
 +
 +    snew(state,1);
 +    if (SIMMASTER(cr)) 
 +    {
 +        /* Read (nearly) all data required for the simulation */
 +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
 +
 +        if (inputrec->cutoff_scheme != ecutsVERLET &&
 +            ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
 +        {
 +            convert_to_verlet_scheme(fplog,inputrec,mtop,det(state->box));
 +        }
 +
 +        /* Detect hardware, gather information. With tMPI only thread 0 does it
 +         * and after threads are started broadcasts hwinfo around. */
 +        snew(hwinfo, 1);
 +        gmx_detect_hardware(fplog, hwinfo, cr,
 +                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
 +
 +        minf.cutoff_scheme = inputrec->cutoff_scheme;
 +        minf.bUseGPU       = FALSE;
 +
 +        if (inputrec->cutoff_scheme == ecutsVERLET)
 +        {
 +            prepare_verlet_scheme(fplog,hwinfo,cr,hw_opt,nbpu_opt,
 +                                  inputrec,mtop,state->box,
 +                                  &minf.bUseGPU);
 +        }
 +        else if (hwinfo->bCanUseGPU)
 +        {
 +            md_print_warn(cr,fplog,
 +                          "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
 +                          "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
 +                          "      (for quick performance testing you can use the -testverlet option)\n");
 +
 +            if (bForceUseGPU)
 +            {
 +                gmx_fatal(FARGS,"GPU requested, but can't be used without cutoff-scheme=Verlet");
 +            }
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        gmx_bcast_sim(sizeof(minf),&minf,cr);
 +    }
 +#endif
 +    if (minf.bUseGPU && cr->npmenodes == -1)
 +    {
 +        /* Don't automatically use PME-only nodes with GPUs */
 +        cr->npmenodes = 0;
 +    }
 +
++    /* Check for externally set OpenMP affinity and turn off internal
++     * pinning if any is found. We need to do this check early to tell
++     * thread-MPI whether it should do pinning when spawning threads.
++     */
++    gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
++
 +#ifdef GMX_THREAD_MPI
 +    /* With thread-MPI inputrec is only set here on the master thread */
 +    if (SIMMASTER(cr))
 +#endif
 +    {
 +        check_and_update_hw_opt(hw_opt,minf.cutoff_scheme);
 +
++#ifdef GMX_THREAD_MPI
++        /* Early check for externally set process affinity. Can't do over all
++         * MPI processes because hwinfo is not available everywhere, but with
++         * thread-MPI it's needed as pinning might get turned off which needs
++         * to be known before starting thread-MPI. */
++        check_cpu_affinity_set(fplog,
++                               NULL,
++                               hw_opt, hwinfo->nthreads_hw_avail, FALSE);
++#endif
++
 +#ifdef GMX_THREAD_MPI
 +        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
 +        {
 +            gmx_fatal(FARGS,"You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
 +        }
 +#endif
 +
 +        if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
 +            cr->npmenodes <= 0)
 +        {
 +            gmx_fatal(FARGS,"You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
 +        }
 +    }
 +
 +#ifdef GMX_THREAD_MPI
 +    if (SIMMASTER(cr))
 +    {
 +        /* NOW the threads will be started: */
 +        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
 +                                                 hw_opt,
 +                                                 inputrec, mtop,
 +                                                 cr, fplog);
 +        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 1)
 +        {
 +            /* now start the threads. */
 +            cr=mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, 
 +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
 +                                      ddxyz, dd_node_order, rdd, rconstr, 
 +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
 +                                      nbpu_opt,
 +                                      nsteps_cmdline, nstepout, resetstep, nmultisim, 
 +                                      repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
 +                                      cpt_period, max_hours, deviceOptions, 
 +                                      Flags);
 +            /* the main thread continues here with a new cr. We don't deallocate
 +               the old cr because other threads may still be reading it. */
 +            if (cr == NULL)
 +            {
 +                gmx_comm("Failed to spawn threads");
 +            }
 +        }
 +    }
 +#endif
 +    /* END OF CAUTION: cr is now reliable */
 +
 +    /* g_membed initialisation *
 +     * Because we change the mtop, init_membed is called before the init_parallel *
 +     * (in case we ever want to make it run in parallel) */
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"Initializing membed");
 +        }
 +        membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* now broadcast everything to the non-master nodes/threads: */
 +        init_parallel(fplog, cr, inputrec, mtop);
 +
 +        /* This check needs to happen after get_nthreads_mpi() */
 +        if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "The Verlet cut-off scheme is not supported with particle decomposition.\n"
 +                                 "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
 +        }
 +    }
 +    if (fplog != NULL)
 +    {
 +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
 +    }
 +
 +#if defined GMX_THREAD_MPI
 +    /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
 +     * to the other threads  -- slightly uncool, but works fine, just need to
 +     * make sure that the data doesn't get freed twice. */
 +    if (cr->nnodes > 1)
 +    {
 +        if (!SIMMASTER(cr))
 +        {
 +            snew(hwinfo, 1);
 +        }
 +        gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
 +    }
 +#else
 +    if (PAR(cr) && !SIMMASTER(cr))
 +    {
 +        /* now we have inputrec on all nodes, can run the detection */
 +        /* TODO: perhaps it's better to propagate within a node instead? */
 +        snew(hwinfo, 1);
 +        gmx_detect_hardware(fplog, hwinfo, cr,
 +                                 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
 +    }
++
++    /* Now do the affinity check with MPI/no-MPI (done earlier with thread-MPI). */
++    check_cpu_affinity_set(fplog, cr,
++                           hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 +#endif
 +
 +    /* now make sure the state is initialized and propagated */
 +    set_state_entries(state,inputrec,cr->nnodes);
 +
 +    /* remove when vv and rerun works correctly! */
 +    if (PAR(cr) && EI_VV(inputrec->eI) && ((Flags & MD_RERUN) || (Flags & MD_RERUN_VSITE)))
 +    {
 +        gmx_fatal(FARGS,
 +                  "Currently can't do velocity verlet with rerun in parallel.");
 +    }
 +
 +    /* A parallel command line option consistency check that we can
 +       only do after any threads have started. */
 +    if (!PAR(cr) &&
 +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
 +    {
 +        gmx_fatal(FARGS,
 +                  "The -dd or -npme option request a parallel simulation, "
 +#ifndef GMX_MPI
 +                  "but %s was compiled without threads or MPI enabled"
 +#else
 +#ifdef GMX_THREAD_MPI
 +                  "but the number of threads (option -nt) is 1"
 +#else
 +                  "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
 +#endif
 +#endif
 +                  , ShortProgram()
 +            );
 +    }
 +
 +    if ((Flags & MD_RERUN) &&
 +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 +    {
 +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 +    }
 +
 +    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog) && PAR(cr))
 +    {
 +        /* All-vs-all loops do not work with domain decomposition */
 +        Flags |= MD_PARTDEC;
 +    }
 +
 +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
 +    {
 +        if (cr->npmenodes > 0)
 +        {
 +            if (!EEL_PME(inputrec->coulombtype))
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but the system does not use PME electrostatics");
 +            }
 +            if (Flags & MD_PARTDEC)
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
 +            }
 +        }
 +
 +        cr->npmenodes = 0;
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
 +#endif
 +
 +    /* NMR restraints must be initialized before load_checkpoint,
 +     * since with time averaging the history is added to t_state.
 +     * For proper consistency check we therefore need to extend
 +     * t_state here.
 +     * So the PME-only nodes (if present) will also initialize
 +     * the distance restraints.
 +     */
 +    snew(fcd,1);
 +
 +    /* This needs to be called before read_checkpoint to extend the state */
 +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
 +
 +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
 +    {
 +        if (PAR(cr) && !(Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
 +        }
 +        /* Orientation restraints */
 +        if (MASTER(cr))
 +        {
 +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
 +                        state);
 +        }
 +    }
 +
 +    if (DEFORM(*inputrec))
 +    {
 +        /* Store the deform reference box before reading the checkpoint */
 +        if (SIMMASTER(cr))
 +        {
 +            copy_mat(state->box,box);
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(box),box,cr);
 +        }
 +        /* Because we do not have the update struct available yet
 +         * in which the reference values should be stored,
 +         * we store them temporarily in static variables.
 +         * This should be thread safe, since they are only written once
 +         * and with identical values.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        deform_init_init_step_tpx = inputrec->init_step;
 +        copy_mat(box,deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    if (opt2bSet("-cpi",nfile,fnm)) 
 +    {
 +        /* Check if checkpoint file exists before doing continuation.
 +         * This way we can use identical input options for the first and subsequent runs...
 +         */
 +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
 +        {
 +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
 +                            cr,Flags & MD_PARTDEC,ddxyz,
 +                            inputrec,state,&bReadRNG,&bReadEkin,
 +                            (Flags & MD_APPENDFILES),
 +                            (Flags & MD_APPENDFILESSET));
 +            
 +            if (bReadRNG)
 +            {
 +                Flags |= MD_READ_RNG;
 +            }
 +            if (bReadEkin)
 +            {
 +                Flags |= MD_READ_EKIN;
 +            }
 +        }
 +    }
 +
 +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI only the master node/thread exists in mdrun.c,
 +         * therefore non-master nodes need to open the "seppot" log file here.
 +         */
 +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
 +#endif
 +        )
 +    {
 +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
 +                             Flags,&fplog);
 +    }
 +
 +    /* override nsteps with value from cmdline */
 +    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
 +
 +    if (SIMMASTER(cr)) 
 +    {
 +        copy_mat(state->box,box);
 +    }
 +
 +    if (PAR(cr)) 
 +    {
 +        gmx_bcast(sizeof(box),box,cr);
 +    }
 +
 +    /* Essential dynamics */
 +    if (opt2bSet("-ei",nfile,fnm))
 +    {
 +        /* Open input and output files, allocate space for ED data structure */
 +        ed = ed_open(nfile,fnm,Flags,cr);
 +    }
 +
 +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
 +                     EI_TPI(inputrec->eI) ||
 +                     inputrec->eI == eiNM))
 +    {
 +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
 +                                           dddlb_opt,dlb_scale,
 +                                           ddcsx,ddcsy,ddcsz,
 +                                           mtop,inputrec,
 +                                           box,state->x,
 +                                           &ddbox,&npme_major,&npme_minor);
 +
 +        make_dd_communicators(fplog,cr,dd_node_order);
 +
 +        /* Set overallocation to avoid frequent reallocation of arrays */
 +        set_over_alloc_dd(TRUE);
 +    }
 +    else
 +    {
 +        /* PME, if used, is done on all nodes with 1D decomposition */
 +        cr->npmenodes = 0;
 +        cr->duty = (DUTY_PP | DUTY_PME);
 +        npme_major = 1;
 +        npme_minor = 1;
 +        if (!EI_TPI(inputrec->eI))
 +        {
 +            npme_major = cr->nnodes;
 +        }
 +        
 +        if (inputrec->ePBC == epbcSCREW)
 +        {
 +            gmx_fatal(FARGS,
 +                      "pbc=%s is only implemented with domain decomposition",
 +                      epbc_names[inputrec->ePBC]);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* After possible communicator splitting in make_dd_communicators.
 +         * we can set up the intra/inter node communication.
 +         */
 +        gmx_setup_nodecomm(fplog,cr);
 +    }
 +
 +    /* Initialize per-node process ID and counters. */
 +    gmx_init_intra_counters(cr);
 +
 +#ifdef GMX_MPI
 +    md_print_info(cr,fplog,"Using %d MPI %s\n",
 +                  cr->nnodes,
 +#ifdef GMX_THREAD_MPI
 +                  cr->nnodes==1 ? "thread" : "threads"
 +#else
 +                  cr->nnodes==1 ? "process" : "processes"
 +#endif
 +                  );
 +#endif
 +
 +    gmx_omp_nthreads_init(fplog, cr,
 +                          hwinfo->nthreads_hw_avail,
 +                          hw_opt->nthreads_omp,
 +                          hw_opt->nthreads_omp_pme,
 +                          (cr->duty & DUTY_PP) == 0,
 +                          inputrec->cutoff_scheme == ecutsVERLET);
 +
 +    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
 +
 +    /* getting number of PP/PME threads
 +       PME: env variable should be read only on one node to make sure it is 
 +       identical everywhere;
 +     */
 +    /* TODO nthreads_pp is only used for pinning threads.
 +     * This is a temporary solution until we have a hw topology library.
 +     */
 +    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 +    nthreads_pme = gmx_omp_nthreads_get(emntPME);
 +
 +    wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pp,nthreads_pme);
 +
 +    if (PAR(cr))
 +    {
 +        /* Master synchronizes its value of reset_counters with all nodes 
 +         * including PME only nodes */
 +        reset_counters = wcycle_get_reset_counters(wcycle);
 +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
 +        wcycle_set_reset_counters(wcycle, reset_counters);
 +    }
 +
 +    snew(nrnb,1);
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* For domain decomposition we allocate dynamically
 +         * in dd_partition_system.
 +         */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            bcast_state_setup(cr,state);
 +        }
 +        else
 +        {
 +            if (PAR(cr))
 +            {
 +                bcast_state(cr,state,TRUE);
 +            }
 +        }
 +
 +        /* Initiate forcerecord */
 +        fr = mk_forcerec();
 +        fr->hwinfo = hwinfo;
 +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +                      opt2fn("-table",nfile,fnm),
 +                      opt2fn("-tabletf",nfile,fnm),
 +                      opt2fn("-tablep",nfile,fnm),
 +                      opt2fn("-tableb",nfile,fnm),
 +                      nbpu_opt,
 +                      FALSE,pforce);
 +
 +        /* version for PCA_NOT_READ_NODE (see md.c) */
 +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +          "nofile","nofile","nofile","nofile",FALSE,pforce);
 +          */        
 +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
 +
 +        /* Initialize QM-MM */
 +        if(fr->bQMMM)
 +        {
 +            init_QMMMrec(cr,box,mtop,inputrec,fr);
 +        }
 +
 +        /* Initialize the mdatoms structure.
 +         * mdatoms is not filled with atom data,
 +         * as this can not be done now with domain decomposition.
 +         */
 +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
 +
 +        /* Initialize the virtual site communication */
 +        vsite = init_vsite(mtop,cr,FALSE);
 +
 +        calc_shifts(box,fr->shift_vec);
 +
 +        /* With periodic molecules the charge groups should be whole at start up
 +         * and the virtual sites should not be far from their proper positions.
 +         */
 +        if (!inputrec->bContinuation && MASTER(cr) &&
 +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        {
 +            /* Make molecules whole at start of run */
 +            if (fr->ePBC != epbcNONE)
 +            {
 +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
 +            }
 +            if (vsite)
 +            {
 +                /* Correct initial vsite positions are required
 +                 * for the initial distribution in the domain decomposition
 +                 * and for the initial shell prediction.
 +                 */
 +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
 +            }
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            ewaldcoeff = fr->ewaldcoeff;
 +            pmedata = &fr->pmedata;
 +        }
 +        else
 +        {
 +            pmedata = NULL;
 +        }
 +    }
 +    else
 +    {
 +        /* This is a PME only node */
 +
 +        /* We don't need the state */
 +        done_state(state);
 +
 +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
 +        snew(pmedata,1);
 +    }
 +
++    /* Before setting affinity, check whether the affinity has changed
++     * - which indicates that probably the OpenMP library has changed it since
++     * we first checked). */
++    check_cpu_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, TRUE);
++
 +    /* Set the CPU affinity */
 +    set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
 +
 +    /* Initiate PME if necessary,
 +     * either on all nodes or on dedicated PME nodes only. */
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (mdatoms)
 +        {
 +            nChargePerturbed = mdatoms->nChargePerturbed;
 +        }
 +        if (cr->npmenodes > 0)
 +        {
 +            /* The PME only nodes need to know nChargePerturbed */
 +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
 +        }
 +
 +        if (cr->duty & DUTY_PME)
 +        {
 +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
 +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
 +                                  (Flags & MD_REPRODUCIBLE),nthreads_pme);
 +            if (status != 0) 
 +            {
 +                gmx_fatal(FARGS,"Error %d initializing PME",status);
 +            }
 +        }
 +    }
 +
 +
 +    if (integrator[inputrec->eI].func == do_md
 +#ifdef GMX_OPENMM
 +        ||
 +        integrator[inputrec->eI].func == do_md_openmm
 +#endif
 +        )
 +    {
 +        /* Turn on signal handling on all nodes */
 +        /*
 +         * (A user signal from the PME nodes (if any)
 +         * is communicated to the PP nodes.
 +         */
 +        signal_handler_install();
 +    }
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        if (inputrec->ePull != epullNO)
 +        {
 +            /* Initialize pull code */
 +            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
 +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +           /* Initialize enforced rotation code */
 +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,box,mtop,oenv,
 +                    bVerbose,Flags);
 +        }
 +
 +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
 +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
 +
 +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
 +
 +            setup_dd_grid(fplog,cr->dd);
 +        }
 +
 +        /* Now do whatever the user wants us to do (how flexible...) */
 +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
 +                                      oenv,bVerbose,bCompact,
 +                                      nstglobalcomm,
 +                                      vsite,constr,
 +                                      nstepout,inputrec,mtop,
 +                                      fcd,state,
 +                                      mdatoms,nrnb,wcycle,ed,fr,
 +                                      repl_ex_nst,repl_ex_nex,repl_ex_seed,
 +                                      membed,
 +                                      cpt_period,max_hours,
 +                                      deviceOptions,
 +                                      Flags,
 +                                      &runtime);
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            finish_pull(fplog,inputrec->pull);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +            finish_rot(fplog,inputrec->rot);
 +        }
 +
 +    } 
 +    else 
 +    {
 +        /* do PME only */
 +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
 +    }
 +
 +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
 +    {
 +        /* Some timing stats */  
 +        if (SIMMASTER(cr))
 +        {
 +            if (runtime.proc == 0)
 +            {
 +                runtime.proc = runtime.real;
 +            }
 +        }
 +        else
 +        {
 +            runtime.real = 0;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    /* Finish up, write some stuff
 +     * if rerunMD, don't write last frame again 
 +     */
 +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
 +               inputrec,nrnb,wcycle,&runtime,
 +               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
 +                 nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
 +               nthreads_pp, 
 +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
 +
 +    if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        char gpu_err_str[STRLEN];
 +
 +        /* free GPU memory and uninitialize GPU (by destroying the context) */
 +        nbnxn_cuda_free(fplog, fr->nbv->cu_nbv);
 +
 +        if (!free_gpu(gpu_err_str))
 +        {
 +            gmx_warning("On node %d failed to free GPU #%d: %s",
 +                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
 +        }
 +    }
 +
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        sfree(membed);
 +    }
 +
 +#ifdef GMX_THREAD_MPI
 +    if (PAR(cr) && SIMMASTER(cr))
 +#endif
 +    {
 +        gmx_hardware_info_free(hwinfo);
 +    }
 +
 +    /* Does what it says */  
 +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
 +
 +    /* Close logfile already here if we were appending to it */
 +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
 +    {
 +        gmx_log_close(fplog);
 +    } 
 +
 +    rc=(int)gmx_get_stop_condition();
 +
 +#ifdef GMX_THREAD_MPI
 +    /* we need to join all threads. The sub-threads join when they
 +       exit this function, but the master thread needs to be told to 
 +       wait for that. */
 +    if (PAR(cr) && MASTER(cr))
 +    {
 +        tMPI_Finalize();
 +    }
 +#endif
 +
 +    return rc;
 +}
Simple merge