Merge release-4-6 into master
authorRoland Schulz <roland@utk.edu>
Mon, 3 Dec 2012 06:15:40 +0000 (01:15 -0500)
committerRoland Schulz <roland@utk.edu>
Mon, 3 Dec 2012 06:25:01 +0000 (01:25 -0500)
Conflicts (change moved to src/gromacs/CMakeLists.txt):
        src/mdlib/CMakeLists.txt
        src/gmxlib/CMakeLists.txt

deleted:
        src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c
        src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.h
        src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c
        src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.h
        src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c
        src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.h

trivial conflicts:
        CMakeLists.txt
cmake/gmxGetCompilerVersion.cmake
src/config.h.cmakein
src/gmxlib/CMakeLists.txt
src/gromacs/gmxlib/copyrite.c
src/gromacs/legacyheaders/maths.h
src/gromacs/legacyheaders/types/nb_verlet.h

All new nonbonded moved.

This commit reverts 792f6b0. Visibility will be added to 5.0 later (it will be
sifnificant different because 5.0 only has one library libgromacs)

Change-Id: Icec02c1cd8992626fbb4b38cdbea935947f9853d

833 files changed:
1  2 
.gitignore
CMakeLists.txt
cmake/ThreadMPI.cmake
src/CMakeLists.txt
src/config.h.cmakein
src/gromacs/CMakeLists.txt
src/gromacs/gmxlib/CMakeLists.txt
src/gromacs/gmxlib/bondfree.c
src/gromacs/gmxlib/checkpoint.c
src/gromacs/gmxlib/copyrite.c
src/gromacs/gmxlib/ewald_util.c
src/gromacs/gmxlib/futil.c
src/gromacs/gmxlib/gmx_cpuid.c
src/gromacs/gmxlib/gmx_detect_hardware.c
src/gromacs/gmxlib/gpu_utils/gpu_utils.cu
src/gromacs/gmxlib/main.c
src/gromacs/gmxlib/nonbonded/CMakeLists.txt
src/gromacs/gmxlib/nonbonded/nb_free_energy.c
src/gromacs/gmxlib/nonbonded/nb_kernel.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/kernelutil_x86_avx_128_fma_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/make_nb_kernel_avx_128_fma_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_template_avx_128_fma_double.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/make_nb_kernel_avx_128_fma_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwNone_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwNone_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwNone_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecCoul_VdwNone_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwLJ_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwLJ_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwLJ_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwLJ_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwLJ_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwNone_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwNone_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwNone_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecEw_VdwNone_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecNone_VdwLJ_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwLJ_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwLJ_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwLJ_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwLJ_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwLJ_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwNone_GeomP1P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwNone_GeomW3P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwNone_GeomW3W3_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwNone_GeomW4P1_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_ElecRF_VdwNone_GeomW4W4_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_template_avx_128_fma_single.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/kernelutil_x86_avx_256_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/make_nb_kernel_avx_256_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwLJ_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomP1P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW3P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW3W3_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW4P1_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW4W4_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/kernelutil_x86_sse2_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/make_nb_kernel_sse2_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_template_sse2_double.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/kernelutil_x86_sse2_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecEw_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwNone_GeomW3P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_ElecRF_VdwNone_GeomW4P1_sse2_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_template_sse2_single.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/kernelutil_x86_sse4_1_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/make_nb_kernel_sse4_1_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_template_sse4_1_double.pre
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/kernelutil_x86_sse4_1_single.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwNone_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecEw_VdwNone_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwNone_GeomW3P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_ElecRF_VdwNone_GeomW4P1_sse4_1_single.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_template_sse4_1_single.pre
src/gromacs/gmxlib/nonbonded/nonbonded.c
src/gromacs/gmxlib/nonbonded/preprocessor/.gitignore
src/gromacs/gmxlib/nrnb.c
src/gromacs/gmxlib/thread_mpi/pthreads.c
src/gromacs/gmxlib/thread_mpi/winthreads.c
src/gromacs/gmxlib/thread_mpi/winthreads.h
src/gromacs/gmxpreprocess/calc_verletbuf.c
src/gromacs/legacyheaders/bondf.h
src/gromacs/legacyheaders/force.h
src/gromacs/legacyheaders/gmx_math_x86_avx_128_fma_double.h
src/gromacs/legacyheaders/gmx_math_x86_avx_128_fma_single.h
src/gromacs/legacyheaders/gmx_math_x86_avx_256_double.h
src/gromacs/legacyheaders/gmx_math_x86_avx_256_single.h
src/gromacs/legacyheaders/gmx_math_x86_sse2_double.h
src/gromacs/legacyheaders/gmx_math_x86_sse2_single.h
src/gromacs/legacyheaders/gmx_math_x86_sse4_1_double.h
src/gromacs/legacyheaders/gmx_math_x86_sse4_1_single.h
src/gromacs/legacyheaders/gmx_x86_avx_128_fma.h
src/gromacs/legacyheaders/gmx_x86_avx_256.h
src/gromacs/legacyheaders/gmx_x86_simd_macros.h
src/gromacs/legacyheaders/gpu_utils.h
src/gromacs/legacyheaders/maths.h
src/gromacs/legacyheaders/thread_mpi/atomic.h
src/gromacs/legacyheaders/thread_mpi/barrier.h
src/gromacs/legacyheaders/thread_mpi/collective.h
src/gromacs/legacyheaders/thread_mpi/event.h
src/gromacs/legacyheaders/thread_mpi/hwinfo.h
src/gromacs/legacyheaders/thread_mpi/list.h
src/gromacs/legacyheaders/thread_mpi/lock.h
src/gromacs/legacyheaders/thread_mpi/mutex.h
src/gromacs/legacyheaders/thread_mpi/numa_malloc.h
src/gromacs/legacyheaders/thread_mpi/system_error.h
src/gromacs/legacyheaders/thread_mpi/threads.h
src/gromacs/legacyheaders/thread_mpi/tmpi.h
src/gromacs/legacyheaders/thread_mpi/visibility.h
src/gromacs/legacyheaders/types/forcerec.h
src/gromacs/legacyheaders/types/nb_verlet.h
src/gromacs/legacyheaders/types/nrnb.h
src/gromacs/legacyheaders/vec.h
src/gromacs/mdlib/force.c
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.c
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.h
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.c
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.h
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_inner.h
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h
src/gromacs/mdlib/nbnxn_search.c
src/gromacs/mdlib/sim_util.c
src/gromacs/mdlib/tables.c
src/programs/mdrun/runner.c
src/tools/gmx_bar.c
src/tools/gmx_cluster.c
src/tools/gmx_hbond.c

diff --cc .gitignore
Simple merge
diff --cc CMakeLists.txt
index 9dfbd26e6fd19e0023dc2a3e61eaad62f2308364,be6701bdd91b1baa8df422958e71a244301d1bc9..bc0f5e3941510093d264fc79fe707b8f886677d2
@@@ -56,14 -56,13 +56,16 @@@ set(API_VERSION ${NUM_VERSION}
  set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
  
  if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND UNIX)
 -set(CMAKE_INSTALL_PREFIX "/usr/local/gromacs" CACHE STRING "Installation prefix (installation will need write permissions here)" FORCE)
 +    set(CMAKE_INSTALL_PREFIX "/usr/local/gromacs" CACHE STRING "Installation prefix (installation will need write permissions here)" FORCE)
  endif()
  
 +set(GMX_INSTALL_PREFIX "" CACHE STRING "Prefix gets appended to CMAKE_INSTALL_PREFIX. For cpack it sets the root folder of the archive.")
 +mark_as_advanced(GMX_INSTALL_PREFIX)
 +
+ include(gmxBuildTypeReference)
  if(NOT CMAKE_BUILD_TYPE)
-     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
+     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference." FORCE)
  endif(NOT CMAKE_BUILD_TYPE)
  
  enable_language(C)
@@@ -159,8 -176,7 +161,12 @@@ option(GMX_POWERPC_INVSQRT "Use PowerP
  mark_as_advanced(GMX_POWERPC_INVSQRT)
  option(GMX_FAHCORE "Build a library with mdrun functionality" OFF)
  mark_as_advanced(GMX_FAHCORE)
++# decide on GPU settings based on user-settings and GPU/CUDA detection
++include(gmxManageGPU)
++
 +option(GMX_OPENMM "Accelerated execution on GPUs through the OpenMM library (rerun cmake after changing to see relevant options)" OFF)
 +
  include(gmxDetectAcceleration)
  if(NOT DEFINED GMX_CPU_ACCELERATION)
      if(CMAKE_CROSSCOMPILING)
@@@ -771,16 -736,18 +742,16 @@@ elseif(${GMX_CPU_ACCELERATION} STREQUA
          GMX_TEST_CFLAG(MSVC_AVX_CFLAG "/arch:AVX" GROMACS_C_FLAGS)
      endif (NOT GNU_AVX_CFLAG)
      if (NOT GNU_AVX_CFLAG AND NOT MSVC_AVX_CFLAG)
-         message(WARNING "No C AVX flag found. Consider a newer compiler, or disable AVX for much lower performance.")
+         message(WARNING "No C AVX flag found. Consider a newer compiler, or try SSE4.1 (lower performance).")
      endif (NOT GNU_AVX_CFLAG AND NOT MSVC_AVX_CFLAG)
  
 -    if (CMAKE_CXX_COMPILER_LOADED)
 -        GMX_TEST_CXXFLAG(GNU_AVX_CXXFLAG "-mavx" GROMACS_CXX_FLAGS)
 -        if (NOT GNU_AVX_CXXFLAG)
 -            GMX_TEST_CXXFLAG(MSVC_AVX_CXXFLAG "/arch:AVX" GROMACS_CXX_FLAGS)
 -        endif (NOT GNU_AVX_CXXFLAG)
 -        if (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
 -            message(WARNING "No C++ AVX flag found. Consider a newer compiler, or try SSE4.1 (lower performance).")
 -        endif (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
 -    endif()
 +    GMX_TEST_CXXFLAG(GNU_AVX_CXXFLAG "-mavx" GROMACS_CXX_FLAGS)
 +    if (NOT GNU_AVX_CXXFLAG)
 +       GMX_TEST_CXXFLAG(MSVC_AVX_CXXFLAG "/arch:AVX" GROMACS_CXX_FLAGS)
 +    endif (NOT GNU_AVX_CXXFLAG)
 +    if (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
-        message(WARNING "No C++ AVX flag found. Consider a newer compiler, or disable AVX for much lower performance.")
++       message(WARNING "No C++ AVX flag found. Consider a newer compiler, or try SSE4.1 (lower performance).")
 +    endif (NOT GNU_AVX_CXXFLAG AND NOT MSVC_AVX_CXXFLAG)
  
      # Set the FMA4 flags (MSVC doesn't require any)
      if(${GMX_CPU_ACCELERATION} STREQUAL "AVX_128_FMA" AND NOT MSVC)
@@@ -1091,10 -1074,10 +1077,11 @@@ if(NOT GMX_OPENMP
      #or because it was only partially detected (e.g. only for C but not C++ compiler)
      unset(OpenMP_C_FLAGS CACHE) 
      unset(OpenMP_CXX_FLAGS CACHE)
 -    unset(OpenMP_LINKER_FLAGS CACHE)
 -    unset(OpenMP_SHARED_LINKER_FLAGS)
 +else()
 +    set(GMX_EXE_LINKER_FLAGS ${GMX_EXE_LINKER_FLAGS} ${OpenMP_LINKER_FLAGS})
 +    set(GMX_SHARED_LINKER_FLAGS ${GMX_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS})
  endif()
  ######################################
  # Output compiler and CFLAGS used
  ######################################
Simple merge
index d4390cad804b1630ba375a5dc3002f5c30240b9b,ebe8f695443b3908411b6b18b3f7e96869a26f9d..b8c10d6d82a8f92d02f0bb54602c8e22da742104
@@@ -1,19 -1,11 +1,20 @@@
- configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/config.h)
+ configure_file(config.h.cmakein config.h)
+ configure_file(buildinfo.h.cmakein buildinfo.h)
  
  include(../cmake/BuildManPages.cmake)
 +if (BUILD_TESTING)
 +    add_custom_target(tests)
 +    if (GMX_BUILD_UNITTESTS)
 +        add_subdirectory(external/gmock-1.6.0)
 +    endif (GMX_BUILD_UNITTESTS)
 +    include(testutils/TestMacros.cmake)
 +    if (GMX_BUILD_UNITTESTS)
 +        add_subdirectory(testutils)
 +    endif (GMX_BUILD_UNITTESTS)
 +endif (BUILD_TESTING)
  
 -add_subdirectory(gmxlib)
 -add_subdirectory(mdlib)
 -add_subdirectory(kernel)
 +add_subdirectory(gromacs)
 +add_subdirectory(programs)
  
  if(NOT GMX_FAHCORE)
    add_subdirectory(tools)
index cbbc835c4f9f165e797bf2c7656698e17d7677c0,40bb5d7a2cb83dc49b0dc7a22554b755c2c0057c..c0ddcde58e7b76df13d550b4ff163578872a7955
  /* Default location of data files */
  #define GMXLIBDIR "@GMXLIBDIR@"
  
- /* Hardware and OS version for build host */
- #define BUILD_HOST "@BUILD_HOST@"
- /* CPU information for build host */
- #define BUILD_CPU_VENDOR "@BUILD_CPU_VENDOR@"
- #define BUILD_CPU_BRAND "@BUILD_CPU_BRAND@"
- #define BUILD_CPU_FAMILY @BUILD_CPU_FAMILY@
- #define BUILD_CPU_MODEL @BUILD_CPU_MODEL@
- #define BUILD_CPU_STEPPING @BUILD_CPU_STEPPING@
- #define BUILD_CPU_FEATURES "@BUILD_CPU_FEATURES@"
- /* Compiler and CFLAGS from build */
- #define BUILD_COMPILER "@BUILD_COMPILER@"
- #define BUILD_CFLAGS   "@BUILD_CFLAGS@"
- /* Date and time for build */
- #define BUILD_TIME "@BUILD_TIME@"
- /* User doing build */
- #define BUILD_USER "@BUILD_USER@"
 +/* Binary suffix for the created binaries */
 +#define GMX_BINARY_SUFFIX "@GMX_BINARY_SUFFIX@"
 +
 +/* Source directory for the build */
 +#cmakedefine CMAKE_SOURCE_DIR "@CMAKE_SOURCE_DIR@"
 +
 +/* Binary directory for the build */
 +#cmakedefine CMAKE_BINARY_DIR "@CMAKE_BINARY_DIR@"
 +
  /* Turn off water-water neighborlist optimization only - not used right now */
  #cmakedefine DISABLE_WATERWATER_NLIST
  
index 8fef9dc243faed2f227ea2a1d361303e90ad2c1b,0000000000000000000000000000000000000000..edf009283fce31d616cc92188a9ddb933cf5157d
mode 100644,000000..100644
--- /dev/null
@@@ -1,94 -1,0 +1,107 @@@
 +set(LIBGROMACS_SOURCES)
 +
 +add_subdirectory(legacyheaders)
 +add_subdirectory(gmxlib)
 +add_subdirectory(mdlib)
 +add_subdirectory(gmxpreprocess)
 +add_subdirectory(analysisdata)
 +add_subdirectory(commandline)
 +add_subdirectory(linearalgebra)
 +add_subdirectory(onlinehelp)
 +add_subdirectory(options)
 +add_subdirectory(selection)
 +add_subdirectory(trajectoryanalysis)
 +add_subdirectory(utility)
 +
 +file(GLOB LIBGROMACS_HEADERS *.h)
 +install(FILES ${LIBGROMACS_HEADERS} DESTINATION ${INCL_INSTALL_DIR}/gromacs
 +        COMPONENT development)
 +
 +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES})
 +
 +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/version.h)
 +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.h
 +    DESTINATION ${INCL_INSTALL_DIR}/gromacs
 +    COMPONENT development)
 +
 +# Add target that generates gitversion.c every time make is run
 +# if git version info is requested
 +# This code is here instead of utility/CMakeLists.txt because CMake
 +# ignores set_source_file_properties from subdirectories.
 +if (GMX_GIT_VERSION_INFO)
 +    set(GENERATED_VERSION_FILE ${CMAKE_CURRENT_BINARY_DIR}/utility/gitversion.c)
 +    add_custom_target(gmx_version ALL
 +            COMMAND ${CMAKE_COMMAND}
 +                -D GIT_EXECUTABLE="${GIT_EXECUTABLE}"
 +                -D GIT_VERSION="${GIT_VERSION}"
 +                -D PROJECT_VERSION="${PROJECT_VERSION}"
 +                -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}"
 +                -D VERSION_C_CMAKEIN="${CMAKE_CURRENT_SOURCE_DIR}/utility/gitversion.c.cmakein"
 +                -D VERSION_C_OUT=${GENERATED_VERSION_FILE}
 +                -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake
 +            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
 +            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/utility/gitversion.c.cmakein
 +            COMMENT "Generating git version information")
 +    set_source_files_properties(${GENERATED_VERSION_FILE}
 +                                PROPERTIES GENERATED true)
 +    list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
 +endif()
 +
 +# apply gcc 4.4.x bug workaround
 +if(GMX_USE_GCC44_BUG_WORKAROUND)
 +   include(gmxGCC44O3BugWorkaround)
 +   gmx_apply_gcc44_bug_workaround("gmxlib/bondfree.c")
 +   gmx_apply_gcc44_bug_workaround("mdlib/force.c")
 +   gmx_apply_gcc44_bug_workaround("mdlib/constr.c")
 +endif()
 +
++if(GMX_GPU)
++    include_directories(${CUDA_TOOLKIT_INCLUDE})
++endif()
++
 +add_library(libgromacs ${LIBGROMACS_SOURCES})
 +if (GMX_GIT_VERSION_INFO)
 +    add_dependencies(libgromacs gmx_version)
 +endif ()
 +
++if(GMX_BUILD_OWN_FFTW)
++    # This dependency has to be made here rather than the CMakeLists.txt that
++    # does the FFTW build, because of the order in which
++    # add_subdirectory() calls are made in the top-level CMakeLists.txt; the
++    # md library target does not necessarily exist yet. Also enabling and
++    # disabling GMX_BUILD_OWN_FFTW changes dependencies correctly.
++    add_dependencies(libgromacs gmxfftw)
++endif()
++
 +target_link_libraries(libgromacs ${GMX_GPU_LIBRARIES}
 +                      ${GMX_EXTRA_LIBRARIES} ${FFT_LIBRARIES} ${XML_LIBRARIES}
 +                      ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS})
 +set_target_properties(libgromacs PROPERTIES
 +                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
 +                      SOVERSION ${SOVERSION}
 +                      COMPILE_FLAGS "${OpenMP_C_FLAGS}")
 +
 +install(TARGETS libgromacs DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
 +
 +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein
 +               ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY)
 +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc
 +        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
 +        RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc"
 +        COMPONENT development)
 +
 +if (INSTALL_CUDART_LIB) #can be set manual by user
 +    if (GMX_OPENMM OR GMX_GPU)
 +        foreach(CUDA_LIB ${CUDA_LIBRARIES})
 +            string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB})
 +            if(IS_CUDART) #libcuda should not be installed
 +                #install also name-links (linker uses those)
 +                file(GLOB CUDA_LIBS ${CUDA_LIB}*)
 +                install(FILES ${CUDA_LIBS} DESTINATION
 +                    ${LIB_INSTALL_DIR} COMPONENT libraries)
 +            endif()
 +        endforeach()
 +    else()
 +        message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_OPENMM or GMX_GPU")
 +    endif()
 +endif ()
index 85248e92fdab69303a11564df921f9d2f2764599,0000000000000000000000000000000000000000..c1b114d15b0a7823f59238ebb406c17f79ff309f
mode 100644,000000..100644
--- /dev/null
@@@ -1,38 -1,0 +1,39 @@@
 +include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 +
 +add_subdirectory(nonbonded)
 +
 +# The nonbonded directory contains subdirectories that are only
 +# conditionally built, so we cannot use a GLOB_RECURSE here.
 +file(GLOB GMXLIB_SOURCES *.c *.cpp statistics/*.c)
 +
 +# This would be the standard way to include thread_mpi, but we want libgmx
 +# to link the functions directly
 +#if(GMX_THREAD_MPI)
 +#    add_subdirectory(thread_mpi)
 +#endif(GMX_THREAD_MPI)
 +#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB})
 +
 +# Files called xxx_test.c are test drivers with a main() function for module xxx.c,
 +# so they should not be included in the library
 +file(GLOB_RECURSE NOT_GMXLIB_SOURCES *_test.c *\#*)
 +list(REMOVE_ITEM GMXLIB_SOURCES ${NOT_GMXLIB_SOURCES})  
 +
 +# gpu utils + cuda tools module
 +if(GMX_GPU)
++    # The log file output queries Cuda if GPU support is enabled
 +    add_subdirectory(cuda_tools)
 +    add_subdirectory(gpu_utils)   
 +    set(GMX_GPU_LIBRARIES ${GMX_GPU_LIBRARIES} gpu_utils cuda_tools PARENT_SCOPE)
 +endif()
 +
 +if(GMX_USE_PLUGINS)
 +  set(GMXLIB_SOURCES ${GMXLIB_SOURCES} ${CMAKE_SOURCE_DIR}/src/external/vmd_molfile/vmddlopen.c)
 +else()
 +  list(REMOVE_ITEM GMXLIB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/vmdio.c)
 +endif()
 +
 +# An ugly hack to get absolute paths...
 +file(GLOB THREAD_MPI_SOURCES ${THREAD_MPI_SRC})
 +
 +set(GMXLIB_SOURCES ${GMXLIB_SOURCES} ${THREAD_MPI_SOURCES} ${NONBONDED_SOURCES}
 +    PARENT_SCOPE)
index 2b7a8f7b5e1156393a42da6408cab59370570297,0000000000000000000000000000000000000000..cd9741b949c3ac83facd026ea4bb03141a9b26ad
mode 100644,000000..100644
--- /dev/null
@@@ -1,4008 -1,0 +1,4006 @@@
-                                   gmx_enerdata_t *enerd, t_nrnb *nrnb,
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "physics.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "txtdump.h"
 +#include "bondf.h"
 +#include "smalloc.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "macros.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "mshift.h"
 +#include "main.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +
 +#if !defined GMX_DOUBLE && defined GMX_X86_SSE2
 +#include "gmx_x86_simd_single.h"
 +#define SSE_PROPER_DIHEDRALS
 +#endif
 +
 +/* Find a better place for this? */
 +const int cmap_coeff_matrix[] = {
 +1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4 ,
 +0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4 ,
 +0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
 +0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2 ,
 +0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2 ,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
 +0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
 +0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2 ,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
 +0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2 ,
 +0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
 +0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
 +0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
 +0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
 +};
 +
 +
 +
 +int glatnr(int *global_atom_index,int i)
 +{
 +    int atnr;
 +
 +    if (global_atom_index == NULL) {
 +        atnr = i + 1;
 +    } else {
 +        atnr = global_atom_index[i] + 1;
 +    }
 +
 +    return atnr;
 +}
 +
 +static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 +{
 +  if (pbc) {
 +    return pbc_dx_aiuc(pbc,xi,xj,dx);
 +  }
 +  else {
 +    rvec_sub(xi,xj,dx);
 +    return CENTRAL;
 +  }
 +}
 +
 +/*
 + * Morse potential bond by Frank Everdij
 + *
 + * Three parameters needed:
 + *
 + * b0 = equilibrium distance in nm
 + * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 + * cb = well depth in kJ/mol
 + *
 + * Note: the potential is referenced to be +cb at infinite separation
 + *       and zero at the equilibrium distance!
 + */
 +
 +real morse_bonds(int nbonds,
 +               const t_iatom forceatoms[],const t_iparams forceparams[],
 +               const rvec x[],rvec f[],rvec fshift[],
 +               const t_pbc *pbc,const t_graph *g,
 +               real lambda,real *dvdlambda,
 +               const t_mdatoms *md,t_fcdata *fcd,
 +               int *global_atom_index)
 +{
 +  const real one=1.0;
 +  const real two=2.0;
 +  real  dr,dr2,temp,omtemp,cbomtemp,fbond,vbond,fij,vtot;
 +  real  b0,be,cb,b0A,beA,cbA,b0B,beB,cbB,L1;
 +  rvec  dx;
 +  int   i,m,ki,type,ai,aj;
 +  ivec  dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    
 +    b0A   = forceparams[type].morse.b0A;
 +    beA   = forceparams[type].morse.betaA;
 +    cbA   = forceparams[type].morse.cbA;
 +
 +    b0B   = forceparams[type].morse.b0B;
 +    beB   = forceparams[type].morse.betaB;
 +    cbB   = forceparams[type].morse.cbB;
 +
 +    L1 = one-lambda;                      /* 1 */
 +    b0 = L1*b0A + lambda*b0B;             /* 3 */
 +    be = L1*beA + lambda*beB;             /* 3 */
 +    cb = L1*cbA + lambda*cbB;             /* 3 */
 +
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
 +    dr2  = iprod(dx,dx);                            /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
 +    temp = exp(-be*(dr-b0));                        /*  12          */
 +    
 +    if (temp == one)
 +    {
 +        /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 +        *dvdlambda += cbB-cbA;
 +        continue;
 +    }
 +
 +    omtemp   = one-temp;                               /*   1          */
 +    cbomtemp = cb*omtemp;                              /*   1          */
 +    vbond    = cbomtemp*omtemp;                        /*   1          */
 +    fbond    = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /*   9          */
 +    vtot     += vbond;                                 /*   1          */
 +
 +    *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki = IVEC2IS(dt);
 +    }
 +
 +    for (m=0; (m<DIM); m++) {                          /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                           /*  83 TOTAL    */
 +  return vtot;
 +}
 +
 +real cubic_bonds(int nbonds,
 +               const t_iatom forceatoms[],const t_iparams forceparams[],
 +               const rvec x[],rvec f[],rvec fshift[],
 +               const t_pbc *pbc,const t_graph *g,
 +               real lambda,real *dvdlambda,
 +               const t_mdatoms *md,t_fcdata *fcd,
 +               int *global_atom_index)
 +{
 +  const real three = 3.0;
 +  const real two   = 2.0;
 +  real  kb,b0,kcub;
 +  real  dr,dr2,dist,kdist,kdist2,fbond,vbond,fij,vtot;
 +  rvec  dx;
 +  int   i,m,ki,type,ai,aj;
 +  ivec  dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    
 +    b0   = forceparams[type].cubic.b0;
 +    kb   = forceparams[type].cubic.kb;
 +    kcub = forceparams[type].cubic.kcub;
 +
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);                /*   3          */
 +    dr2  = iprod(dx,dx);                                /*   5          */
 +    
 +    if (dr2 == 0.0)
 +      continue;
 +      
 +    dr         = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +    dist       = dr-b0;
 +    kdist      = kb*dist;
 +    kdist2     = kdist*dist;
 +    
 +    vbond      = kdist2 + kcub*kdist2*dist;
 +    fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 +
 +    vtot      += vbond;       /* 21 */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                          /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                           /*  54 TOTAL    */
 +  return vtot;
 +}
 +
 +real FENE_bonds(int nbonds,
 +              const t_iatom forceatoms[],const t_iparams forceparams[],
 +              const rvec x[],rvec f[],rvec fshift[],
 +              const t_pbc *pbc,const t_graph *g,
 +              real lambda,real *dvdlambda,
 +              const t_mdatoms *md,t_fcdata *fcd,
 +              int *global_atom_index)
 +{
 +  const real half=0.5;
 +  const real one=1.0;
 +  real  bm,kb;
 +  real  dr,dr2,bm2,omdr2obm2,fbond,vbond,fij,vtot;
 +  rvec  dx;
 +  int   i,m,ki,type,ai,aj;
 +  ivec  dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    
 +    bm   = forceparams[type].fene.bm;
 +    kb   = forceparams[type].fene.kb;
 +
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
 +    dr2  = iprod(dx,dx);                                /*   5          */
 +    
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    bm2 = bm*bm;
 +
 +    if (dr2 >= bm2)
 +      gmx_fatal(FARGS,
 +              "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 +              dr2,bm2,
 +              glatnr(global_atom_index,ai),
 +              glatnr(global_atom_index,aj));
 +      
 +    omdr2obm2  = one - dr2/bm2;
 +    
 +    vbond      = -half*kb*bm2*log(omdr2obm2);
 +    fbond      = -kb/omdr2obm2;
 +
 +    vtot      += vbond;       /* 35 */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                          /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                           /*  58 TOTAL    */
 +  return vtot;
 +}
 +
 +real harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
 +            real *V,real *F)
 +{
 +  const real half=0.5;
 +  real  L1,kk,x0,dx,dx2;
 +  real  v,f,dvdlambda;
 +  
 +  L1    = 1.0-lambda;
 +  kk    = L1*kA+lambda*kB;
 +  x0    = L1*xA+lambda*xB;
 +
 +  dx    = x-x0;
 +  dx2   = dx*dx;
 +
 +  f     = -kk*dx;
 +  v     = half*kk*dx2;
 +  dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 +
 +  *F    = f;
 +  *V    = v;
 +
 +  return dvdlambda;
 +
 +  /* That was 19 flops */
 +}
 +
 +
 +real bonds(int nbonds,
 +         const t_iatom forceatoms[],const t_iparams forceparams[],
 +         const rvec x[],rvec f[],rvec fshift[],
 +         const t_pbc *pbc,const t_graph *g,
 +         real lambda,real *dvdlambda,
 +         const t_mdatoms *md,t_fcdata *fcd,
 +         int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr,dr2,fbond,vbond,fij,vtot;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 +                           forceparams[type].harmonic.krB,
 +                           forceparams[type].harmonic.rA,
 +                           forceparams[type].harmonic.rB,
 +                           dr,lambda,&vbond,&fbond);  /*  19  */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    
 +    vtot  += vbond;/* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +            dr,vbond,fbond);
 +#endif
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 59 TOTAL     */
 +  return vtot;
 +}
 +
 +real restraint_bonds(int nbonds,
 +                     const t_iatom forceatoms[],const t_iparams forceparams[],
 +                     const rvec x[],rvec f[],rvec fshift[],
 +                     const t_pbc *pbc,const t_graph *g,
 +                     real lambda,real *dvdlambda,
 +                     const t_mdatoms *md,t_fcdata *fcd,
 +                     int *global_atom_index)
 +{
 +    int  i,m,ki,ai,aj,type;
 +    real dr,dr2,fbond,vbond,fij,vtot;
 +    real L1;
 +    real low,dlow,up1,dup1,up2,dup2,k,dk;
 +    real drh,drh2;
 +    rvec dx;
 +    ivec dt;
 +
 +    L1   = 1.0 - lambda;
 +
 +    vtot = 0.0;
 +    for(i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        aj   = forceatoms[i++];
 +        
 +        ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);      /*   3          */
 +        dr2  = iprod(dx,dx);                          /*   5          */
 +        dr   = dr2*gmx_invsqrt(dr2);                  /*  10          */
 +
 +        low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 +        dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 +        up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 +        dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 +        up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 +        dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 +        k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 +        dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 +        /* 24 */
 +
 +        if (dr < low)
 +        {
 +            drh   = dr - low;
 +            drh2  = drh*drh;
 +            vbond = 0.5*k*drh2;
 +            fbond = -k*drh;
 +            *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 +        } /* 11 */
 +        else if (dr <= up1)
 +        {
 +            vbond = 0;
 +            fbond = 0;
 +        }
 +        else if (dr <= up2)
 +        {
 +            drh   = dr - up1;
 +            drh2  = drh*drh;
 +            vbond = 0.5*k*drh2;
 +            fbond = -k*drh;
 +            *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 +        } /* 11       */
 +        else
 +        {
 +            drh   = dr - up2;
 +            vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 +            fbond = -k*(up2 - up1);
 +            *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 +                + k*(dup2 - dup1)*(up2 - up1 + drh)
 +                - k*(up2 - up1)*dup2;
 +        }
 +   
 +        if (dr2 == 0.0)
 +            continue;
 +        
 +        vtot  += vbond;/* 1*/
 +        fbond *= gmx_invsqrt(dr2);                    /*   6          */
 +#ifdef DEBUG
 +        if (debug)
 +            fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +                    dr,vbond,fbond);
 +#endif
 +        if (g) {
 +            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +            ki=IVEC2IS(dt);
 +        }
 +        for (m=0; (m<DIM); m++) {                     /*  15          */
 +            fij=fbond*dx[m];
 +            f[ai][m]+=fij;
 +            f[aj][m]-=fij;
 +            fshift[ki][m]+=fij;
 +            fshift[CENTRAL][m]-=fij;
 +        }
 +    }                                 /* 59 TOTAL     */
 +
 +    return vtot;
 +}
 +
 +real polarize(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr,dr2,fbond,vbond,fij,vtot,ksh;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 +    if (debug)
 +      fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +    
 +    vtot  += vbond;/* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 59 TOTAL     */
 +  return vtot;
 +}
 +
 +real anharm_polarize(int nbonds,
 +                     const t_iatom forceatoms[],const t_iparams forceparams[],
 +                     const rvec x[],rvec f[],rvec fshift[],
 +                     const t_pbc *pbc,const t_graph *g,
 +                     real lambda,real *dvdlambda,
 +                     const t_mdatoms *md,t_fcdata *fcd,
 +                     int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr,dr2,fbond,vbond,fij,vtot,ksh,khyp,drcut,ddr,ddr3;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type  = forceatoms[i++];
 +    ai    = forceatoms[i++];
 +    aj    = forceatoms[i++];
 +    ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 +    khyp  = forceparams[type].anharm_polarize.khyp;
 +    drcut = forceparams[type].anharm_polarize.drcut;
 +    if (debug)
 +      fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +    
 +    if (dr > drcut) {
 +        ddr    = dr-drcut;
 +        ddr3   = ddr*ddr*ddr;
 +        vbond += khyp*ddr*ddr3;
 +        fbond -= 4*khyp*ddr3;
 +    }
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +    vtot  += vbond;/* 1*/
 +
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 72 TOTAL     */
 +  return vtot;
 +}
 +
 +real water_pol(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  /* This routine implements anisotropic polarizibility for water, through
 +   * a shell connected to a dummy with spring constant that differ in the
 +   * three spatial dimensions in the molecular frame.
 +   */
 +  int  i,m,aO,aH1,aH2,aD,aS,type,type0;
 +  rvec dOH1,dOH2,dHH,dOD,dDS,nW,kk,dx,kdx,proj;
 +#ifdef DEBUG
 +  rvec df;
 +#endif
 +  real vtot,fij,r_HH,r_OD,r_nW,tx,ty,tz,qS;
 +
 +  vtot = 0.0;
 +  if (nbonds > 0) {
 +    type0  = forceatoms[0];
 +    aS     = forceatoms[5];
 +    qS     = md->chargeA[aS];
 +    kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 +    kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 +    kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 +    r_HH   = 1.0/forceparams[type0].wpol.rHH;
 +    r_OD   = 1.0/forceparams[type0].wpol.rOD;
 +    if (debug) {
 +      fprintf(debug,"WPOL: qS  = %10.5f aS = %5d\n",qS,aS);
 +      fprintf(debug,"WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
 +            kk[XX],kk[YY],kk[ZZ]);
 +      fprintf(debug,"WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
 +            forceparams[type0].wpol.rOH,
 +            forceparams[type0].wpol.rHH,
 +            forceparams[type0].wpol.rOD);
 +    }
 +    for(i=0; (i<nbonds); i+=6) {
 +      type = forceatoms[i];
 +      if (type != type0)
 +      gmx_fatal(FARGS,"Sorry, type = %d, type0 = %d, file = %s, line = %d",
 +                  type,type0,__FILE__,__LINE__);
 +      aO   = forceatoms[i+1];
 +      aH1  = forceatoms[i+2];
 +      aH2  = forceatoms[i+3];
 +      aD   = forceatoms[i+4];
 +      aS   = forceatoms[i+5];
 +      
 +      /* Compute vectors describing the water frame */
 +      rvec_sub(x[aH1],x[aO], dOH1);
 +      rvec_sub(x[aH2],x[aO], dOH2);
 +      rvec_sub(x[aH2],x[aH1],dHH);
 +      rvec_sub(x[aD], x[aO], dOD);
 +      rvec_sub(x[aS], x[aD], dDS);
 +      cprod(dOH1,dOH2,nW);
 +      
 +      /* Compute inverse length of normal vector 
 +       * (this one could be precomputed, but I'm too lazy now)
 +       */
 +      r_nW = gmx_invsqrt(iprod(nW,nW));
 +      /* This is for precision, but does not make a big difference,
 +       * it can go later.
 +       */
 +      r_OD = gmx_invsqrt(iprod(dOD,dOD)); 
 +      
 +      /* Normalize the vectors in the water frame */
 +      svmul(r_nW,nW,nW);
 +      svmul(r_HH,dHH,dHH);
 +      svmul(r_OD,dOD,dOD);
 +      
 +      /* Compute displacement of shell along components of the vector */
 +      dx[ZZ] = iprod(dDS,dOD);
 +      /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 +      for(m=0; (m<DIM); m++)
 +      proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 +      
 +      /*dx[XX] = iprod(dDS,nW);
 +      dx[YY] = iprod(dDS,dHH);*/
 +      dx[XX] = iprod(proj,nW);
 +      for(m=0; (m<DIM); m++)
 +      proj[m] -= dx[XX]*nW[m];
 +      dx[YY] = iprod(proj,dHH);
 +      /*#define DEBUG*/
 +#ifdef DEBUG
 +      if (debug) {
 +      fprintf(debug,"WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
 +              sqr(dx[XX]),sqr(dx[YY]),sqr(dx[ZZ]),iprod(dx,dx),iprod(dDS,dDS));
 +      fprintf(debug,"WPOL: dHH=(%10g,%10g,%10g)\n",dHH[XX],dHH[YY],dHH[ZZ]);
 +      fprintf(debug,"WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
 +              dOD[XX],dOD[YY],dOD[ZZ],1/r_OD);
 +      fprintf(debug,"WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
 +              nW[XX],nW[YY],nW[ZZ],1/r_nW);
 +      fprintf(debug,"WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
 +              dx[XX],dx[YY],dx[ZZ]);
 +      fprintf(debug,"WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
 +              dDS[XX],dDS[YY],dDS[ZZ]);
 +      }
 +#endif
 +      /* Now compute the forces and energy */
 +      kdx[XX] = kk[XX]*dx[XX];
 +      kdx[YY] = kk[YY]*dx[YY];
 +      kdx[ZZ] = kk[ZZ]*dx[ZZ];
 +      vtot   += iprod(dx,kdx);
 +      for(m=0; (m<DIM); m++) {
 +      /* This is a tensor operation but written out for speed */
 +      tx        =  nW[m]*kdx[XX];
 +      ty        = dHH[m]*kdx[YY];
 +      tz        = dOD[m]*kdx[ZZ];
 +      fij       = -tx-ty-tz;
 +#ifdef DEBUG
 +      df[m] = fij;
 +#endif
 +      f[aS][m] += fij;
 +      f[aD][m] -= fij;
 +      }
 +#ifdef DEBUG
 +      if (debug) {
 +      fprintf(debug,"WPOL: vwpol=%g\n",0.5*iprod(dx,kdx));
 +      fprintf(debug,"WPOL: df = (%10g, %10g, %10g)\n",df[XX],df[YY],df[ZZ]);
 +      }
 +#endif
 +    } 
 +  }
 +  return 0.5*vtot;
 +}
 +
 +static real do_1_thole(const rvec xi,const rvec xj,rvec fi,rvec fj,
 +                     const t_pbc *pbc,real qq,
 +                     rvec fshift[],real afac)
 +{
 +  rvec r12;
 +  real r12sq,r12_1,r12n,r12bar,v0,v1,fscal,ebar,fff;
 +  int  m,t;
 +    
 +  t      = pbc_rvec_sub(pbc,xi,xj,r12); /*  3 */
 +  
 +  r12sq  = iprod(r12,r12);              /*  5 */
 +  r12_1  = gmx_invsqrt(r12sq);              /*  5 */
 +  r12bar = afac/r12_1;                  /*  5 */
 +  v0     = qq*ONE_4PI_EPS0*r12_1;       /*  2 */
 +  ebar   = exp(-r12bar);                /*  5 */
 +  v1     = (1-(1+0.5*r12bar)*ebar);     /*  4 */
 +  fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 +  if (debug)
 +    fprintf(debug,"THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n",v0,v1,1/r12_1,r12bar,fscal,ebar);
 +  
 +  for(m=0; (m<DIM); m++) {
 +    fff    = fscal*r12[m];
 +    fi[m] += fff;
 +    fj[m] -= fff;             
 +    fshift[t][m]       += fff;
 +    fshift[CENTRAL][m] -= fff;
 +  } /* 15 */
 +  
 +  return v0*v1; /* 1 */
 +  /* 54 */
 +}
 +
 +real thole_pol(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  /* Interaction between two pairs of particles with opposite charge */
 +  int i,type,a1,da1,a2,da2;
 +  real q1,q2,qq,a,al1,al2,afac;
 +  real V=0;
 +  
 +  for(i=0; (i<nbonds); ) {
 +    type  = forceatoms[i++];
 +    a1    = forceatoms[i++];
 +    da1   = forceatoms[i++];
 +    a2    = forceatoms[i++];
 +    da2   = forceatoms[i++];
 +    q1    = md->chargeA[da1];
 +    q2    = md->chargeA[da2];
 +    a     = forceparams[type].thole.a;
 +    al1   = forceparams[type].thole.alpha1;
 +    al2   = forceparams[type].thole.alpha2;
 +    qq    = q1*q2;
 +    afac  = a*pow(al1*al2,-1.0/6.0);
 +    V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq,fshift,afac);
 +    V += do_1_thole(x[da1],x[a2], f[da1],f[a2], pbc,-qq,fshift,afac);
 +    V += do_1_thole(x[a1], x[da2],f[a1], f[da2],pbc,-qq,fshift,afac);
 +    V += do_1_thole(x[da1],x[da2],f[da1],f[da2],pbc, qq,fshift,afac);
 +  }
 +  /* 290 flops */
 +  return V;
 +}
 +
 +real bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
 +              rvec r_ij,rvec r_kj,real *costh,
 +              int *t1,int *t2)
 +/* Return value is the angle between the bonds i-j and j-k */
 +{
 +  /* 41 FLOPS */
 +  real th;
 +  
 +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
 +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
 +
 +  *costh=cos_angle(r_ij,r_kj);                /* 25           */
 +  th=acos(*costh);                    /* 10           */
 +                                      /* 41 TOTAL     */
 +  return th;
 +}
 +
 +real angles(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +    int  i,ai,aj,ak,t1,t2,type;
 +    rvec r_ij,r_kj;
 +    real cos_theta,cos_theta2,theta,dVdt,va,vtot;
 +    ivec jt,dt_ij,dt_kj;
 +
 +    vtot = 0.0;
 +    for(i=0; i<nbonds; )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        aj   = forceatoms[i++];
 +        ak   = forceatoms[i++];
 +
 +        theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                            r_ij,r_kj,&cos_theta,&t1,&t2);    /*  41          */
 +  
 +        *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 +                               forceparams[type].harmonic.krB,
 +                               forceparams[type].harmonic.rA*DEG2RAD,
 +                               forceparams[type].harmonic.rB*DEG2RAD,
 +                               theta,lambda,&va,&dVdt);  /*  21  */
 +        vtot += va;
 +
 +        cos_theta2 = sqr(cos_theta);
 +        if (cos_theta2 < 1)
 +        {
 +            int  m;
 +            real st,sth;
 +            real cik,cii,ckk;
 +            real nrkj2,nrij2;
 +            real nrkj_1,nrij_1;
 +            rvec f_i,f_j,f_k;
 +
 +            st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
 +            sth = st*cos_theta;                       /*   1          */
 +#ifdef DEBUG
 +            if (debug)
 +                fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +                        theta*RAD2DEG,va,dVdt);
 +#endif
 +            nrij2 = iprod(r_ij,r_ij);                 /*   5          */
 +            nrkj2 = iprod(r_kj,r_kj);                 /*   5          */
 +
 +            nrij_1 = gmx_invsqrt(nrij2);              /*  10          */
 +            nrkj_1 = gmx_invsqrt(nrkj2);              /*  10          */
 +
 +            cik = st*nrij_1*nrkj_1;                   /*   2          */
 +            cii = sth*nrij_1*nrij_1;                  /*   2          */
 +            ckk = sth*nrkj_1*nrkj_1;                  /*   2          */
 +      
 +            for (m=0; m<DIM; m++)
 +            {                 /*  39          */
 +                f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
 +                f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
 +                f_j[m]    = -f_i[m] - f_k[m];
 +                f[ai][m] += f_i[m];
 +                f[aj][m] += f_j[m];
 +                f[ak][m] += f_k[m];
 +            }
 +            if (g != NULL)
 +            {
 +                copy_ivec(SHIFT_IVEC(g,aj),jt);
 +
 +                ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +                ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +                t1 = IVEC2IS(dt_ij);
 +                t2 = IVEC2IS(dt_kj);
 +            }
 +            rvec_inc(fshift[t1],f_i);
 +            rvec_inc(fshift[CENTRAL],f_j);
 +            rvec_inc(fshift[t2],f_k);
 +        }                                           /* 161 TOTAL      */
 +    }
 +
 +    return vtot;
 +}
 +
 +real linear_angles(int nbonds,
 +                   const t_iatom forceatoms[],const t_iparams forceparams[],
 +                   const rvec x[],rvec f[],rvec fshift[],
 +                   const t_pbc *pbc,const t_graph *g,
 +                   real lambda,real *dvdlambda,
 +                   const t_mdatoms *md,t_fcdata *fcd,
 +                   int *global_atom_index)
 +{
 +  int  i,m,ai,aj,ak,t1,t2,type;
 +  rvec f_i,f_j,f_k;
 +  real L1,kA,kB,aA,aB,dr,dr2,va,vtot,a,b,klin;
 +  ivec jt,dt_ij,dt_kj;
 +  rvec r_ij,r_kj,r_ik,dx;
 +    
 +  L1   = 1-lambda;
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    
 +    kA = forceparams[type].linangle.klinA;
 +    kB = forceparams[type].linangle.klinB;
 +    klin = L1*kA + lambda*kB;
 +    
 +    aA   = forceparams[type].linangle.aA;
 +    aB   = forceparams[type].linangle.aB;
 +    a    = L1*aA+lambda*aB;
 +    b    = 1-a;
 +    
 +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
 +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
 +    rvec_sub(r_ij,r_kj,r_ik);
 +    
 +    dr2 = 0;
 +    for(m=0; (m<DIM); m++) 
 +    {
 +        dr     = - a * r_ij[m] - b * r_kj[m];
 +        dr2   += dr*dr;
 +        dx[m]  = dr;
 +        f_i[m] = a*klin*dr;
 +        f_k[m] = b*klin*dr;
 +        f_j[m] = -(f_i[m]+f_k[m]);
 +        f[ai][m] += f_i[m];
 +        f[aj][m] += f_j[m];
 +        f[ak][m] += f_k[m];
 +    }
 +    va    = 0.5*klin*dr2;
 +    *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx,r_ik); 
 +            
 +    vtot += va;
 +    
 +    if (g) {
 +        copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +        ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +        ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +        t1=IVEC2IS(dt_ij);
 +        t2=IVEC2IS(dt_kj);
 +    }
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);
 +  }                                           /* 57 TOTAL     */
 +  return vtot;
 +}
 +
 +real urey_bradley(int nbonds,
 +                const t_iatom forceatoms[],const t_iparams forceparams[],
 +                const rvec x[],rvec f[],rvec fshift[],
 +                const t_pbc *pbc,const t_graph *g,
 +                real lambda,real *dvdlambda,
 +                const t_mdatoms *md,t_fcdata *fcd,
 +                int *global_atom_index)
 +{
 +  int  i,m,ai,aj,ak,t1,t2,type,ki;
 +  rvec r_ij,r_kj,r_ik;
 +  real cos_theta,cos_theta2,theta;
 +  real dVdt,va,vtot,dr,dr2,vbond,fbond,fik;
 +  real kthA,th0A,kUBA,r13A,kthB,th0B,kUBB,r13B;
 +  ivec jt,dt_ij,dt_kj,dt_ik;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
 +    kthA  = forceparams[type].u_b.kthetaA;
 +    r13A  = forceparams[type].u_b.r13A;
 +    kUBA  = forceparams[type].u_b.kUBA;
 +    th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
 +    kthB  = forceparams[type].u_b.kthetaB;
 +    r13B  = forceparams[type].u_b.r13B;
 +    kUBB  = forceparams[type].u_b.kUBB;
 +    
 +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
 +  
 +    *dvdlambda += harmonic(kthA,kthB,th0A,th0B,theta,lambda,&va,&dVdt);  /*  21  */
 +    vtot += va;
 +    
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);        /*   3          */
 +    dr2  = iprod(r_ik,r_ik);                  /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    *dvdlambda += harmonic(kUBA,kUBB,r13A,r13B,dr,lambda,&vbond,&fbond); /*  19  */
 +
 +    cos_theta2 = sqr(cos_theta);                /*   1                */
 +    if (cos_theta2 < 1) {
 +      real st,sth;
 +      real cik,cii,ckk;
 +      real nrkj2,nrij2;
 +      rvec f_i,f_j,f_k;
 +      
 +      st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12          */
 +      sth = st*cos_theta;                     /*   1          */
 +#ifdef DEBUG
 +      if (debug)
 +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +              theta*RAD2DEG,va,dVdt);
 +#endif
 +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
 +      nrij2=iprod(r_ij,r_ij);
 +      
 +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
 +      cii=sth/nrij2;                          /*  10          */
 +      ckk=sth/nrkj2;                          /*  10          */
 +      
 +      for (m=0; (m<DIM); m++) {                       /*  39          */
 +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
 +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +      }
 +      if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_inc(fshift[CENTRAL],f_j);
 +      rvec_inc(fshift[t2],f_k);
 +    }                                           /* 161 TOTAL  */
 +    /* Time for the bond calculations */
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    vtot  += vbond;  /* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +    
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),dt_ik);
 +      ki=IVEC2IS(dt_ik);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fik=fbond*r_ik[m];
 +      f[ai][m]+=fik;
 +      f[ak][m]-=fik;
 +      fshift[ki][m]+=fik;
 +      fshift[CENTRAL][m]-=fik;
 +    }
 +  }
 +  return vtot;
 +}
 +
 +real quartic_angles(int nbonds,
 +                  const t_iatom forceatoms[],const t_iparams forceparams[],
 +                  const rvec x[],rvec f[],rvec fshift[],
 +                  const t_pbc *pbc,const t_graph *g,
 +                  real lambda,real *dvdlambda,
 +                  const t_mdatoms *md,t_fcdata *fcd,
 +                  int *global_atom_index)
 +{
 +  int  i,j,ai,aj,ak,t1,t2,type;
 +  rvec r_ij,r_kj;
 +  real cos_theta,cos_theta2,theta,dt,dVdt,va,dtp,c,vtot;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +
 +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
 +
 +    dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2          */
 +
 +    dVdt = 0;
 +    va = forceparams[type].qangle.c[0];
 +    dtp = 1.0;
 +    for(j=1; j<=4; j++) {
 +      c = forceparams[type].qangle.c[j];
 +      dVdt -= j*c*dtp;
 +      dtp *= dt;
 +      va += c*dtp;
 +    }
 +    /* 20 */
 +
 +    vtot += va;
 +    
 +    cos_theta2 = sqr(cos_theta);                /*   1                */
 +    if (cos_theta2 < 1) {
 +      int  m;
 +      real st,sth;
 +      real cik,cii,ckk;
 +      real nrkj2,nrij2;
 +      rvec f_i,f_j,f_k;
 +      
 +      st  = dVdt*gmx_invsqrt(1 - cos_theta2);         /*  12          */
 +      sth = st*cos_theta;                     /*   1          */
 +#ifdef DEBUG
 +      if (debug)
 +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +              theta*RAD2DEG,va,dVdt);
 +#endif
 +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
 +      nrij2=iprod(r_ij,r_ij);
 +      
 +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
 +      cii=sth/nrij2;                          /*  10          */
 +      ckk=sth/nrkj2;                          /*  10          */
 +      
 +      for (m=0; (m<DIM); m++) {                       /*  39          */
 +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
 +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +      }
 +      if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_inc(fshift[CENTRAL],f_j);
 +      rvec_inc(fshift[t2],f_k);
 +    }                                           /* 153 TOTAL  */
 +  }
 +  return vtot;
 +}
 +
 +real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
 +               const t_pbc *pbc,
 +               rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n,
 +               real *sign,int *t1,int *t2,int *t3)
 +{
 +  real ipr,phi;
 +
 +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
 +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
 +  *t3 = pbc_rvec_sub(pbc,xk,xl,r_kl);                 /*  3           */
 +
 +  cprod(r_ij,r_kj,m);                         /*  9           */
 +  cprod(r_kj,r_kl,n);                 /*  9           */
 +  phi=gmx_angle(m,n);                         /* 49 (assuming 25 for atan2) */
 +  ipr=iprod(r_ij,n);                  /*  5           */
 +  (*sign)=(ipr<0.0)?-1.0:1.0;
 +  phi=(*sign)*phi;                    /*  1           */
 +                                      /* 82 TOTAL     */
 +  return phi;
 +}
 +
 +
 +#ifdef SSE_PROPER_DIHEDRALS
 +
 +/* x86 SIMD inner-product of 4 float vectors */
 +#define GMX_MM_IPROD_PS(ax,ay,az,bx,by,bz)                 \
 +    _mm_add_ps(_mm_add_ps(_mm_mul_ps(ax,bx),_mm_mul_ps(ay,by)),_mm_mul_ps(az,bz))
 +
 +/* x86 SIMD norm^2 of 4 float vectors */
 +#define GMX_MM_NORM2_PS(ax,ay,az) GMX_MM_IPROD_PS(ax,ay,az,ax,ay,az)
 +
 +/* x86 SIMD cross-product of 4 float vectors */
 +#define GMX_MM_CPROD_PS(ax,ay,az,bx,by,bz,cx,cy,cz)        \
 +{                                                          \
 +    cx = _mm_sub_ps(_mm_mul_ps(ay,bz),_mm_mul_ps(az,by));  \
 +    cy = _mm_sub_ps(_mm_mul_ps(az,bx),_mm_mul_ps(ax,bz));  \
 +    cz = _mm_sub_ps(_mm_mul_ps(ax,by),_mm_mul_ps(ay,bx));  \
 +}
 +
 +/* load 4 rvec's into 3 x86 SIMD float registers */
 +#define load_rvec4(r0,r1,r2,r3,rx_SSE,ry_SSE,rz_SSE)          \
 +{                                                             \
 +    __m128 tmp;                                               \
 +    rx_SSE = _mm_load_ps(r0);                                 \
 +    ry_SSE = _mm_load_ps(r1);                                 \
 +    rz_SSE = _mm_load_ps(r2);                                 \
 +    tmp    = _mm_load_ps(r3);                                 \
 +    _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
 +}
 +
 +#define store_rvec4(rx_SSE,ry_SSE,rz_SSE,r0,r1,r2,r3)         \
 +{                                                             \
 +    __m128 tmp=_mm_setzero_ps();                              \
 +    _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
 +    _mm_store_ps(r0,rx_SSE);                                  \
 +    _mm_store_ps(r1,ry_SSE);                                  \
 +    _mm_store_ps(r2,rz_SSE);                                  \
 +    _mm_store_ps(r3,tmp   );                                  \
 +}
 +
 +/* An rvec in a structure which can be allocated 16-byte aligned */
 +typedef struct {
 +    rvec  v;
 +    float f;
 +} rvec_sse_t;
 +
 +/* As dih_angle above, but calculates 4 dihedral angles at once using SSE,
 + * also calculates the pre-factor required for the dihedral force update.
 + * Note that bv and buf should be 16-byte aligned.
 + */
 +static void
 +dih_angle_sse(const rvec *x,
 +              int ai[4],int aj[4],int ak[4],int al[4],
 +              const t_pbc *pbc,
 +              int t1[4],int t2[4],int t3[4],
 +              rvec_sse_t *bv,
 +              real *buf)
 +{
 +    int s;
 +    __m128 rijx_SSE,rijy_SSE,rijz_SSE;
 +    __m128 rkjx_SSE,rkjy_SSE,rkjz_SSE;
 +    __m128 rklx_SSE,rkly_SSE,rklz_SSE;
 +    __m128 mx_SSE,my_SSE,mz_SSE;
 +    __m128 nx_SSE,ny_SSE,nz_SSE;
 +    __m128 cx_SSE,cy_SSE,cz_SSE;
 +    __m128 cn_SSE;
 +    __m128 s_SSE;
 +    __m128 phi_SSE;
 +    __m128 ipr_SSE;
 +    int signs;
 +    __m128 iprm_SSE,iprn_SSE;
 +    __m128 nrkj2_SSE,nrkj_1_SSE,nrkj_2_SSE,nrkj_SSE;
 +    __m128 nrkj_m2_SSE,nrkj_n2_SSE;
 +    __m128 p_SSE,q_SSE;
 +    __m128 fmin_SSE=_mm_set1_ps(GMX_FLOAT_MIN);
 +
 +    for(s=0; s<4; s++)
 +    {
 +        t1[s] = pbc_rvec_sub(pbc,x[ai[s]],x[aj[s]],bv[0+s].v);
 +        t2[s] = pbc_rvec_sub(pbc,x[ak[s]],x[aj[s]],bv[4+s].v);
 +        t3[s] = pbc_rvec_sub(pbc,x[ak[s]],x[al[s]],bv[8+s].v);
 +    }
 +
 +    load_rvec4(bv[0].v,bv[1].v,bv[2].v,bv[3].v,rijx_SSE,rijy_SSE,rijz_SSE);
 +    load_rvec4(bv[4].v,bv[5].v,bv[6].v,bv[7].v,rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +    load_rvec4(bv[8].v,bv[9].v,bv[10].v,bv[11].v,rklx_SSE,rkly_SSE,rklz_SSE);
 +
 +    GMX_MM_CPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
 +                    rkjx_SSE,rkjy_SSE,rkjz_SSE,
 +                    mx_SSE,my_SSE,mz_SSE);
 +
 +    GMX_MM_CPROD_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE,
 +                    rklx_SSE,rkly_SSE,rklz_SSE,
 +                    nx_SSE,ny_SSE,nz_SSE);
 +
 +    GMX_MM_CPROD_PS(mx_SSE,my_SSE,mz_SSE,
 +                    nx_SSE,ny_SSE,nz_SSE,
 +                    cx_SSE,cy_SSE,cz_SSE);
 +
 +    cn_SSE = gmx_mm_sqrt_ps(GMX_MM_NORM2_PS(cx_SSE,cy_SSE,cz_SSE));
 +    
 +    s_SSE = GMX_MM_IPROD_PS(mx_SSE,my_SSE,mz_SSE,nx_SSE,ny_SSE,nz_SSE);
 +
 +    phi_SSE = gmx_mm_atan2_ps(cn_SSE,s_SSE);
 +    _mm_store_ps(buf+16,phi_SSE);
 +
 +    ipr_SSE = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
 +                              nx_SSE,ny_SSE,nz_SSE);
 +
 +    signs = _mm_movemask_ps(ipr_SSE);
 +    
 +    for(s=0; s<4; s++)
 +    {
 +        if (signs & (1<<s))
 +        {
 +            buf[16+s] = -buf[16+s];
 +        }
 +    }
 +
 +    iprm_SSE    = GMX_MM_NORM2_PS(mx_SSE,my_SSE,mz_SSE);
 +    iprn_SSE    = GMX_MM_NORM2_PS(nx_SSE,ny_SSE,nz_SSE);
 +
 +    /* store_rvec4 messes with the input, don't use it after this! */
 +    store_rvec4(mx_SSE,my_SSE,mz_SSE,bv[0].v,bv[1].v,bv[2].v,bv[3].v);
 +    store_rvec4(nx_SSE,ny_SSE,nz_SSE,bv[4].v,bv[5].v,bv[6].v,bv[7].v);
 +
 +    nrkj2_SSE   = GMX_MM_NORM2_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +
 +    /* Avoid division by zero. When zero, the result is multiplied by 0
 +     * anyhow, so the 3 max below do not affect the final result.
 +     */
 +    nrkj2_SSE   = _mm_max_ps(nrkj2_SSE,fmin_SSE);
 +    nrkj_1_SSE  = gmx_mm_invsqrt_ps(nrkj2_SSE);
 +    nrkj_2_SSE  = _mm_mul_ps(nrkj_1_SSE,nrkj_1_SSE);
 +    nrkj_SSE    = _mm_mul_ps(nrkj2_SSE,nrkj_1_SSE);
 +
 +    iprm_SSE    = _mm_max_ps(iprm_SSE,fmin_SSE);
 +    iprn_SSE    = _mm_max_ps(iprn_SSE,fmin_SSE);
 +    nrkj_m2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprm_SSE));
 +    nrkj_n2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprn_SSE));
 +
 +    _mm_store_ps(buf+0,nrkj_m2_SSE);
 +    _mm_store_ps(buf+4,nrkj_n2_SSE);
 +
 +    p_SSE       = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
 +                                  rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +    p_SSE       = _mm_mul_ps(p_SSE,nrkj_2_SSE);
 +
 +    q_SSE       = GMX_MM_IPROD_PS(rklx_SSE,rkly_SSE,rklz_SSE,
 +                                  rkjx_SSE,rkjy_SSE,rkjz_SSE);
 +    q_SSE       = _mm_mul_ps(q_SSE,nrkj_2_SSE);
 +
 +    _mm_store_ps(buf+8 ,p_SSE);
 +    _mm_store_ps(buf+12,q_SSE);
 +}
 +
 +#endif /* SSE_PROPER_DIHEDRALS */
 +
 +
 +void do_dih_fup(int i,int j,int k,int l,real ddphi,
 +              rvec r_ij,rvec r_kj,rvec r_kl,
 +              rvec m,rvec n,rvec f[],rvec fshift[],
 +              const t_pbc *pbc,const t_graph *g,
 +              const rvec x[],int t1,int t2,int t3)
 +{
 +  /* 143 FLOPS */
 +  rvec f_i,f_j,f_k,f_l;
 +  rvec uvec,vvec,svec,dx_jl;
 +  real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
 +  real a,b,p,q,toler;
 +  ivec jt,dt_ij,dt_kj,dt_lj;  
 +  
 +  iprm  = iprod(m,m);         /*  5   */
 +  iprn  = iprod(n,n);         /*  5   */
 +  nrkj2 = iprod(r_kj,r_kj);   /*  5   */
 +  toler = nrkj2*GMX_REAL_EPS;
 +  if ((iprm > toler) && (iprn > toler)) {
 +    nrkj_1 = gmx_invsqrt(nrkj2);      /* 10   */
 +    nrkj_2 = nrkj_1*nrkj_1;   /*  1   */
 +    nrkj  = nrkj2*nrkj_1;     /*  1   */
 +    a     = -ddphi*nrkj/iprm; /* 11   */
 +    svmul(a,m,f_i);           /*  3   */
 +    b     = ddphi*nrkj/iprn;  /* 11   */
 +    svmul(b,n,f_l);           /*  3   */
 +    p     = iprod(r_ij,r_kj); /*  5   */
 +    p    *= nrkj_2;           /*  1   */
 +    q     = iprod(r_kl,r_kj); /*  5   */
 +    q    *= nrkj_2;           /*  1   */
 +    svmul(p,f_i,uvec);                /*  3   */
 +    svmul(q,f_l,vvec);                /*  3   */
 +    rvec_sub(uvec,vvec,svec); /*  3   */
 +    rvec_sub(f_i,svec,f_j);   /*  3   */
 +    rvec_add(f_l,svec,f_k);   /*  3   */
 +    rvec_inc(f[i],f_i);       /*  3   */
 +    rvec_dec(f[j],f_j);       /*  3   */
 +    rvec_dec(f[k],f_k);       /*  3   */
 +    rvec_inc(f[l],f_l);       /*  3   */
 +    
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,j),jt);
 +      ivec_sub(SHIFT_IVEC(g,i),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,k),jt,dt_kj);
 +      ivec_sub(SHIFT_IVEC(g,l),jt,dt_lj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      t3=IVEC2IS(dt_lj);
 +    } else if (pbc) {
 +      t3 = pbc_rvec_sub(pbc,x[l],x[j],dx_jl);
 +    } else {
 +      t3 = CENTRAL;
 +    }
 +    
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_dec(fshift[CENTRAL],f_j);
 +    rvec_dec(fshift[t2],f_k);
 +    rvec_inc(fshift[t3],f_l);
 +  }
 +  /* 112 TOTAL        */
 +}
 +
 +/* As do_dih_fup above, but without shift forces */
 +static void
 +do_dih_fup_noshiftf(int i,int j,int k,int l,real ddphi,
 +                    rvec r_ij,rvec r_kj,rvec r_kl,
 +                    rvec m,rvec n,rvec f[])
 +{
 +  rvec f_i,f_j,f_k,f_l;
 +  rvec uvec,vvec,svec,dx_jl;
 +  real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
 +  real a,b,p,q,toler;
 +  ivec jt,dt_ij,dt_kj,dt_lj;  
 +  
 +  iprm  = iprod(m,m);         /*  5   */
 +  iprn  = iprod(n,n);         /*  5   */
 +  nrkj2 = iprod(r_kj,r_kj);   /*  5   */
 +  toler = nrkj2*GMX_REAL_EPS;
 +  if ((iprm > toler) && (iprn > toler)) {
 +    nrkj_1 = gmx_invsqrt(nrkj2);      /* 10   */
 +    nrkj_2 = nrkj_1*nrkj_1;   /*  1   */
 +    nrkj  = nrkj2*nrkj_1;     /*  1   */
 +    a     = -ddphi*nrkj/iprm; /* 11   */
 +    svmul(a,m,f_i);           /*  3   */
 +    b     = ddphi*nrkj/iprn;  /* 11   */
 +    svmul(b,n,f_l);           /*  3   */
 +    p     = iprod(r_ij,r_kj); /*  5   */
 +    p    *= nrkj_2;           /*  1   */
 +    q     = iprod(r_kl,r_kj); /*  5   */
 +    q    *= nrkj_2;           /*  1   */
 +    svmul(p,f_i,uvec);                /*  3   */
 +    svmul(q,f_l,vvec);                /*  3   */
 +    rvec_sub(uvec,vvec,svec); /*  3   */
 +    rvec_sub(f_i,svec,f_j);   /*  3   */
 +    rvec_add(f_l,svec,f_k);   /*  3   */
 +    rvec_inc(f[i],f_i);       /*  3   */
 +    rvec_dec(f[j],f_j);       /*  3   */
 +    rvec_dec(f[k],f_k);       /*  3   */
 +    rvec_inc(f[l],f_l);       /*  3   */
 +  }
 +}
 +
 +/* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
 +static void
 +do_dih_fup_noshiftf_precalc(int i,int j,int k,int l,real ddphi,
 +                            real nrkj_m2,real nrkj_n2,
 +                            real p,real q,
 +                            rvec m,rvec n,rvec f[])
 +{
 +    rvec f_i,f_j,f_k,f_l;
 +    rvec uvec,vvec,svec,dx_jl;
 +    real a,b,toler;
 +    ivec jt,dt_ij,dt_kj,dt_lj;  
 +  
 +    a = -ddphi*nrkj_m2;
 +    svmul(a,m,f_i);
 +    b =  ddphi*nrkj_n2;
 +    svmul(b,n,f_l);
 +    svmul(p,f_i,uvec);
 +    svmul(q,f_l,vvec);
 +    rvec_sub(uvec,vvec,svec);
 +    rvec_sub(f_i,svec,f_j);
 +    rvec_add(f_l,svec,f_k);
 +    rvec_inc(f[i],f_i);
 +    rvec_dec(f[j],f_j);
 +    rvec_dec(f[k],f_k);
 +    rvec_inc(f[l],f_l);
 +}
 +
 +
 +real dopdihs(real cpA,real cpB,real phiA,real phiB,int mult,
 +           real phi,real lambda,real *V,real *F)
 +{
 +  real v,dvdlambda,mdphi,v1,sdphi,ddphi;
 +  real L1   = 1.0 - lambda;
 +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +  real dph0 = (phiB - phiA)*DEG2RAD;
 +  real cp   = L1*cpA + lambda*cpB;
 +  
 +  mdphi =  mult*phi - ph0;
 +  sdphi = sin(mdphi);
 +  ddphi = -cp*mult*sdphi;
 +  v1    = 1.0 + cos(mdphi);
 +  v     = cp*v1;
 +  
 +  dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
 +  
 +  *V = v;
 +  *F = ddphi;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 40 flops */
 +}
 +
 +static void
 +dopdihs_noener(real cpA,real cpB,real phiA,real phiB,int mult,
 +               real phi,real lambda,real *F)
 +{
 +  real mdphi,sdphi,ddphi;
 +  real L1   = 1.0 - lambda;
 +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +  real cp   = L1*cpA + lambda*cpB;
 +  
 +  mdphi = mult*phi - ph0;
 +  sdphi = sin(mdphi);
 +  ddphi = -cp*mult*sdphi;
 +  
 +  *F = ddphi;
 +  
 +  /* That was 20 flops */
 +}
 +
 +static void
 +dopdihs_mdphi(real cpA,real cpB,real phiA,real phiB,int mult,
 +              real phi,real lambda,real *cp,real *mdphi)
 +{
 +    real L1   = 1.0 - lambda;
 +    real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +
 +    *cp    = L1*cpA + lambda*cpB;
 +
 +    *mdphi = mult*phi - ph0;
 +}
 +
 +static real dopdihs_min(real cpA,real cpB,real phiA,real phiB,int mult,
 +                      real phi,real lambda,real *V,real *F)
 +     /* similar to dopdihs, except for a minus sign  *
 +      * and a different treatment of mult/phi0       */
 +{
 +  real v,dvdlambda,mdphi,v1,sdphi,ddphi;
 +  real L1   = 1.0 - lambda;
 +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
 +  real dph0 = (phiB - phiA)*DEG2RAD;
 +  real cp   = L1*cpA + lambda*cpB;
 +  
 +  mdphi = mult*(phi-ph0);
 +  sdphi = sin(mdphi);
 +  ddphi = cp*mult*sdphi;
 +  v1    = 1.0-cos(mdphi);
 +  v     = cp*v1;
 +  
 +  dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
 +  
 +  *V = v;
 +  *F = ddphi;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 40 flops */
 +}
 +
 +real pdihs(int nbonds,
 +         const t_iatom forceatoms[],const t_iparams forceparams[],
 +         const rvec x[],rvec f[],rvec fshift[],
 +         const t_pbc *pbc,const t_graph *g,
 +         real lambda,real *dvdlambda,
 +         const t_mdatoms *md,t_fcdata *fcd,
 +         int *global_atom_index)
 +{
 +  int  i,type,ai,aj,ak,al;
 +  int  t1,t2,t3;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real phi,sign,ddphi,vpd,vtot;
 +
 +  vtot = 0.0;
 +
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +    
 +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                  &sign,&t1,&t2,&t3);                 /*  84          */
 +    *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
 +                          forceparams[type].pdihs.cpB,
 +                          forceparams[type].pdihs.phiA,
 +                          forceparams[type].pdihs.phiB,
 +                          forceparams[type].pdihs.mult,
 +                          phi,lambda,&vpd,&ddphi);
 +
 +    vtot += vpd;
 +    do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
 +
 +#ifdef DEBUG
 +    fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
 +          ai,aj,ak,al,phi);
 +#endif
 +  } /* 223 TOTAL      */
 +
 +  return vtot;
 +}
 +
 +void make_dp_periodic(real *dp)  /* 1 flop? */
 +{
 +    /* dp cannot be outside (-pi,pi) */
 +    if (*dp >= M_PI)
 +    {
 +        *dp -= 2*M_PI;
 +    }
 +    else if (*dp < -M_PI)
 +    {
 +        *dp += 2*M_PI;
 +    }
 +    return;
 +}
 +
 +/* As pdihs above, but without calculating energies and shift forces */
 +static void
 +pdihs_noener(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +    int  i,type,ai,aj,ak,al;
 +    int  t1,t2,t3;
 +    rvec r_ij,r_kj,r_kl,m,n;
 +    real phi,sign,ddphi_tot,ddphi;
 +
 +    for(i=0; (i<nbonds); )
 +    {
 +        ai   = forceatoms[i+1];
 +        aj   = forceatoms[i+2];
 +        ak   = forceatoms[i+3];
 +        al   = forceatoms[i+4];
 +
 +        phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                        &sign,&t1,&t2,&t3);
 +
 +        ddphi_tot = 0;
 +
 +        /* Loop over dihedrals working on the same atoms,
 +         * so we avoid recalculating angles and force distributions.
 +         */
 +        do
 +        {
 +            type = forceatoms[i];
 +            dopdihs_noener(forceparams[type].pdihs.cpA,
 +                           forceparams[type].pdihs.cpB,
 +                           forceparams[type].pdihs.phiA,
 +                           forceparams[type].pdihs.phiB,
 +                           forceparams[type].pdihs.mult,
 +                           phi,lambda,&ddphi);
 +            ddphi_tot += ddphi;
 +
 +            i += 5;
 +        }
 +        while(i < nbonds &&
 +              forceatoms[i+1] == ai &&
 +              forceatoms[i+2] == aj &&
 +              forceatoms[i+3] == ak &&
 +              forceatoms[i+4] == al);
 +
 +        do_dih_fup_noshiftf(ai,aj,ak,al,ddphi_tot,r_ij,r_kj,r_kl,m,n,f);
 +    }
 +}
 +
 +
 +#ifdef SSE_PROPER_DIHEDRALS
 +
 +/* As pdihs_noner above, but using SSE to calculate 4 dihedrals at once */
 +static void
 +pdihs_noener_sse(int nbonds,
 +                 const t_iatom forceatoms[],const t_iparams forceparams[],
 +                 const rvec x[],rvec f[],
 +                 const t_pbc *pbc,const t_graph *g,
 +                 real lambda,
 +                 const t_mdatoms *md,t_fcdata *fcd,
 +                 int *global_atom_index)
 +{
 +    int  i,i4,s;
 +    int  type,ai[4],aj[4],ak[4],al[4];
 +    int  t1[4],t2[4],t3[4];
 +    int  mult[4];
 +    real cp[4],mdphi[4];
 +    real ddphi;
 +    rvec_sse_t rs_array[13],*rs;
 +    real buf_array[24],*buf;
 +    __m128 mdphi_SSE,sin_SSE,cos_SSE;
 +
 +    /* Ensure 16-byte alignment */
 +    rs  = (rvec_sse_t *)(((size_t)(rs_array +1)) & (~((size_t)15)));
 +    buf =      (float *)(((size_t)(buf_array+3)) & (~((size_t)15)));
 +
 +    for(i=0; (i<nbonds); i+=20)
 +    {
 +        /* Collect atoms quadruplets for 4 dihedrals */
 +        i4 = i;
 +        for(s=0; s<4; s++)
 +        {
 +            ai[s] = forceatoms[i4+1];
 +            aj[s] = forceatoms[i4+2];
 +            ak[s] = forceatoms[i4+3];
 +            al[s] = forceatoms[i4+4];
 +            /* At the end fill the arrays with identical entries */
 +            if (i4 + 5 < nbonds)
 +            {
 +                i4 += 5;
 +            }
 +        }
 +
 +        /* Caclulate 4 dihedral angles at once */
 +        dih_angle_sse(x,ai,aj,ak,al,pbc,t1,t2,t3,rs,buf);
 +
 +        i4 = i;
 +        for(s=0; s<4; s++)
 +        {
 +            if (i4 < nbonds)
 +            {
 +                /* Calculate the coefficient and angle deviation */
 +                type = forceatoms[i4];
 +                dopdihs_mdphi(forceparams[type].pdihs.cpA,
 +                              forceparams[type].pdihs.cpB,
 +                              forceparams[type].pdihs.phiA,
 +                              forceparams[type].pdihs.phiB,
 +                              forceparams[type].pdihs.mult,
 +                              buf[16+s],lambda,&cp[s],&buf[16+s]);
 +                mult[s] = forceparams[type].pdihs.mult;
 +            }
 +            else
 +            {
 +                buf[16+s] = 0;
 +            }
 +            i4 += 5;
 +        }
 +
 +        /* Calculate 4 sines at once */
 +        mdphi_SSE = _mm_load_ps(buf+16);
 +        gmx_mm_sincos_ps(mdphi_SSE,&sin_SSE,&cos_SSE);
 +        _mm_store_ps(buf+16,sin_SSE);
 +
 +        i4 = i;
 +        s = 0;
 +        do
 +        {
 +            ddphi = -cp[s]*mult[s]*buf[16+s];
 +
 +            do_dih_fup_noshiftf_precalc(ai[s],aj[s],ak[s],al[s],ddphi,
 +                                        buf[ 0+s],buf[ 4+s],
 +                                        buf[ 8+s],buf[12+s],
 +                                        rs[0+s].v,rs[4+s].v,
 +                                        f);
 +            s++;
 +            i4 += 5;
 +        }
 +        while (s < 4 && i4 < nbonds);
 +    }
 +}
 +
 +#endif /* SSE_PROPER_DIHEDRALS */
 +
 +
 +real idihs(int nbonds,
 +         const t_iatom forceatoms[],const t_iparams forceparams[],
 +         const rvec x[],rvec f[],rvec fshift[],
 +         const t_pbc *pbc,const t_graph *g,
 +         real lambda,real *dvdlambda,
 +         const t_mdatoms *md,t_fcdata *fcd,
 +         int *global_atom_index)
 +{
 +  int  i,type,ai,aj,ak,al;
 +  int  t1,t2,t3;
 +  real phi,phi0,dphi0,ddphi,sign,vtot;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real L1,kk,dp,dp2,kA,kB,pA,pB,dvdl_term;
 +
 +  L1 = 1.0-lambda;
 +  dvdl_term = 0;
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +    
 +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                  &sign,&t1,&t2,&t3);                 /*  84          */
 +    
 +    /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
 +     * force changes if we just apply a normal harmonic.
 +     * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
 +     * This means we will never have the periodicity problem, unless
 +     * the dihedral is Pi away from phiO, which is very unlikely due to
 +     * the potential.
 +     */
 +    kA = forceparams[type].harmonic.krA;
 +    kB = forceparams[type].harmonic.krB;
 +    pA = forceparams[type].harmonic.rA;
 +    pB = forceparams[type].harmonic.rB;
 +
 +    kk    = L1*kA + lambda*kB;
 +    phi0  = (L1*pA + lambda*pB)*DEG2RAD;
 +    dphi0 = (pB - pA)*DEG2RAD;
 +
 +    dp = phi-phi0;  
 +
 +    make_dp_periodic(&dp);
 +    
 +    dp2 = dp*dp;
 +
 +    vtot += 0.5*kk*dp2;
 +    ddphi = -kk*dp;
 +    
 +    dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
 +
 +    do_dih_fup(ai,aj,ak,al,(real)(-ddphi),r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
 +    /* 218 TOTAL      */
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"idih: (%d,%d,%d,%d) phi=%g\n",
 +            ai,aj,ak,al,phi);
 +#endif
 +  }
 +  
 +  *dvdlambda += dvdl_term;
 +  return vtot;
 +}
 +
 +
 +/*! \brief returns dx, rdist, and dpdl for functions posres() and fbposres()        
 + */
 +static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
 +                      const rvec comA_sc, const rvec comB_sc,
 +                      real lambda,
 +                      t_pbc *pbc, int refcoord_scaling,int npbcdim,
 +                      rvec dx, rvec rdist, rvec dpdl)
 +{
 +    int m,d;
 +    real posA, posB, L1, ref=0.;
 +    rvec pos;
 +
 +    L1=1.0-lambda;
 +
 +    for(m=0; m<DIM; m++)
 +    {
 +        posA = pos0A[m];
 +        posB = pos0B[m];
 +        if (m < npbcdim)
 +        {
 +            switch (refcoord_scaling)
 +            {
 +            case erscNO:
 +                ref      = 0;
 +                rdist[m] = L1*posA + lambda*posB;
 +                dpdl[m]  = posB - posA;
 +                    break;
 +            case erscALL:
 +                /* Box relative coordinates are stored for dimensions with pbc */
 +                posA *= pbc->box[m][m];
 +                posB *= pbc->box[m][m];
 +                for(d=m+1; d<npbcdim; d++)
 +                {
 +                    posA += pos0A[d]*pbc->box[d][m];
 +                    posB += pos0B[d]*pbc->box[d][m];
 +                }
 +                ref      = L1*posA + lambda*posB;
 +                rdist[m] = 0;
 +                dpdl[m]  = posB - posA;
 +                break;
 +            case erscCOM:
 +                ref      = L1*comA_sc[m] + lambda*comB_sc[m];
 +                rdist[m] = L1*posA       + lambda*posB;
 +                dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
 +                break;
 +            default:
 +                gmx_fatal(FARGS, "No such scaling method implemented");
 +            }
 +        }
 +        else
 +        {
 +            ref      = L1*posA + lambda*posB;
 +            rdist[m] = 0;
 +            dpdl[m]  = posB - posA;
 +        }
 +
 +        /* We do pbc_dx with ref+rdist,
 +         * since with only ref we can be up to half a box vector wrong.
 +         */
 +        pos[m] = ref + rdist[m];
 +    }
 +
 +    if (pbc)
 +    {
 +        pbc_dx(pbc,x,pos,dx);
 +    }
 +    else
 +    {
 +        rvec_sub(x,pos,dx);
 +    }
 +}
 +
 +/*! \brief Adds forces of flat-bottomed positions restraints to f[]
 + *         and fixes vir_diag. Returns the flat-bottomed potential. */
 +real fbposres(int nbonds,
 +              const t_iatom forceatoms[],const t_iparams forceparams[],
 +              const rvec x[],rvec f[],rvec vir_diag,
 +              t_pbc *pbc,
 +              int refcoord_scaling,int ePBC,rvec com)
 +/* compute flat-bottomed positions restraints */
 +{
 +    int  i,ai,m,d,type,npbcdim=0,fbdim;
 +    const t_iparams *pr;
 +    real vtot,kk,v;
 +    real ref=0,dr,dr2,rpot,rfb,rfb2,fact,invdr;
 +    rvec com_sc,rdist,pos,dx,dpdl,fm;
 +    gmx_bool bInvert;
 +
 +    npbcdim = ePBC2npbcdim(ePBC);
 +
 +    if (refcoord_scaling == erscCOM)
 +    {
 +        clear_rvec(com_sc);
 +        for(m=0; m<npbcdim; m++)
 +        {
 +            for(d=m; d<npbcdim; d++)
 +            {
 +                com_sc[m] += com[d]*pbc->box[d][m];
 +            }
 +        }
 +    }
 +
 +    vtot = 0.0;
 +    for(i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        pr   = &forceparams[type];
 +
 +        /* same calculation as for normal posres, but with identical A and B states, and lambda==0 */
 +        posres_dx(x[ai],forceparams[type].fbposres.pos0, forceparams[type].fbposres.pos0,
 +                  com_sc, com_sc, 0.0,
 +                  pbc, refcoord_scaling, npbcdim,
 +                  dx, rdist, dpdl);
 +
 +        clear_rvec(fm);
 +        v=0.0;
 +
 +        kk=pr->fbposres.k;
 +        rfb=pr->fbposres.r;
 +        rfb2=sqr(rfb);
 +
 +        /* with rfb<0, push particle out of the sphere/cylinder/layer */
 +        bInvert=FALSE;
 +        if (rfb<0.){
 +            bInvert=TRUE;
 +            rfb=-rfb;
 +        }
 +
 +        switch (pr->fbposres.geom)
 +        {
 +        case efbposresSPHERE:
 +            /* spherical flat-bottom posres */
 +            dr2=norm2(dx);
 +            if ( dr2 > 0.0 &&
 +                 ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
 +                )
 +            {
 +                dr=sqrt(dr2);
 +                v = 0.5*kk*sqr(dr - rfb);
 +                fact = -kk*(dr-rfb)/dr;  /* Force pointing to the center pos0 */
 +                svmul(fact,dx,fm);
 +            }
 +            break;
 +        case efbposresCYLINDER:
 +            /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
 +            dr2=sqr(dx[XX])+sqr(dx[YY]);
 +            if  ( dr2 > 0.0 &&
 +                  ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
 +                )
 +            {
 +                dr=sqrt(dr2);
 +                invdr=1./dr;
 +                v = 0.5*kk*sqr(dr - rfb);
 +                fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr;  /* Force pointing to the center */
 +                fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
 +            }
 +            break;
 +        case efbposresX: /* fbdim=XX */
 +        case efbposresY: /* fbdim=YY */
 +        case efbposresZ: /* fbdim=ZZ */
 +            /* 1D flat-bottom potential */
 +            fbdim = pr->fbposres.geom - efbposresX;
 +            dr=dx[fbdim];
 +            if ( ( dr>rfb && bInvert==FALSE ) || ( 0<dr && dr<rfb && bInvert==TRUE )  )
 +            {
 +                v = 0.5*kk*sqr(dr - rfb);
 +                fm[fbdim] = -kk*(dr - rfb);
 +            }
 +            else if ( (dr < (-rfb) && bInvert==FALSE ) || ( (-rfb)<dr && dr<0 && bInvert==TRUE ))
 +            {
 +                v = 0.5*kk*sqr(dr + rfb);
 +                fm[fbdim] = -kk*(dr + rfb);
 +            }
 +            break;
 +        }
 +
 +        vtot += v;
 +
 +        for (m=0; (m<DIM); m++)
 +        {
 +            f[ai][m]   += fm[m];
 +            /* Here we correct for the pbc_dx which included rdist */
 +            vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm[m];
 +        }
 +    }
 +
 +    return vtot;
 +}
 +
 +
 +real posres(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec vir_diag,
 +            t_pbc *pbc,
 +            real lambda,real *dvdlambda,
 +            int refcoord_scaling,int ePBC,rvec comA,rvec comB)
 +{
 +    int  i,ai,m,d,type,ki,npbcdim=0;
 +    const t_iparams *pr;
 +    real L1;
 +    real vtot,kk,fm;
 +    real posA,posB,ref=0;
 +    rvec comA_sc,comB_sc,rdist,dpdl,pos,dx;
 +    gmx_bool bForceValid = TRUE;
 +
 +    if ((f==NULL) || (vir_diag==NULL)) {  /* should both be null together! */
 +        bForceValid = FALSE;
 +    }
 +
 +    npbcdim = ePBC2npbcdim(ePBC);
 +
 +    if (refcoord_scaling == erscCOM)
 +    {
 +        clear_rvec(comA_sc);
 +        clear_rvec(comB_sc);
 +        for(m=0; m<npbcdim; m++)
 +        {
 +            for(d=m; d<npbcdim; d++)
 +            {
 +                comA_sc[m] += comA[d]*pbc->box[d][m];
 +                comB_sc[m] += comB[d]*pbc->box[d][m];
 +            }
 +        }
 +    }
 +
 +    L1 = 1.0 - lambda;
 +
 +    vtot = 0.0;
 +    for(i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        pr   = &forceparams[type];
 +        
 +        /* return dx, rdist, and dpdl */
 +        posres_dx(x[ai],forceparams[type].posres.pos0A, forceparams[type].posres.pos0B,
 +                  comA_sc, comB_sc, lambda,
 +                  pbc, refcoord_scaling, npbcdim,
 +                  dx, rdist, dpdl);
 +
 +        for (m=0; (m<DIM); m++)
 +        {
 +            kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
 +            fm          = -kk*dx[m];
 +            vtot       += 0.5*kk*dx[m]*dx[m];
 +            *dvdlambda +=
 +                0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
 +                -fm*dpdl[m];
 +
 +            /* Here we correct for the pbc_dx which included rdist */
 +            if (bForceValid) {
 +                f[ai][m]   += fm;
 +                vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
 +            }
 +        }
 +    }
 +
 +    return vtot;
 +}
 +
 +static real low_angres(int nbonds,
 +                     const t_iatom forceatoms[],const t_iparams forceparams[],
 +                     const rvec x[],rvec f[],rvec fshift[],
 +                     const t_pbc *pbc,const t_graph *g,
 +                     real lambda,real *dvdlambda,
 +                     gmx_bool bZAxis)
 +{
 +  int  i,m,type,ai,aj,ak,al;
 +  int  t1,t2;
 +  real phi,cos_phi,cos_phi2,vid,vtot,dVdphi;
 +  rvec r_ij,r_kl,f_i,f_k={0,0,0};
 +  real st,sth,nrij2,nrkl2,c,cij,ckl;
 +
 +  ivec dt;  
 +  t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
 +
 +  vtot = 0.0;
 +  ak=al=0; /* to avoid warnings */
 +  for(i=0; i<nbonds; ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    t1   = pbc_rvec_sub(pbc,x[aj],x[ai],r_ij);                /*  3           */
 +    if (!bZAxis) {      
 +      ak   = forceatoms[i++];
 +      al   = forceatoms[i++];
 +      t2   = pbc_rvec_sub(pbc,x[al],x[ak],r_kl);           /*  3              */
 +    } else {
 +      r_kl[XX] = 0;
 +      r_kl[YY] = 0;
 +      r_kl[ZZ] = 1;
 +    }
 +
 +    cos_phi = cos_angle(r_ij,r_kl);           /* 25           */
 +    phi     = acos(cos_phi);                    /* 10           */
 +
 +    *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
 +                              forceparams[type].pdihs.cpB,
 +                              forceparams[type].pdihs.phiA,
 +                              forceparams[type].pdihs.phiB,
 +                              forceparams[type].pdihs.mult,
 +                              phi,lambda,&vid,&dVdphi); /*  40  */
 +    
 +    vtot += vid;
 +
 +    cos_phi2 = sqr(cos_phi);                    /*   1                */
 +    if (cos_phi2 < 1) {
 +      st  = -dVdphi*gmx_invsqrt(1 - cos_phi2);      /*  12            */
 +      sth = st*cos_phi;                               /*   1          */
 +      nrij2 = iprod(r_ij,r_ij);                       /*   5          */
 +      nrkl2 = iprod(r_kl,r_kl);                 /*   5          */
 +      
 +      c   = st*gmx_invsqrt(nrij2*nrkl2);              /*  11          */ 
 +      cij = sth/nrij2;                                /*  10          */
 +      ckl = sth/nrkl2;                                /*  10          */
 +      
 +      for (m=0; m<DIM; m++) {                 /*  18+18       */
 +      f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
 +      f[ai][m] += f_i[m];
 +      f[aj][m] -= f_i[m];
 +      if (!bZAxis) {
 +        f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
 +        f[ak][m] += f_k[m];
 +        f[al][m] -= f_k[m];
 +      }
 +      }
 +      
 +      if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      t1=IVEC2IS(dt);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_dec(fshift[CENTRAL],f_i);
 +      if (!bZAxis) {
 +      if (g) {
 +        ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,al),dt);
 +        t2=IVEC2IS(dt);
 +      }
 +      rvec_inc(fshift[t2],f_k);
 +      rvec_dec(fshift[CENTRAL],f_k);
 +      }
 +    }
 +  }
 +
 +  return vtot;  /*  184 / 157 (bZAxis)  total  */
 +}
 +
 +real angres(int nbonds,
 +          const t_iatom forceatoms[],const t_iparams forceparams[],
 +          const rvec x[],rvec f[],rvec fshift[],
 +          const t_pbc *pbc,const t_graph *g,
 +          real lambda,real *dvdlambda,
 +          const t_mdatoms *md,t_fcdata *fcd,
 +          int *global_atom_index)
 +{
 +  return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
 +                  lambda,dvdlambda,FALSE);
 +}
 +
 +real angresz(int nbonds,
 +           const t_iatom forceatoms[],const t_iparams forceparams[],
 +           const rvec x[],rvec f[],rvec fshift[],
 +           const t_pbc *pbc,const t_graph *g,
 +           real lambda,real *dvdlambda,
 +           const t_mdatoms *md,t_fcdata *fcd,
 +           int *global_atom_index)
 +{
 +  return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
 +                    lambda,dvdlambda,TRUE);
 +}
 +
 +real dihres(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +    real vtot = 0;
 +    int  ai,aj,ak,al,i,k,type,t1,t2,t3;
 +    real phi0A,phi0B,dphiA,dphiB,kfacA,kfacB,phi0,dphi,kfac;
 +    real phi,ddphi,ddp,ddp2,dp,sign,d2r,fc,L1;
 +    rvec r_ij,r_kj,r_kl,m,n;
 +
 +    L1 = 1.0-lambda;
 +
 +    d2r = DEG2RAD;
 +    k   = 0;
 +
 +    for (i=0; (i<nbonds); )
 +    {
 +        type = forceatoms[i++];
 +        ai   = forceatoms[i++];
 +        aj   = forceatoms[i++];
 +        ak   = forceatoms[i++];
 +        al   = forceatoms[i++];
 +
 +        phi0A  = forceparams[type].dihres.phiA*d2r;
 +        dphiA  = forceparams[type].dihres.dphiA*d2r;
 +        kfacA  = forceparams[type].dihres.kfacA;
 +
 +        phi0B  = forceparams[type].dihres.phiB*d2r;
 +        dphiB  = forceparams[type].dihres.dphiB*d2r;
 +        kfacB  = forceparams[type].dihres.kfacB;
 +
 +        phi0  = L1*phi0A + lambda*phi0B;
 +        dphi  = L1*dphiA + lambda*dphiB;
 +        kfac = L1*kfacA + lambda*kfacB;
 +
 +        phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                        &sign,&t1,&t2,&t3);
 +        /* 84 flops */
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
 +                    k++,ai,aj,ak,al,phi0,dphi,kfac);
 +        }
 +        /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
 +         * force changes if we just apply a normal harmonic.
 +         * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
 +         * This means we will never have the periodicity problem, unless
 +         * the dihedral is Pi away from phiO, which is very unlikely due to
 +         * the potential.
 +         */
 +        dp = phi-phi0;
 +        make_dp_periodic(&dp);
 +
 +        if (dp > dphi)
 +        {
 +            ddp = dp-dphi;
 +        }
 +        else if (dp < -dphi)
 +        {
 +            ddp = dp+dphi;
 +        }
 +        else
 +        {
 +            ddp = 0;
 +        }
 +
 +        if (ddp != 0.0)
 +        {
 +            ddp2 = ddp*ddp;
 +            vtot += 0.5*kfac*ddp2;
 +            ddphi = kfac*ddp;
 +
 +            *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
 +            /* lambda dependence from changing restraint distances */
 +            if (ddp > 0)
 +            {
 +                *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
 +            }
 +            else if (ddp < 0)
 +            {
 +                *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
 +            }
 +            do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
 +                       f,fshift,pbc,g,x,t1,t2,t3);            /* 112          */
 +        }
 +    }
 +    return vtot;
 +}
 +
 +
 +real unimplemented(int nbonds,
 +                 const t_iatom forceatoms[],const t_iparams forceparams[],
 +                 const rvec x[],rvec f[],rvec fshift[],
 +                 const t_pbc *pbc,const t_graph *g,
 +                 real lambda,real *dvdlambda,
 +                 const t_mdatoms *md,t_fcdata *fcd,
 +                 int *global_atom_index)
 +{
 +  gmx_impl("*** you are using a not implemented function");
 +
 +  return 0.0; /* To make the compiler happy */
 +}
 +
 +real rbdihs(int nbonds,
 +          const t_iatom forceatoms[],const t_iparams forceparams[],
 +          const rvec x[],rvec f[],rvec fshift[],
 +          const t_pbc *pbc,const t_graph *g,
 +          real lambda,real *dvdlambda,
 +          const t_mdatoms *md,t_fcdata *fcd,
 +          int *global_atom_index)
 +{
 +  const real c0=0.0,c1=1.0,c2=2.0,c3=3.0,c4=4.0,c5=5.0;
 +  int  type,ai,aj,ak,al,i,j;
 +  int  t1,t2,t3;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real parmA[NR_RBDIHS];
 +  real parmB[NR_RBDIHS];
 +  real parm[NR_RBDIHS];
 +  real cos_phi,phi,rbp,rbpBA;
 +  real v,sign,ddphi,sin_phi;
 +  real cosfac,vtot;
 +  real L1   = 1.0-lambda;
 +  real dvdl_term=0;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +
 +      phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                    &sign,&t1,&t2,&t3);                       /*  84          */
 +
 +    /* Change to polymer convention */
 +    if (phi < c0)
 +      phi += M_PI;
 +    else
 +      phi -= M_PI;                    /*   1          */
 +      
 +    cos_phi = cos(phi);               
 +    /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
 +    sin_phi = sin(phi);
 +
 +    for(j=0; (j<NR_RBDIHS); j++) {
 +      parmA[j] = forceparams[type].rbdihs.rbcA[j];
 +      parmB[j] = forceparams[type].rbdihs.rbcB[j];
 +      parm[j]  = L1*parmA[j]+lambda*parmB[j];
 +    }
 +    /* Calculate cosine powers */
 +    /* Calculate the energy */
 +    /* Calculate the derivative */
 +
 +    v       = parm[0];
 +    dvdl_term   += (parmB[0]-parmA[0]);
 +    ddphi   = c0;
 +    cosfac  = c1;
 +    
 +    rbp     = parm[1];
 +    rbpBA   = parmB[1]-parmA[1];
 +    ddphi  += rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[2];
 +    rbpBA   = parmB[2]-parmA[2];    
 +    ddphi  += c2*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[3];
 +    rbpBA   = parmB[3]-parmA[3];
 +    ddphi  += c3*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[4];
 +    rbpBA   = parmB[4]-parmA[4];
 +    ddphi  += c4*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +    rbp     = parm[5];
 +    rbpBA   = parmB[5]-parmA[5];
 +    ddphi  += c5*rbp*cosfac;
 +    cosfac *= cos_phi;
 +    v      += cosfac*rbp;
 +    dvdl_term   += cosfac*rbpBA;
 +   
 +    ddphi = -ddphi*sin_phi;                           /*  11          */
 +    
 +    do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
 +    vtot += v;
 +  }  
 +  *dvdlambda += dvdl_term;
 +
 +  return vtot;
 +}
 +
 +int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
 +{
 +      int im1, ip1, ip2;
 +      
 +      if(ip<0)
 +      {
 +              ip = ip + grid_spacing - 1;
 +      }
 +      else if(ip > grid_spacing)
 +      {
 +              ip = ip - grid_spacing - 1;
 +      }
 +      
 +      im1 = ip - 1;
 +      ip1 = ip + 1;
 +      ip2 = ip + 2;
 +      
 +      if(ip == 0)
 +      {
 +              im1 = grid_spacing - 1;
 +      }
 +      else if(ip == grid_spacing-2)
 +      {
 +              ip2 = 0;
 +      }
 +      else if(ip == grid_spacing-1)
 +      {
 +              ip1 = 0;
 +              ip2 = 1;
 +      }
 +      
 +      *ipm1 = im1;
 +      *ipp1 = ip1;
 +      *ipp2 = ip2;
 +      
 +      return ip;
 +      
 +}
 +
 +real cmap_dihs(int nbonds,
 +                         const t_iatom forceatoms[],const t_iparams forceparams[],
 +               const gmx_cmap_t *cmap_grid,
 +                         const rvec x[],rvec f[],rvec fshift[],
 +                         const t_pbc *pbc,const t_graph *g,
 +                         real lambda,real *dvdlambda,
 +                         const t_mdatoms *md,t_fcdata *fcd,
 +                         int *global_atom_index)
 +{
 +      int i,j,k,n,idx;
 +      int ai,aj,ak,al,am;
 +      int a1i,a1j,a1k,a1l,a2i,a2j,a2k,a2l;
 +      int type,cmapA;
 +      int t11,t21,t31,t12,t22,t32;
 +      int iphi1,ip1m1,ip1p1,ip1p2;
 +      int iphi2,ip2m1,ip2p1,ip2p2;
 +      int l1,l2,l3,l4;
 +      int pos1,pos2,pos3,pos4,tmp;
 +      
 +      real ty[4],ty1[4],ty2[4],ty12[4],tc[16],tx[16];
 +      real phi1,psi1,cos_phi1,sin_phi1,sign1,xphi1;
 +      real phi2,psi2,cos_phi2,sin_phi2,sign2,xphi2;
 +      real dx,xx,tt,tu,e,df1,df2,ddf1,ddf2,ddf12,vtot;
 +      real ra21,rb21,rg21,rg1,rgr1,ra2r1,rb2r1,rabr1;
 +      real ra22,rb22,rg22,rg2,rgr2,ra2r2,rb2r2,rabr2;
 +      real fg1,hg1,fga1,hgb1,gaa1,gbb1;
 +      real fg2,hg2,fga2,hgb2,gaa2,gbb2;
 +      real fac;
 +      
 +      rvec r1_ij, r1_kj, r1_kl,m1,n1;
 +      rvec r2_ij, r2_kj, r2_kl,m2,n2;
 +      rvec f1_i,f1_j,f1_k,f1_l;
 +      rvec f2_i,f2_j,f2_k,f2_l;
 +      rvec a1,b1,a2,b2;
 +      rvec f1,g1,h1,f2,g2,h2;
 +      rvec dtf1,dtg1,dth1,dtf2,dtg2,dth2;
 +      ivec jt1,dt1_ij,dt1_kj,dt1_lj;
 +      ivec jt2,dt2_ij,dt2_kj,dt2_lj;
 +
 +    const real *cmapd;
 +
 +      int loop_index[4][4] = {
 +              {0,4,8,12},
 +              {1,5,9,13},
 +              {2,6,10,14},
 +              {3,7,11,15}
 +      };
 +      
 +      /* Total CMAP energy */
 +      vtot = 0;
 +      
 +      for(n=0;n<nbonds; )
 +      {
 +              /* Five atoms are involved in the two torsions */
 +              type   = forceatoms[n++];
 +              ai     = forceatoms[n++];
 +              aj     = forceatoms[n++];
 +              ak     = forceatoms[n++];
 +              al     = forceatoms[n++];
 +              am     = forceatoms[n++];
 +              
 +              /* Which CMAP type is this */
 +              cmapA = forceparams[type].cmap.cmapA;
 +        cmapd = cmap_grid->cmapdata[cmapA].cmap;
 +
 +              /* First torsion */
 +              a1i   = ai;
 +              a1j   = aj;
 +              a1k   = ak;
 +              a1l   = al;
 +              
 +              phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
 +                                                 &sign1, &t11, &t21, &t31); /* 84 */
 +              
 +        cos_phi1 = cos(phi1);
 +        
 +              a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
 +              a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
 +              a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
 +              
 +              b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
 +              b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
 +              b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
 +              
 +              tmp = pbc_rvec_sub(pbc,x[a1l],x[a1k],h1);
 +              
 +              ra21  = iprod(a1,a1);       /* 5 */
 +              rb21  = iprod(b1,b1);       /* 5 */
 +              rg21  = iprod(r1_kj,r1_kj); /* 5 */
 +              rg1   = sqrt(rg21);
 +              
 +              rgr1  = 1.0/rg1;
 +              ra2r1 = 1.0/ra21;
 +              rb2r1 = 1.0/rb21;
 +              rabr1 = sqrt(ra2r1*rb2r1);
 +              
 +              sin_phi1 = rg1 * rabr1 * iprod(a1,h1) * (-1);
 +              
 +              if(cos_phi1 < -0.5 || cos_phi1 > 0.5)
 +              {
 +                      phi1 = asin(sin_phi1);
 +                      
 +                      if(cos_phi1 < 0)
 +                      {
 +                              if(phi1 > 0)
 +                              {
 +                                      phi1 = M_PI - phi1;
 +                              }
 +                              else
 +                              {
 +                                      phi1 = -M_PI - phi1;
 +                              }
 +                      }
 +              }
 +              else
 +              {
 +                      phi1 = acos(cos_phi1);
 +                      
 +                      if(sin_phi1 < 0)
 +                      {
 +                              phi1 = -phi1;
 +                      }
 +              }
 +              
 +              xphi1 = phi1 + M_PI; /* 1 */
 +              
 +              /* Second torsion */
 +              a2i   = aj;
 +              a2j   = ak;
 +              a2k   = al;
 +              a2l   = am;
 +              
 +              phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
 +                                                &sign2, &t12, &t22, &t32); /* 84 */
 +              
 +        cos_phi2 = cos(phi2);
 +
 +              a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
 +              a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
 +              a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
 +              
 +              b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
 +              b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
 +              b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
 +              
 +              tmp = pbc_rvec_sub(pbc,x[a2l],x[a2k],h2);
 +              
 +              ra22  = iprod(a2,a2);         /* 5 */
 +              rb22  = iprod(b2,b2);         /* 5 */
 +              rg22  = iprod(r2_kj,r2_kj);   /* 5 */
 +              rg2   = sqrt(rg22);
 +              
 +              rgr2  = 1.0/rg2;
 +              ra2r2 = 1.0/ra22;
 +              rb2r2 = 1.0/rb22;
 +              rabr2 = sqrt(ra2r2*rb2r2);
 +              
 +              sin_phi2 = rg2 * rabr2 * iprod(a2,h2) * (-1);
 +              
 +              if(cos_phi2 < -0.5 || cos_phi2 > 0.5)
 +              {
 +                      phi2 = asin(sin_phi2);
 +                      
 +                      if(cos_phi2 < 0)
 +                      {
 +                              if(phi2 > 0)
 +                              {
 +                                      phi2 = M_PI - phi2;
 +                              }
 +                              else
 +                              {
 +                                      phi2 = -M_PI - phi2;
 +                              }
 +                      }
 +              }
 +              else
 +              {
 +                      phi2 = acos(cos_phi2);
 +                      
 +                      if(sin_phi2 < 0)
 +                      {
 +                              phi2 = -phi2;
 +                      }
 +              }
 +              
 +              xphi2 = phi2 + M_PI; /* 1 */
 +              
 +              /* Range mangling */
 +              if(xphi1<0)
 +              {
 +                      xphi1 = xphi1 + 2*M_PI;
 +              }
 +              else if(xphi1>=2*M_PI)
 +              {
 +                      xphi1 = xphi1 - 2*M_PI;
 +              }
 +              
 +              if(xphi2<0)
 +              {
 +                      xphi2 = xphi2 + 2*M_PI;
 +              }
 +              else if(xphi2>=2*M_PI)
 +              {
 +                      xphi2 = xphi2 - 2*M_PI;
 +              }
 +              
 +              /* Number of grid points */
 +              dx = 2*M_PI / cmap_grid->grid_spacing;
 +              
 +              /* Where on the grid are we */
 +              iphi1 = (int)(xphi1/dx);
 +              iphi2 = (int)(xphi2/dx);
 +              
 +              iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1,&ip1p1,&ip1p2);
 +              iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1,&ip2p1,&ip2p2);
 +              
 +              pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
 +              pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
 +              pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
 +              pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
 +
 +              ty[0]   = cmapd[pos1*4];
 +              ty[1]   = cmapd[pos2*4];
 +              ty[2]   = cmapd[pos3*4];
 +              ty[3]   = cmapd[pos4*4];
 +              
 +              ty1[0]   = cmapd[pos1*4+1];
 +              ty1[1]   = cmapd[pos2*4+1];
 +              ty1[2]   = cmapd[pos3*4+1];
 +              ty1[3]   = cmapd[pos4*4+1];
 +              
 +              ty2[0]   = cmapd[pos1*4+2];
 +              ty2[1]   = cmapd[pos2*4+2];
 +              ty2[2]   = cmapd[pos3*4+2];
 +              ty2[3]   = cmapd[pos4*4+2];
 +              
 +              ty12[0]   = cmapd[pos1*4+3];
 +              ty12[1]   = cmapd[pos2*4+3];
 +              ty12[2]   = cmapd[pos3*4+3];
 +              ty12[3]   = cmapd[pos4*4+3];
 +              
 +              /* Switch to degrees */
 +              dx = 360.0 / cmap_grid->grid_spacing;
 +              xphi1 = xphi1 * RAD2DEG;
 +              xphi2 = xphi2 * RAD2DEG; 
 +              
 +              for(i=0;i<4;i++) /* 16 */
 +              {
 +                      tx[i] = ty[i];
 +                      tx[i+4] = ty1[i]*dx;
 +                      tx[i+8] = ty2[i]*dx;
 +                      tx[i+12] = ty12[i]*dx*dx;
 +              }
 +              
 +              idx=0;
 +              for(i=0;i<4;i++) /* 1056 */
 +              {
 +                      for(j=0;j<4;j++)
 +                      {
 +                              xx = 0;
 +                              for(k=0;k<16;k++)
 +                              {
 +                                      xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
 +                              }
 +                              
 +                              idx++;
 +                              tc[i*4+j]=xx;
 +                      }
 +              }
 +              
 +              tt    = (xphi1-iphi1*dx)/dx;
 +              tu    = (xphi2-iphi2*dx)/dx;
 +              
 +              e     = 0;
 +              df1   = 0;
 +              df2   = 0;
 +              ddf1  = 0;
 +              ddf2  = 0;
 +              ddf12 = 0;
 +              
 +              for(i=3;i>=0;i--)
 +              {
 +                      l1 = loop_index[i][3];
 +                      l2 = loop_index[i][2];
 +                      l3 = loop_index[i][1];
 +                      
 +                      e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
 +                      df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
 +                      df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
 +                      ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
 +                      ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
 +              }
 +              
 +              ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
 +              3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
 +              
 +              fac     = RAD2DEG/dx;
 +              df1     = df1   * fac;
 +              df2     = df2   * fac;
 +              ddf1    = ddf1  * fac * fac;
 +              ddf2    = ddf2  * fac * fac;
 +              ddf12   = ddf12 * fac * fac;
 +              
 +              /* CMAP energy */
 +              vtot += e;
 +              
 +              /* Do forces - first torsion */
 +              fg1       = iprod(r1_ij,r1_kj);
 +              hg1       = iprod(r1_kl,r1_kj);
 +              fga1      = fg1*ra2r1*rgr1;
 +              hgb1      = hg1*rb2r1*rgr1;
 +              gaa1      = -ra2r1*rg1;
 +              gbb1      = rb2r1*rg1;
 +              
 +              for(i=0;i<DIM;i++)
 +              {
 +                      dtf1[i]   = gaa1 * a1[i];
 +                      dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
 +                      dth1[i]   = gbb1 * b1[i];
 +                      
 +                      f1[i]     = df1  * dtf1[i];
 +                      g1[i]     = df1  * dtg1[i];
 +                      h1[i]     = df1  * dth1[i];
 +                      
 +                      f1_i[i]   =  f1[i];
 +                      f1_j[i]   = -f1[i] - g1[i];
 +                      f1_k[i]   =  h1[i] + g1[i];
 +                      f1_l[i]   = -h1[i];
 +                      
 +                      f[a1i][i] = f[a1i][i] + f1_i[i];
 +                      f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */                                                            
 +                      f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */                                                            
 +                      f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */                                                                       
 +              }
 +              
 +              /* Do forces - second torsion */
 +              fg2       = iprod(r2_ij,r2_kj);
 +              hg2       = iprod(r2_kl,r2_kj);
 +              fga2      = fg2*ra2r2*rgr2;
 +              hgb2      = hg2*rb2r2*rgr2;
 +              gaa2      = -ra2r2*rg2;
 +              gbb2      = rb2r2*rg2;
 +              
 +              for(i=0;i<DIM;i++)
 +              {
 +                      dtf2[i]   = gaa2 * a2[i];
 +                      dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
 +                      dth2[i]   = gbb2 * b2[i];
 +                      
 +                      f2[i]     = df2  * dtf2[i];
 +                      g2[i]     = df2  * dtg2[i];
 +                      h2[i]     = df2  * dth2[i];
 +                      
 +                      f2_i[i]   =  f2[i];
 +                      f2_j[i]   = -f2[i] - g2[i];
 +                      f2_k[i]   =  h2[i] + g2[i];
 +                      f2_l[i]   = -h2[i];
 +                      
 +                      f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */                                                                        
 +                      f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */                                                              
 +                      f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */                            
 +                      f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */                                                                      
 +              }
 +              
 +              /* Shift forces */
 +              if(g)
 +              {
 +                      copy_ivec(SHIFT_IVEC(g,a1j), jt1);
 +                      ivec_sub(SHIFT_IVEC(g,a1i),  jt1,dt1_ij);
 +                      ivec_sub(SHIFT_IVEC(g,a1k),  jt1,dt1_kj);
 +                      ivec_sub(SHIFT_IVEC(g,a1l),  jt1,dt1_lj);
 +                      t11 = IVEC2IS(dt1_ij);
 +                      t21 = IVEC2IS(dt1_kj);
 +                      t31 = IVEC2IS(dt1_lj);
 +                      
 +                      copy_ivec(SHIFT_IVEC(g,a2j), jt2);
 +                      ivec_sub(SHIFT_IVEC(g,a2i),  jt2,dt2_ij);
 +                      ivec_sub(SHIFT_IVEC(g,a2k),  jt2,dt2_kj);
 +                      ivec_sub(SHIFT_IVEC(g,a2l),  jt2,dt2_lj);
 +                      t12 = IVEC2IS(dt2_ij);
 +                      t22 = IVEC2IS(dt2_kj);
 +                      t32 = IVEC2IS(dt2_lj);
 +              }
 +              else if(pbc)
 +              {
 +                      t31 = pbc_rvec_sub(pbc,x[a1l],x[a1j],h1);
 +                      t32 = pbc_rvec_sub(pbc,x[a2l],x[a2j],h2);
 +              }
 +              else
 +              {
 +                      t31 = CENTRAL;
 +                      t32 = CENTRAL;
 +              }
 +              
 +              rvec_inc(fshift[t11],f1_i);
 +              rvec_inc(fshift[CENTRAL],f1_j);
 +              rvec_inc(fshift[t21],f1_k);
 +              rvec_inc(fshift[t31],f1_l);
 +              
 +              rvec_inc(fshift[t21],f2_i);
 +              rvec_inc(fshift[CENTRAL],f2_j);
 +              rvec_inc(fshift[t22],f2_k);
 +              rvec_inc(fshift[t32],f2_l);
 +      }       
 +      return vtot;
 +}
 +
 +
 +
 +/***********************************************************
 + *
 + *   G R O M O S  9 6   F U N C T I O N S
 + *
 + ***********************************************************/
 +real g96harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
 +               real *V,real *F)
 +{
 +  const real half=0.5;
 +  real  L1,kk,x0,dx,dx2;
 +  real  v,f,dvdlambda;
 +  
 +  L1    = 1.0-lambda;
 +  kk    = L1*kA+lambda*kB;
 +  x0    = L1*xA+lambda*xB;
 +  
 +  dx    = x-x0;
 +  dx2   = dx*dx;
 +  
 +  f     = -kk*dx;
 +  v     = half*kk*dx2;
 +  dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 +  
 +  *F    = f;
 +  *V    = v;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 21 flops */
 +}
 +
 +real g96bonds(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type;
 +  real dr2,fbond,vbond,fij,vtot;
 +  rvec dx;
 +  ivec dt;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);          /*   3          */
 +    dr2  = iprod(dx,dx);                              /*   5          */
 +      
 +    *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
 +                              forceparams[type].harmonic.krB,
 +                              forceparams[type].harmonic.rA,
 +                              forceparams[type].harmonic.rB,
 +                              dr2,lambda,&vbond,&fbond);
 +
 +    vtot  += 0.5*vbond;                             /* 1*/
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +            sqrt(dr2),vbond,fbond);
 +#endif
 +   
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 44 TOTAL     */
 +  return vtot;
 +}
 +
 +real g96bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
 +                 rvec r_ij,rvec r_kj,
 +                 int *t1,int *t2)
 +/* Return value is the angle between the bonds i-j and j-k */
 +{
 +  real costh;
 +  
 +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
 +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
 +
 +  costh=cos_angle(r_ij,r_kj);                 /* 25           */
 +                                      /* 41 TOTAL     */
 +  return costh;
 +}
 +
 +real g96angles(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  int  i,ai,aj,ak,type,m,t1,t2;
 +  rvec r_ij,r_kj;
 +  real cos_theta,dVdt,va,vtot;
 +  real rij_1,rij_2,rkj_1,rkj_2,rijrkj_1;
 +  rvec f_i,f_j,f_k;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    
 +    cos_theta  = g96bond_angle(x[ai],x[aj],x[ak],pbc,r_ij,r_kj,&t1,&t2);
 +
 +    *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
 +                              forceparams[type].harmonic.krB,
 +                              forceparams[type].harmonic.rA,
 +                              forceparams[type].harmonic.rB,
 +                              cos_theta,lambda,&va,&dVdt);
 +    vtot    += va;
 +    
 +    rij_1    = gmx_invsqrt(iprod(r_ij,r_ij));
 +    rkj_1    = gmx_invsqrt(iprod(r_kj,r_kj));
 +    rij_2    = rij_1*rij_1;
 +    rkj_2    = rkj_1*rkj_1;
 +    rijrkj_1 = rij_1*rkj_1;                     /* 23 */
 +    
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
 +            cos_theta,va,dVdt);
 +#endif
 +    for (m=0; (m<DIM); m++) {                 /*  42  */
 +      f_i[m]=dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
 +      f_k[m]=dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +    }
 +    
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +    }      
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);               /* 9 */
 +    /* 163 TOTAL      */
 +  }
 +  return vtot;
 +}
 +
 +real cross_bond_bond(int nbonds,
 +                   const t_iatom forceatoms[],const t_iparams forceparams[],
 +                   const rvec x[],rvec f[],rvec fshift[],
 +                   const t_pbc *pbc,const t_graph *g,
 +                   real lambda,real *dvdlambda,
 +                   const t_mdatoms *md,t_fcdata *fcd,
 +                   int *global_atom_index)
 +{
 +  /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
 +   * pp. 842-847
 +   */
 +  int  i,ai,aj,ak,type,m,t1,t2;
 +  rvec r_ij,r_kj;
 +  real vtot,vrr,s1,s2,r1,r2,r1e,r2e,krr;
 +  rvec f_i,f_j,f_k;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    r1e  = forceparams[type].cross_bb.r1e;
 +    r2e  = forceparams[type].cross_bb.r2e;
 +    krr  = forceparams[type].cross_bb.krr;
 +    
 +    /* Compute distance vectors ... */
 +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
 +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
 +    
 +    /* ... and their lengths */
 +    r1 = norm(r_ij);
 +    r2 = norm(r_kj);
 +    
 +    /* Deviations from ideality */
 +    s1 = r1-r1e;
 +    s2 = r2-r2e;
 +    
 +    /* Energy (can be negative!) */
 +    vrr   = krr*s1*s2;
 +    vtot += vrr;
 +    
 +    /* Forces */
 +    svmul(-krr*s2/r1,r_ij,f_i);
 +    svmul(-krr*s1/r2,r_kj,f_k);
 +    
 +    for (m=0; (m<DIM); m++) {                 /*  12  */
 +      f_j[m]    = -f_i[m] - f_k[m];
 +      f[ai][m] += f_i[m];
 +      f[aj][m] += f_j[m];
 +      f[ak][m] += f_k[m];
 +    }
 +    
 +    /* Virial stuff */
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +    }      
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);               /* 9 */
 +    /* 163 TOTAL      */
 +  }
 +  return vtot;
 +}
 +
 +real cross_bond_angle(int nbonds,
 +                    const t_iatom forceatoms[],const t_iparams forceparams[],
 +                    const rvec x[],rvec f[],rvec fshift[],
 +                    const t_pbc *pbc,const t_graph *g,
 +                    real lambda,real *dvdlambda,
 +                    const t_mdatoms *md,t_fcdata *fcd,
 +                    int *global_atom_index)
 +{
 +  /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
 +   * pp. 842-847
 +   */
 +  int  i,ai,aj,ak,type,m,t1,t2,t3;
 +  rvec r_ij,r_kj,r_ik;
 +  real vtot,vrt,s1,s2,s3,r1,r2,r3,r1e,r2e,r3e,krt,k1,k2,k3;
 +  rvec f_i,f_j,f_k;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    r1e  = forceparams[type].cross_ba.r1e;
 +    r2e  = forceparams[type].cross_ba.r2e;
 +    r3e  = forceparams[type].cross_ba.r3e;
 +    krt  = forceparams[type].cross_ba.krt;
 +    
 +    /* Compute distance vectors ... */
 +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
 +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
 +    t3 = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);
 +    
 +    /* ... and their lengths */
 +    r1 = norm(r_ij);
 +    r2 = norm(r_kj);
 +    r3 = norm(r_ik);
 +    
 +    /* Deviations from ideality */
 +    s1 = r1-r1e;
 +    s2 = r2-r2e;
 +    s3 = r3-r3e;
 +    
 +    /* Energy (can be negative!) */
 +    vrt   = krt*s3*(s1+s2);
 +    vtot += vrt;
 +    
 +    /* Forces */
 +    k1 = -krt*(s3/r1);
 +    k2 = -krt*(s3/r2);
 +    k3 = -krt*(s1+s2)/r3;
 +    for(m=0; (m<DIM); m++) {
 +      f_i[m] = k1*r_ij[m] + k3*r_ik[m];
 +      f_k[m] = k2*r_kj[m] - k3*r_ik[m];
 +      f_j[m] = -f_i[m] - f_k[m];
 +    }
 +    
 +    for (m=0; (m<DIM); m++) {                 /*  12  */
 +      f[ai][m] += f_i[m];
 +      f[aj][m] += f_j[m];
 +      f[ak][m] += f_k[m];
 +    }
 +    
 +    /* Virial stuff */
 +    if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +    }      
 +    rvec_inc(fshift[t1],f_i);
 +    rvec_inc(fshift[CENTRAL],f_j);
 +    rvec_inc(fshift[t2],f_k);               /* 9 */
 +    /* 163 TOTAL      */
 +  }
 +  return vtot;
 +}
 +
 +static real bonded_tab(const char *type,int table_nr,
 +                     const bondedtable_t *table,real kA,real kB,real r,
 +                     real lambda,real *V,real *F)
 +{
 +  real k,tabscale,*VFtab,rt,eps,eps2,Yt,Ft,Geps,Heps2,Fp,VV,FF;
 +  int  n0,nnn;
 +  real v,f,dvdlambda;
 +
 +  k = (1.0 - lambda)*kA + lambda*kB;
 +
 +  tabscale = table->scale;
 +  VFtab    = table->data;
 +  
 +  rt    = r*tabscale;
 +  n0    = rt;
 +  if (n0 >= table->n) {
 +    gmx_fatal(FARGS,"A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
 +            type,table_nr,r,n0,n0+1,table->n);
 +  }
 +  eps   = rt - n0;
 +  eps2  = eps*eps;
 +  nnn   = 4*n0;
 +  Yt    = VFtab[nnn];
 +  Ft    = VFtab[nnn+1];
 +  Geps  = VFtab[nnn+2]*eps;
 +  Heps2 = VFtab[nnn+3]*eps2;
 +  Fp    = Ft + Geps + Heps2;
 +  VV    = Yt + Fp*eps;
 +  FF    = Fp + Geps + 2.0*Heps2;
 +  
 +  *F    = -k*FF*tabscale;
 +  *V    = k*VV;
 +  dvdlambda  = (kB - kA)*VV;
 +  
 +  return dvdlambda;
 +  
 +  /* That was 22 flops */
 +}
 +
 +real tab_bonds(int nbonds,
 +             const t_iatom forceatoms[],const t_iparams forceparams[],
 +             const rvec x[],rvec f[],rvec fshift[],
 +             const t_pbc *pbc,const t_graph *g,
 +             real lambda,real *dvdlambda,
 +             const t_mdatoms *md,t_fcdata *fcd,
 +             int *global_atom_index)
 +{
 +  int  i,m,ki,ai,aj,type,table;
 +  real dr,dr2,fbond,vbond,fij,vtot;
 +  rvec dx;
 +  ivec dt;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +  
 +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
 +    dr2  = iprod(dx,dx);                      /*   5          */
 +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
 +
 +    table = forceparams[type].tab.table;
 +
 +    *dvdlambda += bonded_tab("bond",table,
 +                             &fcd->bondtab[table],
 +                             forceparams[type].tab.kA,
 +                             forceparams[type].tab.kB,
 +                             dr,lambda,&vbond,&fbond);  /*  22 */
 +
 +    if (dr2 == 0.0)
 +      continue;
 +
 +    
 +    vtot  += vbond;/* 1*/
 +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
 +#ifdef DEBUG
 +    if (debug)
 +      fprintf(debug,"TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 +            dr,vbond,fbond);
 +#endif
 +    if (g) {
 +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 +      ki=IVEC2IS(dt);
 +    }
 +    for (m=0; (m<DIM); m++) {                 /*  15          */
 +      fij=fbond*dx[m];
 +      f[ai][m]+=fij;
 +      f[aj][m]-=fij;
 +      fshift[ki][m]+=fij;
 +      fshift[CENTRAL][m]-=fij;
 +    }
 +  }                                   /* 62 TOTAL     */
 +  return vtot;
 +}
 +
 +real tab_angles(int nbonds,
 +              const t_iatom forceatoms[],const t_iparams forceparams[],
 +              const rvec x[],rvec f[],rvec fshift[],
 +              const t_pbc *pbc,const t_graph *g,
 +              real lambda,real *dvdlambda,
 +              const t_mdatoms *md,t_fcdata *fcd,
 +              int *global_atom_index)
 +{
 +  int  i,ai,aj,ak,t1,t2,type,table;
 +  rvec r_ij,r_kj;
 +  real cos_theta,cos_theta2,theta,dVdt,va,vtot;
 +  ivec jt,dt_ij,dt_kj;
 +  
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    
 +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
 +
 +    table = forceparams[type].tab.table;
 +  
 +    *dvdlambda += bonded_tab("angle",table,
 +                             &fcd->angletab[table],
 +                             forceparams[type].tab.kA,
 +                             forceparams[type].tab.kB,
 +                             theta,lambda,&va,&dVdt);  /*  22  */
 +    vtot += va;
 +    
 +    cos_theta2 = sqr(cos_theta);                /*   1                */
 +    if (cos_theta2 < 1) {
 +      int  m;
 +      real snt,st,sth;
 +      real cik,cii,ckk;
 +      real nrkj2,nrij2;
 +      rvec f_i,f_j,f_k;
 +      
 +      st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12          */
 +      sth = st*cos_theta;                     /*   1          */
 +#ifdef DEBUG
 +      if (debug)
 +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 +              theta*RAD2DEG,va,dVdt);
 +#endif
 +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
 +      nrij2=iprod(r_ij,r_ij);
 +      
 +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
 +      cii=sth/nrij2;                          /*  10          */
 +      ckk=sth/nrkj2;                          /*  10          */
 +      
 +      for (m=0; (m<DIM); m++) {                       /*  39          */
 +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
 +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
 +      f_j[m]=-f_i[m]-f_k[m];
 +      f[ai][m]+=f_i[m];
 +      f[aj][m]+=f_j[m];
 +      f[ak][m]+=f_k[m];
 +      }
 +      if (g) {
 +      copy_ivec(SHIFT_IVEC(g,aj),jt);
 +      
 +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 +      t1=IVEC2IS(dt_ij);
 +      t2=IVEC2IS(dt_kj);
 +      }
 +      rvec_inc(fshift[t1],f_i);
 +      rvec_inc(fshift[CENTRAL],f_j);
 +      rvec_inc(fshift[t2],f_k);
 +    }                                           /* 169 TOTAL  */
 +  }
 +  return vtot;
 +}
 +
 +real tab_dihs(int nbonds,
 +            const t_iatom forceatoms[],const t_iparams forceparams[],
 +            const rvec x[],rvec f[],rvec fshift[],
 +            const t_pbc *pbc,const t_graph *g,
 +            real lambda,real *dvdlambda,
 +            const t_mdatoms *md,t_fcdata *fcd,
 +            int *global_atom_index)
 +{
 +  int  i,type,ai,aj,ak,al,table;
 +  int  t1,t2,t3;
 +  rvec r_ij,r_kj,r_kl,m,n;
 +  real phi,sign,ddphi,vpd,vtot;
 +
 +  vtot = 0.0;
 +  for(i=0; (i<nbonds); ) {
 +    type = forceatoms[i++];
 +    ai   = forceatoms[i++];
 +    aj   = forceatoms[i++];
 +    ak   = forceatoms[i++];
 +    al   = forceatoms[i++];
 +    
 +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
 +                  &sign,&t1,&t2,&t3);                 /*  84  */
 +
 +    table = forceparams[type].tab.table;
 +
 +    /* Hopefully phi+M_PI never results in values < 0 */
 +    *dvdlambda += bonded_tab("dihedral",table,
 +                             &fcd->dihtab[table],
 +                             forceparams[type].tab.kA,
 +                             forceparams[type].tab.kB,
 +                             phi+M_PI,lambda,&vpd,&ddphi);
 +                     
 +    vtot += vpd;
 +    do_dih_fup(ai,aj,ak,al,-ddphi,r_ij,r_kj,r_kl,m,n,
 +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112  */
 +
 +#ifdef DEBUG
 +    fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
 +          ai,aj,ak,al,phi);
 +#endif
 +  } /* 227 TOTAL      */
 +
 +  return vtot;
 +}
 +
 +static unsigned
 +calc_bonded_reduction_mask(const t_idef *idef,
 +                           int shift,
 +                           int t,int nt)
 +{
 +    unsigned mask;
 +    int ftype,nb,nat1,nb0,nb1,i,a;
 +
 +    mask = 0;
 +
 +    for(ftype=0; ftype<F_NRE; ftype++)
 +    {
 +        if (interaction_function[ftype].flags & IF_BOND &&
 +            !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
 +            (ftype<F_GB12 || ftype>F_GB14))
 +        {
 +            nb = idef->il[ftype].nr;
 +            if (nb > 0)
 +            {
 +                nat1 = interaction_function[ftype].nratoms + 1;
 +
 +                /* Divide this interaction equally over the threads.
 +                 * This is not stored: should match division in calc_bonds.
 +                 */
 +                nb0 = (((nb/nat1)* t   )/nt)*nat1;
 +                nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
 +
 +                for(i=nb0; i<nb1; i+=nat1)
 +                {
 +                    for(a=1; a<nat1; a++)
 +                    {
 +                        mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    return mask;
 +}
 +
 +void init_bonded_thread_force_reduction(t_forcerec *fr,
 +                                        const t_idef *idef)
 +{
 +#define MAX_BLOCK_BITS 32
 +    int t;
 +    int ctot,c,b;
 +
 +    if (fr->nthreads <= 1)
 +    {
 +        fr->red_nblock = 0;
 +
 +        return;
 +    }
 +
 +    /* We divide the force array in a maximum of 32 blocks.
 +     * Minimum force block reduction size is 2^6=64.
 +     */
 +    fr->red_ashift = 6;
 +    while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
 +    {
 +        fr->red_ashift++;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"bonded force buffer block atom shift %d bits\n",
 +                fr->red_ashift);
 +    }
 +
 +    /* Determine to which blocks each thread's bonded force calculation
 +     * contributes. Store this is a mask for each thread.
 +     */
 +#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
 +    for(t=1; t<fr->nthreads; t++)
 +    {
 +        fr->f_t[t].red_mask =
 +            calc_bonded_reduction_mask(idef,fr->red_ashift,t,fr->nthreads);
 +    }
 +
 +    /* Determine the maximum number of blocks we need to reduce over */
 +    fr->red_nblock = 0;
 +    ctot = 0;
 +    for(t=0; t<fr->nthreads; t++)
 +    {
 +        c = 0;
 +        for(b=0; b<MAX_BLOCK_BITS; b++)
 +        {
 +            if (fr->f_t[t].red_mask & (1U<<b))
 +            {
 +                fr->red_nblock = max(fr->red_nblock,b+1);
 +                c++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"thread %d flags %x count %d\n",
 +                    t,fr->f_t[t].red_mask,c);
 +        }
 +        ctot += c;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"Number of blocks to reduce: %d of size %d\n",
 +                fr->red_nblock,1<<fr->red_ashift);
 +        fprintf(debug,"Reduction density %.2f density/#thread %.2f\n",
 +                ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
 +                ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
 +    }
 +}
 +
 +static void zero_thread_forces(f_thread_t *f_t,int n,
 +                               int nblock,int blocksize)
 +{
 +    int b,a0,a1,a,i,j;
 +
 +    if (n > f_t->f_nalloc)
 +    {
 +        f_t->f_nalloc = over_alloc_large(n);
 +        srenew(f_t->f,f_t->f_nalloc);
 +    }
 +
 +    if (f_t->red_mask != 0)
 +    {
 +        for(b=0; b<nblock; b++)
 +        {
 +            if (f_t->red_mask && (1U<<b))
 +            {
 +                a0 = b*blocksize;
 +                a1 = min((b+1)*blocksize,n);
 +                for(a=a0; a<a1; a++)
 +                {
 +                    clear_rvec(f_t->f[a]);
 +                }
 +            }
 +        }
 +    }
 +    for(i=0; i<SHIFTS; i++)
 +    {
 +        clear_rvec(f_t->fshift[i]);
 +    }
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        f_t->ener[i] = 0;
 +    }
 +    for(i=0; i<egNR; i++)
 +    {
 +        for(j=0; j<f_t->grpp.nener; j++)
 +        {
 +            f_t->grpp.ener[i][j] = 0;
 +        }
 +    }
 +    for(i=0; i<efptNR; i++)
 +    {
 +        f_t->dvdl[i] = 0;
 +    }
 +}
 +
 +static void reduce_thread_force_buffer(int n,rvec *f,
 +                                       int nthreads,f_thread_t *f_t,
 +                                       int nblock,int block_size)
 +{
 +    /* The max thread number is arbitrary,
 +     * we used a fixed number to avoid memory management.
 +     * Using more than 16 threads is probably never useful performance wise.
 +     */
 +#define MAX_BONDED_THREADS 256
 +    int b;
 +
 +    if (nthreads > MAX_BONDED_THREADS)
 +    {
 +        gmx_fatal(FARGS,"Can not reduce bonded forces on more than %d threads",
 +                  MAX_BONDED_THREADS);
 +    }
 +
 +    /* This reduction can run on any number of threads,
 +     * independently of nthreads.
 +     */
 +#pragma omp parallel for num_threads(nthreads) schedule(static)
 +    for(b=0; b<nblock; b++)
 +    {
 +        rvec *fp[MAX_BONDED_THREADS];
 +        int nfb,ft,fb;
 +        int a0,a1,a;
 +
 +        /* Determine which threads contribute to this block */
 +        nfb = 0;
 +        for(ft=1; ft<nthreads; ft++)
 +        {
 +            if (f_t[ft].red_mask & (1U<<b))
 +            {
 +                fp[nfb++] = f_t[ft].f;
 +            }
 +        }
 +        if (nfb > 0)
 +        {
 +            /* Reduce force buffers for threads that contribute */
 +            a0 =  b   *block_size;
 +            a1 = (b+1)*block_size;
 +            a1 = min(a1,n);
 +            for(a=a0; a<a1; a++)
 +            {
 +                for(fb=0; fb<nfb; fb++)
 +                {
 +                    rvec_inc(f[a],fp[fb][a]);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void reduce_thread_forces(int n,rvec *f,rvec *fshift,
 +                                 real *ener,gmx_grppairener_t *grpp,real *dvdl,
 +                                 int nthreads,f_thread_t *f_t,
 +                                 int nblock,int block_size,
 +                                 gmx_bool bCalcEnerVir,
 +                                 gmx_bool bDHDL)
 +{
 +    if (nblock > 0)
 +    {
 +        /* Reduce the bonded force buffer */
 +        reduce_thread_force_buffer(n,f,nthreads,f_t,nblock,block_size);
 +    }
 +
 +    /* When necessary, reduce energy and virial using one thread only */
 +    if (bCalcEnerVir)
 +    {
 +        int t,i,j;
 +
 +        for(i=0; i<SHIFTS; i++)
 +        {
 +            for(t=1; t<nthreads; t++)
 +            {
 +                rvec_inc(fshift[i],f_t[t].fshift[i]);
 +            }
 +        }
 +        for(i=0; i<F_NRE; i++)
 +        {
 +            for(t=1; t<nthreads; t++)
 +            {
 +                ener[i] += f_t[t].ener[i];
 +            }
 +        }
 +        for(i=0; i<egNR; i++)
 +        {
 +            for(j=0; j<f_t[1].grpp.nener; j++)
 +            {
 +                for(t=1; t<nthreads; t++)
 +                {
 +                    
 +                    grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
 +                }
 +            }
 +        }
 +        if (bDHDL)
 +        {
 +            for(i=0; i<efptNR; i++)
 +            {
 +                
 +                for(t=1; t<nthreads; t++)
 +                {
 +                    dvdl[i] += f_t[t].dvdl[i];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static real calc_one_bond(FILE *fplog,int thread,
 +                          int ftype,const t_idef *idef,
 +                          rvec x[], rvec f[], rvec fshift[],
 +                          t_forcerec *fr,
 +                          const t_pbc *pbc,const t_graph *g,
 +                          gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
 +                          t_nrnb *nrnb,
 +                          real *lambda, real *dvdl,
 +                          const t_mdatoms *md,t_fcdata *fcd,
 +                          gmx_bool bCalcEnerVir,
 +                          int *global_atom_index, gmx_bool bPrintSepPot)
 +{
 +    int ind,nat1,nbonds,efptFTYPE;
 +    real v=0;
 +    t_iatom *iatoms;
 +    int nb0,nbn;
 +
 +    if (IS_RESTRAINT_TYPE(ftype))
 +    {
 +        efptFTYPE = efptRESTRAINT;
 +    }
 +    else
 +    {
 +        efptFTYPE = efptBONDED;
 +    }
 +
 +    if (interaction_function[ftype].flags & IF_BOND &&
 +        !(ftype == F_CONNBONDS || ftype == F_POSRES))
 +    {
 +        ind  = interaction_function[ftype].nrnb_ind;
 +        nat1 = interaction_function[ftype].nratoms + 1;
 +        nbonds    = idef->il[ftype].nr/nat1;
 +        iatoms    = idef->il[ftype].iatoms;
 +
 +        nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
 +        nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
 +
 +        if (!IS_LISTED_LJ_C(ftype))
 +        {
 +            if(ftype==F_CMAP)
 +            {
 +                v = cmap_dihs(nbn,iatoms+nb0,
 +                              idef->iparams,&idef->cmap_grid,
 +                              (const rvec*)x,f,fshift,
 +                              pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
 +                              md,fcd,global_atom_index);
 +            }
 +            else if (ftype == F_PDIHS &&
 +                     !bCalcEnerVir && fr->efep==efepNO)
 +            {
 +                /* No energies, shift forces, dvdl */
 +#ifndef SSE_PROPER_DIHEDRALS
 +                pdihs_noener
 +#else
 +                pdihs_noener_sse
 +#endif
 +                    (nbn,idef->il[ftype].iatoms+nb0,
 +                     idef->iparams,
 +                     (const rvec*)x,f,
 +                     pbc,g,lambda[efptFTYPE],md,fcd,
 +                     global_atom_index);
 +                v = 0;
 +            }
 +            else
 +            {
 +                v = interaction_function[ftype].ifunc(nbn,iatoms+nb0,
 +                                                      idef->iparams,
 +                                                      (const rvec*)x,f,fshift,
 +                                                      pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
 +                                                      md,fcd,global_atom_index);
 +            }
 +            if (bPrintSepPot)
 +            {
 +                fprintf(fplog,"  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
 +                        interaction_function[ftype].longname,
 +                        nbonds/nat1,v,lambda[efptFTYPE]);
 +            }
 +        }
 +        else
 +        {
 +            v = do_nonbonded_listed(ftype,nbn,iatoms+nb0,idef->iparams,(const rvec*)x,f,fshift,
 +                                    pbc,g,lambda,dvdl,md,fr,grpp,global_atom_index);
 +
 +            enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
 +            enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
 +            
 +            if (bPrintSepPot)
 +            {
 +                fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
 +                        interaction_function[ftype].longname,
 +                        interaction_function[F_LJ14].longname,nbonds/nat1,dvdl[efptVDW]);
 +                fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
 +                        interaction_function[ftype].longname,
 +                        interaction_function[F_COUL14].longname,nbonds/nat1,dvdl[efptCOUL]);
 +            }
 +        }
 +        if (ind != -1 && thread == 0)
 +        {
 +            inc_nrnb(nrnb,ind,nbonds);
 +        }
 +    }
 +
 +    return v;
 +}
 +
 +/* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
 +   function, or horrible things will happen when doing free energy
 +   calculations!  In a good coding world, this would not be a
 +   different function, but for speed reasons, it needs to be made a
 +   separate function.  TODO for 5.0 - figure out a way to reorganize
 +   to reduce duplication.
 +*/
 +
 +static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
 +                                  rvec x[], rvec f[], t_forcerec *fr,
 +                                  const t_pbc *pbc,const t_graph *g,
-                                             md,fr,&enerd->grpp,global_atom_index);
++                                  gmx_grppairener_t *grpp, t_nrnb *nrnb,
 +                                  real *lambda, real *dvdl,
 +                                  const t_mdatoms *md,t_fcdata *fcd,
 +                                  int *global_atom_index, gmx_bool bPrintSepPot)
 +{
 +    int ind,nat1,nbonds,efptFTYPE,nbonds_np;
 +    real v=0;
 +    t_iatom *iatoms;
 +
 +    if (IS_RESTRAINT_TYPE(ftype))
 +    {
 +        efptFTYPE = efptRESTRAINT;
 +    }
 +    else
 +    {
 +        efptFTYPE = efptBONDED;
 +    }
 +
 +    if (ftype<F_GB12 || ftype>F_GB14)
 +    {
 +        if (interaction_function[ftype].flags & IF_BOND &&
 +            !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
 +        {
 +            ind  = interaction_function[ftype].nrnb_ind;
 +            nat1 = interaction_function[ftype].nratoms+1;
 +            nbonds_np = idef->il[ftype].nr_nonperturbed;
 +            nbonds    = idef->il[ftype].nr - nbonds_np;
 +            iatoms    = idef->il[ftype].iatoms + nbonds_np;
 +            if (nbonds > 0)
 +            {
 +                if (!IS_LISTED_LJ_C(ftype))
 +                {
 +                    if(ftype==F_CMAP)
 +                    {
 +                        v = cmap_dihs(nbonds,iatoms,
 +                                      idef->iparams,&idef->cmap_grid,
 +                                      (const rvec*)x,f,fr->fshift,
 +                                      pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),md,fcd,
 +                                      global_atom_index);
 +                    }
 +                    else
 +                    {
 +                        v =       interaction_function[ftype].ifunc(nbonds,iatoms,
 +                                                                  idef->iparams,
 +                                                                  (const rvec*)x,f,fr->fshift,
 +                                                                  pbc,g,lambda[efptFTYPE],&dvdl[efptFTYPE],
 +                                                                  md,fcd,global_atom_index);
 +                    }
 +                }
 +                else
 +                {
 +                    v = do_nonbonded_listed(ftype,nbonds,iatoms,
 +                                            idef->iparams,
 +                                            (const rvec*)x,f,fr->fshift,
 +                                            pbc,g,lambda,dvdl,
-                        gmx_enerdata_t *enerd,t_nrnb *nrnb,
++                                            md,fr,grpp,global_atom_index);
 +                }
 +                if (ind != -1)
 +                {
 +                    inc_nrnb(nrnb,ind,nbonds/nat1);
 +                }
 +            }
 +        }
 +    }
 +    return v;
 +}
 +
 +void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
 +                const t_idef *idef,
 +                rvec x[],history_t *hist,
 +                rvec f[],t_forcerec *fr,
 +                const t_pbc *pbc,const t_graph *g,
 +                gmx_enerdata_t *enerd,t_nrnb *nrnb,
 +                real *lambda,
 +                const t_mdatoms *md,
 +                t_fcdata *fcd,int *global_atom_index,
 +                t_atomtypes *atype, gmx_genborn_t *born,
 +                int force_flags,
 +                gmx_bool bPrintSepPot,gmx_large_int_t step)
 +{
 +    gmx_bool bCalcEnerVir;
 +    int    i;
 +    real   v,dvdl[efptNR],dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
 +                                               of lambda, which will be thrown away in the end*/
 +    const  t_pbc *pbc_null;
 +    char   buf[22];
 +    int    thread;
 +
 +    bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        dvdl[i] = 0.0;
 +    }
 +    if (fr->bMolPBC)
 +    {
 +        pbc_null = pbc;
 +    }
 +    else
 +    {
 +        pbc_null = NULL;
 +    }
 +    if (bPrintSepPot)
 +    {
 +        fprintf(fplog,"Step %s: bonded V and dVdl for this node\n",
 +                gmx_step_str(step,buf));
 +    }
 +
 +#ifdef DEBUG
 +    if (g && debug)
 +    {
 +        p_graph(debug,"Bondage is fun",g);
 +    }
 +#endif
 +
 +    /* Do pre force calculation stuff which might require communication */
 +    if (idef->il[F_ORIRES].nr)
 +    {
 +        enerd->term[F_ORIRESDEV] =
 +            calc_orires_dev(ms,idef->il[F_ORIRES].nr,
 +                            idef->il[F_ORIRES].iatoms,
 +                            idef->iparams,md,(const rvec*)x,
 +                            pbc_null,fcd,hist);
 +    }
 +    if (idef->il[F_DISRES].nr)
 +    {
 +        calc_disres_R_6(ms,idef->il[F_DISRES].nr,
 +                        idef->il[F_DISRES].iatoms,
 +                        idef->iparams,(const rvec*)x,pbc_null,
 +                        fcd,hist);
 +    }
 +
 +#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
 +    for(thread=0; thread<fr->nthreads; thread++)
 +    {
 +        int    ftype,nbonds,ind,nat1;
 +        real   *epot,v;
 +        /* thread stuff */
 +        rvec   *ft,*fshift;
 +        real   *dvdlt;
 +        gmx_grppairener_t *grpp;
 +        int    nb0,nbn;
 +
 +        if (thread == 0)
 +        {
 +            ft     = f;
 +            fshift = fr->fshift;
 +            epot   = enerd->term;
 +            grpp   = &enerd->grpp;
 +            dvdlt  = dvdl;
 +        }
 +        else
 +        {
 +            zero_thread_forces(&fr->f_t[thread],fr->natoms_force,
 +                               fr->red_nblock,1<<fr->red_ashift);
 +
 +            ft     = fr->f_t[thread].f;
 +            fshift = fr->f_t[thread].fshift;
 +            epot   = fr->f_t[thread].ener;
 +            grpp   = &fr->f_t[thread].grpp;
 +            dvdlt  = fr->f_t[thread].dvdl;
 +        }
 +        /* Loop over all bonded force types to calculate the bonded forces */
 +        for(ftype=0; (ftype<F_NRE); ftype++)
 +        {
 +            if (idef->il[ftype].nr > 0 &&
 +                (interaction_function[ftype].flags & IF_BOND) &&
 +                (ftype < F_GB12 || ftype > F_GB14) &&
 +                !(ftype == F_CONNBONDS || ftype == F_POSRES))
 +            {
 +                v = calc_one_bond(fplog,thread,ftype,idef,x, 
 +                                  ft,fshift,fr,pbc_null,g,enerd,grpp,
 +                                  nrnb,lambda,dvdlt,
 +                                  md,fcd,bCalcEnerVir,
 +                                  global_atom_index,bPrintSepPot);
 +                epot[ftype]        += v;
 +            }
 +        }
 +    }
 +    if (fr->nthreads > 1)
 +    {
 +        reduce_thread_forces(fr->natoms_force,f,fr->fshift,
 +                             enerd->term,&enerd->grpp,dvdl,
 +                             fr->nthreads,fr->f_t,
 +                             fr->red_nblock,1<<fr->red_ashift,
 +                             bCalcEnerVir,
 +                             force_flags & GMX_FORCE_DHDL);
 +    }
 +    if (force_flags & GMX_FORCE_DHDL)
 +    {
 +        for(i=0; i<efptNR; i++)
 +        {
 +            enerd->dvdl_nonlin[i] += dvdl[i];
 +        }
 +    }
 +
 +    /* Copy the sum of violations for the distance restraints from fcd */
 +    if (fcd)
 +    {
 +        enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
 +
 +    }
 +}
 +
 +void calc_bonds_lambda(FILE *fplog,
 +                       const t_idef *idef,
 +                       rvec x[],
 +                       t_forcerec *fr,
 +                       const t_pbc *pbc,const t_graph *g,
-     real   v,dr,dr2,*epot;
++                       gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
 +                       real *lambda,
 +                       const t_mdatoms *md,
 +                       t_fcdata *fcd,
 +                       int *global_atom_index)
 +{
 +    int    i,ftype,nbonds_np,nbonds,ind,nat;
-     epot = enerd->term;
++    real   v,dr,dr2;
 +    real   dvdl_dum[efptNR];
 +    rvec   *f,*fshift_orig;
 +    const  t_pbc *pbc_null;
 +    t_iatom *iatom_fe;
 +
 +    if (fr->bMolPBC)
 +    {
 +        pbc_null = pbc;
 +    }
 +    else
 +    {
 +        pbc_null = NULL;
 +    }
 +
-                                   f,fr,pbc_null,g,enerd,nrnb,lambda,dvdl_dum,
 +    snew(f,fr->natoms_force);
 +    /* We want to preserve the fshift array in forcerec */
 +    fshift_orig = fr->fshift;
 +    snew(fr->fshift,SHIFTS);
 +
 +    /* Loop over all bonded force types to calculate the bonded forces */
 +    for(ftype=0; (ftype<F_NRE); ftype++) 
 +    {
 +        v = calc_one_bond_foreign(fplog,ftype,idef,x, 
++                                  f,fr,pbc_null,g,grpp,nrnb,lambda,dvdl_dum,
 +                                  md,fcd,global_atom_index,FALSE);
 +        epot[ftype] += v;
 +    }
 +
 +    sfree(fr->fshift);
 +    fr->fshift = fshift_orig;
 +    sfree(f);
 +}
index fa0696548375a1d20ff2391657d95023c728c1ba,0000000000000000000000000000000000000000..5ae98d0047faf2763f632fe0d5134eccc59aa19a
mode 100644,000000..100644
--- /dev/null
@@@ -1,2402 -1,0 +1,2403 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +/* The source code in this file should be thread-safe. 
 + Please keep it that way. */
 +
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +/* _chsize_s */
 +#include <io.h>
 +#include <sys/locking.h>
 +#endif
 +
 +
 +#include "filenm.h"
 +#include "names.h"
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmxfio.h"
 +#include "xdrf.h"
 +#include "statutil.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "network.h"
 +#include "gmx_random.h"
 +#include "checkpoint.h"
 +#include "futil.h"
 +#include "string2.h"
 +#include <fcntl.h>
 +
++#include "buildinfo.h"
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +#define CPT_MAGIC1 171817
 +#define CPT_MAGIC2 171819
 +#define CPTSTRLEN 1024
 +
 +#ifdef GMX_DOUBLE
 +#define GMX_CPT_BUILD_DP 1
 +#else
 +#define GMX_CPT_BUILD_DP 0
 +#endif
 +
 +/* cpt_version should normally only be changed
 + * when the header of footer format changes.
 + * The state data format itself is backward and forward compatible.
 + * But old code can not read a new entry that is present in the file
 + * (but can read a new format when new entries are not present).
 + */
 +static const int cpt_version = 14;
 +
 +
 +const char *est_names[estNR]=
 +{
 +    "FE-lambda",
 +    "box", "box-rel", "box-v", "pres_prev",
 +    "nosehoover-xi", "thermostat-integral",
 +    "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
 +    "disre_initf", "disre_rm3tav",
 +    "orire_initf", "orire_Dtav",
 +    "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
 +};
 +
 +enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
 +
 +const char *eeks_names[eeksNR]=
 +{
 +    "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
 +    "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
 +};
 +
 +enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
 +       eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
 +       eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM, 
 +       eenhENERGY_DELTA_H_NN,
 +       eenhENERGY_DELTA_H_LIST, 
 +       eenhENERGY_DELTA_H_STARTTIME, 
 +       eenhENERGY_DELTA_H_STARTLAMBDA, 
 +       eenhNR };
 +
 +const char *eenh_names[eenhNR]=
 +{
 +    "energy_n", "energy_aver", "energy_sum", "energy_nsum",
 +    "energy_sum_sim", "energy_nsum_sim",
 +    "energy_nsteps", "energy_nsteps_sim", 
 +    "energy_delta_h_nn",
 +    "energy_delta_h_list", 
 +    "energy_delta_h_start_time", 
 +    "energy_delta_h_start_lambda"
 +};
 +
 +/* free energy history variables -- need to be preserved over checkpoint */
 +enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
 +       edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
 +/* free energy history variable names  */
 +const char *edfh_names[edfhNR]=
 +{
 +    "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
 +    "accumulated_plus", "accumulated_minus", "accumulated_plus_2",  "accumulated_minus_2", "Tij", "Tij_empirical"
 +};
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +static int
 +gmx_wintruncate(const char *filename, __int64 size)
 +{
 +#ifdef GMX_FAHCORE
 +    /*we do this elsewhere*/
 +    return 0;
 +#else
 +    FILE *fp;
 +    int   rc;
 +    
 +    fp=fopen(filename,"rb+");
 +    
 +    if(fp==NULL)
 +    {
 +        return -1;
 +    }
 +    
 +    return _chsize_s( fileno(fp), size);
 +#endif
 +}
 +#endif
 +
 +
 +enum { ecprREAL, ecprRVEC, ecprMATRIX };
 +
 +enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
 +/* enums for the different components of checkpoint variables, replacing the hard coded ones.
 +   cptpEST - state variables.
 +   cptpEEKS - Kinetic energy state variables.
 +   cptpEENH - Energy history state variables.
 +   cptpEDFH - free energy history variables.
 +*/
 +
 +
 +static const char *st_names(int cptp,int ecpt)
 +{
 +    switch (cptp)
 +    {
 +    case cptpEST: return est_names [ecpt]; break;
 +    case cptpEEKS: return eeks_names[ecpt]; break;
 +    case cptpEENH: return eenh_names[ecpt]; break;
 +    case cptpEDFH: return edfh_names[ecpt]; break;
 +    }
 +
 +    return NULL;
 +}
 +
 +static void cp_warning(FILE *fp)
 +{
 +    fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
 +}
 +
 +static void cp_error()
 +{
 +    gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
 +}
 +
 +static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    if (bRead)
 +    {
 +        snew(*s,CPTSTRLEN);
 +    }
 +    res = xdr_string(xd,s,CPTSTRLEN);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %s\n",desc,*s);
 +        sfree(*s);
 +    }
 +}
 +
 +static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    res = xdr_int(xd,i);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %d\n",desc,*i);
 +    }
 +    return 0;
 +}
 +
 +static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
 +{
 +    bool_t res=1;
 +    int j;
 +    if (list)
 +    {
 +        fprintf(list,"%s = ",desc);
 +    }
 +    for (j=0; j<n && res; j++)
 +    {
 +        res &= xdr_u_char(xd,&i[j]);
 +        if (list)
 +        {
 +            fprintf(list,"%02x",i[j]);
 +        }
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"\n");
 +    }
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +
 +    return 0;
 +}
 +
 +static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
 +{
 +    if (do_cpt_int(xd,desc,i,list) < 0)
 +    {
 +        cp_error();
 +    }
 +}
 +
 +static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
 +{
 +    bool_t res=0;
 +    char   buf[STEPSTRSIZE];
 +
 +    res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
 +    }
 +}
 +
 +static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    res = xdr_double(xd,f);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %f\n",desc,*f);
 +    }
 +}
 +
 +/* If nval >= 0, nval is used; on read this should match the passed value.
 + * If nval n<0, *nptr is used; on read the value is stored in nptr
 + */
 +static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
 +                             int nval,int *nptr,real **v,
 +                             FILE *list,int erealtype)
 +{
 +    bool_t res=0;
 +#ifndef GMX_DOUBLE
 +    int  dtc=xdr_datatype_float; 
 +#else
 +    int  dtc=xdr_datatype_double;
 +#endif
 +    real *vp,*va=NULL;
 +    float  *vf;
 +    double *vd;
 +    int  nf,dt,i;
 +    
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            nf = nval;
 +        }
 +        else
 +        {
 +        if (nptr == NULL)
 +        {
 +            gmx_incons("*ntpr=NULL in do_cpte_reals_low");
 +        }
 +        nf = *nptr;
 +        }
 +    }
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            if (nf != nval)
 +            {
 +                gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
 +            }
 +        }
 +        else
 +        {
 +            *nptr = nf;
 +        }
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    if (dt == xdr_datatype_float)
 +    {
 +        if (dtc == xdr_datatype_float)
 +        {
 +            vf = (float *)vp;
 +        }
 +        else
 +        {
 +            snew(vf,nf);
 +        }
 +        res = xdr_vector(xd,(char *)vf,nf,
 +                         (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_float)
 +        {
 +            for(i=0; i<nf; i++)
 +            {
 +                vp[i] = vf[i];
 +            }
 +            sfree(vf);
 +        }
 +    }
 +    else
 +    {
 +        if (dtc == xdr_datatype_double)
 +        {
 +            vd = (double *)vp;
 +        }
 +        else
 +        {
 +            snew(vd,nf);
 +        }
 +        res = xdr_vector(xd,(char *)vd,nf,
 +                         (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_double)
 +        {
 +            for(i=0; i<nf; i++)
 +            {
 +                vp[i] = vd[i];
 +            }
 +            sfree(vd);
 +        }
 +    }
 +    
 +    if (list)
 +    {
 +        switch (erealtype)
 +        {
 +        case ecprREAL:
 +            pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
 +            break;
 +        case ecprRVEC:
 +            pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
 +            break;
 +        default:
 +            gmx_incons("Unknown checkpoint real type");
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +
 +/* This function stores n along with the reals for reading,
 + * but on reading it assumes that n matches the value in the checkpoint file,
 + * a fatal error is generated when this is not the case.
 + */
 +static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
 +                         int n,real **v,FILE *list)
 +{
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
 +}
 +
 +/* This function does the same as do_cpte_reals,
 + * except that on reading it ignores the passed value of *n
 + * and stored the value read from the checkpoint file in *n.
 + */
 +static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int *n,real **v,FILE *list)
 +{
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
 +}
 +
 +static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
 +                        real *r,FILE *list)
 +{
 +    int n;
 +
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
 +}
 +
 +static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
 +                        int n,int **v,FILE *list)
 +{
 +    bool_t res=0;
 +    int  dtc=xdr_datatype_int;
 +    int *vp,*va=NULL;
 +    int  nf,dt,i;
 +    
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && v != NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
 +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)) || v == NULL)
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd,(char *)vp,nf,
 +                     (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
 +                       int *i,FILE *list)
 +{
 +    return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
 +}
 +
 +static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int n,double **v,FILE *list)
 +{
 +    bool_t res=0;
 +    int  dtc=xdr_datatype_double;
 +    double *vp,*va=NULL;
 +    int  nf,dt,i;
 +    
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd,(char *)vp,nf,
 +                     (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
 +                          double *r,FILE *list)
 +{
 +    return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
 +}
 +
 +
 +static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
 +                         int n,rvec **v,FILE *list)
 +{
 +    int n3;
 +
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                             n*DIM,NULL,(real **)v,list,ecprRVEC);
 +}
 +
 +static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
 +                          matrix v,FILE *list)
 +{
 +    real *vr;
 +    real ret;
 +
 +    vr = (real *)&(v[0][0]);
 +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                            DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
 +    
 +    if (list && ret == 0)
 +    {
 +        pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
 +    }
 +    
 +    return ret;
 +}
 +
 +
 +static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int n, real **v,FILE *list)
 +{
 +    int i;
 +    real *vr;
 +    real ret,reti;
 +    char name[CPTSTRLEN];
 +
 +    ret = 0;
 +    if (v==NULL)
 +    {
 +        snew(v,n);
 +    }
 +    for (i=0;i<n;i++)
 +    {
 +        reti = 0;
 +        vr = v[i];
 +        reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
 +        if (list && reti == 0)
 +        {
 +            sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
 +            pr_reals(list,0,name,v[i],n);
 +        }
 +        if (reti == 0)
 +        {
 +            ret = 0;
 +        }
 +    }
 +    return ret;
 +}
 +
 +static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
 +                            int n,matrix **v,FILE *list)
 +{
 +    bool_t res=0;
 +    matrix *vp,*va=NULL;
 +    real *vr;
 +    int  nf,i,j,k;
 +    int  ret;
 +
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    snew(vr,nf*DIM*DIM);
 +    for(i=0; i<nf; i++)
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            for(k=0; k<DIM; k++)
 +            {
 +                vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
 +            }
 +        }
 +    }
 +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                            nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
 +    for(i=0; i<nf; i++)
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            for(k=0; k<DIM; k++)
 +            {
 +                vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
 +            }
 +        }
 +    }
 +    sfree(vr);
 +    
 +    if (list && ret == 0)
 +    {
 +        for(i=0; i<nf; i++)
 +        {
 +            pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +    
 +    return ret;
 +}
 +
 +static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
 +                          char **version,char **btime,char **buser,char **bhost,
 +                          int *double_prec,
 +                          char **fprog,char **ftime,
 +                          int *eIntegrator,int *simulation_part,
 +                          gmx_large_int_t *step,double *t,
 +                          int *nnodes,int *dd_nc,int *npme,
 +                          int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
 +                          int *nlambda, int *flags_state,
 +                          int *flags_eks,int *flags_enh, int *flags_dfh,
 +                          FILE *list)
 +{
 +    bool_t res=0;
 +    int  magic;
 +    int  idum=0;
 +    int  i;
 +    char *fhost;
 +
 +    if (bRead)
 +    {
 +        magic = -1;
 +    }
 +    else
 +    {
 +        magic = CPT_MAGIC1;
 +    }
 +    res = xdr_int(xd,&magic);
 +    if (res == 0)
 +    {
 +        gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
 +    }
 +    if (magic != CPT_MAGIC1)
 +    {
 +        gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
 +                  "The checkpoint file is corrupted or not a checkpoint file",
 +                  magic,CPT_MAGIC1);
 +    }
 +    if (!bRead)
 +    {
 +        snew(fhost,255);
 +#ifdef HAVE_UNISTD_H
 +        if (gethostname(fhost,255) != 0)
 +        {
 +            sprintf(fhost,"unknown");
 +        }
 +#else
 +        sprintf(fhost,"unknown");
 +#endif  
 +    }
 +    do_cpt_string_err(xd,bRead,"GROMACS version"           ,version,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build time"        ,btime,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build user"        ,buser,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build host"        ,bhost,list);
 +    do_cpt_string_err(xd,bRead,"generating program"        ,fprog,list);
 +    do_cpt_string_err(xd,bRead,"generation time"           ,ftime,list);
 +    *file_version = cpt_version;
 +    do_cpt_int_err(xd,"checkpoint file version",file_version,list);
 +    if (*file_version > cpt_version)
 +    {
 +        gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
 +    }
 +    if (*file_version >= 13)
 +    {
 +        do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
 +    }
 +    else
 +    {
 +        *double_prec = -1;
 +    }
 +    if (*file_version >= 12)
 +    {
 +        do_cpt_string_err(xd,bRead,"generating host"           ,&fhost,list);
 +        if (list == NULL)
 +        {
 +            sfree(fhost);
 +        }
 +    }
 +    do_cpt_int_err(xd,"#atoms"            ,natoms     ,list);
 +    do_cpt_int_err(xd,"#T-coupling groups",ngtc       ,list);
 +    if (*file_version >= 10) 
 +    {
 +        do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
 +    }
 +    else
 +    {
 +        *nhchainlength = 1;
 +    }
 +    if (*file_version >= 11)
 +    {
 +        do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
 +    }
 +    else
 +    {
 +        *nnhpres = 0;
 +    }
 +    if (*file_version >= 14)
 +    {
 +        do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
 +    }
 +    else
 +    {
 +        *nlambda = 0;
 +    }
 +    do_cpt_int_err(xd,"integrator"        ,eIntegrator,list);
 +      if (*file_version >= 3)
 +      {
 +              do_cpt_int_err(xd,"simulation part #", simulation_part,list);
 +      }
 +      else
 +      {
 +              *simulation_part = 1;
 +      }
 +    if (*file_version >= 5)
 +    {
 +        do_cpt_step_err(xd,"step"         ,step       ,list);
 +    }
 +    else
 +    {
 +        do_cpt_int_err(xd,"step"          ,&idum      ,list);
 +        *step = idum;
 +    }
 +    do_cpt_double_err(xd,"t"              ,t          ,list);
 +    do_cpt_int_err(xd,"#PP-nodes"         ,nnodes     ,list);
 +    idum = 1;
 +    do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
 +    do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
 +    do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
 +    do_cpt_int_err(xd,"#PME-only nodes",npme,list);
 +    do_cpt_int_err(xd,"state flags",flags_state,list);
 +      if (*file_version >= 4)
 +    {
 +        do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
 +        do_cpt_int_err(xd,"energy history flags",flags_enh,list);
 +    }
 +    else
 +    {
 +        *flags_eks  = 0;
 +        *flags_enh   = (*flags_state >> (estORIRE_DTAV+1));
 +        *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
 +                                         (1<<(estORIRE_DTAV+2)) |
 +                                         (1<<(estORIRE_DTAV+3))));
 +    }
 +      if (*file_version >= 14)
 +    {
 +        do_cpt_int_err(xd,"df history flags",flags_dfh,list);
 +    } else {
 +        *flags_dfh = 0;
 +    }
 +}
 +
 +static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
 +{
 +    bool_t res=0;
 +    int  magic;
 +    
 +    if (file_version >= 2)
 +    {
 +        magic = CPT_MAGIC2;
 +        res = xdr_int(xd,&magic);
 +        if (res == 0)
 +        {
 +            cp_error();
 +        }
 +        if (magic != CPT_MAGIC2)
 +        {
 +            return -1;
 +        }
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpt_state(XDR *xd,gmx_bool bRead,
 +                        int fflags,t_state *state,
 +                        gmx_bool bReadRNG,FILE *list)
 +{
 +    int  sflags;
 +    int  **rng_p,**rngi_p;
 +    int  i;
 +    int  ret;
 +    int  nnht,nnhtp;
 +
 +    ret = 0;
 +    
 +    nnht = state->nhchainlength*state->ngtc;
 +    nnhtp = state->nhchainlength*state->nnhpres;
 +
 +    if (bReadRNG)
 +    {
 +        rng_p  = (int **)&state->ld_rng;
 +        rngi_p = &state->ld_rngi;
 +    }
 +    else
 +    {
 +        /* Do not read the RNG data */
 +        rng_p  = NULL;
 +        rngi_p = NULL;
 +    }
 +    /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
 +
 +    sflags = state->flags;
 +    for(i=0; (i<estNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +            case estLAMBDA:  ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
 +            case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
 +            case estBOX:     ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
 +            case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
 +            case estBOXV:    ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
 +            case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
 +            case estSVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
 +            case estFVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
 +            case estNH_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
 +            case estNH_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
 +            case estNHPRES_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
 +            case estNHPRES_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
 +            case estTC_INT:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
 +            case estVETA:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
 +            case estVOL0:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
 +            case estX:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
 +            case estV:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
 +            case estSDX:     ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
 +            case estLD_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
 +            case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
 +            case estMC_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
 +            case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
 +            case estDISRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
 +            case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
 +            case estORIRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
 +            case estORIRE_DTAV:   ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown state entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +    
 +    return ret;
 +}
 +
 +static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
 +                            int fflags,ekinstate_t *ekins,
 +                            FILE *list)
 +{
 +    int  i;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    for(i=0; (i<eeksNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                
 +                      case eeksEKIN_N:     ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
 +                      case eeksEKINH :     ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
 +                      case eeksEKINF:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
 +                      case eeksEKINO:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
 +            case eeksEKINTOTAL:  ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
 +            case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
 +            case eeksVSCALE:     ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
 +            case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
 +                      case eeksDEKINDL :   ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
 +            case eeksMVCOS:      ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +    
 +    return ret;
 +}
 +
 +
 +static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
 +                           int fflags,energyhistory_t *enerhist,
 +                           FILE *list)
 +{
 +    int  i;
 +    int  j;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    if (bRead)
 +    {
 +        enerhist->nsteps     = 0;
 +        enerhist->nsum       = 0;
 +        enerhist->nsteps_sim = 0;
 +        enerhist->nsum_sim   = 0;
 +        enerhist->dht        = NULL;
 +
 +        if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
 +        {
 +            snew(enerhist->dht,1);
 +            enerhist->dht->ndh = NULL;
 +            enerhist->dht->dh = NULL;
 +            enerhist->dht->start_lambda_set=FALSE;
 +        }
 +    }
 +
 +    for(i=0; (i<eenhNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                      case eenhENERGY_N:     ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
 +                      case eenhENERGY_AVER:  ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
 +                      case eenhENERGY_SUM:   ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
 +            case eenhENERGY_NSUM:  do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
 +            case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
 +            case eenhENERGY_NSUM_SIM:   do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
 +            case eenhENERGY_NSTEPS:     do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
 +            case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
 +            case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
 +                if (bRead) /* now allocate memory for it */
 +                {
 +                    snew(enerhist->dht->dh, enerhist->dht->nndh);
 +                    snew(enerhist->dht->ndh, enerhist->dht->nndh);
 +                    for(j=0;j<enerhist->dht->nndh;j++)
 +                    {
 +                        enerhist->dht->ndh[j] = 0;
 +                        enerhist->dht->dh[j] = NULL;
 +                    }
 +                }
 +                break;
 +            case eenhENERGY_DELTA_H_LIST:
 +                for(j=0;j<enerhist->dht->nndh;j++)
 +                {
 +                    ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
 +                }
 +                break;
 +            case eenhENERGY_DELTA_H_STARTTIME:
 +                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
 +            case eenhENERGY_DELTA_H_STARTLAMBDA:
 +                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown energy history entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +
 +    if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
 +    {
 +        /* Assume we have an old file format and copy sum to sum_sim */
 +        srenew(enerhist->ener_sum_sim,enerhist->nener);
 +        for(i=0; i<enerhist->nener; i++)
 +        {
 +            enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
 +        }
 +        fflags |= (1<<eenhENERGY_SUM_SIM);
 +    }
 +    
 +    if ( (fflags & (1<<eenhENERGY_NSUM)) &&
 +        !(fflags & (1<<eenhENERGY_NSTEPS)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps = enerhist->nsum;
 +        fflags |= (1<<eenhENERGY_NSTEPS);
 +    }
 +    if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
 +        !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps_sim = enerhist->nsum_sim;
 +        fflags |= (1<<eenhENERGY_NSTEPS_SIM);
 +    }
 +
 +    return ret;
 +}
 +
 +static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
 +{
 +    int  i,nlambda;
 +    int  ret;
 +
 +    nlambda = dfhist->nlambda;
 +    ret = 0;
 +
 +    for(i=0; (i<edfhNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +            case edfhBEQUIL:       ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
 +            case edfhNATLAMBDA:    ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
 +            case edfhWLHISTO:      ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
 +            case edfhWLDELTA:      ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
 +            case edfhSUMWEIGHTS:   ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
 +            case edfhSUMDG:        ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
 +            case edfhSUMMINVAR:    ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
 +            case edfhSUMVAR:       ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
 +            case edfhACCUMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
 +            case edfhACCUMM:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
 +            case edfhACCUMP2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
 +            case edfhACCUMM2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
 +            case edfhTIJ:          ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
 +            case edfhTIJEMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
 +
 +            default:
 +                gmx_fatal(FARGS,"Unknown df history entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +static int do_cpt_files(XDR *xd, gmx_bool bRead, 
 +                        gmx_file_position_t **p_outputfiles, int *nfiles, 
 +                        FILE *list, int file_version)
 +{
 +    int    i,j;
 +    gmx_off_t  offset;
 +    gmx_off_t  mask = 0xFFFFFFFFL;
 +    int    offset_high,offset_low;
 +    char   *buf;
 +    gmx_file_position_t *outputfiles;
 +
 +    if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
 +    {
 +        return -1;
 +    }
 +
 +    if(bRead)
 +    {
 +        snew(*p_outputfiles,*nfiles);
 +    }
 +
 +    outputfiles = *p_outputfiles;
 +
 +    for(i=0;i<*nfiles;i++)
 +    {
 +        /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
 +        if(bRead)
 +        {
 +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
 +            strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
 +            if(list==NULL)
 +            {
 +                sfree(buf);                   
 +            }
 +
 +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
 +            {
 +                return -1;
 +            }
 +#if (SIZEOF_GMX_OFF_T > 4)
 +            outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
 +#else
 +            outputfiles[i].offset = offset_low;
 +#endif
 +        }
 +        else
 +        {
 +            buf = outputfiles[i].filename;
 +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
 +            /* writing */
 +            offset      = outputfiles[i].offset;
 +            if (offset == -1)
 +            {
 +                offset_low  = -1;
 +                offset_high = -1;
 +            }
 +            else
 +            {
 +#if (SIZEOF_GMX_OFF_T > 4)
 +                offset_low  = (int) (offset & mask);
 +                offset_high = (int) ((offset >> 32) & mask);
 +#else
 +                offset_low  = offset;
 +                offset_high = 0;
 +#endif
 +            }
 +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
 +            {
 +                return -1;
 +            }
 +        }
 +        if (file_version >= 8)
 +        {
 +            if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
 +                           list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
 +            {
 +                return -1;
 +            }
 +        } 
 +        else 
 +        {
 +            outputfiles[i].chksum_size = -1;
 +        }
 +    }
 +    return 0;
 +}
 +
 +
 +void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
 +                      FILE *fplog,t_commrec *cr,
 +                      int eIntegrator,int simulation_part,
 +                      gmx_bool bExpanded, int elamstats,
 +                      gmx_large_int_t step,double t,t_state *state)
 +{
 +    t_fileio *fp;
 +    int  file_version;
 +    char *version;
 +    char *btime;
 +    char *buser;
 +    char *bhost;
 +    int  double_prec;
 +    char *fprog;
 +    char *fntemp; /* the temporary checkpoint file name */
 +    time_t now;
 +    char timebuf[STRLEN];
 +    int  nppnodes,npmenodes,flag_64bit;
 +    char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
 +    gmx_file_position_t *outputfiles;
 +    int  noutputfiles;
 +    char *ftime;
 +    int  flags_eks,flags_enh,flags_dfh,i;
 +    t_fileio *ret;
 +              
 +    if (PAR(cr))
 +    {
 +        if (DOMAINDECOMP(cr))
 +        {
 +            nppnodes  = cr->dd->nnodes;
 +            npmenodes = cr->npmenodes;
 +        }
 +        else
 +        {
 +            nppnodes  = cr->nnodes;
 +            npmenodes = 0;
 +        }
 +    }
 +    else
 +    {
 +        nppnodes  = 1;
 +        npmenodes = 0;
 +    }
 +
 +    /* make the new temporary filename */
 +    snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
 +    strcpy(fntemp,fn);
 +    fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +    sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
 +    strcat(fntemp,suffix);
 +    strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
 +   
 +    time(&now);
 +    gmx_ctime_r(&now,timebuf,STRLEN);
 +
 +    if (fplog)
 +    { 
 +        fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
 +                gmx_step_str(step,buf),timebuf);
 +    }
 +    
 +    /* Get offsets for open files */
 +    gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
 +
 +    fp = gmx_fio_open(fntemp,"w");
 +      
 +    if (state->ekinstate.bUpToDate)
 +    {
 +        flags_eks =
 +            ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) | 
 +             (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) | 
 +             (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
 +    }
 +    else
 +    {
 +        flags_eks = 0;
 +    }
 +
 +    flags_enh = 0;
 +    if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
 +    {
 +        flags_enh |= (1<<eenhENERGY_N);
 +        if (state->enerhist.nsum > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
 +                          (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
 +        }
 +        if (state->enerhist.nsum_sim > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
 +                          (1<<eenhENERGY_NSUM_SIM));
 +        }
 +        if (state->enerhist.dht)
 +        {
 +            flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
 +                           (1<< eenhENERGY_DELTA_H_LIST) | 
 +                           (1<< eenhENERGY_DELTA_H_STARTTIME) |
 +                           (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
 +        }
 +    }
 +
 +    if (bExpanded)
 +    {
 +        flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) |  (1<<edfhSUMDG)  |
 +                     (1<<edfhTIJ) | (1<<edfhTIJEMP));
 +        if (EWL(elamstats))
 +        {
 +            flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
 +        }
 +        if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
 +        {
 +            flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
 +                          | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
 +        }
 +    } else {
 +        flags_dfh = 0;
 +    }
 +    
 +    /* We can check many more things now (CPU, acceleration, etc), but
 +     * it is highly unlikely to have two separate builds with exactly
 +     * the same version, user, time, and build host!
 +     */
 +
 +    version = gmx_strdup(VERSION);
 +    btime   = gmx_strdup(BUILD_TIME);
 +    buser   = gmx_strdup(BUILD_USER);
 +    bhost   = gmx_strdup(BUILD_HOST);
 +
 +    double_prec = GMX_CPT_BUILD_DP;
 +    fprog   = gmx_strdup(Program());
 +
 +    ftime   = &(timebuf[0]);
 +    
 +    do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,
 +                  DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
 +                  &state->natoms,&state->ngtc,&state->nnhpres,
 +                  &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
 +                  NULL);
 +    
 +    sfree(version);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +    sfree(fprog);
 +
 +    if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0)        ||
 +       (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
 +       (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0)  ||
 +       (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0)  ||
 +       (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
 +                     file_version) < 0))
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
 +
 +    /* we really, REALLY, want to make sure to physically write the checkpoint, 
 +       and all the files it depends on, out to disk. Because we've
 +       opened the checkpoint with gmx_fio_open(), it's in our list
 +       of open files.  */
 +    ret=gmx_fio_all_output_fsync();
 +
 +    if (ret)
 +    {
 +        char buf[STRLEN];
 +        sprintf(buf,
 +                "Cannot fsync '%s'; maybe you are out of disk space?",
 +                gmx_fio_getname(ret));
 +
 +        if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
 +        {
 +            gmx_file(buf);
 +        }
 +        else
 +        {
 +            gmx_warning(buf);
 +        }
 +    }
 +
 +    if( gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    /* we don't move the checkpoint if the user specified they didn't want it,
 +       or if the fsyncs failed */
 +    if (!bNumberAndKeep && !ret)
 +    {
 +        if (gmx_fexist(fn))
 +        {
 +            /* Rename the previous checkpoint file */
 +            strcpy(buf,fn);
 +            buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +            strcat(buf,"_prev");
 +            strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
 +#ifndef GMX_FAHCORE
 +            /* we copy here so that if something goes wrong between now and
 +             * the rename below, there's always a state.cpt.
 +             * If renames are atomic (such as in POSIX systems),
 +             * this copying should be unneccesary.
 +             */
 +            gmx_file_copy(fn, buf, FALSE);
 +            /* We don't really care if this fails: 
 +             * there's already a new checkpoint.
 +             */
 +#else
 +            gmx_file_rename(fn, buf);
 +#endif
 +        }
 +        if (gmx_file_rename(fntemp, fn) != 0)
 +        {
 +            gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
 +        }
 +    }
 +
 +    sfree(outputfiles);
 +    sfree(fntemp);
 +
 +#ifdef GMX_FAHCORE
 +    /*code for alternate checkpointing scheme.  moved from top of loop over 
 +      steps */
 +    fcRequestCheckPoint();
 +    if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
 +    }
 +#endif /* end GMX_FAHCORE block */
 +}
 +
 +static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
 +{
 +    int i;
 +    
 +    fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
 +    fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
 +    fprintf(fplog,"  %24s    %11s    %11s\n","","simulation","checkpoint");
 +    for(i=0; i<estNR; i++)
 +    {
 +        if ((sflags & (1<<i)) || (fflags & (1<<i)))
 +        {
 +            fprintf(fplog,"  %24s    %11s    %11s\n",
 +                    est_names[i],
 +                    (sflags & (1<<i)) ? "  present  " : "not present",
 +                    (fflags & (1<<i)) ? "  present  " : "not present");
 +        }
 +    }
 +}
 +
 +static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
 +{
 +      FILE *fp = fplog ? fplog : stderr;
 +
 +    if (p != f)
 +    {
 +              fprintf(fp,"  %s mismatch,\n",type);
 +              fprintf(fp,"    current program: %d\n",p);
 +              fprintf(fp,"    checkpoint file: %d\n",f);
 +              fprintf(fp,"\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_string(FILE *fplog,const char *type,const char *p,
 +                         const char *f,gmx_bool *mm)
 +{
 +      FILE *fp = fplog ? fplog : stderr;
 +      
 +    if (strcmp(p,f) != 0)
 +    {
 +              fprintf(fp,"  %s mismatch,\n",type);
 +              fprintf(fp,"    current program: %s\n",p);
 +              fprintf(fp,"    checkpoint file: %s\n",f);
 +              fprintf(fp,"\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_match(FILE *fplog,
 +                        char *version,
 +                        char *btime,char *buser,char *bhost,int double_prec,
 +                        char *fprog,
 +                        t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
 +                        ivec dd_nc,ivec dd_nc_f)
 +{
 +    int  npp;
 +    gmx_bool mm;
 +    
 +    mm = FALSE;
 +    
 +    check_string(fplog,"Version"      ,VERSION      ,version,&mm);
 +    check_string(fplog,"Build time"   ,BUILD_TIME   ,btime  ,&mm);
 +    check_string(fplog,"Build user"   ,BUILD_USER   ,buser  ,&mm);
 +    check_string(fplog,"Build host"   ,BUILD_HOST   ,bhost  ,&mm);
 +    check_int   (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
 +    check_string(fplog,"Program name" ,Program()    ,fprog  ,&mm);
 +    
 +    check_int   (fplog,"#nodes"       ,cr->nnodes   ,npp_f+npme_f ,&mm);
 +    if (bPartDecomp)
 +    {
 +        dd_nc[XX] = 1;
 +        dd_nc[YY] = 1;
 +        dd_nc[ZZ] = 1;
 +    }
 +    if (cr->nnodes > 1)
 +    {
 +        check_int (fplog,"#PME-nodes"  ,cr->npmenodes,npme_f     ,&mm);
 +
 +        npp = cr->nnodes;
 +        if (cr->npmenodes >= 0)
 +        {
 +            npp -= cr->npmenodes;
 +        }
 +        if (npp == npp_f)
 +        {
 +            check_int (fplog,"#DD-cells[x]",dd_nc[XX]    ,dd_nc_f[XX],&mm);
 +            check_int (fplog,"#DD-cells[y]",dd_nc[YY]    ,dd_nc_f[YY],&mm);
 +            check_int (fplog,"#DD-cells[z]",dd_nc[ZZ]    ,dd_nc_f[ZZ],&mm);
 +        }
 +    }
 +    
 +    if (mm)
 +    {
 +              fprintf(stderr,
 +                              "Gromacs binary or parallel settings not identical to previous run.\n"
 +                              "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
 +                              fplog ? ",\n see the log file for details" : "");
 +              
 +        if (fplog)
 +        {
 +                      fprintf(fplog,
 +                                      "Gromacs binary or parallel settings not identical to previous run.\n"
 +                                      "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
 +              }
 +    }
 +}
 +
 +static void read_checkpoint(const char *fn,FILE **pfplog,
 +                            t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
 +                            int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
 +                            t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
 +                            int *simulation_part,
 +                            gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
 +{
 +    t_fileio *fp;
 +    int  i,j,rc;
 +    int  file_version;
 +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
 +    int  double_prec;
 +      char filename[STRLEN],buf[STEPSTRSIZE];
 +    int  nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
 +    ivec dd_nc_f;
 +    int  natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
 +    int  d;
 +    int  ret;
 +    gmx_file_position_t *outputfiles;
 +    int  nfiles;
 +    t_fileio *chksum_file;
 +    FILE* fplog = *pfplog;
 +    unsigned char digest[16];
 +#ifndef GMX_NATIVE_WINDOWS
 +    struct flock fl;  /* don't initialize here: the struct order is OS 
 +                         dependent! */
 +#endif
 +
 +    const char *int_warn=
 +              "WARNING: The checkpoint file was generated with integrator %s,\n"
 +              "         while the simulation uses integrator %s\n\n";
 +    const char *sd_note=
 +        "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
 +        "      while the simulation uses %d SD or BD nodes,\n"
 +        "      continuation will be exact, except for the random state\n\n";
 +    
 +#ifndef GMX_NATIVE_WINDOWS
 +    fl.l_type=F_WRLCK;
 +    fl.l_whence=SEEK_SET;
 +    fl.l_start=0;
 +    fl.l_len=0;
 +    fl.l_pid=0;
 +#endif
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        gmx_fatal(FARGS,
 +                  "read_checkpoint not (yet) supported with particle decomposition");
 +    }
 +    
 +    fp = gmx_fio_open(fn,"r");
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator_f,simulation_part,step,t,
 +                  &nppnodes_f,dd_nc_f,&npmenodes_f,
 +                  &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
 +                  &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
 +
 +    if (bAppendOutputFiles &&
 +        file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
 +    {
 +        gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
 +    }
 +    
 +    if (cr == NULL || MASTER(cr))
 +    {
 +        fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
 +                fn,ftime);
 +    }
 +      
 +      /* This will not be written if we do appending, since fplog is still NULL then */
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"Reading checkpoint file %s\n",fn);
 +        fprintf(fplog,"  file generated by:     %s\n",fprog);  
 +        fprintf(fplog,"  file generated at:     %s\n",ftime);  
 +        fprintf(fplog,"  GROMACS build time:    %s\n",btime);  
 +        fprintf(fplog,"  GROMACS build user:    %s\n",buser);  
 +        fprintf(fplog,"  GROMACS build host:    %s\n",bhost);
 +        fprintf(fplog,"  GROMACS double prec.:  %d\n",double_prec);
 +        fprintf(fplog,"  simulation part #:     %d\n",*simulation_part);
 +        fprintf(fplog,"  step:                  %s\n",gmx_step_str(*step,buf));
 +        fprintf(fplog,"  time:                  %f\n",*t);  
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    if (natoms != state->natoms)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
 +    }
 +    if (ngtc != state->ngtc)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
 +    }
 +    if (nnhpres != state->nnhpres)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
 +    }
 +
 +    if (nlambda != state->dfhist.nlambda)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
 +    }
 +
 +    init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
 +    /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
 +    
 +    if (eIntegrator_f != eIntegrator)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
 +        }
 +              if(bAppendOutputFiles)
 +              {
 +                      gmx_fatal(FARGS,
 +                                        "Output file appending requested, but input/checkpoint integrators do not match.\n"
 +                                        "Stopping the run to prevent you from ruining all your data...\n"
 +                                        "If you _really_ know what you are doing, try with the -noappend option.\n");
 +              }
 +        if (fplog)
 +        {
 +            fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
 +        }
 +    }
 +
 +    if (!PAR(cr))
 +    {
 +        nppnodes = 1;
 +        cr->npmenodes = 0;
 +    }
 +    else if (bPartDecomp)
 +    {
 +        nppnodes = cr->nnodes;
 +        cr->npmenodes = 0;
 +    }
 +    else if (cr->nnodes == nppnodes_f + npmenodes_f)
 +    {
 +        if (cr->npmenodes < 0)
 +        {
 +            cr->npmenodes = npmenodes_f;
 +        }
 +        nppnodes = cr->nnodes - cr->npmenodes;
 +        if (nppnodes == nppnodes_f)
 +        {
 +            for(d=0; d<DIM; d++)
 +            {
 +                if (dd_nc[d] == 0)
 +                {
 +                    dd_nc[d] = dd_nc_f[d];
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* The number of PP nodes has not been set yet */
 +        nppnodes = -1;
 +    }
 +
 +    if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
 +    {
 +        /* Correct the RNG state size for the number of PP nodes.
 +         * Such assignments should all be moved to one central function.
 +         */
 +        state->nrng  = nppnodes*gmx_rng_n();
 +        state->nrngi = nppnodes;
 +    }
 +    
 +    *bReadRNG = TRUE;
 +    if (fflags != state->flags)
 +    {
 +              
 +        if (MASTER(cr))
 +        {
 +                      if(bAppendOutputFiles)
 +                      {
 +                              gmx_fatal(FARGS,
 +                                                "Output file appending requested, but input and checkpoint states are not identical.\n"
 +                                                "Stopping the run to prevent you from ruining all your data...\n"
 +                                                "You can try with the -noappend option, and get more info in the log file.\n");
 +                      }
 +                      
 +            if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
 +            {
 +                gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
 +            }
 +            else
 +            {
 +                fprintf(stderr,
 +                        "WARNING: The checkpoint state entries do not match the simulation,\n"
 +                        "         see the log file for details\n\n");
 +            }
 +        }
 +              
 +              if(fplog)
 +              {
 +                      print_flag_mismatch(fplog,state->flags,fflags);
 +              }
 +    }
 +    else
 +    {
 +        if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
 +            nppnodes != nppnodes_f)
 +        {
 +            *bReadRNG = FALSE;
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr,sd_note,nppnodes_f,nppnodes);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
 +            }
 +        }
 +        if (MASTER(cr))
 +        {
 +            check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
 +                        cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
 +        }
 +    }
 +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
 +    *init_fep_state = state->fep_state;  /* there should be a better way to do this than setting it here.
 +                                            Investigate for 5.0. */
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state->ekinstate,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
 +                  ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
 +    
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state->enerhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    if (file_version < 6)
 +    {
 +        const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
 +
 +        fprintf(stderr,"\nWARNING: %s\n\n",warn);
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\nWARNING: %s\n\n",warn);
 +        }
 +        state->enerhist.nsum     = *step;
 +        state->enerhist.nsum_sim = *step;
 +    }
 +
 +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
 +                         flags_dfh,&state->dfhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +      ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
 +      if (ret)
 +      {
 +              cp_error();
 +      }
 +                                         
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +    
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +      
 +      /* If the user wants to append to output files,
 +     * we use the file pointer positions of the output files stored
 +     * in the checkpoint file and truncate the files such that any frames
 +     * written after the checkpoint time are removed.
 +     * All files are md5sum checked such that we can be sure that
 +     * we do not truncate other (maybe imprortant) files.
 +       */
 +    if (bAppendOutputFiles)
 +    {
 +        if (fn2ftp(outputfiles[0].filename)!=efLOG)
 +        {
 +            /* make sure first file is log file so that it is OK to use it for 
 +             * locking
 +             */
 +            gmx_fatal(FARGS,"The first output file should always be the log "
 +                      "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
 +        }
 +        for(i=0;i<nfiles;i++)
 +        {
 +            if (outputfiles[i].offset < 0)
 +            {
 +                gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
 +                    "is larger than 2 GB, but mdrun did not support large file"
 +                    " offsets. Can not append. Run mdrun with -noappend",
 +                    outputfiles[i].filename);
 +            }
 +#ifdef GMX_FAHCORE
 +            chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
 +
 +#else
 +            chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
 +
 +            /* lock log file */                
 +            if (i==0)
 +            {
 +                /* Note that there are systems where the lock operation
 +                 * will succeed, but a second process can also lock the file.
 +                 * We should probably try to detect this.
 +                 */
 +#ifndef GMX_NATIVE_WINDOWS
 +                if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
 +                    ==-1)
 +#else
 +                if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
 +#endif
 +                {
 +                    if (errno == ENOSYS)
 +                    {
 +                        if (!bForceAppend)
 +                        {
 +                            gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
 +                        }
 +                        else
 +                        {
 +                            fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
 +                            if (fplog)
 +                            {
 +                                fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
 +                            }
 +                        }
 +                    }
 +                    else if (errno == EACCES || errno == EAGAIN)
 +                    {
 +                        gmx_fatal(FARGS,"Failed to lock: %s. Already running "
 +                                  "simulation?", outputfiles[i].filename);
 +                    }
 +                    else
 +                    {
 +                        gmx_fatal(FARGS,"Failed to lock: %s. %s.",
 +                                  outputfiles[i].filename, strerror(errno));
 +                    }
 +                }
 +            }
 +            
 +            /* compute md5 chksum */ 
 +            if (outputfiles[i].chksum_size != -1)
 +            {
 +                if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
 +                                     digest) != outputfiles[i].chksum_size)  /*at the end of the call the file position is at the end of the file*/
 +                {
 +                    gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                              outputfiles[i].chksum_size, 
 +                              outputfiles[i].filename);
 +                }
 +            } 
 +            if (i==0)  /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
 +            {
 +                if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
 +                {
 +                      gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
 +                }
 +            }
 +#endif
 +            
 +            if (i==0) /*open log file here - so that lock is never lifted 
 +                        after chksum is calculated */
 +            {
 +                *pfplog = gmx_fio_getfp(chksum_file);
 +            }
 +            else
 +            {
 +                gmx_fio_close(chksum_file);
 +            }
 +#ifndef GMX_FAHCORE            
 +            /* compare md5 chksum */
 +            if (outputfiles[i].chksum_size != -1 &&
 +                memcmp(digest,outputfiles[i].chksum,16)!=0) 
 +            {
 +                if (debug)
 +                {
 +                    fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
 +                    for (j=0; j<16; j++)
 +                    {
 +                        fprintf(debug,"%02x",digest[j]);
 +                    }
 +                    fprintf(debug,"\n");
 +                }
 +                gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                          outputfiles[i].filename);
 +            }
 +#endif        
 +
 +              
 +            if (i!=0) /*log file is already seeked to correct position */
 +            {
 +#ifdef GMX_NATIVE_WINDOWS
 +                rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
 +#else            
 +                rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
 +#endif
 +                if(rc!=0)
 +                {
 +                    gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
 +                }
 +            }
 +        }
 +    }
 +
 +    sfree(outputfiles);
 +}
 +
 +
 +void load_checkpoint(const char *fn,FILE **fplog,
 +                     t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
 +                     t_inputrec *ir,t_state *state,
 +                     gmx_bool *bReadRNG,gmx_bool *bReadEkin,
 +                     gmx_bool bAppend,gmx_bool bForceAppend)
 +{
 +    gmx_large_int_t step;
 +    double t;
 +
 +    if (SIMMASTER(cr)) {
 +      /* Read the state from the checkpoint file */
 +      read_checkpoint(fn,fplog,
 +                      cr,bPartDecomp,dd_nc,
 +                      ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
 +                      &ir->simulation_part,bAppend,bForceAppend);
 +    }
 +    if (PAR(cr)) {
 +      gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
 +      gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
 +      gmx_bcast(sizeof(step),&step,cr);
 +      gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
 +      gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
 +    }
 +    ir->bContinuation    = TRUE;
 +    if (ir->nsteps >= 0)
 +    {
 +        ir->nsteps          += ir->init_step - step;
 +    }
 +    ir->init_step        = step;
 +      ir->simulation_part += 1;
 +}
 +
 +static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
 +                                 gmx_large_int_t *step,double *t,t_state *state,
 +                                 gmx_bool bReadRNG,
 +                                 int *nfiles,gmx_file_position_t **outputfiles)
 +{
 +    int  file_version;
 +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
 +    int  double_prec;
 +    int  eIntegrator;
 +    int  nppnodes,npme;
 +    ivec dd_nc;
 +    int  flags_eks,flags_enh,flags_dfh;
 +    int  nfiles_loc;
 +    gmx_file_position_t *files_loc=NULL;
 +    int  ret;
 +      
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
 +                  &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
 +                  &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
 +    ret =
 +        do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state->ekinstate,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state->enerhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_dfh,&state->dfhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
 +                       outputfiles != NULL ? outputfiles : &files_loc,
 +                       outputfiles != NULL ? nfiles : &nfiles_loc,
 +                       NULL,file_version);
 +    if (files_loc != NULL)
 +    {
 +        sfree(files_loc);
 +    }
 +      
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +      
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +}
 +
 +void 
 +read_checkpoint_state(const char *fn,int *simulation_part,
 +                      gmx_large_int_t *step,double *t,t_state *state)
 +{
 +    t_fileio *fp;
 +    
 +    fp = gmx_fio_open(fn,"r");
 +    read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +}
 +
 +void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
 +{
 +    t_state state;
 +    int simulation_part;
 +    gmx_large_int_t step;
 +    double t;
 +    
 +    init_state(&state,0,0,0,0,0);
 +    
 +    read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
 +    
 +    fr->natoms  = state.natoms;
 +    fr->bTitle  = FALSE;
 +    fr->bStep   = TRUE;
 +    fr->step    = gmx_large_int_to_int(step,
 +                                    "conversion of checkpoint to trajectory");
 +    fr->bTime   = TRUE;
 +    fr->time    = t;
 +    fr->bLambda = TRUE;
 +    fr->lambda  = state.lambda[efptFEP];
 +    fr->fep_state  = state.fep_state;
 +    fr->bAtoms  = FALSE;
 +    fr->bX      = (state.flags & (1<<estX));
 +    if (fr->bX)
 +    {
 +        fr->x     = state.x;
 +        state.x   = NULL;
 +    }
 +    fr->bV      = (state.flags & (1<<estV));
 +    if (fr->bV)
 +    {
 +        fr->v     = state.v;
 +        state.v   = NULL;
 +    }
 +    fr->bF      = FALSE;
 +    fr->bBox    = (state.flags & (1<<estBOX));
 +    if (fr->bBox)
 +    {
 +        copy_mat(state.box,fr->box);
 +    }
 +    done_state(&state);
 +}
 +
 +void list_checkpoint(const char *fn,FILE *out)
 +{
 +    t_fileio *fp;
 +    int  file_version;
 +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
 +    int  double_prec;
 +    int  eIntegrator,simulation_part,nppnodes,npme;
 +    gmx_large_int_t step;
 +    double t;
 +    ivec dd_nc;
 +    t_state state;
 +    int  flags_eks,flags_enh,flags_dfh;
 +    int  indent;
 +    int  i,j;
 +    int  ret;
 +    gmx_file_position_t *outputfiles;
 +      int  nfiles;
 +      
 +    init_state(&state,-1,-1,-1,-1,0);
 +
 +    fp = gmx_fio_open(fn,"r");
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
 +                  &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
 +                  &(state.dfhist.nlambda),&state.flags,
 +                  &flags_eks,&flags_enh,&flags_dfh,out);
 +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state.ekinstate,out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state.enerhist,out);
 +
 +    if (ret == 0)
 +    {
 +        init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
 +        ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
 +                             flags_dfh,&state.dfhist,out);
 +    }
 +    if (ret == 0)
 +    {
 +              do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
 +      }
 +      
 +    if (ret == 0)
 +    {
 +        ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    }
 +      
 +    if (ret)
 +    {
 +        cp_warning(out);
 +    }
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +    
 +    done_state(&state);
 +}
 +
 +
 +static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
 +{
 +    int i;
 +
 +    /* Check if the output file name stored in the checkpoint file
 +     * is one of the output file names of mdrun.
 +     */
 +    i = 0;
 +    while (i < nfile &&
 +           !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
 +    {
 +        i++;
 +    }
 +    
 +    return (i < nfile && gmx_fexist(fnm_cp));
 +}
 +
 +/* This routine cannot print tons of data, since it is called before the log file is opened. */
 +gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
 +                                     gmx_large_int_t *cpt_step,t_commrec *cr,
 +                                     gmx_bool bAppendReq,
 +                                     int nfile,const t_filenm fnm[],
 +                                     const char *part_suffix,gmx_bool *bAddPart)
 +{
 +    t_fileio *fp;
 +    gmx_large_int_t step=0;
 +      double t;
 +    t_state state;
 +    int  nfiles;
 +    gmx_file_position_t *outputfiles;
 +    int  nexist,f;
 +    gmx_bool bAppend;
 +    char *fn,suf_up[STRLEN];
 +
 +    bAppend = FALSE;
 +
 +    if (SIMMASTER(cr)) {
 +        if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
 +        {
 +            *simulation_part = 0;
 +        }
 +        else 
 +        {
 +            init_state(&state,0,0,0,0,0);
 +
 +            read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
 +                                 &nfiles,&outputfiles);
 +            if( gmx_fio_close(fp) != 0)
 +            {
 +                gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +            }
 +            done_state(&state);
 +
 +            if (bAppendReq)
 +            {
 +                nexist = 0;
 +                for(f=0; f<nfiles; f++)
 +                {
 +                    if (exist_output_file(outputfiles[f].filename,nfile,fnm))
 +                    {
 +                        nexist++;
 +                    }
 +                }
 +                if (nexist == nfiles)
 +                {
 +                    bAppend = bAppendReq;
 +                }
 +                else if (nexist > 0)
 +                {
 +                    fprintf(stderr,
 +                            "Output file appending has been requested,\n"
 +                            "but some output files listed in the checkpoint file %s\n"
 +                            "are not present or are named differently by the current program:\n",
 +                            filename);
 +                    fprintf(stderr,"output files present:");
 +                    for(f=0; f<nfiles; f++)
 +                    {
 +                        if (exist_output_file(outputfiles[f].filename,
 +                                              nfile,fnm))
 +                        {
 +                            fprintf(stderr," %s",outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr,"\n");
 +                    fprintf(stderr,"output files not present or named differently:");
 +                    for(f=0; f<nfiles; f++)
 +                    {
 +                        if (!exist_output_file(outputfiles[f].filename,
 +                                               nfile,fnm))
 +                        {
 +                            fprintf(stderr," %s",outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr,"\n");
 +                    
 +                    gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
 +                }
 +            }
 +            
 +            if (bAppend)
 +            {
 +                if (nfiles == 0)
 +                {
 +                    gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
 +                }
 +                fn = outputfiles[0].filename;
 +                if (strlen(fn) < 4 ||
 +                    gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
 +                {
 +                    gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
 +                }
 +                /* Set bAddPart to whether the suffix string '.part' is present
 +                 * in the log file name.
 +                 */
 +                strcpy(suf_up,part_suffix);
 +                upstring(suf_up);
 +                *bAddPart = (strstr(fn,part_suffix) != NULL ||
 +                             strstr(fn,suf_up) != NULL);
 +            }
 +
 +            sfree(outputfiles);
 +        }
 +    }
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
 +
 +        if (*simulation_part > 0 && bAppendReq)
 +        {
 +            gmx_bcast(sizeof(bAppend),&bAppend,cr);
 +            gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
 +        }
 +    }
 +    if (NULL != cpt_step)
 +    {
 +        *cpt_step = step;
 +    }
 +
 +    return bAppend;
 +}
index 7915fbe57f66604fa67b158e19e0e93560996aee,0000000000000000000000000000000000000000..fa371bb3e320d387682c29980804052fc31746e9
mode 100644,000000..100644
--- /dev/null
@@@ -1,682 -1,0 +1,758 @@@
- /* This routine only returns a static (constant) string, so we use a 
-  * mutex to initialize it. Since the string is only written to the
-  * first time, there is no risk with multiple calls overwriting the
-  * output for each other.
-  */
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
++#ifdef HAVE_LIBMKL
++#include <mkl.h>
++#endif
++#ifdef GMX_GPU
++#include <cuda.h>
++#include <cuda_runtime_api.h>
++#endif
++#ifdef GMX_FFT_FFTW3
++#include <fftw3.h>
++#endif
++
 +/* This file is completely threadsafe - keep it that way! */
 +
 +#include <string.h>
 +#include <ctype.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "string2.h"
 +#include "macros.h"
 +#include <time.h>
 +#include "random.h"
 +#include "statutil.h"
 +#include "copyrite.h"
 +#include "strdb.h"
 +#include "futil.h"
++#include "vec.h"
++#include "buildinfo.h"
++#include "gmx_cpuid.h"
 +
 +static void pr_two(FILE *out,int c,int i)
 +{
 +  if (i < 10)
 +    fprintf(out,"%c0%1d",c,i);
 +  else
 +    fprintf(out,"%c%2d",c,i);
 +}
 +
 +void pr_difftime(FILE *out,double dt)
 +{
 +  int    ndays,nhours,nmins,nsecs;
 +  gmx_bool   bPrint,bPrinted;
 +
 +  ndays = dt/(24*3600);
 +  dt    = dt-24*3600*ndays;
 +  nhours= dt/3600;
 +  dt    = dt-3600*nhours;
 +  nmins = dt/60;
 +  dt    = dt-nmins*60;
 +  nsecs = dt;
 +  bPrint= (ndays > 0);
 +  bPrinted=bPrint;
 +  if (bPrint) 
 +    fprintf(out,"%d",ndays);
 +  bPrint=bPrint || (nhours > 0);
 +  if (bPrint) {
 +    if (bPrinted)
 +      pr_two(out,'d',nhours);
 +    else 
 +      fprintf(out,"%d",nhours);
 +  }
 +  bPrinted=bPrinted || bPrint;
 +  bPrint=bPrint || (nmins > 0);
 +  if (bPrint) {
 +    if (bPrinted)
 +      pr_two(out,'h',nmins);
 +    else 
 +      fprintf(out,"%d",nmins);
 +  }
 +  bPrinted=bPrinted || bPrint;
 +  if (bPrinted)
 +    pr_two(out,':',nsecs);
 +  else
 +    fprintf(out,"%ds",nsecs);
 +  fprintf(out,"\n");
 +}
 +
 +
 +gmx_bool be_cool(void)
 +{
 +  /* Yes, it is bad to check the environment variable every call,
 +   * but we dont call this routine often, and it avoids using 
 +   * a mutex for locking the variable...
 +   */
 +#ifdef GMX_FAHCORE
 +  /*be uncool*/
 +  return FALSE;
 +#else
 +  return (getenv("GMX_NO_QUOTES") == NULL);
 +#endif
 +}
 +
 +void space(FILE *out, int n)
 +{
 +  fprintf(out,"%*s",n,"");
 +}
 +
 +void f(char *a)
 +{
 +    int i;
 +    int len=strlen(a);
 +    
 +    for(i=0;i<len;i++)
 +        a[i]=~a[i]; 
 +}
 +
 +static void sp_print(FILE *out,const char *s)
 +{
 +  int slen;
 +  
 +  slen=strlen(s);
 +  space(out,(80-slen)/2);
 +  fprintf(out,"%s\n",s);
 +}
 +
 +static void ster_print(FILE *out,const char *s)
 +{
 +  int  slen;
 +  char buf[128];
 +  
 +  snprintf(buf,128,":-)  %s  (-:",s);
 +  slen=strlen(buf);
 +  space(out,(80-slen)/2);
 +  fprintf(out,"%s\n",buf);
 +}
 +
 +
 +static void pukeit(const char *db,const char *defstring, char *retstring, 
 +                 int retsize, int *cqnum)
 +{
 +  FILE *fp;
 +  char **help;
 +  int  i,nhlp;
 +  int  seed;
 + 
 +  if (be_cool() && ((fp = low_libopen(db,FALSE)) != NULL)) {
 +    nhlp=fget_lines(fp,&help);
 +    /* for libraries we can use the low-level close routines */
 +    ffclose(fp);
 +    seed=time(NULL);
 +    *cqnum=nhlp*rando(&seed);
 +    if (strlen(help[*cqnum]) >= STRLEN)
 +      help[*cqnum][STRLEN-1] = '\0';
 +    strncpy(retstring,help[*cqnum],retsize);
 +    f(retstring);
 +    for(i=0; (i<nhlp); i++)
 +      sfree(help[i]);
 +    sfree(help);
 +  }
 +  else 
 +    strncpy(retstring,defstring,retsize);
 +}
 +
 +void bromacs(char *retstring, int retsize)
 +{
 +  int dum;
 +
 +  pukeit("bromacs.dat",
 +       "Groningen Machine for Chemical Simulation",
 +       retstring,retsize,&dum);
 +}
 +
 +void cool_quote(char *retstring, int retsize, int *cqnum)
 +{
 +  char *tmpstr;
 +  char *s,*ptr;
 +  int tmpcq,*p;
 +  
 +  if (cqnum!=NULL)
 +    p = cqnum;
 +  else
 +    p = &tmpcq;
 +  
 +  /* protect audience from explicit lyrics */
 +  snew(tmpstr,retsize+1);
 +  pukeit("gurgle.dat","Thanx for Using GROMACS - Have a Nice Day",
 +       tmpstr,retsize-2,p);
 +
 +  if ((ptr = strchr(tmpstr,'_')) != NULL) {
 +    *ptr='\0';
 +    ptr++;
 +    sprintf(retstring,"\"%s\" %s",tmpstr,ptr);
 +  }
 +  else {
 +    strcpy(retstring,tmpstr);
 +  }
 +  sfree(tmpstr);
 +}
 +
 +void CopyRight(FILE *out,const char *szProgram)
 +{
 +  static const char * CopyrightText[] = {
 +             "Written by Emile Apol, Rossen Apostolov, Herman J.C. Berendsen,",
 +             "Aldert van Buuren, Pär Bjelkmar, Rudi van Drunen, Anton Feenstra, ",
 +             "Gerrit Groenhof, Peter Kasson, Per Larsson, Pieter Meulenhoff, ",
 +             "Teemu Murtola, Szilard Pall, Sander Pronk, Roland Schulz, ",
 +             "Michael Shirts, Alfons Sijbers, Peter Tieleman,\n",
 +             "Berk Hess, David van der Spoel, and Erik Lindahl.\n",
 +             "Copyright (c) 1991-2000, University of Groningen, The Netherlands.",
 +             "Copyright (c) 2001-2010, The GROMACS development team at",
 +             "Uppsala University & The Royal Institute of Technology, Sweden.",
 +             "check out http://www.gromacs.org for more information.\n"
 +  };
 +
 +  static const char * GPLText[] = {
 +              "This program is free software; you can redistribute it and/or",
 +              "modify it under the terms of the GNU General Public License",
 +              "as published by the Free Software Foundation; either version 2",
 +              "of the License, or (at your option) any later version."
 +  };
 +
 +  /* Dont change szProgram arbitrarily - it must be argv[0], i.e. the 
 +   * name of a file. Otherwise, we won't be able to find the library dir.
 +   */
 +#define NCR (int)asize(CopyrightText)
 +#ifdef GMX_FAHCORE
 +#define NGPL 0 /*FAH has an exception permission from GPL to allow digital signatures in Gromacs*/
 +#else
 +#define NGPL (int)asize(GPLText)
 +#endif
 +
 +  char buf[256],tmpstr[1024];
 +  int i;
 +
 +#ifdef GMX_FAHCORE
 +  set_program_name("Gromacs");
 +#else
 +  set_program_name(szProgram);
 +#endif
 +
 +  ster_print(out,"G  R  O  M  A  C  S");
 +  fprintf(out,"\n");
 +  
 +  bromacs(tmpstr,1023);
 +  sp_print(out,tmpstr); 
 +  fprintf(out,"\n");
 +
 +  ster_print(out,GromacsVersion());
 +  fprintf(out,"\n");
 +
 +  /* fprintf(out,"\n");*/
 +
 +  /* sp_print(out,"PLEASE NOTE: THIS IS A BETA VERSION\n");
 +  
 +  fprintf(out,"\n"); */
 +
 +  for(i=0; (i<NCR); i++) 
 +    sp_print(out,CopyrightText[i]);
 +  for(i=0; (i<NGPL); i++)
 +    sp_print(out,GPLText[i]);
 +
 +  fprintf(out,"\n");
 +
 +  snprintf(buf,256,"%s",Program());
 +#ifdef GMX_DOUBLE
 +  strcat(buf," (double precision)");
 +#endif
 +  ster_print(out,buf);
 +  fprintf(out,"\n");
 +}
 +
 +
 +void thanx(FILE *fp)
 +{
 +  char cq[1024];
 +  int  cqnum;
 +
 +  /* protect the audience from suggestive discussions */
 +  cool_quote(cq,1023,&cqnum);
 +  
 +  if (be_cool()) 
 +    fprintf(fp,"\ngcq#%d: %s\n\n",cqnum,cq);
 +  else
 +    fprintf(fp,"\n%s\n\n",cq);
 +}
 +
 +typedef struct {
 +  const char *key;
 +  const char *author;
 +  const char *title;
 +  const char *journal;
 +  int volume,year;
 +  const char *pages;
 +} t_citerec;
 +
 +void please_cite(FILE *fp,const char *key)
 +{
 +  static const t_citerec citedb[] = {
 +    { "Allen1987a",
 +      "M. P. Allen and D. J. Tildesley",
 +      "Computer simulation of liquids",
 +      "Oxford Science Publications",
 +      1, 1987, "1" },
 +    { "Berendsen95a",
 +      "H. J. C. Berendsen, D. van der Spoel and R. van Drunen",
 +      "GROMACS: A message-passing parallel molecular dynamics implementation",
 +      "Comp. Phys. Comm.",
 +      91, 1995, "43-56" },
 +    { "Berendsen84a",
 +      "H. J. C. Berendsen, J. P. M. Postma, A. DiNola and J. R. Haak",
 +      "Molecular dynamics with coupling to an external bath",
 +      "J. Chem. Phys.",
 +      81, 1984, "3684-3690" },
 +    { "Ryckaert77a",
 +      "J. P. Ryckaert and G. Ciccotti and H. J. C. Berendsen",
 +      "Numerical Integration of the Cartesian Equations of Motion of a System with Constraints; Molecular Dynamics of n-Alkanes",
 +      "J. Comp. Phys.",
 +      23, 1977, "327-341" },
 +    { "Miyamoto92a",
 +      "S. Miyamoto and P. A. Kollman",
 +      "SETTLE: An Analytical Version of the SHAKE and RATTLE Algorithms for Rigid Water Models",
 +      "J. Comp. Chem.",
 +      13, 1992, "952-962" },
 +    { "Cromer1968a",
 +      "D. T. Cromer & J. B. Mann",
 +      "X-ray scattering factors computed from numerical Hartree-Fock wave functions",
 +      "Acta Cryst. A",
 +      24, 1968, "321" },
 +    { "Barth95a",
 +      "E. Barth and K. Kuczera and B. Leimkuhler and R. D. Skeel",
 +      "Algorithms for Constrained Molecular Dynamics",
 +      "J. Comp. Chem.",
 +      16, 1995, "1192-1209" },
 +    { "Essmann95a",
 +      "U. Essmann, L. Perera, M. L. Berkowitz, T. Darden, H. Lee and L. G. Pedersen ",
 +      "A smooth particle mesh Ewald method",
 +      "J. Chem. Phys.",
 +      103, 1995, "8577-8592" },
 +    { "Torda89a",
 +      "A. E. Torda and R. M. Scheek and W. F. van Gunsteren",
 +      "Time-dependent distance restraints in molecular dynamics simulations",
 +      "Chem. Phys. Lett.",
 +      157, 1989, "289-294" },
 +    { "Tironi95a",
 +      "I. G. Tironi and R. Sperb and P. E. Smith and W. F. van Gunsteren",
 +      "Generalized reaction field method for molecular dynamics simulations",
 +      "J. Chem. Phys",
 +      102, 1995, "5451-5459" },
 +    { "Hess97a",
 +      "B. Hess and H. Bekker and H. J. C. Berendsen and J. G. E. M. Fraaije",
 +      "LINCS: A Linear Constraint Solver for molecular simulations",
 +      "J. Comp. Chem.",
 +      18, 1997, "1463-1472" },
 +    { "Hess2008a",
 +      "B. Hess",
 +      "P-LINCS: A Parallel Linear Constraint Solver for molecular simulation",
 +      "J. Chem. Theory Comput.",
 +      4, 2008, "116-122" },
 +    { "Hess2008b",
 +      "B. Hess and C. Kutzner and D. van der Spoel and E. Lindahl",
 +      "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation",
 +      "J. Chem. Theory Comput.",
 +      4, 2008, "435-447" },
 +    { "Hub2010",
 +      "J. S. Hub, B. L. de Groot and D. van der Spoel",
 +      "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates",
 +      "J. Chem. Theory Comput.",
 +      6, 2010, "3713-3720"}, 
 +    { "In-Chul99a",
 +      "Y. In-Chul and M. L. Berkowitz",
 +      "Ewald summation for systems with slab geometry",
 +      "J. Chem. Phys.",
 +      111, 1999, "3155-3162" },
 +    { "DeGroot97a",
 +      "B. L. de Groot and D. M. F. van Aalten and R. M. Scheek and A. Amadei and G. Vriend and H. J. C. Berendsen",
 +      "Prediction of Protein Conformational Freedom From Distance Constrains",
 +      "Proteins",
 +      29, 1997, "240-251" },
 +    { "Spoel98a",
 +      "D. van der Spoel and P. J. van Maaren and H. J. C. Berendsen",
 +      "A systematic study of water models for molecular simulation. Derivation of models optimized for use with a reaction-field.",
 +      "J. Chem. Phys.",
 +      108, 1998, "10220-10230" },
 +    { "Wishart98a",
 +      "D. S. Wishart and A. M. Nip",
 +      "Protein Chemical Shift Analysis: A Practical Guide",
 +      "Biochem. Cell Biol.",
 +      76, 1998, "153-163" },
 +    { "Maiorov95",
 +      "V. N. Maiorov and G. M. Crippen",
 +      "Size-Independent Comparison of Protein Three-Dimensional Structures",
 +      "PROTEINS: Struct. Funct. Gen.",
 +      22, 1995, "273-283" },
 +    { "Feenstra99",
 +      "K. A. Feenstra and B. Hess and H. J. C. Berendsen",
 +      "Improving Efficiency of Large Time-scale Molecular Dynamics Simulations of Hydrogen-rich Systems",
 +      "J. Comput. Chem.",
 +      20, 1999, "786-798" },
 +    { "Timneanu2004a",
 +      "N. Timneanu and C. Caleman and J. Hajdu and D. van der Spoel",
 +      "Auger Electron Cascades in Water and Ice",
 +      "Chem. Phys.",
 +      299, 2004, "277-283" },
 +    { "Pascal2011a",
 +      "T. A. Pascal and S. T. Lin and W. A. Goddard III",
 +      "Thermodynamics of liquids: standard molar entropies and heat capacities of common solvents from 2PT molecular dynamics",
 +      "Phys. Chem. Chem. Phys.",
 +      13, 2011, "169-181" },
 +    { "Caleman2011b",
 +      "C. Caleman and P. J. van Maaren and M. Hong and J. S. Hub and L. T. da Costa and D. van der Spoel",
 +      "Force Field Benchmark of Organic Liquids: Density, Enthalpy of Vaporization, Heat Capacities, Surface Tension, Isothermal Compressibility, Volumetric Expansion Coefficient, and Dielectric Constant",
 +      "J. Chem. Theo. Comp.",
 +      8, 2012, "61" },
 +    { "Lindahl2001a",
 +      "E. Lindahl and B. Hess and D. van der Spoel",
 +      "GROMACS 3.0: A package for molecular simulation and trajectory analysis",
 +      "J. Mol. Mod.",
 +      7, 2001, "306-317" },
 +    { "Wang2001a",
 +      "J. Wang and W. Wang and S. Huo and M. Lee and P. A. Kollman",
 +      "Solvation model based on weighted solvent accessible surface area",
 +      "J. Phys. Chem. B",
 +      105, 2001, "5055-5067" },
 +    { "Eisenberg86a",
 +      "D. Eisenberg and A. D. McLachlan",
 +      "Solvation energy in protein folding and binding",
 +      "Nature",
 +      319, 1986, "199-203" },
 +    { "Eisenhaber95",
 +      "Frank Eisenhaber and Philip Lijnzaad and Patrick Argos and Chris Sander and Michael Scharf",
 +      "The Double Cube Lattice Method: Efficient Approaches to Numerical Integration of Surface Area and Volume and to Dot Surface Contouring of Molecular Assemblies",
 +      "J. Comp. Chem.",
 +      16, 1995, "273-284" },
 +    { "Hess2002",
 +      "B. Hess, H. Saint-Martin and H.J.C. Berendsen",
 +      "Flexible constraints: an adiabatic treatment of quantum degrees of freedom, with application to the flexible and polarizable MCDHO model for water",
 +      "J. Chem. Phys.",
 +      116, 2002, "9602-9610" },
 +    { "Hetenyi2002b",
 +      "Csaba Hetenyi and David van der Spoel",
 +      "Efficient docking of peptides to proteins without prior knowledge of the binding site.",
 +      "Prot. Sci.",
 +      11, 2002, "1729-1737" },
 +    { "Hess2003",
 +      "B. Hess and R.M. Scheek",
 +      "Orientation restraints in molecular dynamics simulations using time and ensemble averaging",
 +      "J. Magn. Res.",
 +      164, 2003, "19-27" },
 +    { "Rappe1991a",
 +      "A. K. Rappe and W. A. Goddard III",
 +      "Charge Equillibration for Molecular Dynamics Simulations",
 +      "J. Phys. Chem.",
 +      95, 1991, "3358-3363" },
 +    { "Mu2005a",
 +      "Y. Mu, P. H. Nguyen and G. Stock",
 +      "Energy landscape of a small peptide revelaed by dihedral angle principal component analysis",
 +      "Prot. Struct. Funct. Bioinf.",
 +      58, 2005, "45-52" },
 +    { "Okabe2001a",
 +      "T. Okabe and M. Kawata and Y. Okamoto and M. Mikami",
 +      "Replica-exchange {M}onte {C}arlo method for the isobaric-isothermal ensemble",
 +      "Chem. Phys. Lett.",
 +      335, 2001, "435-439" },
 +    { "Hukushima96a",
 +      "K. Hukushima and K. Nemoto",
 +      "Exchange Monte Carlo Method and Application to Spin Glass Simulations",
 +      "J. Phys. Soc. Jpn.",
 +      65, 1996, "1604-1608" },
 +    { "Tropp80a",
 +      "J. Tropp",
 +      "Dipolar Relaxation and Nuclear Overhauser effects in nonrigid molecules: The effect of fluctuating internuclear distances",
 +      "J. Chem. Phys.",
 +      72, 1980, "6035-6043" },
 +    { "Bultinck2002a",
 +       "P. Bultinck and W. Langenaeker and P. Lahorte and F. De Proft and P. Geerlings and M. Waroquier and J. P. Tollenaere",
 +      "The electronegativity equalization method I: Parametrization and validation for atomic charge calculations",
 +      "J. Phys. Chem. A",
 +      106, 2002, "7887-7894" },
 +    { "Yang2006b",
 +      "Q. Y. Yang and K. A. Sharp",
 +      "Atomic charge parameters for the finite difference Poisson-Boltzmann method using electronegativity neutralization",
 +      "J. Chem. Theory Comput.",
 +      2, 2006, "1152-1167" },
 +    { "Spoel2005a",
 +      "D. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A. E. Mark and H. J. C. Berendsen",
 +      "GROMACS: Fast, Flexible and Free",
 +      "J. Comp. Chem.",
 +      26, 2005, "1701-1719" },
 +    { "Spoel2006b",
 +      "D. van der Spoel, P. J. van Maaren, P. Larsson and N. Timneanu",
 +      "Thermodynamics of hydrogen bonding in hydrophilic and hydrophobic media",
 +      "J. Phys. Chem. B",
 +      110, 2006, "4393-4398" },
 +    { "Spoel2006d",
 +      "D. van der Spoel and M. M. Seibert",
 +      "Protein folding kinetics and thermodynamics from atomistic simulations",
 +      "Phys. Rev. Letters",
 +      96, 2006, "238102" },
 +    { "Palmer94a",
 +      "B. J. Palmer",
 +      "Transverse-current autocorrelation-function calculations of the shear viscosity for molecular liquids",
 +      "Phys. Rev. E",
 +      49, 1994, "359-366" },
 +    { "Bussi2007a",
 +      "G. Bussi, D. Donadio and M. Parrinello",
 +      "Canonical sampling through velocity rescaling",
 +      "J. Chem. Phys.",
 +      126, 2007, "014101" },
 +    { "Hub2006",
 +      "J. S. Hub and B. L. de Groot",
 +      "Does CO2 permeate through Aquaporin-1?",
 +      "Biophys. J.",
 +      91, 2006, "842-848" },
 +    { "Hub2008",
 +      "J. S. Hub and B. L. de Groot",
 +      "Mechanism of selectivity in aquaporins and aquaglyceroporins",
 +      "PNAS",
 +      105, 2008, "1198-1203" },
 +    { "Friedrich2009",
 +      "M. S. Friedrichs, P. Eastman, V. Vaidyanathan, M. Houston, S. LeGrand, A. L. Beberg, D. L. Ensign, C. M. Bruns, and V. S. Pande",
 +      "Accelerating Molecular Dynamic Simulation on Graphics Processing Units",
 +      "J. Comp. Chem.",
 +      30, 2009, "864-872" },
 +    { "Engin2010",
 +      "O. Engin, A. Villa, M. Sayar and B. Hess",
 +      "Driving Forces for Adsorption of Amphiphilic Peptides to Air-Water Interface",
 +      "J. Phys. Chem. B",
 +      114, 2010, "11093" },
 +    { "Fritsch12",
 +      "S. Fritsch, C. Junghans and K. Kremer",
 +      "Adaptive molecular simulation study on structure formation of toluene around C60 using Gromacs",
 +      "J. Chem. Theo. Comp.",
 +      8, 2012, "398" },
 +    { "Junghans10",
 +      "C. Junghans and S. Poblete",
 +      "A reference implementation of the adaptive resolution scheme in ESPResSo",
 +      "Comp. Phys. Comm.",
 +      181, 2010, "1449" },
 +    { "Wang2010",
 +      "H. Wang, F. Dommert, C.Holm",
 +      "Optimizing working parameters of the smooth particle mesh Ewald algorithm in terms of accuracy and efficiency",
 +      "J. Chem. Phys. B",
 +      133, 2010, "034117" },
 +    { "Sugita1999a",
 +      "Y. Sugita, Y. Okamoto",
 +      "Replica-exchange molecular dynamics method for protein folding",
 +      "Chem. Phys. Lett.",
 +      314, 1999, "141-151" },
 +    { "Kutzner2011",
 +      "C. Kutzner and J. Czub and H. Grubmuller",
 +      "Keep it Flexible: Driving Macromolecular Rotary Motions in Atomistic Simulations with GROMACS",
 +      "J. Chem. Theory Comput.",
 +      7, 2011, "1381-1393" },
 +    { "Hoefling2011",
 +      "M. Hoefling, N. Lima, D. Haenni, C.A.M. Seidel, B. Schuler, H. Grubmuller",
 +      "Structural Heterogeneity and Quantitative FRET Efficiency Distributions of Polyprolines through a Hybrid Atomistic Simulation and Monte Carlo Approach",
 +      "PLoS ONE",
 +      6, 2011, "e19791" },
 +    { "Hockney1988",
 +      "R. W. Hockney and J. W. Eastwood",
 +      "Computer simulation using particles",
 +      "IOP, Bristol",
 +      1, 1988, "1" },
 +    { "Ballenegger2012",
 +      "V. Ballenegger, J.J. Cerda, and C. Holm",
 +      "How to Convert SPME to P3M: Influence Functions and Error Estimates",
 +      "J. Chem. Theory Comput.",
 +      8, 2012, "936-947" },
 +    { "Garmay2012",
 +      "Garmay Yu, Shvetsov A, Karelov D, Lebedev D, Radulescu A, Petukhov M, Isaev-Ivanov V",
 +      "Correlated motion of protein subdomains and large-scale conformational flexibility of RecA protein filament",
 +      "Journal of Physics: Conference Series",
 +      340, 2012, "012094" }
 +  };
 +#define NSTR (int)asize(citedb)
 +  
 +  int  j,index;
 +  char *author;
 +  char *title;
 +#define LINE_WIDTH 79
 +  
 +  if (fp == NULL)
 +    return;
 +
 +  for(index=0; (index<NSTR) && (strcmp(citedb[index].key,key) != 0); index++)
 +    ;
 +  
 +  fprintf(fp,"\n++++ PLEASE READ AND CITE THE FOLLOWING REFERENCE ++++\n");
 +  if (index < NSTR) {
 +    /* Insert newlines */
 +    author = wrap_lines(citedb[index].author,LINE_WIDTH,0,FALSE);
 +    title  = wrap_lines(citedb[index].title,LINE_WIDTH,0,FALSE);
 +    fprintf(fp,"%s\n%s\n%s %d (%d) pp. %s\n",
 +          author,title,citedb[index].journal,
 +          citedb[index].volume,citedb[index].year,
 +          citedb[index].pages);
 +    sfree(author);
 +    sfree(title);
 +  }
 +  else {
 +    fprintf(fp,"Entry %s not found in citation database\n",key);
 +  }
 +  fprintf(fp,"-------- -------- --- Thank You --- -------- --------\n\n");
 +  fflush(fp);
 +}
 +
 +#ifdef GMX_GIT_VERSION_INFO
 +/* Version information generated at compile time. */
 +#include "gromacs/utility/gitversion.h"
 +#else
 +/* Fall back to statically defined version. */
 +static const char _gmx_ver_string[]="VERSION " VERSION;
 +#endif
 +
 +const char *GromacsVersion()
 +{
 +  return _gmx_ver_string;
 +}
 +
-     fprintf(fp, "Version:          %s\n", _gmx_ver_string);
 +void gmx_print_version_info(FILE *fp)
 +{
-     fprintf(fp, "GIT SHA1 hash:    %s\n", _gmx_full_git_hash);
++#ifdef GMX_GPU
++    int cuda_driver,cuda_runtime;
++#endif
++
++    fprintf(fp, "Gromacs version:    %s\n", _gmx_ver_string);
 +#ifdef GMX_GIT_VERSION_INFO
-         fprintf(fp, "Branched from:    %s\n", _gmx_central_base_hash);
++    fprintf(fp, "GIT SHA1 hash:      %s\n", _gmx_full_git_hash);
 +    /* Only print out the branch information if present.
 +     * The generating script checks whether the branch point actually
 +     * coincides with the hash reported above, and produces an empty string
 +     * in such cases. */
 +    if (_gmx_central_base_hash[0] != 0)
 +    {
-     fprintf(fp, "Precision:        double\n");
++        fprintf(fp, "Branched from:      %s\n", _gmx_central_base_hash);
 +    }
 +#endif
 +
 +#ifdef GMX_DOUBLE
-     fprintf(fp, "Precision:        single\n");
++    fprintf(fp, "Precision:          double\n");
 +#else
-     fprintf(fp, "Parallellization: thread_mpi\n");
++    fprintf(fp, "Precision:          single\n");
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
-     fprintf(fp, "Parallellization: MPI\n");
++    fprintf(fp, "MPI library:        thread_mpi\n");
 +#elif defined(GMX_MPI)
-     fprintf(fp, "Parallellization: none\n");
++    fprintf(fp, "MPI library:        MPI\n");
 +#else
++    fprintf(fp, "MPI library:        none\n");
 +#endif
-     fprintf(fp, "FFT Library:      fftpack\n");
++#ifdef GMX_OPENMP
++    fprintf(fp, "OpenMP support:     enabled\n");
++#else
++    fprintf(fp, "OpenMP support:     disabled\n");
++#endif
++#ifdef GMX_GPU
++    fprintf(fp, "GPU support:        enabled\n");
++#else
++    fprintf(fp, "GPU support:        disabled\n");
++#endif
++    /* A preprocessor trick to avoid duplicating logic from vec.h */
++#define gmx_stringify2(x) #x
++#define gmx_stringify(x) gmx_stringify2(x)
++    fprintf(fp, "invsqrt routine:    %s\n", gmx_stringify(gmx_invsqrt(x)));
++    fprintf(fp, "CPU acceleration:   %s\n", GMX_CPU_ACCELERATION_STRING);
++
++    /* TODO: Would be nicer to wrap this in a gmx_fft_version() call, but
++     * since that is currently in mdlib, can wait for master. */
 +#ifdef GMX_FFT_FFTPACK
-     fprintf(fp, "FFT Library:      fftw3\n");
++    fprintf(fp, "FFT library:        fftpack (built-in)\n");
++#elif defined(GMX_FFT_FFTW3) && defined(GMX_NATIVE_WINDOWS)
++    fprintf(fp, "FFT library:        %s\n", "fftw3");
++#elif defined(GMX_FFT_FFTW3) && defined(GMX_DOUBLE)
++    fprintf(fp, "FFT library:        %s\n", fftw_version);
 +#elif defined(GMX_FFT_FFTW3)
-     fprintf(fp, "FFT Library:      MKL\n");
++    fprintf(fp, "FFT library:        %s\n", fftwf_version);
 +#elif defined(GMX_FFT_MKL)
-     fprintf(fp, "FFT Library:      unknown\n");
++    fprintf(fp, "FFT library:        MKL\n");
++#else
++    fprintf(fp, "FFT library:        unknown\n");
++#endif
++#ifdef GMX_LARGEFILES
++    fprintf(fp, "Large file support: enabled\n");
++#else
++    fprintf(fp, "Large file support: disabled\n");
++#endif
++#ifdef HAVE_RDTSCP
++    fprintf(fp, "RDTSCP usage:       enabled\n");
 +#else
++    fprintf(fp, "RDTSCP usage:       disabled\n");
++#endif
++
++    fprintf(fp, "Built on:           %s\n", BUILD_TIME);
++    fprintf(fp, "Built by:           %s\n", BUILD_USER);
++    fprintf(fp, "Build OS/arch:      %s\n", BUILD_HOST);
++    fprintf(fp, "Build CPU vendor:   %s\n", BUILD_CPU_VENDOR);
++    fprintf(fp, "Build CPU brand:    %s\n", BUILD_CPU_BRAND);
++    fprintf(fp, "Build CPU family:   %d   Model: %d   Stepping: %d\n",
++            BUILD_CPU_FAMILY, BUILD_CPU_MODEL, BUILD_CPU_STEPPING);
++    /* TODO: The below strings can be quite long, so it would be nice to wrap
++     * them. Can wait for later, as the master branch has ready code to do all
++     * that. */
++    fprintf(fp, "Build CPU features: %s\n", BUILD_CPU_FEATURES);
++    fprintf(fp, "C compiler:         %s\n", BUILD_C_COMPILER);
++    fprintf(fp, "C compiler flags:   %s\n", BUILD_CFLAGS);
++    if (BUILD_CXX_COMPILER[0] != '\0')
++    {
++        fprintf(fp, "C++ compiler:       %s\n", BUILD_CXX_COMPILER);
++        fprintf(fp, "C++ compiler flags: %s\n", BUILD_CXXFLAGS);
++    }
++#ifdef HAVE_LIBMKL
++    /* MKL might be used for LAPACK/BLAS even if FFTs use FFTW, so keep it separate */
++    fprintf(fp, "Linked with Intel MKL version %s.%s.%s.\n",
++            __INTEL_MKL__,__INTEL_MKL_MINOR__,__INTEL_MKL_UPDATE__);
++#endif
++#ifdef GMX_GPU
++    fprintf(fp, "CUDA compiler:      %s\n",CUDA_NVCC_COMPILER_INFO);
++    cuda_driver = 0;
++    cudaDriverGetVersion(&cuda_driver);
++    cuda_runtime = 0;
++    cudaRuntimeGetVersion(&cuda_runtime);
++    fprintf(fp, "CUDA driver:        %d.%d\n",cuda_driver/1000, cuda_driver%100);
++    fprintf(fp, "CUDA runtime:       %d.%d\n",cuda_runtime/1000, cuda_runtime%100);
 +#endif
 +
 +}
Simple merge
index b4c67c0f8a7042c149642cb47fc2dc5aac39e41d,0000000000000000000000000000000000000000..b00e2887c8a93d36d3d16bd04371d33bf18751e5
mode 100644,000000..100644
--- /dev/null
@@@ -1,1202 -1,0 +1,1198 @@@
-             #ifdef HAVE_LSTAT
-                 status = lstat (fname, &st_buf);
-             #else
-                 status = stat (fname, &st_buf);
-             #endif
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <sys/types.h>
 +#include <sys/stat.h>
 +#include <fcntl.h>
 +
 +#ifdef HAVE_DIRENT_H
 +/* POSIX */
 +#include <dirent.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <direct.h>
 +#include <io.h>
 +#endif
 +
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "futil.h"
 +#include "network.h"
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "statutil.h"
 +
 +
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +/* Windows file stuff, only necessary for visual studio */
 +#ifdef _MSC_VER
 +#include "windows.h"
 +#endif
 +
 +/* we keep a linked list of all files opened through pipes (i.e. 
 +   compressed or .gzipped files. This way we can distinguish between them
 +   without having to change the semantics of reading from/writing to files) 
 +   */
 +typedef struct t_pstack {
 +    FILE   *fp;
 +    struct t_pstack *prev;
 +} t_pstack;
 +
 +static t_pstack *pstack=NULL;
 +static gmx_bool     bUnbuffered=FALSE;
 +
 +#ifdef GMX_THREAD_MPI
 +/* this linked list is an intrinsically globally shared object, so we have
 +   to protect it with mutexes */
 +static tMPI_Thread_mutex_t pstack_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +void no_buffers(void)
 +{
 +    bUnbuffered=TRUE;
 +}
 +
 +void push_ps(FILE *fp)
 +{
 +    t_pstack *ps;
 +
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    snew(ps,1);
 +    ps->fp   = fp;
 +    ps->prev = pstack;
 +    pstack   = ps;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +}
 +
 +#ifdef GMX_FAHCORE
 +/* don't use pipes!*/
 +#define popen fah_fopen
 +#define pclose fah_fclose
 +#define SKIP_FFOPS 1
 +#else
 +#ifdef ffclose
 +#undef ffclose
 +#endif
 +#endif
 +
 +#ifndef GMX_FAHCORE
 +#ifndef HAVE_PIPES
 +static FILE *popen(const char *nm,const char *mode)
 +{
 +    gmx_impl("Sorry no pipes...");
 +
 +    return NULL;
 +}
 +
 +static int pclose(FILE *fp)
 +{
 +    gmx_impl("Sorry no pipes...");
 +
 +    return 0;
 +}
 +#endif
 +#endif
 +
 +int ffclose(FILE *fp)
 +{
 +#ifdef SKIP_FFOPS
 +    return fclose(fp);
 +#else
 +    t_pstack *ps,*tmp;
 +    int ret=0;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    ps=pstack;
 +    if (ps == NULL) {
 +        if (fp != NULL) 
 +            ret = fclose(fp);
 +    }
 +    else if (ps->fp == fp) {
 +        if (fp != NULL)
 +            ret = pclose(fp);
 +        pstack=pstack->prev;
 +        sfree(ps);
 +    }
 +    else {
 +        while ((ps->prev != NULL) && (ps->prev->fp != fp))
 +            ps=ps->prev;
 +        if ((ps->prev != NULL) && ps->prev->fp == fp) {
 +            if (ps->prev->fp != NULL)
 +                ret = pclose(ps->prev->fp);
 +            tmp=ps->prev;
 +            ps->prev=ps->prev->prev;
 +            sfree(tmp);
 +        }
 +        else {
 +            if (fp != NULL)
 +                ret = fclose(fp);
 +        }
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +    return ret;
 +#endif
 +}
 +
 +
 +#ifdef rewind
 +#undef rewind
 +#endif
 +
 +void frewind(FILE *fp)
 +{
 +    t_pstack *ps;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    ps=pstack;
 +    while (ps != NULL) {
 +        if (ps->fp == fp) {
 +            fprintf(stderr,"Cannot rewind compressed file!\n");
 +#ifdef GMX_THREAD_MPI
 +            tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +            return;
 +        }
 +        ps=ps->prev;
 +    }
 +    rewind(fp);
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +}
 +
 +int gmx_fseek(FILE *stream, gmx_off_t offset, int whence)
 +{
 +#ifdef HAVE_FSEEKO
 +    return fseeko(stream, offset, whence);
 +#else
 +#ifdef HAVE__FSEEKI64
 +    return _fseeki64(stream, offset, whence);
 +#else
 +    return fseek(stream, offset, whence);
 +#endif
 +#endif
 +}
 +
 +gmx_off_t gmx_ftell(FILE *stream)
 +{
 +#ifdef HAVE_FSEEKO
 +    return ftello(stream);
 +#else
 +#ifdef HAVE__FSEEKI64 
 +    return _ftelli64(stream);
 +#else
 +    return ftell(stream);
 +#endif
 +#endif
 +}
 +
 +
 +gmx_bool is_pipe(FILE *fp)
 +{
 +    t_pstack *ps;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    ps=pstack;
 +    while (ps != NULL) {
 +        if (ps->fp == fp) {
 +#ifdef GMX_THREAD_MPI
 +            tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +            return TRUE;
 +        }
 +        ps=ps->prev;
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +    return FALSE;
 +}
 +
 +
 +static FILE *uncompress(const char *fn,const char *mode)
 +{
 +    FILE *fp;
 +    char buf[256];
 +
 +    sprintf(buf,"uncompress -c < %s",fn);
 +    fprintf(stderr,"Going to execute '%s'\n",buf);
 +    if ((fp=popen(buf,mode)) == NULL)
 +        gmx_open(fn);
 +    push_ps(fp);
 +
 +    return fp;
 +}
 +
 +static FILE *gunzip(const char *fn,const char *mode)
 +{
 +    FILE *fp;
 +    char buf[256];
 +
 +    sprintf(buf,"gunzip -c < %s",fn);
 +    fprintf(stderr,"Going to execute '%s'\n",buf);
 +    if ((fp=popen(buf,mode)) == NULL)
 +        gmx_open(fn);
 +    push_ps(fp);
 +
 +    return fp;
 +}
 +
 +gmx_bool gmx_fexist(const char *fname)
 +{
 +    FILE *test;
 +
 +    if (fname == NULL)
 +        return FALSE;
 +    test=fopen(fname,"r");
 +    if (test == NULL) {
 +        /*Windows doesn't allow fopen of directory - so we need to check this seperately */
 +        #ifdef GMX_NATIVE_WINDOWS
 +            DWORD attr = GetFileAttributes(fname);
 +            return (attr != INVALID_FILE_ATTRIBUTES) && (attr & FILE_ATTRIBUTE_DIRECTORY);
 +        #else 
 +            return FALSE;
 +        #endif
 +    } else {
 +        fclose(test);
 +        return TRUE;
 +    }
 +}
 +
 +static gmx_bool gmx_is_file(const char *fname)
 +{
 +    FILE *test;
 +
 +    if (fname == NULL)
 +        return FALSE;
 +    test=fopen(fname,"r");
 +    if (test == NULL)
 +    {
 +        return FALSE;
 +    }
 +    else
 +    {
 +        fclose(test);
 +        /*Windows doesn't allow fopen of directory - so we don't need to check this seperately */
 +        #if (!((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__))
 +        {
 +            int status;
 +            struct stat st_buf;
++            status = stat (fname, &st_buf);
 +            if (status != 0 || !S_ISREG(st_buf.st_mode))
 +            {
 +                return FALSE;
 +            }
 +        }
 +        #endif
 +        return TRUE;
 +    }
 +}
 +
 +
 +gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr)
 +{
 +  gmx_bool bExist;
 +  
 +  if (SIMMASTER(cr)) 
 +  {
 +      bExist = gmx_fexist(fname);
 +  }
 +  if (PAR(cr)) 
 +  {
 +      gmx_bcast(sizeof(bExist),&bExist,cr);
 +  }
 +  return bExist;
 +}
 +
 +gmx_bool gmx_eof(FILE *fp)
 +{
 +    char data[4];
 +    gmx_bool beof;
 +
 +    if (is_pipe(fp))
 +        return feof(fp);
 +    else {
 +        if ((beof=fread(data,1,1,fp))==1)
 +            gmx_fseek(fp,-1,SEEK_CUR);
 +        return !beof;
 +    }
 +}
 +
 +static char *backup_fn(const char *file,int count_max)
 +{
 +    /* Use a reasonably low value for countmax; we might
 +     * generate 4-5 files in each round, and we dont
 +     * want to hit directory limits of 1024 or 2048 files.
 +     */
 +#define COUNTMAX 99
 +    int         i,count=1;
 +    char        *directory,*fn;
 +    char        *buf;
 +
 +    if (count_max == -1)
 +    {
 +        count_max = COUNTMAX;
 +    }
 +
 +    smalloc(buf, GMX_PATH_MAX);
 +
 +    for(i=strlen(file)-1; ((i > 0) && (file[i] != DIR_SEPARATOR)); i--)
 +        ;
 +    /* Must check whether i > 0, i.e. whether there is a directory
 +     * in the file name. In that case we overwrite the / sign with
 +     * a '\0' to end the directory string .
 +     */
 +    if (i > 0) {
 +        directory    = gmx_strdup(file);
 +        directory[i] = '\0';
 +        fn           = gmx_strdup(file+i+1);
 +    }
 +    else {
 +        directory    = gmx_strdup(".");
 +        fn           = gmx_strdup(file);
 +    }
 +    do {
 +        sprintf(buf,"%s/#%s.%d#",directory,fn,count);
 +        count++;
 +    } while ((count <= count_max) && gmx_fexist(buf));
 +
 +    /* Arbitrarily bail out */
 +    if (count > count_max) 
 +        gmx_fatal(FARGS,"Won't make more than %d backups of %s for you.\n"
 +                  "The env.var. GMX_MAXBACKUP controls this maximum, -1 disables backups.",
 +                  count_max,fn);
 +
 +    sfree(directory);
 +    sfree(fn);
 +
 +    return buf;
 +}
 +
 +gmx_bool make_backup(const char * name)
 +{
 +    char * env;
 +    int  count_max;
 +    char * backup;
 +
 +#ifdef GMX_FAHCORE
 +    return FALSE; /* skip making backups */
 +#else
 +
 +    if (gmx_fexist(name))
 +    {
 +        env = getenv("GMX_MAXBACKUP");
 +        if (env != NULL)
 +        {
 +            count_max = 0;
 +            sscanf(env,"%d",&count_max);
 +            if (count_max == -1)
 +            {
 +                /* Do not make backups and possibly overwrite old files */
 +                return TRUE;
 +            }
 +        }
 +        else
 +        {
 +            /* Use the default maximum */
 +            count_max = -1;
 +        }
 +        backup = backup_fn(name,count_max);
 +        if(rename(name, backup) == 0) {
 +            fprintf(stderr, "\nBack Off! I just backed up %s to %s\n",
 +                    name, backup);
 +        } else {
 +            fprintf(stderr, "Sorry couldn't backup %s to %s\n", name, backup);
 +            return FALSE;
 +        }
 +        sfree(backup);
 +    }
 +    return TRUE;
 +#endif
 +}
 +
 +FILE *ffopen(const char *file,const char *mode)
 +{
 +#ifdef SKIP_FFOPS
 +    return fopen(file,mode);
 +#else
 +    FILE *ff=NULL;
 +    char buf[256],*bf,*bufsize=0,*ptr;
 +    gmx_bool bRead;
 +    int  bs;
 +
 +    if (file == NULL) 
 +    {
 +        return NULL;
 +    }
 +
 +    if (mode[0]=='w') {
 +        make_backup(file);
 +    }
 +    where();
 +
 +    bRead= (mode[0]=='r'&&mode[1]!='+');
 +    strcpy(buf,file);
 +    if (!bRead || gmx_fexist(buf)) {
 +        if ((ff=fopen(buf,mode))==NULL)
 +            gmx_file(buf);
 +        where();
 +        /* Check whether we should be using buffering (default) or not
 +         * (for debugging)
 +         */
 +        if (bUnbuffered || ((bufsize=getenv("LOG_BUFS")) != NULL)) {
 +            /* Check whether to use completely unbuffered */
 +            if (bUnbuffered)
 +                bs = 0;
 +            else
 +                bs=strtol(bufsize, NULL, 10); 
 +            if (bs <= 0)
 +                setbuf(ff,NULL); 
 +            else {
 +                snew(ptr,bs+8);
 +                if (setvbuf(ff,ptr,_IOFBF,bs) != 0)
 +                    gmx_file("Buffering File");
 +            }
 +        }
 +        where();
 +    }
 +    else {
 +        sprintf(buf,"%s.Z",file);
 +        if (gmx_fexist(buf)) {
 +            ff=uncompress(buf,mode);
 +        }
 +        else {
 +            sprintf(buf,"%s.gz",file);
 +            if (gmx_fexist(buf)) {
 +                ff=gunzip(buf,mode);
 +            }
 +            else 
 +                gmx_file(file);
 +        }
 +    }
 +    return ff;
 +#endif
 +}
 +
 +/* Our own implementation of dirent-like functionality to scan directories. */
 +struct gmx_directory
 +{
 +#ifdef HAVE_DIRENT_H
 +    DIR  *               dirent_handle;
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    intptr_t             windows_handle;
 +    struct _finddata_t   finddata;
 +    int                  first;
 +#else
 +    int      dummy;
 +#endif
 +};
 +
 +
 +int
 +gmx_directory_open(gmx_directory_t *p_gmxdir,const char *dirname)
 +{
 +    struct gmx_directory *  gmxdir;
 +    int                     rc;
 +    
 +    snew(gmxdir,1);
 +    
 +    *p_gmxdir = gmxdir;
 +    
 +#ifdef HAVE_DIRENT_H
 +    if( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
 +    {
 +        rc = 0;
 +    }
 +    else 
 +    {
 +        sfree(gmxdir);
 +        *p_gmxdir = NULL;
 +        rc        = EINVAL;
 +    }
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    
 +    if(dirname!=NULL && strlen(dirname)>0)
 +    {
 +        char *     tmpname;
 +        size_t     namelength;
 +        int        len;
 +        
 +        len = strlen(dirname);
 +        snew(tmpname,len+3);
 +        
 +        strncpy(tmpname,dirname,len+1);
 +        
 +        /* Remove possible trailing directory separator */
 +        if(tmpname[len]=='/' || tmpname[len]=='\\')
 +        {
 +            tmpname[len]='\0';
 +        }
 +        
 +        /* Add wildcard */
 +        strcat(tmpname,"/*");
 +        
 +        gmxdir->first = 1;
 +        if( (gmxdir->windows_handle=_findfirst(tmpname,&gmxdir->finddata))>0L)
 +        {
 +            rc = 0;
 +        }
 +        else
 +        {
 +            if(errno==EINVAL)
 +            {
 +                sfree(gmxdir);
 +                *p_gmxdir = NULL;
 +                rc        = EINVAL;                
 +            }
 +            else
 +            {
 +                rc        = 0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        rc = EINVAL;
 +    }
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n"
 +              "In the very unlikely event this is not a compile-time mistake you could consider\n"
 +              "implementing support for your platform in futil.c, but contact the developers\n"
 +              "to make sure it's really necessary!\n");
 +    rc = -1;
 +#endif
 +    return rc;
 +}
 +
 +
 +int
 +gmx_directory_nextfile(gmx_directory_t gmxdir,char *name,int maxlength_name)
 +{
 +    int                     rc;
 +    
 +#ifdef HAVE_DIRENT_H
 +    
 +    struct dirent *         direntp_large;
 +    struct dirent *         p;
 +    
 +    
 +    if(gmxdir!=NULL && gmxdir->dirent_handle!=NULL)
 +    {
 +        /* On some platforms no space is present for d_name in dirent.
 +         * Since d_name is guaranteed to be the last entry, allocating
 +         * extra space for dirent will allow more size for d_name.
 +         * GMX_MAX_PATH should always be >= the max possible d_name.
 +         */
 +        smalloc(direntp_large, sizeof(*direntp_large) + GMX_PATH_MAX);
 +        rc = readdir_r(gmxdir->dirent_handle,direntp_large,&p);
 +
 +        if(p!=NULL && rc==0)
 +        {
 +            strncpy(name,direntp_large->d_name,maxlength_name);
 +        }
 +        else
 +        {
 +            name[0] = '\0';
 +            rc      = ENOENT;
 +        }
 +        sfree(direntp_large);
 +    }
 +    else 
 +    {
 +        name[0] = '\0';
 +        rc      = EINVAL;
 +    }
 +    
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    
 +    if(gmxdir!=NULL)
 +    {
 +        if(gmxdir->windows_handle<=0)
 +        {
 +            
 +            name[0] = '\0';
 +            rc      = ENOENT;
 +        }
 +        else if(gmxdir->first==1)
 +        {
 +            strncpy(name,gmxdir->finddata.name,maxlength_name);
 +            rc            = 0;
 +            gmxdir->first = 0;
 +        }
 +        else
 +        {
 +            if(_findnext(gmxdir->windows_handle,&gmxdir->finddata)==0)
 +            {
 +                strncpy(name,gmxdir->finddata.name,maxlength_name);
 +                rc      = 0;
 +            }
 +            else
 +            {
 +                name[0] = '\0';
 +                rc      = ENOENT;
 +            }
 +        }
 +    }
 +    
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
 +    rc = -1;
 +#endif
 +    return rc;
 +}
 +
 +
 +int 
 +gmx_directory_close(gmx_directory_t gmxdir)
 +{
 +    int                     rc;
 +#ifdef HAVE_DIRENT_H
 +    rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    rc = (gmxdir != NULL) ? _findclose(gmxdir->windows_handle) : EINVAL;
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
 +    rc = -1;
 +#endif
 +    
 +    sfree(gmxdir);
 +    return rc;
 +}
 +
 +
 +static gmx_bool search_subdirs(const char *parent, char *libdir)
 +{
 +    char *ptr;
 +    gmx_bool found;
 +
 +    /* Search a few common subdirectory names for the gromacs library dir */
 +    sprintf(libdir,"%s%cshare%ctop%cgurgle.dat",parent,
 +            DIR_SEPARATOR,DIR_SEPARATOR,DIR_SEPARATOR);
 +    found=gmx_fexist(libdir);
 +    if(!found) {
 +        sprintf(libdir,"%s%cshare%cgromacs%ctop%cgurgle.dat",parent,
 +                DIR_SEPARATOR,DIR_SEPARATOR,
 +                DIR_SEPARATOR,DIR_SEPARATOR);
 +        found=gmx_fexist(libdir);
 +    }    
 +    if(!found) {
 +        sprintf(libdir,"%s%cshare%cgromacs-%s%ctop%cgurgle.dat",parent,
 +                DIR_SEPARATOR,DIR_SEPARATOR,VERSION,
 +                DIR_SEPARATOR,DIR_SEPARATOR);
 +        found=gmx_fexist(libdir);
 +    }    
 +    if(!found) {
 +        sprintf(libdir,"%s%cshare%cgromacs%cgromacs-%s%ctop%cgurgle.dat",parent,
 +                DIR_SEPARATOR,DIR_SEPARATOR,DIR_SEPARATOR,
 +                VERSION,DIR_SEPARATOR,DIR_SEPARATOR);
 +        found=gmx_fexist(libdir);
 +    }    
 +
 +    /* Remove the gurgle.dat part from libdir if we found something */
 +    if(found) {
 +        ptr=strrchr(libdir,DIR_SEPARATOR); /* slash or backslash always present, no check necessary */
 +        *ptr='\0';
 +    }
 +    return found;
 +}
 +
 +
 +/* Check if the program name begins with "/" on unix/cygwin, or
 + * with "\" or "X:\" on windows. If not, the program name
 + * is relative to the current directory.
 + */
 +static gmx_bool filename_is_absolute(char *name)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    return ((name[0] == DIR_SEPARATOR) || ((strlen(name)>3) && strncmp(name+1,":\\",2)) == 0);
 +#else
 +    return (name[0] == DIR_SEPARATOR);
 +#endif
 +}
 +
 +gmx_bool get_libdir(char *libdir)
 +{
 +#define GMX_BINNAME_MAX 512
 +    char bin_name[GMX_BINNAME_MAX];
 +    char buf[GMX_BINNAME_MAX];
 +    char full_path[GMX_PATH_MAX+GMX_BINNAME_MAX];
 +    char system_path[GMX_PATH_MAX];
 +    char *dir,*ptr,*s,*pdum;
 +    gmx_bool found=FALSE;
 +    int i;
 +
 +    if (Program() != NULL)
 +    {
 +
 +    /* First - detect binary name */
 +    if (strlen(Program()) >= GMX_BINNAME_MAX)
 +    {
 +        gmx_fatal(FARGS,"The name of the binary is longer than the allowed buffer size (%d):\n'%s'",GMX_BINNAME_MAX,Program());
 +    }
 +    strncpy(bin_name,Program(),GMX_BINNAME_MAX-1);
 +
 +    /* On windows & cygwin we need to add the .exe extension
 +     * too, or we wont be able to detect that the file exists
 +     */
 +#if (defined GMX_NATIVE_WINDOWS || defined GMX_CYGWIN)
 +    if(strlen(bin_name)<3 || gmx_strncasecmp(bin_name+strlen(bin_name)-4,".exe",4))
 +        strcat(bin_name,".exe");
 +#endif
 +
 +    /* Only do the smart search part if we got a real name */
 +    if (NULL!=bin_name && strncmp(bin_name,"GROMACS",GMX_BINNAME_MAX)) {
 +
 +        if (!strchr(bin_name,DIR_SEPARATOR)) {
 +            /* No slash or backslash in name means it must be in the path - search it! */
 +            /* Add the local dir since it is not in the path on windows */
 +            gmx_getcwd(system_path, sizeof(system_path));
 +            sprintf(full_path,"%s%c%s",system_path,DIR_SEPARATOR,bin_name);
 +            found = gmx_is_file(full_path);
 +            if (!found && (s=getenv("PATH")) != NULL)
 +            {
 +                char *dupped;
 +                
 +                dupped=gmx_strdup(s);
 +                s=dupped;
 +                while(!found && (dir=gmx_strsep(&s, PATH_SEPARATOR)) != NULL)
 +                {
 +                    sprintf(full_path,"%s%c%s",dir,DIR_SEPARATOR,bin_name);
 +                    found = gmx_is_file(full_path);
 +                }
 +                sfree(dupped);
 +            }
 +            if (!found)
 +            {
 +                return FALSE;
 +            }
 +        } else if (!filename_is_absolute(bin_name)) {
 +            /* name contains directory separators, but 
 +             * it does not start at the root, i.e.
 +             * name is relative to the current dir 
 +             */
 +            gmx_getcwd(buf, sizeof(buf));
 +            sprintf(full_path,"%s%c%s",buf,DIR_SEPARATOR,bin_name);
 +        } else {
 +            strncpy(full_path,bin_name,GMX_PATH_MAX);
 +        }
 +
 +        /* Now we should have a full path and name in full_path,
 +         * but on unix it might be a link, or a link to a link to a link..
 +         */
 +#ifndef GMX_NATIVE_WINDOWS
 +        while( (i=readlink(full_path,buf,sizeof(buf)-1)) > 0 ) {
 +            buf[i]='\0';
 +            /* If it doesn't start with "/" it is relative */
 +            if (buf[0]!=DIR_SEPARATOR) {
 +                strncpy(strrchr(full_path,DIR_SEPARATOR)+1,buf,GMX_PATH_MAX);
 +            } else
 +                strncpy(full_path,buf,GMX_PATH_MAX);
 +        }
 +#endif
 +
 +        /* If running directly from the build tree, try to use the source
 +         * directory.
 +         */
 +#if (defined CMAKE_SOURCE_DIR && defined CMAKE_BINARY_DIR)
 +        if (strncmp(full_path, CMAKE_BINARY_DIR, strlen(CMAKE_BINARY_DIR)) == 0)
 +        {
 +            if (search_subdirs(CMAKE_SOURCE_DIR, libdir))
 +            {
 +                return TRUE;
 +            }
 +        }
 +#endif
 +
 +        /* Remove the executable name - it always contains at least one slash */
 +        *(strrchr(full_path,DIR_SEPARATOR)+1)='\0';
 +        /* Now we have the full path to the gromacs executable.
 +         * Use it to find the library dir. 
 +         */
 +        found=FALSE;
 +        while(!found && ( (ptr=strrchr(full_path,DIR_SEPARATOR)) != NULL ) ) {
 +            *ptr='\0';
 +            found=search_subdirs(full_path,libdir);
 +        }
 +    }
 +    }
 +    /* End of smart searching. If we didn't find it in our parent tree,
 +     * or if the program name wasn't set, at least try some standard 
 +     * locations before giving up, in case we are running from e.g. 
 +     * a users home directory. This only works on unix or cygwin...
 +     */
 +#ifndef GMX_NATIVE_WINDOWS
 +    if(!found) 
 +        found=search_subdirs("/usr/local",libdir);
 +    if(!found) 
 +        found=search_subdirs("/usr",libdir);
 +    if(!found) 
 +        found=search_subdirs("/opt",libdir);
 +#endif
 +    return found;
 +}
 +
 +
 +char *low_gmxlibfn(const char *file, gmx_bool bAddCWD, gmx_bool bFatal)
 +{
 +    char *ret;
 +    char *lib,*dir;
 +    char buf[1024];
 +    char libpath[GMX_PATH_MAX];
 +    gmx_bool env_is_set=FALSE;
 +    char   *s,tmppath[GMX_PATH_MAX];
 +
 +    /* GMXLIB can be a path now */
 +    lib=getenv("GMXLIB");
 +    if (lib != NULL)
 +    {
 +        env_is_set=TRUE;
 +        strncpy(libpath,lib,GMX_PATH_MAX);
 +    } 
 +    else if (!get_libdir(libpath))
 +    {
 +        strncpy(libpath,GMXLIBDIR,GMX_PATH_MAX);
 +    }
 +
 +    ret = NULL;
 +    if (bAddCWD && gmx_fexist(file))
 +    {
 +        ret = gmx_strdup(file);
 +    }
 +    else 
 +    {
 +        strncpy(tmppath,libpath,GMX_PATH_MAX);
 +        s=tmppath;
 +        while(ret == NULL && (dir=gmx_strsep(&s, PATH_SEPARATOR)) != NULL )
 +        {
 +            sprintf(buf,"%s%c%s",dir,DIR_SEPARATOR,file);
 +            if (gmx_fexist(buf))
 +            {
 +                ret = gmx_strdup(buf);
 +            }
 +        }
 +        if (ret == NULL && bFatal) 
 +        {
 +            if (env_is_set) 
 +            {
 +                gmx_fatal(FARGS,
 +                          "Library file %s not found %sin your GMXLIB path.",
 +                          file, bAddCWD ? "in current dir nor " : "");
 +            }
 +            else
 +            {
 +                gmx_fatal(FARGS,
 +                          "Library file %s not found %sin default directories.\n"
 +                        "(You can set the directories to search with the GMXLIB path variable)",
 +                          file, bAddCWD ? "in current dir nor " : "");
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +
 +
 +
 +
 +FILE *low_libopen(const char *file,gmx_bool bFatal)
 +{
 +    FILE *ff;
 +    char *fn;
 +
 +    fn=low_gmxlibfn(file,TRUE,bFatal);
 +
 +    if (fn==NULL) {
 +        ff=NULL;
 +    } else {
 +      if (debug)
 +      fprintf(debug,"Opening library file %s\n",fn);
 +      ff=fopen(fn,"r");
 +    }
 +    sfree(fn);
 +
 +    return ff;
 +}
 +
 +char *gmxlibfn(const char *file)
 +{
 +    return low_gmxlibfn(file,TRUE,TRUE);
 +}
 +
 +FILE *libopen(const char *file)
 +{
 +    return low_libopen(file,TRUE);
 +}
 +
 +void gmx_tmpnam(char *buf)
 +{
 +    int i,len,fd;
 +
 +    if ((len = strlen(buf)) < 7)
 +        gmx_fatal(FARGS,"Buf passed to gmx_tmpnam must be at least 7 bytes long");
 +    for(i=len-6; (i<len); i++) {
 +        buf[i] = 'X';
 +    }
 +    /* mktemp is dangerous and we should use mkstemp instead, but 
 +     * since windows doesnt support it we have to separate the cases.
 +     * 20090307: mktemp deprecated, use iso c++ _mktemp instead.
 +     */
 +#ifdef GMX_NATIVE_WINDOWS
 +    _mktemp(buf);
 +#else
 +    fd = mkstemp(buf);
 +
 +    switch (fd) {
 +        case EINVAL:
 +            gmx_fatal(FARGS,"Invalid template %s for mkstemp",buf);
 +            break;
 +        case EEXIST:
 +            gmx_fatal(FARGS,"mkstemp created existing file",buf);
 +            break;
 +        case EACCES: 
 +            gmx_fatal(FARGS,"Permission denied for opening %s",buf);
 +            break;
 +        default:
 +            break;
 +    }   
 +    close(fd);
 +#endif
 +    /* name in Buf should now be OK */
 +}
 +
 +int gmx_truncatefile(char *path, gmx_off_t length)
 +{
 +#ifdef _MSC_VER
 +    /* Microsoft visual studio does not have "truncate" */
 +    HANDLE fh;
 +    LARGE_INTEGER win_length;
 +
 +    win_length.QuadPart = length;
 +
 +    fh = CreateFile(path,GENERIC_READ | GENERIC_WRITE,0,NULL,
 +            OPEN_EXISTING,0,NULL);
 +    SetFilePointerEx(fh,win_length,NULL,FILE_BEGIN);
 +    SetEndOfFile(fh);
 +    CloseHandle(fh);
 +
 +    return 0;
 +#else
 +    return truncate(path,length);
 +#endif
 +}
 +
 +
 +int gmx_file_rename(const char *oldname, const char *newname)
 +{
 +#ifndef GMX_NATIVE_WINDOWS
 +    /* under unix, rename() is atomic (at least, it should be). */
 +    return rename(oldname, newname);
 +#else
 +    if (MoveFileEx(oldname, newname, 
 +                   MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH))
 +        return 0;
 +    else
 +        return 1;
 +#endif
 +}
 +
 +int gmx_file_copy(const char *oldname, const char *newname, gmx_bool copy_if_empty)
 +{
 +/* the full copy buffer size: */
 +#define FILECOPY_BUFSIZE (1<<16)
 +    FILE *in=NULL; 
 +    FILE *out=NULL;
 +    char *buf;
 +
 +    snew(buf, FILECOPY_BUFSIZE); 
 +
 +    in=fopen(oldname, "rb");
 +    if (!in)
 +        goto error;
 +
 +    /* If we don't copy when empty, we postpone opening the file
 +       until we're actually ready to write. */
 +    if (copy_if_empty)
 +    {
 +        out=fopen(newname, "wb");
 +        if (!out)
 +            goto error;
 +    }
 +
 +    while(!feof(in))
 +    {
 +        size_t nread;
 +        
 +        nread=fread(buf, sizeof(char), FILECOPY_BUFSIZE, in);
 +        if (nread>0)
 +        {
 +            size_t ret;
 +            if (!out)
 +            {
 +                /* so this is where we open when copy_if_empty is false:
 +                   here we know we read something. */
 +                out=fopen(newname, "wb");
 +                if (!out)
 +                    goto error;
 +            }
 +            ret=fwrite(buf, sizeof(char), nread, out);
 +            if (ret!=nread)
 +            {
 +                goto error;
 +            }
 +        }
 +        if (ferror(in))
 +            goto error;
 +    }
 +    sfree(buf);
 +    fclose(in);
 +    fclose(out);
 +    return 0;
 +error:
 +    sfree(buf);
 +    if (in)
 +        fclose(in);
 +    if (out)
 +        fclose(out);
 +    return 1;
 +#undef FILECOPY_BUFSIZE
 +}
 +
 +
 +int gmx_fsync(FILE *fp)
 +{
 +    int rc=0;
 +
 +#ifdef GMX_FAHCORE
 +    /* the fahcore defines its own os-independent fsync */
 +    rc=fah_fsync(fp);
 +#else /* GMX_FAHCORE */
 +    {
 +        int fn=-1;
 +
 +        /* get the file number */
 +#if defined(HAVE_FILENO)
 +        fn= fileno(fp);
 +#elif defined(HAVE__FILENO)
 +        fn= _fileno(fp);
 +#endif
 +
 +        /* do the actual fsync */
 +        if (fn >= 0)
 +        {
 +#if (defined(HAVE_FSYNC))
 +            rc=fsync(fn);
 +#elif (defined(HAVE__COMMIT)) 
 +            rc=_commit(fn);
 +#endif
 +        }
 +    }
 +#endif /* GMX_FAHCORE */
 +
 +    /* We check for these error codes this way because POSIX requires them
 +       to be defined, and using anything other than macros is unlikely: */
 +#ifdef EINTR
 +    /* we don't want to report an error just because fsync() caught a signal.
 +       For our purposes, we can just ignore this. */
 +    if (rc && errno==EINTR)
 +        rc=0;
 +#endif
 +#ifdef EINVAL
 +    /* we don't want to report an error just because we tried to fsync() 
 +       stdout, a socket or a pipe. */
 +    if (rc && errno==EINVAL)
 +        rc=0;
 +#endif
 +    return rc;
 +}
 +
 +void gmx_chdir(const char *directory)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    int rc = _chdir(directory);
 +#else
 +    int rc = chdir(directory);
 +#endif
 +    if (rc != 0)
 +    {
 +        gmx_fatal(FARGS, "Cannot change directory to '%s'. Reason: %s",
 +                  directory, strerror(errno));
 +    }
 +}
 +
 +void gmx_getcwd(char *buffer, size_t size)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    char *pdum = _getcwd(buffer, size);
 +#else
 +    char *pdum = getcwd(buffer, size);
 +#endif
 +    if (pdum == NULL)
 +    {
 +        gmx_fatal(FARGS, "Cannot get working directory. Reason: %s",
 +                  strerror(errno));
 +    }
 +}
Simple merge
index 48fc4e4a270cfae3e6965129f9431a33377fe77d,0000000000000000000000000000000000000000..678b402faf018a6ffc228269e64515064a545fb9
mode 100644,000000..100644
--- /dev/null
@@@ -1,571 -1,0 +1,562 @@@
-     fprintf(fp,
-             "Built %s by %s\n"
-             "Build os/architecture: %s\n"
-             "Build CPU Vendor: %s  Brand: %s\n"
-             "Build CPU Family: %d  Model: %d  Stepping: %d\n"
-             "Build CPU Features: %s\n"
-             "Compiler: %s\n"
-             "CFLAGS: %s\n\n",
-             BUILD_TIME,BUILD_USER,BUILD_HOST,
-             BUILD_CPU_VENDOR,BUILD_CPU_BRAND,
-             BUILD_CPU_FAMILY,BUILD_CPU_MODEL,BUILD_CPU_STEPPING,
-             BUILD_CPU_FEATURES,BUILD_COMPILER,BUILD_CFLAGS);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <limits.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "network.h"
 +#include "main.h"
 +#include "macros.h"
 +#include "futil.h"
 +#include "filenm.h"
 +#include "gmxfio.h"
 +#include "string2.h"
++#include "copyrite.h"
 +
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <process.h>
 +#endif
 +
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +#define BUFSIZE       1024
 +
 +
 +static void par_fn(char *base,int ftp,const t_commrec *cr,
 +                 gmx_bool bAppendSimId,gmx_bool bAppendNodeId,
 +                 char buf[],int bufsize)
 +{
 +  int n;
 +  
 +  if((size_t)bufsize<(strlen(base)+10))
 +     gmx_mem("Character buffer too small!");
 +
 +  /* Copy to buf, and strip extension */
 +  strcpy(buf,base);
 +  buf[strlen(base) - strlen(ftp2ext(fn2ftp(base))) - 1] = '\0';
 +
 +  if (bAppendSimId) {
 +    sprintf(buf+strlen(buf),"%d",cr->ms->sim);
 +  }
 +  if (bAppendNodeId) {
 +    strcat(buf,"_node");
 +    sprintf(buf+strlen(buf),"%d",cr->nodeid);
 +  }
 +  strcat(buf,".");
 +  
 +  /* Add extension again */
 +  strcat(buf,(ftp == efTPX) ? "tpr" : (ftp == efEDR) ? "edr" : ftp2ext(ftp));
 +  if (debug)
 +  {
 +      fprintf(debug, "node %d par_fn '%s'\n",cr->nodeid,buf);
 +      if (fn2ftp(buf) == efLOG)
 +      {
 +          fprintf(debug,"log\n");
 +      }
 +  }
 +}
 +
 +void check_multi_int(FILE *log,const gmx_multisim_t *ms,int val,
 +                     const char *name)
 +{
 +  int  *ibuf,p;
 +  gmx_bool bCompatible;
 +
 +  if (NULL != log)
 +      fprintf(log,"Multi-checking %s ... ",name);
 +  
 +  if (ms == NULL)
 +    gmx_fatal(FARGS,
 +            "check_multi_int called with a NULL communication pointer");
 +
 +  snew(ibuf,ms->nsim);
 +  ibuf[ms->sim] = val;
 +  gmx_sumi_sim(ms->nsim,ibuf,ms);
 +  
 +  bCompatible = TRUE;
 +  for(p=1; p<ms->nsim; p++)
 +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +  
 +  if (bCompatible) 
 +  {
 +      if (NULL != log)
 +          fprintf(log,"OK\n");
 +  }
 +  else 
 +  {
 +      if (NULL != log)
 +      {
 +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
 +          for(p=0; p<ms->nsim; p++)
 +              fprintf(log,"  subsystem %d: %d\n",p,ibuf[p]);
 +      }
 +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
 +  }
 +  
 +  sfree(ibuf);
 +}
 +
 +void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
 +                           gmx_large_int_t val, const char *name)
 +{
 +  gmx_large_int_t  *ibuf;
 +  int p;
 +  gmx_bool bCompatible;
 +
 +  if (NULL != log)
 +      fprintf(log,"Multi-checking %s ... ",name);
 +  
 +  if (ms == NULL)
 +    gmx_fatal(FARGS,
 +            "check_multi_int called with a NULL communication pointer");
 +
 +  snew(ibuf,ms->nsim);
 +  ibuf[ms->sim] = val;
 +  gmx_sumli_sim(ms->nsim,ibuf,ms);
 +  
 +  bCompatible = TRUE;
 +  for(p=1; p<ms->nsim; p++)
 +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +  
 +  if (bCompatible) 
 +  {
 +      if (NULL != log)
 +          fprintf(log,"OK\n");
 +  }
 +  else 
 +  {
 +      if (NULL != log)
 +      {
 +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
 +          for(p=0; p<ms->nsim; p++)
 +          {
 +              char strbuf[255];
 +              /* first make the format string */
 +              snprintf(strbuf, 255, "  subsystem %%d: %s\n", 
 +                       gmx_large_int_pfmt);
 +              fprintf(log,strbuf,p,ibuf[p]);
 +          }
 +      }
 +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
 +  }
 +  
 +  sfree(ibuf);
 +}
 +
 +
 +char *gmx_gethostname(char *name, size_t len)
 +{
 +    if (len < 8)
 +    {
 +        gmx_incons("gmx_gethostname called with len<8");
 +    }
 +#ifdef HAVE_UNISTD_H
 +    if (gethostname(name, len-1) != 0)
 +    {
 +        strncpy(name, "unknown",8);
 +    }
 +#else
 +    strncpy(name, "unknown",8);
 +#endif
 +
 +    return name;
 +}
 +
 +
 +void gmx_log_open(const char *lognm,const t_commrec *cr,gmx_bool bMasterOnly, 
 +                  gmx_bool bAppendFiles, FILE** fplog)
 +{
 +    int  len,testlen,pid;
 +    char buf[256],host[256];
 +    time_t t;
 +    char timebuf[STRLEN];
 +    FILE *fp=*fplog;
 +    char *tmpnm;
 +  
 +    debug_gmx();
 +  
 +    /* Communicate the filename for logfile */
 +    if (cr->nnodes > 1 && !bMasterOnly
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI the non-master log files are opened later
 +         * when the files names are already known on all nodes.
 +         */
 +        && FALSE
 +#endif
 +        )
 +    {
 +        if (MASTER(cr))
 +        {
 +            len = strlen(lognm) + 1;
 +        }
 +        gmx_bcast(sizeof(len),&len,cr);
 +        if (!MASTER(cr))
 +        {
 +            snew(tmpnm,len+8);
 +        }
 +        else
 +        {
 +            tmpnm=gmx_strdup(lognm);
 +        }
 +        gmx_bcast(len*sizeof(*tmpnm),tmpnm,cr);
 +    }
 +    else
 +    {
 +        tmpnm=gmx_strdup(lognm);
 +    }
 +  
 +    debug_gmx();
 +
 +    if (!bMasterOnly && !MASTER(cr))
 +    {
 +        /* Since log always ends with '.log' let's use this info */
 +        par_fn(tmpnm,efLOG,cr,FALSE,!bMasterOnly,buf,255);
 +        fp = gmx_fio_fopen(buf, bAppendFiles ? "a+" : "w+" );
 +    }
 +    else if (!bAppendFiles)
 +    {
 +        fp = gmx_fio_fopen(tmpnm, bAppendFiles ? "a+" : "w+" );
 +    }
 +
 +    sfree(tmpnm);
 +
 +    gmx_fatal_set_log_file(fp);
 +  
 +    /* Get some machine parameters */
 +    gmx_gethostname(host,256);
 +
 +    time(&t);
 +
 +#ifndef NO_GETPID
 +#   ifdef GMX_NATIVE_WINDOWS
 +    pid = _getpid();
 +#   else
 +    pid = getpid();
 +#   endif
 +#else
 +      pid = 0;
 +#endif
 +
 +    if (bAppendFiles)
 +    {
 +        fprintf(fp,
 +                "\n"
 +                "\n"
 +                "-----------------------------------------------------------\n"
 +                "Restarting from checkpoint, appending to previous log file.\n"
 +                "\n"
 +            );
 +    }
 +      
 +    gmx_ctime_r(&t,timebuf,STRLEN);
 +
 +    fprintf(fp,
 +            "Log file opened on %s"
 +            "Host: %s  pid: %d  nodeid: %d  nnodes:  %d\n",
 +            timebuf,host,pid,cr->nodeid,cr->nnodes);
++    gmx_print_version_info(fp);
++    fprintf(fp, "\n\n");
 +
 +    fflush(fp);
 +    debug_gmx();
 +
 +    *fplog = fp;
 +}
 +
 +void gmx_log_close(FILE *fp)
 +{
 +  if (fp) {
 +    gmx_fatal_set_log_file(NULL);
 +    gmx_fio_fclose(fp);
 +  }
 +}
 +
 +static void comm_args(const t_commrec *cr,int *argc,char ***argv)
 +{
 +  int i,len;
 +  
 +  if (PAR(cr))
 +    gmx_bcast(sizeof(*argc),argc,cr);
 +  
 +  if (!MASTER(cr))
 +    snew(*argv,*argc+1);
 +  fprintf(stderr,"NODEID=%d argc=%d\n",cr->nodeid,*argc);
 +  for(i=0; (i<*argc); i++) {
 +    if (MASTER(cr))
 +      len = strlen((*argv)[i])+1;
 +    gmx_bcast(sizeof(len),&len,cr);
 +    if (!MASTER(cr))
 +      snew((*argv)[i],len);
 +    /*gmx_bcast(len*sizeof((*argv)[i][0]),(*argv)[i],cr);*/
 +    gmx_bcast(len*sizeof(char),(*argv)[i],cr);
 +  }
 +  debug_gmx();
 +}
 +
 +void init_multisystem(t_commrec *cr,int nsim, char **multidirs,
 +                      int nfile, const t_filenm fnm[],gmx_bool bParFn)
 +{
 +    gmx_multisim_t *ms;
 +    int  nnodes,nnodpersim,sim,i,ftp;
 +    char buf[256];
 +#ifdef GMX_MPI
 +    MPI_Group mpi_group_world;
 +#endif  
 +    int *rank;
 +
 +#ifndef GMX_MPI
 +    if (nsim > 1)
 +    {
 +        gmx_fatal(FARGS,"This binary is compiled without MPI support, can not do multiple simulations.");
 +    }
 +#endif
 +
 +    nnodes  = cr->nnodes;
 +    if (nnodes % nsim != 0)
 +    {
 +        gmx_fatal(FARGS,"The number of nodes (%d) is not a multiple of the number of simulations (%d)",nnodes,nsim);
 +    }
 +
 +    nnodpersim = nnodes/nsim;
 +    sim = cr->nodeid/nnodpersim;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"We have %d simulations, %d nodes per simulation, local simulation is %d\n",nsim,nnodpersim,sim);
 +    }
 +
 +    snew(ms,1);
 +    cr->ms = ms;
 +    ms->nsim = nsim;
 +    ms->sim  = sim;
 +#ifdef GMX_MPI
 +    /* Create a communicator for the master nodes */
 +    snew(rank,ms->nsim);
 +    for(i=0; i<ms->nsim; i++)
 +    {
 +        rank[i] = i*nnodpersim;
 +    }
 +    MPI_Comm_group(MPI_COMM_WORLD,&mpi_group_world);
 +    MPI_Group_incl(mpi_group_world,nsim,rank,&ms->mpi_group_masters);
 +    sfree(rank);
 +    MPI_Comm_create(MPI_COMM_WORLD,ms->mpi_group_masters,
 +                    &ms->mpi_comm_masters);
 +
 +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
 +    /* initialize the MPI_IN_PLACE replacement buffers */
 +    snew(ms->mpb, 1);
 +    ms->mpb->ibuf=NULL;
 +    ms->mpb->libuf=NULL;
 +    ms->mpb->fbuf=NULL;
 +    ms->mpb->dbuf=NULL;
 +    ms->mpb->ibuf_alloc=0;
 +    ms->mpb->libuf_alloc=0;
 +    ms->mpb->fbuf_alloc=0;
 +    ms->mpb->dbuf_alloc=0;
 +#endif
 +
 +#endif
 +
 +    /* Reduce the intra-simulation communication */
 +    cr->sim_nodeid = cr->nodeid % nnodpersim;
 +    cr->nnodes = nnodpersim;
 +#ifdef GMX_MPI
 +    MPI_Comm_split(MPI_COMM_WORLD,sim,cr->sim_nodeid,&cr->mpi_comm_mysim);
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +    cr->nodeid = cr->sim_nodeid;
 +#endif
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"This is simulation %d",cr->ms->sim);
 +        if (PAR(cr))
 +        {
 +            fprintf(debug,", local number of nodes %d, local nodeid %d",
 +                    cr->nnodes,cr->sim_nodeid);
 +        }
 +        fprintf(debug,"\n\n");
 +    }
 +
 +    if (multidirs)
 +    {
 +        int ret;
 +        if (debug)
 +        {
 +            fprintf(debug,"Changing to directory %s\n",multidirs[cr->ms->sim]);
 +        }
 +        gmx_chdir(multidirs[cr->ms->sim]);
 +    }
 +    else if (bParFn)
 +    {
 +        /* Patch output and tpx, cpt and rerun input file names */
 +        for(i=0; (i<nfile); i++)
 +        {
 +            /* Because of possible multiple extensions per type we must look 
 +             * at the actual file name 
 +             */
 +            if (is_output(&fnm[i]) ||
 +                fnm[i].ftp == efTPX || fnm[i].ftp == efCPT ||
 +                strcmp(fnm[i].opt,"-rerun") == 0)
 +            {
 +                ftp = fn2ftp(fnm[i].fns[0]);
 +                par_fn(fnm[i].fns[0],ftp,cr,TRUE,FALSE,buf,255);
 +                sfree(fnm[i].fns[0]);
 +                fnm[i].fns[0] = gmx_strdup(buf);
 +            }
 +        }
 +    }
 +}
 +
 +t_commrec *init_par(int *argc,char ***argv_ptr)
 +{
 +    t_commrec *cr;
 +    char      **argv;
 +    int       i;
 +    gmx_bool      pe=FALSE;
 +
 +    snew(cr,1);
 +
 +    argv = argv_ptr ? *argv_ptr : NULL;
 +
 +#if defined GMX_MPI && !defined GMX_THREAD_MPI
 +    cr->sim_nodeid = gmx_setup(argc,argv,&cr->nnodes);
 +
 +    if (!PAR(cr) && (cr->sim_nodeid != 0))
 +    {
 +        gmx_comm("(!PAR(cr) && (cr->sim_nodeid != 0))");
 +    }
 +
 +    cr->mpi_comm_mysim   = MPI_COMM_WORLD;
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#else
 +    /* These should never be accessed */
 +    cr->mpi_comm_mysim   = NULL;
 +    cr->mpi_comm_mygroup = NULL;
 +    cr->nnodes           = 1;
 +    cr->sim_nodeid       = 0;
 +#endif
 +
 +    cr->nodeid = cr->sim_nodeid;
 +
 +    cr->duty = (DUTY_PP | DUTY_PME);
 +
 +    /* Communicate arguments if parallel */
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        comm_args(cr,argc,argv_ptr);
 +    }
 +#endif /* GMX_THREAD_MPI */
 +
 +#ifdef GMX_MPI
 +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
 +  /* initialize the MPI_IN_PLACE replacement buffers */
 +  snew(cr->mpb, 1);
 +  cr->mpb->ibuf=NULL;
 +  cr->mpb->libuf=NULL;
 +  cr->mpb->fbuf=NULL;
 +  cr->mpb->dbuf=NULL;
 +  cr->mpb->ibuf_alloc=0;
 +  cr->mpb->libuf_alloc=0;
 +  cr->mpb->fbuf_alloc=0;
 +  cr->mpb->dbuf_alloc=0;
 +#endif
 +#endif
 +
 +    return cr;
 +}
 +
 +t_commrec *init_par_threads(const t_commrec *cro)
 +{
 +#ifdef GMX_THREAD_MPI
 +    int initialized;
 +    t_commrec *cr;
 +
 +    /* make a thread-specific commrec */
 +    snew(cr,1);
 +    /* now copy the whole thing, so settings like the number of PME nodes
 +       get propagated. */
 +    *cr=*cro;
 +
 +    /* and we start setting our own thread-specific values for things */
 +    MPI_Initialized(&initialized);
 +    if (!initialized)
 +    {
 +        gmx_comm("Initializing threads without comm");
 +    }
 +    /* once threads will be used together with MPI, we'll
 +       fill the cr structure with distinct data here. This might even work: */
 +    cr->sim_nodeid = gmx_setup(0,NULL, &cr->nnodes);
 +
 +    cr->mpi_comm_mysim = MPI_COMM_WORLD;
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +    cr->nodeid = cr->sim_nodeid;
 +    cr->duty = (DUTY_PP | DUTY_PME);
 +
 +    return cr;
 +#else
 +    return NULL;
 +#endif
 +}
index 0000000000000000000000000000000000000000,b86c3eee5715d688527f130c666757938c52d5e2..b86c3eee5715d688527f130c666757938c52d5e2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,31e8b758fbe75e8cb8a4f3e2c90da150f696be5f..31e8b758fbe75e8cb8a4f3e2c90da150f696be5f
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,ceb52b591ef4831c910b6d8faa0773d9c9cf2af0..ceb52b591ef4831c910b6d8faa0773d9c9cf2af0
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2f7e8d9ed57577f36e7c23e7e45014ee8e20aaba..2f7e8d9ed57577f36e7c23e7e45014ee8e20aaba
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,43f7f24fd8fb531c7126c9f935e1ac888bdec843..43f7f24fd8fb531c7126c9f935e1ac888bdec843
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8fe321d85cb9078763daf769b05c822a24afc382..8fe321d85cb9078763daf769b05c822a24afc382
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8c7deae7cc4342cb0b710a48604972139c88b9d4..8c7deae7cc4342cb0b710a48604972139c88b9d4
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,20add7f0a6ac5240f3314a33351baba79600e682..20add7f0a6ac5240f3314a33351baba79600e682
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c40e6d4604120f82d4dd68ff36988a2465e4f6bf..c40e6d4604120f82d4dd68ff36988a2465e4f6bf
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,24d7dcc3d5ec046917bd2bc2dbc049f6ce3fbd38..24d7dcc3d5ec046917bd2bc2dbc049f6ce3fbd38
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e915536c5f95844860c9899bd318a187455d4ce2..e915536c5f95844860c9899bd318a187455d4ce2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,500f1e55dcd64106820641849590363b41f4c05a..500f1e55dcd64106820641849590363b41f4c05a
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,a887b3b9e4cee08d3a965722309e2c52377a7fc9..a887b3b9e4cee08d3a965722309e2c52377a7fc9
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4a7ac0245e5d4b13234df269f72ca9712e93f786..4a7ac0245e5d4b13234df269f72ca9712e93f786
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4e8d50428620a9bae44359ffaeddeaedbedc3dfb..4e8d50428620a9bae44359ffaeddeaedbedc3dfb
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1f2ab31be04f996cd47402a9cfed44e24fe12270..1f2ab31be04f996cd47402a9cfed44e24fe12270
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,06f47a5002cc9f36c5e0993501e1f4f212a86456..06f47a5002cc9f36c5e0993501e1f4f212a86456
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,f316f44605247bdd8188431ab415f50d2faaaecf..f316f44605247bdd8188431ab415f50d2faaaecf
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b8c3987b8f8ff65f822ded3d5e12a744b652d1d3..b8c3987b8f8ff65f822ded3d5e12a744b652d1d3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,89bfdd3514646bab46276d15bdaa493ffd873f85..89bfdd3514646bab46276d15bdaa493ffd873f85
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,006439173d4e8011d395e3195bbdd33580354c0a..006439173d4e8011d395e3195bbdd33580354c0a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,98541ff40482c1ac3770aaa048e740be28fbc4e6..98541ff40482c1ac3770aaa048e740be28fbc4e6
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,944d5487cb82d7238b46874d12f0466f0c6fd0b6..944d5487cb82d7238b46874d12f0466f0c6fd0b6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,949600111a3c44e2d80230cee42668dceaf1ccb4..949600111a3c44e2d80230cee42668dceaf1ccb4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4bf51a6a8f9b468ac00639045ae253e939bc0662..4bf51a6a8f9b468ac00639045ae253e939bc0662
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4c9d79ad11e948f3b2c2752f3827cb37579bd99b..4c9d79ad11e948f3b2c2752f3827cb37579bd99b
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e36875e3d54dc2f1c8345523abe731a1d2e0e99c..e36875e3d54dc2f1c8345523abe731a1d2e0e99c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,363cdb79633dc518b15469a95ddd76fc6a340d6f..363cdb79633dc518b15469a95ddd76fc6a340d6f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ac7c5b8465068f0d948984cfcd715579d3ab81c8..ac7c5b8465068f0d948984cfcd715579d3ab81c8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f969c321a39bf26cba1cd6130ab5b36bbe7be235..f969c321a39bf26cba1cd6130ab5b36bbe7be235
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,778be9fa133a6feb5db52401a9e08498624f9bf4..778be9fa133a6feb5db52401a9e08498624f9bf4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,314ceccb779d2fdbc0d6ce3a89d66d6f7c0777d9..314ceccb779d2fdbc0d6ce3a89d66d6f7c0777d9
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e54dd6385077a13a8fc322a6a7dc2b4ed0866df8..e54dd6385077a13a8fc322a6a7dc2b4ed0866df8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,605cd895e2e857eb34c9e0bc6835a59799c30b37..605cd895e2e857eb34c9e0bc6835a59799c30b37
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1f0337985922a84cfe578ecec90bddec5554c598..1f0337985922a84cfe578ecec90bddec5554c598
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,5fac7d47e6eb31f9b1478a89415bf650185f897f..5fac7d47e6eb31f9b1478a89415bf650185f897f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,cf4623057775d9f36ece8e9c8f9648daf360c13c..cf4623057775d9f36ece8e9c8f9648daf360c13c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8bed655b594d9a5413b9ffebc2412bca604d4961..8bed655b594d9a5413b9ffebc2412bca604d4961
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,24499eeca68b44f1d86741935fad1cfe428d4848..24499eeca68b44f1d86741935fad1cfe428d4848
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b498e5d613566ea5c01dc6541e29133b53cac9a9..b498e5d613566ea5c01dc6541e29133b53cac9a9
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,54abcdfebd80d292e1c8ac4e7a2a1a8119faf9f2..54abcdfebd80d292e1c8ac4e7a2a1a8119faf9f2
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,855229d1d723f98f1e0ada72cf1c00a933df65fb..855229d1d723f98f1e0ada72cf1c00a933df65fb
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,d41573bfc52cec443fd7b7518256d678cdc58a85..d41573bfc52cec443fd7b7518256d678cdc58a85
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8bd73156d0228eb156a39e3897bf5d8936fe1dc3..8bd73156d0228eb156a39e3897bf5d8936fe1dc3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a36f307132710ac7b1645ed87350c6c723cd1481..a36f307132710ac7b1645ed87350c6c723cd1481
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8729a32d07434b4fda53aea5b93a497cfd5a4af7..8729a32d07434b4fda53aea5b93a497cfd5a4af7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4e099092e5991ed8f3960fb24387314b88df1589..4e099092e5991ed8f3960fb24387314b88df1589
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fe40ad0999ed7354fddef1116c281e6a405897de..fe40ad0999ed7354fddef1116c281e6a405897de
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e8e7d72a0213805a8da0a87cd95f7edb52f76356..e8e7d72a0213805a8da0a87cd95f7edb52f76356
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,8e8314e777e699264623a959927dbe96138278ed..8e8314e777e699264623a959927dbe96138278ed
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b9af3c8d6a2c8f1a344e781d2d5eeb974b0f7431..b9af3c8d6a2c8f1a344e781d2d5eeb974b0f7431
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fded3a02cfff3111b040ed8e68a914eed596a3e7..fded3a02cfff3111b040ed8e68a914eed596a3e7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0c7cb9a38c9b88e6c5f6ec567fcb23e1f83a4104..0c7cb9a38c9b88e6c5f6ec567fcb23e1f83a4104
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,18574d8d0024953c9373267f70b9300c286ab2c0..18574d8d0024953c9373267f70b9300c286ab2c0
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a7fbc638426f36e3371962f19654612484fd37b7..a7fbc638426f36e3371962f19654612484fd37b7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,b4567e55a268b84ff011e573be36324041eba05f..b4567e55a268b84ff011e573be36324041eba05f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bedf4c77a65dee6862b1b273ecfb33c2d112c898..bedf4c77a65dee6862b1b273ecfb33c2d112c898
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9f687d623e109467f2a4bae2150b1e6c671fa038..9f687d623e109467f2a4bae2150b1e6c671fa038
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bca9f013407b343be3cbffc3516242258a1fd905..bca9f013407b343be3cbffc3516242258a1fd905
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9d38f68fc364f8af3be990291a9342da17d7aab6..9d38f68fc364f8af3be990291a9342da17d7aab6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,31406cb1465b57d06448a7abd1d2800e643fb005..31406cb1465b57d06448a7abd1d2800e643fb005
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f5d50a6d059da95215854bb314c9c67550b4f8d9..f5d50a6d059da95215854bb314c9c67550b4f8d9
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1f90a68460779c424cc8fe38383ccbc43d06946e..1f90a68460779c424cc8fe38383ccbc43d06946e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e3a0fc817cddf8891f840c633e83fe87f58e941f..e3a0fc817cddf8891f840c633e83fe87f58e941f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,af4c2b51a59cd85d9fcefada788f66c9e1bd99ee..af4c2b51a59cd85d9fcefada788f66c9e1bd99ee
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f62b4651e5c770ec39d5fc926bac61bab28bc5c6..f62b4651e5c770ec39d5fc926bac61bab28bc5c6
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,d2d6b1466d1725224fb73048d63f57a497a4feee..d2d6b1466d1725224fb73048d63f57a497a4feee
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,7d170ef52292f6d9f6f90e687cd5b957446090e0..7d170ef52292f6d9f6f90e687cd5b957446090e0
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,c79090db3f4d51442dae3a21ed13faa90a9e4e98..c79090db3f4d51442dae3a21ed13faa90a9e4e98
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bf9e6c43a36727157ed8c3ab4d1d0575a3a33d11..bf9e6c43a36727157ed8c3ab4d1d0575a3a33d11
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9bdef9064b677e22c002f3161d6e2cd0879b45f4..9bdef9064b677e22c002f3161d6e2cd0879b45f4
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,f36548b0449ccbdffacb7103d5de7fbde341092f..f36548b0449ccbdffacb7103d5de7fbde341092f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,a18d1c18495a1a7b68f3c5fc70515194a566fa7c..a18d1c18495a1a7b68f3c5fc70515194a566fa7c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2c6fa3646815f80c71012154651ce91538231577..2c6fa3646815f80c71012154651ce91538231577
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,830d40a2f0a6ad2383af15ab0f30a8cb116b0741..830d40a2f0a6ad2383af15ab0f30a8cb116b0741
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,5d64d9e4ba130ae680fe18d2c36914355ec13bdb..5d64d9e4ba130ae680fe18d2c36914355ec13bdb
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0850a39b4c33b8a2c40746c69297e5b4014ba30a..0850a39b4c33b8a2c40746c69297e5b4014ba30a
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4677e6b98de820d601a430defe4d568cf54655ee..4677e6b98de820d601a430defe4d568cf54655ee
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,27c7016096211fc088d559aa2a6ed51196659ac9..27c7016096211fc088d559aa2a6ed51196659ac9
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,3109d42f3e470c7d56a632b6c0d7147fdbc40913..3109d42f3e470c7d56a632b6c0d7147fdbc40913
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9951dfcbd73be5b1bdd5214f82e943f9c7d56a10..9951dfcbd73be5b1bdd5214f82e943f9c7d56a10
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,ff83b179925d899ea608efb7f74138801c39081d..ff83b179925d899ea608efb7f74138801c39081d
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9ff8ae9d1d6427dc2d78236e9dd2faf2387f3023..9ff8ae9d1d6427dc2d78236e9dd2faf2387f3023
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4f3444c7cc0a528d859093cb2b1b4fb1247a98b1..4f3444c7cc0a528d859093cb2b1b4fb1247a98b1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e818b9f3aeb8cbc4daafeebf6596cda81aa6bd43..e818b9f3aeb8cbc4daafeebf6596cda81aa6bd43
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,abdb68e7bef58a8568d16dbf061ab41f9d009f5c..abdb68e7bef58a8568d16dbf061ab41f9d009f5c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4894056abe3185740ed06ede6c9a69fba8f26036..4894056abe3185740ed06ede6c9a69fba8f26036
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,0a1088a729d06df4ad44c408b30cb24ca580b204..0a1088a729d06df4ad44c408b30cb24ca580b204
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bd2b1b710b1519a213d82772102cb885678fba19..bd2b1b710b1519a213d82772102cb885678fba19
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4e7b0f6572987764a05d6c6ce99902fd4df81a42..4e7b0f6572987764a05d6c6ce99902fd4df81a42
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,cb74baef1b22fbd730e72a72bb79069259f8bdfc..cb74baef1b22fbd730e72a72bb79069259f8bdfc
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,1967301958d070a5410a631c4dcd98354dd33c93..1967301958d070a5410a631c4dcd98354dd33c93
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,bd7b5fd5b3efcdc2493dd43fc5e64c88710569cb..bd7b5fd5b3efcdc2493dd43fc5e64c88710569cb
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4bf5952cbaa89e27750c5046f8b3261c94753ed8..4bf5952cbaa89e27750c5046f8b3261c94753ed8
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,e7bb484515c65505b2963a05b71e98c29613664f..e7bb484515c65505b2963a05b71e98c29613664f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,cda34cd3b94d0ca80576cc88d87af28f63cae044..cda34cd3b94d0ca80576cc88d87af28f63cae044
mode 000000,100755..100755
--- /dev/null
index 0000000000000000000000000000000000000000,b3b51e0ed6d2273e6f3a77c24926a8fe6dd4797f..b3b51e0ed6d2273e6f3a77c24926a8fe6dd4797f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,9305aa8779aeff573286e8fbedf9c61fa09cad25..9305aa8779aeff573286e8fbedf9c61fa09cad25
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,4d8a1ca3bb2ac866ed198e4345a46e5f42b5d2d3..4d8a1ca3bb2ac866ed198e4345a46e5f42b5d2d3
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,86075b0baa3c9c0cb20c147a62e105c7c78dce1f..86075b0baa3c9c0cb20c147a62e105c7c78dce1f
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,667c068b52ad6558bd88fe16604c8a76406e8b37..667c068b52ad6558bd88fe16604c8a76406e8b37
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,2e9e29e174b43456611613acd556c5db72f13164..2e9e29e174b43456611613acd556c5db72f13164
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,43a5eb9ca829ea3d7f202a6ae1c4af9a67117da1..43a5eb9ca829ea3d7f202a6ae1c4af9a67117da1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,619aadcd9cf5677157f60c17bfd7e7fa1c06c028..619aadcd9cf5677157f60c17bfd7e7fa1c06c028
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,5403d257a8908a5031a6fe7ec3d6c824cd0f448c..5403d257a8908a5031a6fe7ec3d6c824cd0f448c
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,249bc60e140dead8524ad8b7fe4f74c92f0d43ce..249bc60e140dead8524ad8b7fe4f74c92f0d43ce
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,64174ad2bf8f173259080a4f78bc8f5e7e5fef44..64174ad2bf8f173259080a4f78bc8f5e7e5fef44
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,cf5b9086093c6c9df0d5aab1e526ee37d98caee1..cf5b9086093c6c9df0d5aab1e526ee37d98caee1
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,fa28b477aad275e29dbf7aec49c1103f0a14147e..fa28b477aad275e29dbf7aec49c1103f0a14147e
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,744783556837baef451265ddcdc7c6edf259a6b7..744783556837baef451265ddcdc7c6edf259a6b7
mode 000000,100644..100644
--- /dev/null
index 0000000000000000000000000000000000000000,973a1f32a5868edfb14667723ba3ec24d6a47e40..973a1f32a5868edfb14667723ba3ec24d6a47e40
mode 000000,100644..100644
--- /dev/null
index b1ab471b836f2ea6a7b20ec8f7a2aaf143239987,0000000000000000000000000000000000000000..29a212ae37ed8489f9c3c8a5d2a23500644d5253
mode 100644,000000..100644
--- /dev/null
@@@ -1,473 -1,0 +1,477 @@@
-     { "NxN CSTab Elec. + VdW [F]",      41 }, /* nbnxn kernel LJ+tab, no en */
-     { "NxN CSTab Elec. + VdW [V&F]",    59 },
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include "types/commrec.h"
 +#include "sysstuff.h"
 +#include "gmx_fatal.h"
 +#include "names.h"
 +#include "macros.h"
 +#include "nrnb.h"
 +#include "main.h"
 +#include "smalloc.h"
 +#include "copyrite.h"
 +
 +
 +
 +
 +
 +typedef struct {
 +  const char *name;
 +  int  flop;
 +} t_nrnb_data;
 +
 +
 +static const t_nrnb_data nbdata[eNRNB] = {
 +    /* These are re-used for different NB kernels, since there are so many.
 +     * The actual number of flops is set dynamically.
 +     */
 +    { "NB VdW [V&F]",                    1 },
 +    { "NB VdW [F]",                      1 },
 +    { "NB Elec. [V&F]",                  1 },
 +    { "NB Elec. [F]",                    1 },
 +    { "NB Elec. [W3,V&F]",               1 },
 +    { "NB Elec. [W3,F]",                 1 },
 +    { "NB Elec. [W3-W3,V&F]",            1 },
 +    { "NB Elec. [W3-W3,F]",              1 },
 +    { "NB Elec. [W4,V&F]",               1 },
 +    { "NB Elec. [W4,F]",                 1 },
 +    { "NB Elec. [W4-W4,V&F]",            1 },
 +    { "NB Elec. [W4-W4,F]",              1 },
 +    { "NB VdW & Elec. [V&F]",            1 },
 +    { "NB VdW & Elec. [F]",              1 },
 +    { "NB VdW & Elec. [W3,V&F]",         1 },
 +    { "NB VdW & Elec. [W3,F]",           1 },
 +    { "NB VdW & Elec. [W3-W3,V&F]",      1 },
 +    { "NB VdW & Elec. [W3-W3,F]",        1 },
 +    { "NB VdW & Elec. [W4,V&F]",         1 },
 +    { "NB VdW & Elec. [W4,F]",           1 },
 +    { "NB VdW & Elec. [W4-W4,V&F]",      1 },
 +    { "NB VdW & Elec. [W4-W4,F]",        1 },
 +    
 +    { "NB Generic kernel",               1 },
 +    { "NB Free energy kernel",           1 },
 +    { "NB All-vs-all",                   1 },
 +    { "NB All-vs-all, GB",               1 },
 +
 +    { "Pair Search distance check",      9 }, /* nbnxn pair dist. check */
 +    /* nbnxn kernel flops are based on inner-loops without exclusion checks.
 +     * Plain Coulomb runs through the RF kernels, except with CUDA.
 +     * invsqrt is counted as 6 flops: 1 for _mm_rsqt_ps + 5 for iteration.
 +     * The flops are equal for plain-C, x86 SIMD and CUDA, except for:
 +     * - plain-C kernel uses one flop more for Coulomb-only (F) than listed
 +     * - x86 SIMD LJ geom-comb.rule kernels (fastest) use 2 more flops
 +     * - x86 SIMD LJ LB-comb.rule kernels (fast) use 3 (8 for F+E) more flops
 +     * - GPU always does exclusions, which requires 2-4 flops, but as invsqrt
 +     *   is always counted as 6 flops, this roughly compensates.
 +     */
 +    { "NxN RF Elec. + VdW [F]",         38 }, /* nbnxn kernel LJ+RF, no ener */
 +    { "NxN RF Elec. + VdW [V&F]",       54 },
-     { "NxN CSTab Elec. [F]",            34 }, /* nbnxn kernel tab, no ener */
-     { "NxN CSTab Elec. [V&F]",          41 },
++    { "NxN QSTab Elec. + VdW [F]",      41 }, /* nbnxn kernel LJ+tab, no en */
++    { "NxN QSTab Elec. + VdW [V&F]",    59 },
++    { "NxN Ewald Elec. + VdW [F]",      66 }, /* nbnxn kernel LJ+Ewald, no en */
++    { "NxN Ewald Elec. + VdW [V&F]",   107 },
 +    { "NxN VdW [F]",                    33 }, /* nbnxn kernel LJ, no ener */
 +    { "NxN VdW [V&F]",                  43 },
 +    { "NxN RF Electrostatics [F]",      31 }, /* nbnxn kernel RF, no ener */
 +    { "NxN RF Electrostatics [V&F]",    36 },
-       fprintf(out," NB=Group-cutoff nonbonded kernels    NxN=N-by-N tile Verlet kernels\n");
-       fprintf(out," RF=Reaction-Field  VdW=Van der Waals  CSTab=Cubic-spline table\n");
++    { "NxN QSTab Elec. [F]",            34 }, /* nbnxn kernel tab, no ener */
++    { "NxN QSTab Elec. [V&F]",          41 },
++    { "NxN Ewald Elec. [F]",            61 }, /* nbnxn kernel Ewald, no ener */
++    { "NxN Ewald Elec. [V&F]",          84 },
 +    { "1,4 nonbonded interactions",     90 },
 +    { "Born radii (Still)",             47 },
 +    { "Born radii (HCT/OBC)",          183 },
 +    { "Born force chain rule",          15 },
 +    { "All-vs-All Still radii",          1 },
 +    { "All-vs-All HCT/OBC radii",        1 },
 +    { "All-vs-All Born chain rule",      1 },
 +    { "Calc Weights",                   36 },
 +    { "Spread Q",                        6 },
 +    { "Spread Q Bspline",                2 }, 
 +    { "Gather F",                      23  },
 +    { "Gather F Bspline",              6   }, 
 +    { "3D-FFT",                        8   },
 +    { "Convolution",                   4   },
 +    { "Solve PME",                     64  },
 +    { "NS-Pairs",                      21  },
 +    { "Reset In Box",                  3   },
 +    { "Shift-X",                       6   },
 +    { "CG-CoM",                        3   },
 +    { "Sum Forces",                    1   },
 +    { "Bonds",                         59  },
 +    { "G96Bonds",                      44  },
 +    { "FENE Bonds",                    58  },
 +    { "Tab. Bonds",                    62  },
 +    { "Restraint Potential",           86  },
 +    { "Linear Angles",                 57  },
 +    { "Angles",                        168 },
 +    { "G96Angles",                     150 },
 +    { "Quartic Angles",                160 },
 +    { "Tab. Angles",                   169 },
 +    { "Propers",                       229 },
 +    { "Impropers",                     208 },
 +    { "RB-Dihedrals",                  247 },
 +    { "Four. Dihedrals",               247 },
 +    { "Tab. Dihedrals",                227 },
 +    { "Dist. Restr.",                  200 },
 +    { "Orient. Restr.",                200 },
 +    { "Dihedral Restr.",               200 },
 +    { "Pos. Restr.",                   50  },
 +    { "Flat-bottom posres",            50  },
 +    { "Angle Restr.",                  191 },
 +    { "Angle Restr. Z",                164 },
 +    { "Morse Potent.",                 83  },
 +    { "Cubic Bonds",                   54  },
 +    { "Walls",                         31  },
 +    { "Polarization",                  59  },
 +    { "Anharmonic Polarization",       72  },
 +    { "Water Pol.",                    62  },
 +    { "Thole Pol.",                    296 },
 +    { "Virial",                        18  },
 +    { "Update",                        31  },
 +    { "Ext.ens. Update",               54  },
 +    { "Stop-CM",                       10  },
 +    { "P-Coupling",                    6   },
 +    { "Calc-Ekin",                     27  },
 +    { "Lincs",                         60  },
 +    { "Lincs-Mat",                     4   },
 +    { "Shake",                         30  },
 +    { "Constraint-V",                   8  },
 +    { "Shake-Init",                    10  },
 +    { "Constraint-Vir",                24  },
 +    { "Settle",                        323 },
 +    { "Virtual Site 2",                23  },
 +    { "Virtual Site 3",                37  },
 +    { "Virtual Site 3fd",              95  },
 +    { "Virtual Site 3fad",             176 },
 +    { "Virtual Site 3out",             87  },
 +    { "Virtual Site 4fd",              110 }, 
 +    { "Virtual Site 4fdn",             254 }, 
 +    { "Virtual Site N",                 15 },
 +    { "Mixed Generalized Born stuff",   10 } 
 +};
 +
 +
 +void init_nrnb(t_nrnb *nrnb)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    nrnb->n[i]=0.0;
 +}
 +
 +void cp_nrnb(t_nrnb *dest, t_nrnb *src)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    dest->n[i]=src->n[i];
 +}
 +
 +void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    dest->n[i]=s1->n[i]+s2->n[i];
 +}
 +
 +void print_nrnb(FILE *out, t_nrnb *nrnb)
 +{
 +  int i;
 +
 +  for(i=0; (i<eNRNB); i++)
 +    if (nrnb->n[i] > 0)
 +      fprintf(out," %-26s %10.0f.\n",nbdata[i].name,nrnb->n[i]);
 +}
 +
 +void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
 +{
 +  nrnb->n[enr]+=inc;
 +#ifdef DEBUG_NRNB
 +  printf("nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
 +        nbdata[enr].name,enr,inc,file,line);
 +#endif
 +}
 +
 +void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
 +{
 +  int    i;
 +  double mni,frac,tfrac,tflop;
 +  const char   *myline = "-----------------------------------------------------------------------------";
 +  
 +  *nbfs = 0.0;
 +  for(i=0; (i<eNR_NBKERNEL_ALLVSALLGB); i++) {
 +    if (strstr(nbdata[i].name,"W3-W3") != NULL)
 +      *nbfs += 9e-6*nrnb->n[i];
 +    else if (strstr(nbdata[i].name,"W3") != NULL)
 +      *nbfs += 3e-6*nrnb->n[i];
 +    else if (strstr(nbdata[i].name,"W4-W4") != NULL)
 +      *nbfs += 10e-6*nrnb->n[i];
 +    else if (strstr(nbdata[i].name,"W4") != NULL)
 +      *nbfs += 4e-6*nrnb->n[i];
 +    else
 +      *nbfs += 1e-6*nrnb->n[i];
 +  }
 +  tflop=0;
 +  for(i=0; (i<eNRNB); i++) 
 +    tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
 +  
 +  if (tflop == 0) {
 +    fprintf(out,"No MEGA Flopsen this time\n");
 +    return;
 +  }
 +  if (out) {
 +    fprintf(out,"\n\tM E G A - F L O P S   A C C O U N T I N G\n\n");
 +  }
 +
 +  if (out)
 +  {
++      fprintf(out," NB=Group-cutoff nonbonded kernels    NxN=N-by-N cluster Verlet kernels\n");
++      fprintf(out," RF=Reaction-Field  VdW=Van der Waals  QSTab=quadratic-spline table\n");
 +      fprintf(out," W3=SPC/TIP3p  W4=TIP4p (single or pairs)\n");
 +      fprintf(out," V&F=Potential and force  V=Potential only  F=Force only\n\n");
 +
 +      fprintf(out," %-32s %16s %15s  %7s\n",
 +              "Computing:","M-Number","M-Flops","% Flops");
 +      fprintf(out,"%s\n",myline);
 +  }
 +  *mflop=0.0;
 +  tfrac=0.0;
 +  for(i=0; (i<eNRNB); i++) {
 +    mni     = 1e-6*nrnb->n[i];
 +    *mflop += mni*nbdata[i].flop;
 +    frac    = 100.0*mni*nbdata[i].flop/tflop;
 +    tfrac  += frac;
 +    if (out && mni != 0)
 +      fprintf(out," %-32s %16.6f %15.3f  %6.1f\n",
 +            nbdata[i].name,mni,mni*nbdata[i].flop,frac);
 +  }
 +  if (out) {
 +    fprintf(out,"%s\n",myline);
 +    fprintf(out," %-32s %16s %15.3f  %6.1f\n",
 +          "Total","",*mflop,tfrac);
 +    fprintf(out,"%s\n\n",myline);
 +  }
 +}
 +
 +void print_perf(FILE *out,double nodetime,double realtime,int nprocs,
 +              gmx_large_int_t nsteps,real delta_t,
 +              double nbfs,double mflop,
 +                int omp_nth_pp)
 +{
 +  real runtime;
 +
 +  fprintf(out,"\n");
 +
 +  if (realtime > 0) 
 +  {
 +    fprintf(out,"%12s %12s %12s %10s\n","","Core t (s)","Wall t (s)","(%)");
 +    fprintf(out,"%12s %12.3f %12.3f %10.1f\n","Time:",
 +          nodetime, realtime, 100.0*nodetime/realtime);
 +    /* only print day-hour-sec format if realtime is more than 30 min */
 +    if (realtime > 30*60)
 +    {
 +      fprintf(out,"%12s %12s","","");
 +      pr_difftime(out,realtime);
 +    }
 +    if (delta_t > 0) 
 +    {
 +      mflop = mflop/realtime;
 +      runtime = nsteps*delta_t;
 +
 +      if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
 +      {
 +          fprintf(out,"%12s %12s %12s\n",
 +                  "","(ns/day)","(hour/ns)");
 +          fprintf(out,"%12s %12.3f %12.3f\n","Performance:",
 +                  runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
 +      }
 +      else
 +      {
 +        fprintf(out,"%12s %12s %12s %12s %12s\n",
 +              "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
 +              "(ns/day)","(hour/ns)");
 +        fprintf(out,"%12s %12.3f %12.3f %12.3f %12.3f\n","Performance:",
 +              nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
 +              runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
 +      }
 +    } 
 +    else 
 +    {
 +      if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
 +      {
 +          fprintf(out,"%12s %14s\n",
 +                  "","(steps/hour)");
 +          fprintf(out,"%12s %14.1f\n","Performance:",
 +                  nsteps*3600.0/realtime);
 +      }
 +      else
 +      {
 +          fprintf(out,"%12s %12s %12s %14s\n",
 +                "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
 +                "(steps/hour)");
 +          fprintf(out,"%12s %12.3f %12.3f %14.1f\n","Performance:",
 +            nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
 +            nsteps*3600.0/realtime);
 +      }
 +    }
 +  }
 +}
 +
 +int cost_nrnb(int enr)
 +{
 +  return nbdata[enr].flop;
 +}
 +
 +const char *nrnb_str(int enr)
 +{
 +  return nbdata[enr].name;
 +}
 +
 +static const int    force_index[]={ 
 +  eNR_BONDS,  eNR_ANGLES,  eNR_PROPER, eNR_IMPROPER, 
 +  eNR_RB,     eNR_DISRES,  eNR_ORIRES, eNR_POSRES,
 +  eNR_FBPOSRES, eNR_NS,
 +};
 +#define NFORCE_INDEX asize(force_index)
 +
 +static const int    constr_index[]={ 
 +  eNR_SHAKE,     eNR_SHAKE_RIJ, eNR_SETTLE,       eNR_UPDATE,       eNR_PCOUPL,
 +  eNR_CONSTR_VIR,eNR_CONSTR_V
 +};
 +#define NCONSTR_INDEX asize(constr_index)
 +
 +static double pr_av(FILE *log,t_commrec *cr,
 +                  double fav,double ftot[],const char *title)
 +{
 +  int    i,perc;
 +  double dperc,unb;
 +  
 +  unb=0;
 +  if (fav > 0) {
 +    fav /= cr->nnodes - cr->npmenodes;
 +    fprintf(log,"\n %-26s",title);
 +    for(i=0; (i<cr->nnodes); i++) {
 +      dperc=(100.0*ftot[i])/fav;
 +      unb=max(unb,dperc);
 +      perc=dperc;
 +      fprintf(log,"%3d ",perc);
 +    }
 +    if (unb > 0) {
 +      perc=10000.0/unb;
 +      fprintf(log,"%6d%%\n\n",perc);
 +    }
 +    else
 +      fprintf(log,"\n\n");
 +  }
 +  return unb;
 +}
 +
 +void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
 +{
 +  int    i,j,perc;
 +  double dperc,unb,uf,us;
 +  double *ftot,fav;
 +  double *stot,sav;
 +  t_nrnb *av;
 +
 +  snew(av,1);
 +  snew(ftot,cr->nnodes);
 +  snew(stot,cr->nnodes);
 +  init_nrnb(av);
 +  for(i=0; (i<cr->nnodes); i++) {
 +      add_nrnb(av,av,&(nrnb[i]));
 +      /* Cost due to forces */
 +      for(j=0; (j<eNR_NBKERNEL_ALLVSALLGB); j++)
 +      ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
 +      for(j=0; (j<NFORCE_INDEX); j++) 
 +      ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
 +      /* Due to shake */
 +      for(j=0; (j<NCONSTR_INDEX); j++) {
 +      stot[i]+=nrnb[i].n[constr_index[j]]*cost_nrnb(constr_index[j]);
 +      }
 +  }   
 +  for(j=0; (j<eNRNB); j++)
 +    av->n[j]=av->n[j]/(double)(cr->nnodes - cr->npmenodes);
 +    
 +    fprintf(log,"\nDetailed load balancing info in percentage of average\n");
 +  
 +  fprintf(log," Type                 NODE:");
 +  for(i=0; (i<cr->nnodes); i++)
 +      fprintf(log,"%3d ",i);
 +  fprintf(log,"Scaling\n");
 +  fprintf(log,"---------------------------");
 +  for(i=0; (i<cr->nnodes); i++)
 +      fprintf(log,"----");
 +  fprintf(log,"-------\n");
 +  
 +  for(j=0; (j<eNRNB); j++) {
 +    unb=100.0;
 +    if (av->n[j] > 0) {
 +      fprintf(log," %-26s",nrnb_str(j));
 +      for(i=0; (i<cr->nnodes); i++) {
 +        dperc=(100.0*nrnb[i].n[j])/av->n[j];
 +        unb=max(unb,dperc);
 +        perc=dperc;
 +        fprintf(log,"%3d ",perc);
 +      }
 +      if (unb > 0) {
 +      perc=10000.0/unb;
 +      fprintf(log,"%6d%%\n",perc);
 +      }
 +      else
 +      fprintf(log,"\n");
 +    }   
 +  }
 +  fav=sav=0;
 +  for(i=0; (i<cr->nnodes); i++) {
 +    fav+=ftot[i];
 +    sav+=stot[i];
 +  }
 +  uf=pr_av(log,cr,fav,ftot,"Total Force");
 +  us=pr_av(log,cr,sav,stot,"Total Constr.");
 +  
 +  unb=(uf*fav+us*sav)/(fav+sav);
 +  if (unb > 0) {
 +    unb=10000.0/unb;
 +    fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);
 +  }
 +}
 +
index 7855d372549138ed592d0a63e186f3d1fb2e12d7,0000000000000000000000000000000000000000..fb84ca0e81185f2fc37c61f3cfdf2696205e45a9
mode 100644,000000..100644
--- /dev/null
@@@ -1,716 -1,0 +1,716 @@@
-         md_el = elfac*(2*b*exp(-br*br)/(sqrt(M_PI)*rc) + gmx_erfc(br)/(rc*rc));
-         dd_el = elfac/(rc*rc)*(4*b*(1 + br*br)*exp(-br*br)/sqrt(M_PI) + 2*gmx_erfc(br)/rc);
 +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.03
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <assert.h>
 +
 +#include <sys/types.h>
 +#include <math.h>
 +#include "typedefs.h"
 +#include "physics.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "vec.h"
 +#include "coulomb.h"
 +#include "calc_verletbuf.h"
 +#include "../mdlib/nbnxn_consts.h"
 +
 +/* Struct for unique atom type for calculating the energy drift.
 + * The atom displacement depends on mass and constraints.
 + * The energy jump for given distance depend on LJ type and q.
 + */
 +typedef struct
 +{
 +    real     mass; /* mass */
 +    int      type; /* type (used for LJ parameters) */
 +    real     q;    /* charge */
 +    int      con;  /* constrained: 0, else 1, if 1, use #DOF=2 iso 3 */
 +    int      n;    /* total #atoms of this type in the system */
 +} verletbuf_atomtype_t;
 +
 +
 +void verletbuf_get_list_setup(gmx_bool bGPU,
 +                              verletbuf_list_setup_t *list_setup)
 +{
 +    list_setup->cluster_size_i     = NBNXN_CPU_CLUSTER_I_SIZE;
 +
 +    if (bGPU)
 +    {
 +        list_setup->cluster_size_j = NBNXN_GPU_CLUSTER_SIZE;
 +    }
 +    else
 +    {
 +#ifndef GMX_X86_SSE2
 +        list_setup->cluster_size_j = NBNXN_CPU_CLUSTER_I_SIZE;
 +#else
 +        int simd_width;
 +
 +#ifdef GMX_X86_AVX_256
 +        simd_width = 256;
 +#else
 +        simd_width = 128;
 +#endif
 +        list_setup->cluster_size_j = simd_width/(sizeof(real)*8);
 +#endif
 +    }
 +}
 +
 +static void add_at(verletbuf_atomtype_t **att_p,int *natt_p,
 +                   real mass,int type,real q,int con,int nmol)
 +{
 +    verletbuf_atomtype_t *att;
 +    int natt,i;
 +
 +    if (mass == 0)
 +    {
 +        /* Ignore massless particles */
 +        return;
 +    }
 +
 +    att  = *att_p;
 +    natt = *natt_p;
 +
 +    i = 0;
 +    while (i < natt &&
 +           !(mass == att[i].mass &&
 +             type == att[i].type &&
 +             q    == att[i].q &&
 +             con  == att[i].con))
 +    {
 +        i++;
 +    }
 +
 +    if (i < natt)
 +    {
 +        att[i].n += nmol;
 +    }
 +    else
 +    {
 +        (*natt_p)++;
 +        srenew(*att_p,*natt_p);
 +        (*att_p)[i].mass = mass;
 +        (*att_p)[i].type = type;
 +        (*att_p)[i].q    = q;
 +        (*att_p)[i].con  = con;
 +        (*att_p)[i].n    = nmol;
 +    }
 +}
 +
 +static void get_verlet_buffer_atomtypes(const gmx_mtop_t *mtop,
 +                                        verletbuf_atomtype_t **att_p,
 +                                        int *natt_p,
 +                                        int *n_nonlin_vsite)
 +{
 +    verletbuf_atomtype_t *att;
 +    int natt;
 +    int mb,nmol,ft,i,j,a1,a2,a3,a;
 +    const t_atoms *atoms;
 +    const t_ilist *il;
 +    const t_atom *at;
 +    const t_iparams *ip;
 +    real *con_m,*vsite_m,cam[5];
 +
 +    att  = NULL;
 +    natt = 0;
 +
 +    if (n_nonlin_vsite != NULL)
 +    {
 +        *n_nonlin_vsite = 0;
 +    }
 +
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        nmol = mtop->molblock[mb].nmol;
 +
 +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +
 +        /* Check for constraints, as they affect the kinetic energy */
 +        snew(con_m,atoms->nr);
 +        snew(vsite_m,atoms->nr);
 +
 +        for(ft=F_CONSTR; ft<=F_CONSTRNC; ft++)
 +        {
 +            il = &mtop->moltype[mtop->molblock[mb].type].ilist[ft];
 +
 +            for(i=0; i<il->nr; i+=1+NRAL(ft))
 +            {
 +                a1 = il->iatoms[i+1];
 +                a2 = il->iatoms[i+2];
 +                con_m[a1] += atoms->atom[a2].m;
 +                con_m[a2] += atoms->atom[a1].m;
 +            }
 +        }
 +
 +        il = &mtop->moltype[mtop->molblock[mb].type].ilist[F_SETTLE];
 +
 +        for(i=0; i<il->nr; i+=1+NRAL(F_SETTLE))
 +        {
 +            a1 = il->iatoms[i+1];
 +            a2 = il->iatoms[i+2];
 +            a3 = il->iatoms[i+3];
 +            con_m[a1] += atoms->atom[a2].m + atoms->atom[a3].m;
 +            con_m[a2] += atoms->atom[a1].m + atoms->atom[a3].m;
 +            con_m[a3] += atoms->atom[a1].m + atoms->atom[a2].m;
 +        }
 +
 +        /* Check for virtual sites, determine mass from constructing atoms */
 +        for(ft=0; ft<F_NRE; ft++)
 +        {
 +            if (IS_VSITE(ft))
 +            {
 +                il = &mtop->moltype[mtop->molblock[mb].type].ilist[ft];
 +
 +                for(i=0; i<il->nr; i+=1+NRAL(ft))
 +                {
 +                    ip = &mtop->ffparams.iparams[il->iatoms[i]];
 +
 +                    a1 = il->iatoms[i+1];
 +
 +                    for(j=1; j<NRAL(ft); j++)
 +                    {
 +                        cam[j] = atoms->atom[il->iatoms[i+1+j]].m;
 +                        if (cam[j] == 0)
 +                        {
 +                            cam[j] = vsite_m[il->iatoms[i+1+j]];
 +                        }
 +                        if (cam[j] == 0)
 +                        {
 +                            gmx_fatal(FARGS,"In molecule type '%s' %s construction involves atom %d, which is a virtual site of equal or high complexity. This is not supported.",
 +                                      *mtop->moltype[mtop->molblock[mb].type].name,
 +                                      interaction_function[ft].longname,
 +                                      il->iatoms[i+1+j]+1);
 +                        }
 +                    }
 +
 +                    switch(ft)
 +                    {
 +                    case F_VSITE2:
 +                        /* Exact except for ignoring constraints */
 +                        vsite_m[a1] = (cam[2]*sqr(1-ip->vsite.a) + cam[1]*sqr(ip->vsite.a))/(cam[1]*cam[2]);
 +                        break;
 +                    case F_VSITE3:
 +                        /* Exact except for ignoring constraints */
 +                        vsite_m[a1] = (cam[2]*cam[3]*sqr(1-ip->vsite.a-ip->vsite.b) + cam[1]*cam[3]*sqr(ip->vsite.a) + cam[1]*cam[2]*sqr(ip->vsite.b))/(cam[1]*cam[2]*cam[3]);
 +                        break;
 +                    default:
 +                        /* Use the mass of the lightest constructing atom.
 +                         * This is an approximation.
 +                         * If the distance of the virtual site to the
 +                         * constructing atom is less than all distances
 +                         * between constructing atoms, this is a safe
 +                         * over-estimate of the displacement of the vsite.
 +                         * This condition holds for all H mass replacement
 +                         * replacement vsite constructions, except for SP2/3
 +                         * groups. In SP3 groups one H will have a F_VSITE3
 +                         * construction, so even there the total drift
 +                         * estimation shouldn't be far off.
 +                         */
 +                        assert(j>=1);
 +                        vsite_m[a1] = cam[1];
 +                        for(j=2; j<NRAL(ft); j++)
 +                        {
 +                            vsite_m[a1] = min(vsite_m[a1],cam[j]);
 +                        }
 +                        if (n_nonlin_vsite != NULL)
 +                        {
 +                            *n_nonlin_vsite += nmol;
 +                        }
 +                        break;
 +                    }
 +                }
 +            }
 +        }
 +
 +        for(a=0; a<atoms->nr; a++)
 +        {
 +            at = &atoms->atom[a];
 +            /* We consider an atom constrained, #DOF=2, when it is
 +             * connected with constraints to one or more atoms with
 +             * total mass larger than 1.5 that of the atom itself.
 +             */
 +            add_at(&att,&natt,
 +                   at->m,at->type,at->q,con_m[a] > 1.5*at->m,nmol);
 +        }
 +
 +        sfree(vsite_m);
 +        sfree(con_m);
 +    }
 +
 +    if (gmx_debug_at)
 +    {
 +        for(a=0; a<natt; a++)
 +        {
 +            fprintf(debug,"type %d: m %5.2f t %d q %6.3f con %d n %d\n",
 +                    a,att[a].mass,att[a].type,att[a].q,att[a].con,att[a].n);
 +        }
 +    }
 +
 +    *att_p  = att;
 +    *natt_p = natt;
 +}
 +
 +static void approx_2dof(real s2,real x,
 +                        real *shift,real *scale)
 +{
 +    /* A particle with 1 DOF constrained has 2 DOFs instead of 3.
 +     * This code is also used for particles with multiple constraints,
 +     * in which case we overestimate the displacement.
 +     * The 2DOF distribution is sqrt(pi/2)*erfc(r/(sqrt(2)*s))/(2*s).
 +     * We approximate this with scale*Gaussian(s,r+shift),
 +     * by matching the distribution value and derivative at x.
 +     * This is a tight overestimate for all r>=0 at any s and x.
 +     */
 +    real ex,er;
 +
 +    ex = exp(-x*x/(2*s2));
 +    er = gmx_erfc(x/sqrt(2*s2));
 +
 +    *shift = -x + sqrt(2*s2/M_PI)*ex/er;
 +    *scale = 0.5*M_PI*exp(ex*ex/(M_PI*er*er))*er;
 +}
 +
 +static real ener_drift(const verletbuf_atomtype_t *att,int natt,
 +                       const gmx_ffparams_t *ffp,
 +                       real kT_fac,
 +                       real md_ljd,real md_ljr,real md_el,real dd_el,
 +                       real r_buffer,
 +                       real rlist,real boxvol)
 +{
 +    double drift_tot,pot1,pot2,pot;
 +    int    i,j;
 +    real   s2i,s2j,s2,s;
 +    int    ti,tj;
 +    real   md,dd;
 +    real   sc_fac,rsh;
 +    double c_exp,c_erfc;
 +
 +    drift_tot = 0;
 +
 +    /* Loop over the different atom type pairs */
 +    for(i=0; i<natt; i++)
 +    {
 +        s2i = kT_fac/att[i].mass;
 +        ti  = att[i].type;
 +
 +        for(j=i; j<natt; j++)
 +        {
 +            s2j = kT_fac/att[j].mass;
 +            tj = att[j].type;
 +
 +            /* Note that attractive and repulsive potentials for individual
 +             * pairs will partially cancel.
 +             */
 +            /* -dV/dr at the cut-off for LJ + Coulomb */
 +            md =
 +                md_ljd*ffp->iparams[ti*ffp->atnr+tj].lj.c6 +
 +                md_ljr*ffp->iparams[ti*ffp->atnr+tj].lj.c12 +
 +                md_el*att[i].q*att[j].q;
 +
 +            /* d2V/dr2 at the cut-off for Coulomb, we neglect LJ */
 +            dd = dd_el*att[i].q*att[j].q;
 +
 +            s2  = s2i + s2j;
 +
 +            rsh    = r_buffer;
 +            sc_fac = 1.0;
 +            /* For constraints: adapt r and scaling for the Gaussian */
 +            if (att[i].con)
 +            {
 +                real sh,sc;
 +                approx_2dof(s2i,r_buffer*s2i/s2,&sh,&sc);
 +                rsh    += sh;
 +                sc_fac *= sc;
 +            }
 +            if (att[j].con)
 +            {
 +                real sh,sc;
 +                approx_2dof(s2j,r_buffer*s2j/s2,&sh,&sc);
 +                rsh    += sh;
 +                sc_fac *= sc;
 +            }
 +
 +            /* Exact contribution of an atom pair with Gaussian displacement
 +             * with sigma s to the energy drift for a potential with
 +             * derivative -md and second derivative dd at the cut-off.
 +             * The only catch is that for potentials that change sign
 +             * near the cut-off there could be an unlucky compensation
 +             * of positive and negative energy drift.
 +             * Such potentials are extremely rare though.
 +             *
 +             * Note that pot has unit energy*length, as the linear
 +             * atom density still needs to be put in.
 +             */
 +            c_exp  = exp(-rsh*rsh/(2*s2))/sqrt(2*M_PI);
 +            c_erfc = 0.5*gmx_erfc(rsh/(sqrt(2*s2)));
 +            s      = sqrt(s2);
 +
 +            pot1 = sc_fac*
 +                md/2*((rsh*rsh + s2)*c_erfc - rsh*s*c_exp);
 +            pot2 = sc_fac*
 +                dd/6*(s*(rsh*rsh + 2*s2)*c_exp - rsh*(rsh*rsh + 3*s2)*c_erfc);
 +            pot = pot1 + pot2;
 +
 +            if (gmx_debug_at)
 +            {
 +                fprintf(debug,"n %d %d d s %.3f %.3f con %d md %8.1e dd %8.1e pot1 %8.1e pot2 %8.1e pot %8.1e\n",
 +                        att[i].n,att[j].n,sqrt(s2i),sqrt(s2j),
 +                        att[i].con+att[j].con,
 +                        md,dd,pot1,pot2,pot);
 +            }
 +
 +            /* Multiply by the number of atom pairs */
 +            if (j == i)
 +            {
 +                pot *= (double)att[i].n*(att[i].n - 1)/2;
 +            }
 +            else
 +            {
 +                pot *= (double)att[i].n*att[j].n;
 +            }
 +            /* We need the line density to get the energy drift of the system.
 +             * The effective average r^2 is close to (rlist+sigma)^2.
 +             */
 +            pot *= 4*M_PI*sqr(rlist + s)/boxvol;
 +
 +            /* Add the unsigned drift to avoid cancellation of errors */
 +            drift_tot += fabs(pot);
 +        }
 +    }
 +
 +    return drift_tot;
 +}
 +
 +static real surface_frac(int cluster_size,real particle_distance,real rlist)
 +{
 +    real d,area_rel;
 +
 +    if (rlist < 0.5*particle_distance)
 +    {
 +        /* We have non overlapping spheres */
 +        return 1.0;
 +    }
 +
 +    /* Half the inter-particle distance relative to rlist */
 +    d = 0.5*particle_distance/rlist;
 +
 +    /* Determine the area of the surface at distance rlist to the closest
 +     * particle, relative to surface of a sphere of radius rlist.
 +     * The formulas below assume close to cubic cells for the pair search grid,
 +     * which the pair search code tries to achieve.
 +     * Note that in practice particle distances will not be delta distributed,
 +     * but have some spread, often involving shorter distances,
 +     * as e.g. O-H bonds in a water molecule. Thus the estimates below will
 +     * usually be slightly too high and thus conservative.
 +     */
 +    switch (cluster_size)
 +    {
 +    case 1:
 +        /* One particle: trivial */
 +        area_rel = 1.0;
 +        break;
 +    case 2:
 +        /* Two particles: two spheres at fractional distance 2*a */
 +        area_rel = 1.0 + d;
 +        break;
 +    case 4:
 +        /* We assume a perfect, symmetric tetrahedron geometry.
 +         * The surface around a tetrahedron is too complex for a full
 +         * analytical solution, so we use a Taylor expansion.
 +         */
 +        area_rel = (1.0 + 1/M_PI*(6*acos(1/sqrt(3))*d +
 +                                  sqrt(3)*d*d*(1.0 +
 +                                               5.0/18.0*d*d +
 +                                               7.0/45.0*d*d*d*d +
 +                                               83.0/756.0*d*d*d*d*d*d)));
 +        break;
 +    default:
 +        gmx_incons("surface_frac called with unsupported cluster_size");
 +        area_rel = 1.0;
 +    }
 +        
 +    return area_rel/cluster_size;
 +}
 +
 +void calc_verlet_buffer_size(const gmx_mtop_t *mtop,real boxvol,
 +                             const t_inputrec *ir,real drift_target,
 +                             const verletbuf_list_setup_t *list_setup,
 +                             int *n_nonlin_vsite,
 +                             real *rlist)
 +{
 +    double resolution;
 +    char *env;
 +
 +    real particle_distance;
 +    real nb_clust_frac_pairs_not_in_list_at_cutoff;
 +
 +    verletbuf_atomtype_t *att=NULL;
 +    int  natt=-1,i;
 +    double reppow;
 +    real md_ljd,md_ljr,md_el,dd_el;
 +    real elfac;
 +    real kT_fac,mass_min;
 +    int  ib0,ib1,ib;
 +    real rb,rl;
 +    real drift;
 +
 +    /* Resolution of the buffer size */
 +    resolution = 0.001;
 +
 +    env = getenv("GMX_VERLET_BUFFER_RES");
 +    if (env != NULL)
 +    {
 +        sscanf(env,"%lf",&resolution);
 +    }
 +
 +    /* In an atom wise pair-list there would be no pairs in the list
 +     * beyond the pair-list cut-off.
 +     * However, we use a pair-list of groups vs groups of atoms.
 +     * For groups of 4 atoms, the parallelism of SSE instructions, only
 +     * 10% of the atoms pairs are not in the list just beyond the cut-off.
 +     * As this percentage increases slowly compared to the decrease of the
 +     * Gaussian displacement distribution over this range, we can simply
 +     * reduce the drift by this fraction.
 +     * For larger groups, e.g. of 8 atoms, this fraction will be lower,
 +     * so then buffer size will be on the conservative (large) side.
 +     *
 +     * Note that the formulas used here do not take into account
 +     * cancellation of errors which could occur by missing both
 +     * attractive and repulsive interactions.
 +     *
 +     * The only major assumption is homogeneous particle distribution.
 +     * For an inhomogeneous system, such as a liquid-vapor system,
 +     * the buffer will be underestimated. The actual energy drift
 +     * will be higher by the factor: local/homogeneous particle density.
 +     *
 +     * The results of this estimate have been checked againt simulations.
 +     * In most cases the real drift differs by less than a factor 2.
 +     */
 +
 +    /* Worst case assumption: HCP packing of particles gives largest distance */
 +    particle_distance = pow(boxvol*sqrt(2)/mtop->natoms,1.0/3.0);
 +
 +    get_verlet_buffer_atomtypes(mtop,&att,&natt,n_nonlin_vsite);
 +    assert(att != NULL && natt >= 0);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"particle distance assuming HCP packing: %f nm\n",
 +                particle_distance);
 +        fprintf(debug,"energy drift atom types: %d\n",natt);
 +    }
 +
 +    reppow = mtop->ffparams.reppow;
 +    md_ljd = 0;
 +    md_ljr = 0;
 +    if (ir->vdwtype == evdwCUT)
 +    {
 +        /* -dV/dr of -r^-6 and r^-repporw */
 +        md_ljd = -6*pow(ir->rvdw,-7.0);
 +        md_ljr = reppow*pow(ir->rvdw,-(reppow+1));
 +        /* The contribution of the second derivative is negligible */
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"Energy drift calculation is only implemented for plain cut-off Lennard-Jones interactions");
 +    }
 +
 +    elfac = ONE_4PI_EPS0/ir->epsilon_r;
 +
 +    /* Determine md=-dV/dr and dd=d^2V/dr^2 */
 +    md_el = 0;
 +    dd_el = 0;
 +    if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype))
 +    {
 +        real eps_rf,k_rf;
 +
 +        if (ir->coulombtype == eelCUT)
 +        {
 +            eps_rf = 1;
 +            k_rf = 0;
 +        }
 +        else
 +        {
 +            eps_rf = ir->epsilon_rf/ir->epsilon_r;
 +            if (eps_rf != 0)
 +            {
 +                k_rf = pow(ir->rcoulomb,-3.0)*(eps_rf - ir->epsilon_r)/(2*eps_rf + ir->epsilon_r);
 +            }
 +            else
 +            {
 +                /* epsilon_rf = infinity */
 +                k_rf = 0.5*pow(ir->rcoulomb,-3.0);
 +            }
 +        }
 +
 +        if (eps_rf > 0)
 +        {
 +            md_el = elfac*(pow(ir->rcoulomb,-2.0) - 2*k_rf*ir->rcoulomb);
 +        }
 +        dd_el = elfac*(2*pow(ir->rcoulomb,-3.0) + 2*k_rf);
 +    }
 +    else if (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD)
 +    {
 +        real b,rc,br;
 +
 +        b  = calc_ewaldcoeff(ir->rcoulomb,ir->ewald_rtol);
 +        rc = ir->rcoulomb;
 +        br = b*rc;
++        md_el = elfac*(b*exp(-br*br)*M_2_SQRTPI/rc + gmx_erfc(br)/(rc*rc));
++        dd_el = elfac/(rc*rc)*(2*b*(1 + br*br)*exp(-br*br)*M_2_SQRTPI + 2*gmx_erfc(br)/rc);
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"Energy drift calculation is only implemented for Reaction-Field and Ewald electrostatics");
 +    }
 +
 +    /* Determine the variance of the atomic displacement
 +     * over nstlist-1 steps: kT_fac
 +     * For inertial dynamics (not Brownian dynamics) the mass factor
 +     * is not included in kT_fac, it is added later.
 +     */
 +    if (ir->eI == eiBD)
 +    {
 +        /* Get the displacement distribution from the random component only.
 +         * With accurate integration the systematic (force) displacement
 +         * should be negligible (unless nstlist is extremely large, which
 +         * you wouldn't do anyhow).
 +         */
 +        kT_fac = 2*BOLTZ*ir->opts.ref_t[0]*(ir->nstlist-1)*ir->delta_t;
 +        if (ir->bd_fric > 0)
 +        {
 +            /* This is directly sigma^2 of the displacement */
 +            kT_fac /= ir->bd_fric;
 +
 +            /* Set the masses to 1 as kT_fac is the full sigma^2,
 +             * but we divide by m in ener_drift().
 +             */
 +            for(i=0; i<natt; i++)
 +            {
 +                att[i].mass = 1;
 +            }
 +        }
 +        else
 +        {
 +            real tau_t;
 +
 +            /* Per group tau_t is not implemented yet, use the maximum */
 +            tau_t = ir->opts.tau_t[0];
 +            for(i=1; i<ir->opts.ngtc; i++)
 +            {
 +                tau_t = max(tau_t,ir->opts.tau_t[i]);
 +            }
 +
 +            kT_fac *= tau_t;
 +            /* This kT_fac needs to be divided by the mass to get sigma^2 */
 +        }
 +    }
 +    else
 +    {
 +        kT_fac = BOLTZ*ir->opts.ref_t[0]*sqr((ir->nstlist-1)*ir->delta_t);
 +    }
 +
 +    mass_min = att[0].mass;
 +    for(i=1; i<natt; i++)
 +    {
 +        mass_min = min(mass_min,att[i].mass);
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"md_ljd %e md_ljr %e\n",md_ljd,md_ljr);
 +        fprintf(debug,"md_el %e dd_el %e\n",md_el,dd_el);
 +        fprintf(debug,"sqrt(kT_fac) %f\n",sqrt(kT_fac));
 +        fprintf(debug,"mass_min %f\n",mass_min);
 +    }
 +
 +    /* Search using bisection */
 +    ib0 = -1;
 +    /* The drift will be neglible at 5 times the max sigma */
 +    ib1 = (int)(5*2*sqrt(kT_fac/mass_min)/resolution) + 1;
 +    while (ib1 - ib0 > 1)
 +    {
 +        ib = (ib0 + ib1)/2;
 +        rb = ib*resolution;
 +        rl = max(ir->rvdw,ir->rcoulomb) + rb;
 +
 +        /* Calculate the average energy drift at the last step
 +         * of the nstlist steps at which the pair-list is used.
 +         */
 +        drift = ener_drift(att,natt,&mtop->ffparams,
 +                           kT_fac,
 +                           md_ljd,md_ljr,md_el,dd_el,rb,
 +                           rl,boxvol);
 +
 +        /* Correct for the fact that we are using a Ni x Nj particle pair list
 +         * and not a 1 x 1 particle pair list. This reduces the drift.
 +         */
 +        /* We don't have a formula for 8 (yet), use 4 which is conservative */
 +        nb_clust_frac_pairs_not_in_list_at_cutoff =
 +            surface_frac(min(list_setup->cluster_size_i,4),
 +                         particle_distance,rl)*
 +            surface_frac(min(list_setup->cluster_size_j,4),
 +                         particle_distance,rl);
 +        drift *= nb_clust_frac_pairs_not_in_list_at_cutoff;
 +
 +        /* Convert the drift to drift per unit time per atom */
 +        drift /= ir->nstlist*ir->delta_t*mtop->natoms;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"ib %3d %3d %3d rb %.3f %dx%d fac %.3f drift %f\n",
 +                    ib0,ib,ib1,rb,
 +                    list_setup->cluster_size_i,list_setup->cluster_size_j,
 +                    nb_clust_frac_pairs_not_in_list_at_cutoff,
 +                    drift);
 +        }
 +
 +        if (fabs(drift) > drift_target)
 +        {
 +            ib0 = ib;
 +        }
 +        else
 +        {
 +            ib1 = ib;
 +        }
 +    }
 +
 +    sfree(att);
 +
 +    *rlist = max(ir->rvdw,ir->rcoulomb) + ib1*resolution;
 +}
index d97873bb288dc76b3d4bb4c8069dd00c344edbf7,0000000000000000000000000000000000000000..51bf1893fce987e9a733ae608d24431dd5b47d6a
mode 100644,000000..100644
--- /dev/null
@@@ -1,168 -1,0 +1,168 @@@
-                             gmx_enerdata_t *enerd,t_nrnb *nrnb,
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gromacs Runs On Most of All Computer Systems
 + */
 +
 +#ifndef _bondf_h
 +#define _bondf_h
 +
 +
 +#include <stdio.h>
 +#include "typedefs.h"
 +#include "nrnb.h"
 +#include "pbc.h"
 +#include "genborn.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +int glatnr(int *global_atom_index,int i);
 +/* Returns the global topology atom number belonging to local atom index i.
 + * This function is intended for writing ascii output
 + * and returns atom numbers starting at 1.
 + * When global_atom_index=NULL returns i+1.
 + */
 +
 +void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
 +                const t_idef *idef,
 +                rvec x[],history_t *hist,
 +                rvec f[],t_forcerec *fr,
 +                const t_pbc *pbc,const t_graph *g,
 +                gmx_enerdata_t *enerd,t_nrnb *nrnb,real *lambda,
 +                const t_mdatoms *md,
 +                t_fcdata *fcd,int *ddgatindex,
 +                t_atomtypes *atype, gmx_genborn_t *born,
 +              int force_flags,
 +                gmx_bool bPrintSepPot,gmx_large_int_t step);
 +/* 
 + * The function calc_bonds() calculates all bonded force interactions.
 + * The "bonds" are specified as follows:
 + *   int nbonds
 + *        the total number of bonded interactions.
 + *   t_iatom *forceatoms
 + *     specifies which atoms are involved in a bond of a certain 
 + *     type, see also struct t_idef.
 + *   t_functype *functype
 + *        defines for every bonded force type what type of function to 
 + *     use, see also struct t_idef.
 + *   t_iparams *forceparams
 + *        defines the parameters for every bond type, see also struct 
 + *     t_idef.
 + *   real epot[NR_F]
 + *     total potential energy split up over the function types.
 + *   int *ddgatindex
 + *     global atom number indices, should be NULL when not using DD.
 + *   gmx_bool bPrintSepPot
 + *     if TRUE print local potential and dVdlambda for each bonded type.
 + *   int step
 + *     used with bPrintSepPot
 + *   return value:
 + *        the total potential energy (sum over epot).
 + */
 +
 +void calc_bonds_lambda(FILE *fplog,
 +                            const t_idef *idef,
 +                            rvec x[],
 +                            t_forcerec *fr,
 +                            const t_pbc *pbc,const t_graph *g,
++                  gmx_grppairener_t *grpp, real *epot,t_nrnb *nrnb,
 +                            real *lambda,
 +                            const t_mdatoms *md,
 +                            t_fcdata *fcd,int *global_atom_index);
 +/* As calc_bonds, but only determines the potential energy
 + * for the perturbed interactions.
 + * The shift forces in fr are not affected.
 + */
 +
 +real posres(int nbonds,
 +                 const t_iatom forceatoms[],const t_iparams forceparams[],
 +                 const rvec x[],rvec f[],rvec vir_diag,
 +                 t_pbc *pbc,
 +                 real lambda,real *dvdlambda,
 +                 int refcoord_scaling,int ePBC,rvec comA,rvec comB);
 +/* Position restraints require a different pbc treatment from other bondeds */
 +
 +real fbposres(int nbonds,
 +               const t_iatom forceatoms[],const t_iparams forceparams[],
 +               const rvec x[],rvec f[],rvec vir_diag,
 +               t_pbc *pbc, int refcoord_scaling,int ePBC,rvec com);
 +/* Flat-bottom posres. Same PBC treatment as in normal position restraints */
 +
 +real bond_angle(const rvec xi,const rvec xj,const rvec xk,
 +                     const t_pbc *pbc,
 +                     rvec r_ij,rvec r_kj,real *costh,
 +                     int *t1,int *t2);        /* out */
 +/* Calculate bond-angle. No PBC is taken into account (use mol-shift) */
 +
 +real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
 +                    const t_pbc *pbc,
 +                    rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n, /* out */
 +                    real *sign,
 +                    int *t1,int *t2,int *t3);
 +/* Calculate dihedral-angle. No PBC is taken into account (use mol-shift) */
 +
 +void do_dih_fup(int i,int j,int k,int l,real ddphi,
 +                     rvec r_ij,rvec r_kj,rvec r_kl,
 +                     rvec m,rvec n,rvec f[],rvec fshift[],
 +                     const t_pbc *pbc,const t_graph *g,
 +                     const rvec *x,int t1,int t2,int t3);
 +/* Do an update of the forces for dihedral potentials */
 +
 +void make_dp_periodic(real *dp);
 +/* make a dihedral fall in the range (-pi,pi) */
 +
 +/*************************************************************************
 + *
 + *  Bonded force functions
 + *
 + *************************************************************************/
 +  t_ifunc bonds,g96bonds,morse_bonds,cubic_bonds,FENE_bonds,restraint_bonds;
 +  t_ifunc angles,g96angles,cross_bond_bond,cross_bond_angle,urey_bradley,quartic_angles,linear_angles;
 +  t_ifunc pdihs,idihs,rbdihs;
 +  t_ifunc tab_bonds,tab_angles,tab_dihs;
 +  t_ifunc polarize,anharm_polarize,water_pol,thole_pol,angres,angresz,dihres,unimplemented;
 +
 +
 +/* Initialize the setup for the bonded force buffer reduction
 + * over threads. This should be called each time the bonded setup
 + * changes; i.e. at start-up without domain decomposition and at DD.
 + */ 
 +void init_bonded_thread_force_reduction(t_forcerec *fr,
 +                                        const t_idef *idef);
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif        /* _bondf_h */
index 9b20347b9aa76579f00069dfe7411209dec7e10a,0000000000000000000000000000000000000000..59573346e5b65ac079ecea4cc2e9e44eb20afa29
mode 100644,000000..100644
--- /dev/null
@@@ -1,294 -1,0 +1,297 @@@
- void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd);
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gromacs Runs On Most of All Computer Systems
 + */
 +
 +#ifndef _force_h
 +#define _force_h
 +
 +
 +#include "typedefs.h"
 +#include "types/force_flags.h"
 +#include "pbc.h"
 +#include "network.h"
 +#include "tgroup.h"
 +#include "vsite.h"
 +#include "genborn.h"
 +
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +static const char *sepdvdlformat="  %-30s V %12.5e  dVdl %12.5e\n";
 +
 +void calc_vir(FILE *fplog,int nxf,rvec x[],rvec f[],tensor vir,
 +                   gmx_bool bScrewPBC,matrix box);
 +/* Calculate virial for nxf atoms, and add it to vir */
 +
 +void f_calc_vir(FILE *fplog,int i0,int i1,rvec x[],rvec f[],tensor vir,
 +                     t_graph *g,rvec shift_vec[]);
 +/* Calculate virial taking periodicity into account */
 +
 +real RF_excl_correction(FILE *fplog,
 +                             const t_forcerec *fr,t_graph *g,
 +                             const t_mdatoms *mdatoms,const t_blocka *excl,
 +                             rvec x[],rvec f[],rvec *fshift,const t_pbc *pbc,
 +                             real lambda,real *dvdlambda);
 +/* Calculate the reaction-field energy correction for this node:
 + * epsfac q_i q_j (k_rf r_ij^2 - c_rf)
 + * and force correction for all excluded pairs, including self pairs.
 + */
 +
 +void calc_rffac(FILE *fplog,int eel,real eps_r,real eps_rf,
 +                     real Rc,real Temp,
 +                     real zsq,matrix box,
 +                     real *kappa,real *krf,real *crf);
 +/* Determine the reaction-field constants */
 +
 +void init_generalized_rf(FILE *fplog,
 +                              const gmx_mtop_t *mtop,const t_inputrec *ir,
 +                              t_forcerec *fr);
 +/* Initialize the generalized reaction field parameters */
 +
 +
 +/* In wall.c */
 +void make_wall_tables(FILE *fplog,const output_env_t oenv,
 +                           const t_inputrec *ir,const char *tabfn,
 +                           const gmx_groups_t *groups,
 +                           t_forcerec *fr);
 +
 +real do_walls(t_inputrec *ir,t_forcerec *fr,matrix box,t_mdatoms *md,
 +            rvec x[],rvec f[],real lambda,real Vlj[],t_nrnb *nrnb);
 +
 +t_forcerec *mk_forcerec(void);
 +
 +#define GMX_MAKETABLES_FORCEUSER  (1<<0)
 +#define GMX_MAKETABLES_14ONLY     (1<<1)
 +
 +t_forcetable make_tables(FILE *fp,const output_env_t oenv,
 +                                const t_forcerec *fr, gmx_bool bVerbose,
 +                                const char *fn, real rtab,int flags);
 +/* Return tables for inner loops. When bVerbose the tables are printed
 + * to .xvg files
 + */
 + 
 +bondedtable_t make_bonded_table(FILE *fplog,char *fn,int angle);
 +/* Return a table for bonded interactions,
 + * angle should be: bonds 0, angles 1, dihedrals 2
 + */
 +
 +/* Return a table for GB calculations */
 +t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
 +                                  const t_forcerec *fr,
 +                                  const char *fn,
 +                                  real rtab);
 +
 +/* Read a table for AdResS Thermo Force calculations */
 +extern t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
 +                                 const t_forcerec *fr,
 +                                 const char *fn,
 +                                 matrix box);
 +
 +void pr_forcerec(FILE *fplog,t_forcerec *fr,t_commrec *cr);
 +
 +void
 +forcerec_set_ranges(t_forcerec *fr,
 +                  int ncg_home,int ncg_force,
 +                  int natoms_force,
 +                  int natoms_force_constr,int natoms_f_novirsum);
 +/* Set the number of cg's and atoms for the force calculation */
 +
 +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 +                             gmx_bool bPrintNote,t_commrec *cr,FILE *fp);
 +/* Returns if we can use all-vs-all loops.
 + * If bPrintNote==TRUE, prints a note, if necessary, to stderr
 + * and fp (if !=NULL) on the master node.
 + */
 +
 +gmx_bool uses_simple_tables(int cutoff_scheme,
 +                            nonbonded_verlet_t *nbv,
 +                            int group);
 +/* Returns whether simple tables (i.e. not for use with GPUs) are used
 + * with the type of kernel indicated.
 + */
 +
 +void init_interaction_const_tables(FILE *fp, 
 +                                   interaction_const_t *ic,
 +                                   gmx_bool bSimpleTable,
 +                                   real rtab);
 +/* Initializes the tables in the interaction constant data structure.
 + * Setting verlet_kernel_type to -1 always initializes tables for
 + * use with group kernels.
 + */
 +
 +void init_interaction_const(FILE *fp, 
 +                            interaction_const_t **interaction_const,
 +                            const t_forcerec *fr,
 +                            real  rtab);
 +/* Initializes the interaction constant data structure. Currently it 
 + * uses forcerec as input. 
 + */
 +
 +void init_forcerec(FILE       *fplog,     
 +                          const output_env_t oenv,
 +                        t_forcerec *fr,   
 +                        t_fcdata   *fcd,
 +                        const t_inputrec *ir,   
 +                        const gmx_mtop_t *mtop,
 +                        const t_commrec  *cr,
 +                        matrix     box,
 +                        gmx_bool       bMolEpot,
 +                        const char *tabfn,
 +                        const char *tabafn,
 +                        const char *tabpfn,
 +                        const char *tabbfn,
 +                        const char *nbpu_opt,
 +                        gmx_bool   bNoSolvOpt,
 +                        real       print_force);
 +/* The Force rec struct must be created with mk_forcerec 
 + * The gmx_booleans have the following meaning:
 + * bSetQ:    Copy the charges [ only necessary when they change ]
 + * bMolEpot: Use the free energy stuff per molecule
 + * print_force >= 0: print forces for atoms with force >= print_force
 + */
 +
 +void forcerec_set_excl_load(t_forcerec *fr,
 +                          const gmx_localtop_t *top,const t_commrec *cr);
 +  /* Set the exclusion load for the local exclusions and possibly threads */
 +
 +void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd);
 +/* Intializes the energy storage struct */
 +
 +void destroy_enerdata(gmx_enerdata_t *enerd);
 +/* Free all memory associated with enerd */
 +
++void reset_foreign_enerdata(gmx_enerdata_t *enerd);
++/* Resets only the foreign energy data */
++
 +void reset_enerdata(t_grpopts *opts,
 +                         t_forcerec *fr,gmx_bool bNS,
 +                         gmx_enerdata_t *enerd,
 +                         gmx_bool bMaster);
 +/* Resets the energy data, if bNS=TRUE also zeros the long-range part */
 +
++void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot);
 +/* Locally sum the non-bonded potential energy terms */
 +
 +void sum_dhdl(gmx_enerdata_t *enerd,real *lambda,t_lambda *fepvals);
 +/* Sum the free energy contributions */
 +
 +void update_forcerec(FILE *fplog,t_forcerec *fr,matrix box);
 +/* Updates parameters in the forcerec that are time dependent */
 +
 +/* Compute the average C6 and C12 params for LJ corrections */
 +void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,
 +                           const gmx_mtop_t *mtop);
 +
 +extern void do_force(FILE *log,t_commrec *cr,
 +                   t_inputrec *inputrec,
 +                   gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                   gmx_localtop_t *top,
 +                   gmx_mtop_t *mtop,
 +                   gmx_groups_t *groups,
 +                   matrix box,rvec x[],history_t *hist,
 +                   rvec f[],
 +                   tensor vir_force,
 +                   t_mdatoms *mdatoms,
 +                   gmx_enerdata_t *enerd,t_fcdata *fcd,
 +                   real *lambda,t_graph *graph,
 +                   t_forcerec *fr,
 +                     gmx_vsite_t *vsite,rvec mu_tot,
 +                   double t,FILE *field,gmx_edsam_t ed,
 +                   gmx_bool bBornRadii,
 +                   int flags);
 +
 +/* Communicate coordinates (if parallel).
 + * Do neighbor searching (if necessary).
 + * Calculate forces.
 + * Communicate forces (if parallel).
 + * Spread forces for vsites (if present).
 + *
 + * f is always required.
 + */
 +
 +void ns(FILE       *fplog,
 +             t_forcerec *fr,
 +             rvec       x[],
 +             matrix     box,
 +             gmx_groups_t *groups,
 +             t_grpopts  *opts,
 +             gmx_localtop_t *top,
 +             t_mdatoms  *md,
 +             t_commrec  *cr,
 +             t_nrnb     *nrnb,
 +             real       *lambda,
 +             real       *dvdlambda,
 +             gmx_grppairener_t *grppener,
 +             gmx_bool       bFillGrid,
 +           gmx_bool       bDoLongRangeNS);
 +/* Call the neighborsearcher */
 +
 +extern void do_force_lowlevel(FILE         *fplog,  
 +                            gmx_large_int_t   step,
 +                            t_forcerec   *fr,
 +                            t_inputrec   *ir,
 +                            t_idef       *idef,
 +                            t_commrec    *cr,
 +                            t_nrnb       *nrnb,
 +                            gmx_wallcycle_t wcycle,
 +                            t_mdatoms    *md,
 +                            t_grpopts    *opts,
 +                            rvec         x[],
 +                            history_t    *hist,
 +                            rvec         f_shortrange[],
 +                  rvec         f_longrange[],
 +                            gmx_enerdata_t *enerd,
 +                            t_fcdata     *fcd,
 +                            gmx_mtop_t     *mtop,
 +                            gmx_localtop_t *top,
 +                            gmx_genborn_t *born,
 +                            t_atomtypes  *atype,
 +                            gmx_bool         bBornRadii,
 +                            matrix       box,
 +                            t_lambda     *fepvals,
 +                            real         *lambda,
 +                            t_graph      *graph,
 +                            t_blocka     *excl,
 +                            rvec         mu_tot[2],
 +                            int          flags,
 +                            float        *cycles_pme);
 +/* Call all the force routines */
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif        /* _force_h */
Simple merge
index 3ade75f1e52dc2466cf397bc66f6a9f9f334b13f,0000000000000000000000000000000000000000..ff36c9d13daf2555e5648fb40e983582fdbf1066
mode 100644,000000..100644
--- /dev/null
@@@ -1,161 -1,0 +1,176 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gromacs Runs On Most of All Computer Systems
 + */
 +
 +#ifndef _maths_h
 +#define _maths_h
 +
 +#include <math.h>
 +#include "types/simple.h"
 +#include "typedefs.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +#ifndef M_PI
 +#define       M_PI            3.14159265358979323846
 +#endif
 +
 +#ifndef M_PI_2
 +#define       M_PI_2          1.57079632679489661923
 +#endif
 +
 +#ifndef M_2PI
 +#define       M_2PI           6.28318530717958647692
 +#endif
 +    
 +#ifndef M_SQRT2
 +#define M_SQRT2 sqrt(2.0)
 +#endif
 +
 +#ifndef M_1_PI
 +#define M_1_PI      0.31830988618379067154
 +#endif
 +
++#ifndef M_FLOAT_1_SQRTPI /* used in CUDA kernels */
++/* 1.0 / sqrt(M_PI) */
++#define M_FLOAT_1_SQRTPI 0.564189583547756f
++#endif
++
++#ifndef M_1_SQRTPI
++/* 1.0 / sqrt(M_PI) */
++#define M_1_SQRTPI 0.564189583547756
++#endif
++
++#ifndef M_2_SQRTPI
++/* 2.0 / sqrt(M_PI) */
++#define M_2_SQRTPI  1.128379167095513
++#endif
++
 +int           gmx_nint(real a);
 +real    sign(real x,real y);
 +
 +real    cuberoot (real a);
 +double  gmx_erfd(double x);
 +double  gmx_erfcd(double x);
 +float   gmx_erff(float x);
 +float   gmx_erfcf(float x);
 +#ifdef GMX_DOUBLE
 +#define gmx_erf(x)   gmx_erfd(x)
 +#define gmx_erfc(x)  gmx_erfcd(x)
 +#else
 +#define gmx_erf(x)   gmx_erff(x)
 +#define gmx_erfc(x)  gmx_erfcf(x)
 +#endif
 +
 +gmx_bool gmx_isfinite(real x);
 +
 +/*! \brief Check if two numbers are within a tolerance
 + *
 + *  This routine checks if the relative difference between two numbers is
 + *  approximately within the given tolerance, defined as
 + *  fabs(f1-f2)<=tolerance*fabs(f1+f2).
 + *
 + *  To check if two floating-point numbers are almost identical, use this routine 
 + *  with the tolerance GMX_REAL_EPS, or GMX_DOUBLE_EPS if the check should be
 + *  done in double regardless of Gromacs precision.
 + *  
 + *  To check if two algorithms produce similar results you will normally need
 + *  to relax the tolerance significantly since many operations (e.g. summation)
 + *  accumulate floating point errors.
 + *
 + *  \param f1  First number to compare
 + *  \param f2  Second number to compare
 + *  \param tol Tolerance to use
 + *
 + *  \return 1 if the relative difference is within tolerance, 0 if not.
 + */
 +static int
 +gmx_within_tol(double   f1,
 +               double   f2,
 +               double   tol)
 +{
 +    /* The or-equal is important - otherwise we return false if f1==f2==0 */
 +    if( fabs(f1-f2) <= tol*0.5*(fabs(f1)+fabs(f2)) )
 +    {
 +        return 1;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +
 +
 +/** 
 + * Check if a number is smaller than some preset safe minimum
 + * value, currently defined as GMX_REAL_MIN/GMX_REAL_EPS.
 + *
 + * If a number is smaller than this value we risk numerical overflow
 + * if any number larger than 1.0/GMX_REAL_EPS is divided by it.
 + *
 + * \return 1  if 'almost' numerically zero, 0 otherwise.
 + */
 +static int
 +gmx_numzero(double a)
 +{
 +  return gmx_within_tol(a,0.0,GMX_REAL_MIN/GMX_REAL_EPS);
 +}
 +
 +
 +static real
 +gmx_log2(real x)
 +{
 +  const real iclog2 = 1.0/log( 2.0 );
 +
 +    return log( x ) * iclog2;
 +}
 +
 +/*! /brief Multiply two large ints
 + *
 + *  Returns true when overflow did not occur.
 + */
 +gmx_bool
 +check_int_multiply_for_overflow(gmx_large_int_t a,
 +                                gmx_large_int_t b,
 +                                gmx_large_int_t *result);
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif        /* _maths_h */
index 0000000000000000000000000000000000000000,1c19fe3248d6207ea473406fcbd5be08000aa354..1c19fe3248d6207ea473406fcbd5be08000aa354
mode 000000,100644..100644
--- /dev/null
index d36896975478b3311529ee531edce924ad5a51aa,0000000000000000000000000000000000000000..4bf927b9088659637e83d94cb9f44c6478db641d
mode 100644,000000..100644
--- /dev/null
@@@ -1,95 -1,0 +1,98 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +#ifndef NB_VERLET_H
 +#define NB_VERLET_H
 +
 +#include "nbnxn_pairlist.h"
 +#include "nbnxn_cuda_types_ext.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +/*! Nonbonded NxN kernel types: plain C, SSE/AVX, GPU CUDA, GPU emulation, etc */
 +enum { nbkNotSet = 0, 
 +       nbk4x4_PlainC, 
 +       nbk4xN_X86_SIMD128,
 +       nbk4xN_X86_SIMD256,
 +       nbk8x8x8_CUDA,
 +       nbk8x8x8_PlainC };
 +
++enum { ewaldexclTable, ewaldexclAnalytical };
++
 +/* Atom locality indicator: local, non-local, all, used for calls to:
 +   gridding, pair-search, force calculation, x/f buffer operations */
 +enum { eatLocal = 0, eatNonlocal = 1, eatAll  };
 +
 +#define LOCAL_A(x)               ((x) == eatLocal)
 +#define NONLOCAL_A(x)            ((x) == eatNonlocal)
 +#define LOCAL_OR_NONLOCAL_A(x)   (LOCAL_A(x) || NONLOCAL_A(x))
 +
 +/* Interaction locality indicator (used in pair-list search/calculations):
 +    - local interactions require local atom data and affect local output only;
 +    - non-local interactions require both local and non-local atom data and
 +      affect both local- and non-local output. */
 +enum { eintLocal = 0, eintNonlocal = 1 };
 +
 +#define LOCAL_I(x)               ((x) == eintLocal)
 +#define NONLOCAL_I(x)            ((x) == eintNonlocal)
 +
 +enum { enbvClearFNo, enbvClearFYes };
 +
 +typedef struct {
 +    nbnxn_pairlist_set_t nbl_lists;   /* pair list(s)                       */
 +    nbnxn_atomdata_t     *nbat;       /* atom data                          */
 +    int                  kernel_type; /* non-bonded kernel - see enum above */
++    int                  ewald_excl;  /* Ewald exclusion - see enum above   */
 +} nonbonded_verlet_group_t;
 +
 +/* non-bonded data structure with Verlet-type cut-off */
 +typedef struct {
 +    nbnxn_search_t           nbs;   /* n vs n atom pair searching data          */
 +    int                      ngrp;  /* number of interaction groups             */
 +    nonbonded_verlet_group_t grp[2];/* local and non-local interaction group    */
 +
 +    gmx_bool         bUseGPU;          /* TRUE when GPU acceleration is used */
 +    nbnxn_cuda_ptr_t cu_nbv;           /* pointer to CUDA nb verlet data     */
 +    int              min_ci_balanced;  /* pair list balancing parameter
 +                                          used for the 8x8x8 CUDA kernels    */
 +} nonbonded_verlet_t;
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif /* NB_VERLET_H */
index ae28aefd047e054c69e9a0ada23ae847baf92291,0000000000000000000000000000000000000000..be404308f4514458fab5492237fd98da86c9e6cd
mode 100644,000000..100644
--- /dev/null
@@@ -1,133 -1,0 +1,135 @@@
-     eNR_NBNXN_LJ_RF,  eNR_NBNXN_LJ_RF_E,
-     eNR_NBNXN_LJ_TAB, eNR_NBNXN_LJ_TAB_E,
-     eNR_NBNXN_LJ,     eNR_NBNXN_LJ_E,
-     eNR_NBNXN_RF,     eNR_NBNXN_RF_E,
-     eNR_NBNXN_TAB,    eNR_NBNXN_TAB_E,
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GRoups of Organic Molecules in ACtion for Science
 + */
 +#ifndef _types_nrnb_h
 +#define _types_nrnb_h
 +
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +#if 0
 +} /* fixes auto-indentation problems */
 +#endif
 +
 +
 +#define eNR_NBKERNEL_NONE -1
 +
 +enum 
 +{
 +    eNR_NBKERNEL_VDW_VF,
 +    eNR_NBKERNEL_VDW_F,
 +    eNR_NBKERNEL_ELEC_VF,
 +    eNR_NBKERNEL_ELEC_F,
 +    eNR_NBKERNEL_ELEC_W3_VF,
 +    eNR_NBKERNEL_ELEC_W3_F,
 +    eNR_NBKERNEL_ELEC_W3W3_VF,
 +    eNR_NBKERNEL_ELEC_W3W3_F,
 +    eNR_NBKERNEL_ELEC_W4_VF,
 +    eNR_NBKERNEL_ELEC_W4_F,
 +    eNR_NBKERNEL_ELEC_W4W4_VF,
 +    eNR_NBKERNEL_ELEC_W4W4_F,
 +    eNR_NBKERNEL_ELEC_VDW_VF,
 +    eNR_NBKERNEL_ELEC_VDW_F,
 +    eNR_NBKERNEL_ELEC_VDW_W3_VF,
 +    eNR_NBKERNEL_ELEC_VDW_W3_F,
 +    eNR_NBKERNEL_ELEC_VDW_W3W3_VF,
 +    eNR_NBKERNEL_ELEC_VDW_W3W3_F,
 +    eNR_NBKERNEL_ELEC_VDW_W4_VF,
 +    eNR_NBKERNEL_ELEC_VDW_W4_F,
 +    eNR_NBKERNEL_ELEC_VDW_W4W4_VF,
 +    eNR_NBKERNEL_ELEC_VDW_W4W4_F,
 +
 +    eNR_NBKERNEL_NR,  /* Total number of interaction-specific kernel entries */
 +
 +    eNR_NBKERNEL_GENERIC = eNR_NBKERNEL_NR, /* Reuse number; KERNEL_NR is not an entry itself */
 +    eNR_NBKERNEL_FREE_ENERGY,               /* Add other generic kernels _before_ the free energy one */
 +
 +    eNR_NBKERNEL_ALLVSALL,
 +    eNR_NBKERNEL_ALLVSALLGB,
 +
 +    eNR_NBNXN_DIST2,
++    eNR_NBNXN_LJ_RF,    eNR_NBNXN_LJ_RF_E,
++    eNR_NBNXN_LJ_TAB,   eNR_NBNXN_LJ_TAB_E,
++    eNR_NBNXN_LJ_EWALD, eNR_NBNXN_LJ_EWALD_E,
++    eNR_NBNXN_LJ,       eNR_NBNXN_LJ_E,
++    eNR_NBNXN_RF,       eNR_NBNXN_RF_E,
++    eNR_NBNXN_TAB,      eNR_NBNXN_TAB_E,
++    eNR_NBNXN_EWALD,    eNR_NBNXN_EWALD_E,
 +    eNR_NB14,
 +    eNR_BORN_RADII_STILL,     eNR_BORN_RADII_HCT_OBC,
 +    eNR_BORN_CHAINRULE,
 +    eNR_BORN_AVA_RADII_STILL, eNR_BORN_AVA_RADII_HCT_OBC,
 +    eNR_BORN_AVA_CHAINRULE,
 +    eNR_WEIGHTS,              eNR_SPREADQ,              eNR_SPREADQBSP,
 +    eNR_GATHERF,              eNR_GATHERFBSP,           eNR_FFT,
 +    eNR_CONV,                 eNR_SOLVEPME,eNR_NS,      eNR_RESETX,
 +    eNR_SHIFTX,               eNR_CGCM,                 eNR_FSUM,
 +    eNR_BONDS,                eNR_G96BONDS,             eNR_FENEBONDS,
 +    eNR_TABBONDS,             eNR_RESTRBONDS,           eNR_LINEAR_ANGLES,
 +    eNR_ANGLES,               eNR_G96ANGLES,            eNR_QANGLES,
 +    eNR_TABANGLES,            eNR_PROPER,               eNR_IMPROPER,
 +    eNR_RB,                   eNR_FOURDIH,              eNR_TABDIHS,
 +    eNR_DISRES,               eNR_ORIRES,               eNR_DIHRES,
 +    eNR_POSRES,               eNR_FBPOSRES,
 +    eNR_ANGRES,               eNR_ANGRESZ,
 +    eNR_MORSE,                eNR_CUBICBONDS,           eNR_WALLS,
 +    eNR_POLARIZE,             eNR_ANHARM_POL,
 +    eNR_WPOL,                 eNR_THOLE,                eNR_VIRIAL,
 +    eNR_UPDATE,               eNR_EXTUPDATE,            eNR_STOPCM,
 +    eNR_PCOUPL,               eNR_EKIN,                 eNR_LINCS,
 +    eNR_LINCSMAT,             eNR_SHAKE,                eNR_CONSTR_V,
 +    eNR_SHAKE_RIJ,            eNR_CONSTR_VIR,           eNR_SETTLE,
 +    eNR_VSITE2,               eNR_VSITE3,               eNR_VSITE3FD,
 +    eNR_VSITE3FAD,            eNR_VSITE3OUT,            eNR_VSITE4FD,
 +    eNR_VSITE4FDN,            eNR_VSITEN,               eNR_GB,
 +    eNR_CMAP,
 +    eNRNB
 +};
 +
 +
 +typedef struct
 +{
 +    double n[eNRNB];
 +}
 +t_nrnb;
 +
 +
 +typedef struct gmx_wallcycle *gmx_wallcycle_t;
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif
index ec0785566b924c81bdda465c603c1b27aac4932f,0000000000000000000000000000000000000000..0660994ddc46eba9b80f301a130672d67b7925f0
mode 100644,000000..100644
--- /dev/null
@@@ -1,907 -1,0 +1,909 @@@
- static real gmx_invsqrt(real x)
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gromacs Runs On Most of All Computer Systems
 + */
 +#ifndef _vec_h
 +#define _vec_h
 +
 +/*
 +  collection of in-line ready operations:
 +  
 +  lookup-table optimized scalar operations:
 +  real gmx_invsqrt(real x)
 +  void vecinvsqrt(real in[],real out[],int n)
 +  void vecrecip(real in[],real out[],int n)
 +  real sqr(real x)
 +  double dsqr(double x)
 +  
 +  vector operations:
 +  void rvec_add(const rvec a,const rvec b,rvec c)  c = a + b
 +  void dvec_add(const dvec a,const dvec b,dvec c)  c = a + b
 +  void ivec_add(const ivec a,const ivec b,ivec c)  c = a + b
 +  void rvec_inc(rvec a,const rvec b)               a += b
 +  void dvec_inc(dvec a,const dvec b)               a += b
 +  void ivec_inc(ivec a,const ivec b)               a += b
 +  void rvec_sub(const rvec a,const rvec b,rvec c)  c = a - b
 +  void dvec_sub(const dvec a,const dvec b,dvec c)  c = a - b
 +  void rvec_dec(rvec a,rvec b)                     a -= b
 +  void copy_rvec(const rvec a,rvec b)              b = a (reals)
 +  void copy_dvec(const dvec a,dvec b)              b = a (reals)
 +  void copy_ivec(const ivec a,ivec b)              b = a (integers)
 +  void ivec_sub(const ivec a,const ivec b,ivec c)  c = a - b
 +  void svmul(real a,rvec v1,rvec v2)               v2 = a * v1
 +  void dsvmul(double a,dvec v1,dvec v2)            v2 = a * v1
 +  void clear_rvec(rvec a)                          a = 0
 +  void clear_dvec(dvec a)                          a = 0
 +  void clear_ivec(rvec a)                          a = 0
 +  void clear_rvecs(int n,rvec v[])
 +  real iprod(rvec a,rvec b)                        = a . b (inner product)
 +  double diprod(dvec a,dvec b)                     = a . b (inner product)
 +  real iiprod(ivec a,ivec b)                       = a . b (integers)
 +  real norm2(rvec a)                               = | a |^2 ( = x*y*z )
 +  double dnorm2(dvec a)                            = | a |^2 ( = x*y*z )
 +  real norm(rvec a)                                = | a |
 +  double dnorm(dvec a)                             = | a |
 +  void cprod(rvec a,rvec b,rvec c)                 c = a x b (cross product)
 +  void dprod(rvec a,rvec b,rvec c)                 c = a x b (cross product)
 +  void dprod(rvec a,rvec b,rvec c)                 c = a * b (direct product)
 +  real cos_angle(rvec a,rvec b)
 +  real cos_angle_no_table(rvec a,rvec b)
 +  real distance2(rvec v1, rvec v2)                 = | v2 - v1 |^2
 +  void unitv(rvec src,rvec dest)                   dest = src / |src|
 +  void unitv_no_table(rvec src,rvec dest)          dest = src / |src|
 +  
 +  matrix (3x3) operations:
 +    ! indicates that dest should not be the same as a, b or src
 +    the _ur0 varieties work on matrices that have only zeros
 +    in the upper right part, such as box matrices, these varieties
 +    could produce less rounding errors, not due to the operations themselves,
 +    but because the compiler can easier recombine the operations
 +  void copy_mat(matrix a,matrix b)                 b = a
 +  void clear_mat(matrix a)                       a = 0
 +  void mmul(matrix a,matrix b,matrix dest)    !  dest = a . b
 +  void mmul_ur0(matrix a,matrix b,matrix dest)     dest = a . b
 +  void transpose(matrix src,matrix dest)      !  dest = src*
 +  void tmmul(matrix a,matrix b,matrix dest)   !  dest = a* . b
 +  void mtmul(matrix a,matrix b,matrix dest)   !  dest = a . b*
 +  real det(matrix a)                             = det(a)
 +  void m_add(matrix a,matrix b,matrix dest)      dest = a + b
 +  void m_sub(matrix a,matrix b,matrix dest)      dest = a - b
 +  void msmul(matrix m1,real r1,matrix dest)      dest = r1 * m1
 +  void m_inv_ur0(matrix src,matrix dest)           dest = src^-1
 +  void m_inv(matrix src,matrix dest)          !  dest = src^-1
 +  void mvmul(matrix a,rvec src,rvec dest)     !  dest = a . src
 +  void mvmul_ur0(matrix a,rvec src,rvec dest)      dest = a . src
 +  void tmvmul_ur0(matrix a,rvec src,rvec dest)     dest = a* . src
 +  real trace(matrix m)                             = trace(m)
 +*/
 +
 +#include "types/simple.h"
 +#include "maths.h"
 +#include "typedefs.h"
 +#include "sysstuff.h"
 +#include "gmx_fatal.h"
 +#include "physics.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#elif 0
 +} /* avoid screwing up indentation */
 +#endif
 +
 +
 +#define EXP_LSB         0x00800000
 +#define EXP_MASK        0x7f800000
 +#define EXP_SHIFT       23
 +#define FRACT_MASK      0x007fffff
 +#define FRACT_SIZE      11              /* significant part of fraction */
 +#define FRACT_SHIFT     (EXP_SHIFT-FRACT_SIZE)
 +#define EXP_ADDR(val)   (((val)&EXP_MASK)>>EXP_SHIFT)
 +#define FRACT_ADDR(val) (((val)&(FRACT_MASK|EXP_LSB))>>FRACT_SHIFT)
 +
 +#define PR_VEC(a)       a[XX],a[YY],a[ZZ]
 +
 +#ifdef GMX_SOFTWARE_INVSQRT
 +extern const unsigned int *  gmx_invsqrt_exptab;
 +extern const unsigned int *  gmx_invsqrt_fracttab;
 +#endif
 +
 +
 +typedef union 
 +{
 +  unsigned int bval;
 +  float fval;
 +} t_convert;
 +
 +
 +#ifdef GMX_SOFTWARE_INVSQRT
- static real gmx_invsqrt(real x)
++static real gmx_software_invsqrt(real x)
 +{
 +  const real  half=0.5;
 +  const real  three=3.0;
 +  t_convert   result,bit_pattern;
 +  unsigned int exp,fract;
 +  real        lu;
 +  real        y;
 +#ifdef GMX_DOUBLE
 +  real        y2;
 +#endif
 + 
 +  bit_pattern.fval=x;
 +  exp   = EXP_ADDR(bit_pattern.bval);
 +  fract = FRACT_ADDR(bit_pattern.bval);
 +  result.bval=gmx_invsqrt_exptab[exp] | gmx_invsqrt_fracttab[fract];
 +  lu    = result.fval;
 +  
 +  y=(half*lu*(three-((x*lu)*lu)));
 +#ifdef GMX_DOUBLE
 +  y2=(half*y*(three-((x*y)*y)));
 +  
 +  return y2;                    /* 10 Flops */
 +#else
 +  return y;                     /* 5  Flops */
 +#endif
 +}
++#define gmx_invsqrt(x) gmx_software_invsqrt(x)
 +#define INVSQRT_DONE 
 +#endif /* gmx_invsqrt */
 +
 +#ifdef GMX_POWERPC_SQRT
++static real gmx_powerpc_invsqrt(real x)
 +{
 +  const real  half=0.5;
 +  const real  three=3.0;
 +  t_convert   result,bit_pattern;
 +  unsigned int exp,fract;
 +  real        lu;
 +  real        y;
 +#ifdef GMX_DOUBLE
 +  real        y2;
 +#endif
 +
 +  lu = __frsqrte((double)x);
 +
 +  y=(half*lu*(three-((x*lu)*lu)));
 +
 +#if (GMX_POWERPC_SQRT==2)
 +  /* Extra iteration required */
 +  y=(half*y*(three-((x*y)*y)));
 +#endif
 +
 +#ifdef GMX_DOUBLE
 +  y2=(half*y*(three-((x*y)*y)));
 +
 +  return y2;                    /* 10 Flops */
 +#else
 +  return y;                     /* 5  Flops */
 +#endif
 +}
++#define gmx_invsqrt(x) gmx_powerpc_invsqrt(x)
 +#define INVSQRT_DONE
 +#endif /* powerpc_invsqrt */
 +
 +#ifndef INVSQRT_DONE
 +#    ifdef GMX_DOUBLE
 +#        ifdef HAVE_RSQRT
 +#            define gmx_invsqrt(x)     rsqrt(x)
 +#        else
 +#            define gmx_invsqrt(x)     (1.0/sqrt(x))
 +#        endif
 +#    else /* single */
 +#        ifdef HAVE_RSQRTF
 +#            define gmx_invsqrt(x)     rsqrtf(x)
 +#        elif defined HAVE_RSQRT
 +#            define gmx_invsqrt(x)     rsqrt(x)
 +#        elif defined HAVE_SQRTF
 +#            define gmx_invsqrt(x)     (1.0/sqrtf(x))
 +#        else
 +#            define gmx_invsqrt(x)     (1.0/sqrt(x))
 +#        endif
 +#    endif
 +#endif
 +
 +
 +static real sqr(real x)
 +{
 +  return (x*x);
 +}
 +
 +static gmx_inline double dsqr(double x)
 +{
 +  return (x*x);
 +}
 +
 +/* Maclaurin series for sinh(x)/x, useful for NH chains and MTTK pressure control 
 +   Here, we compute it to 10th order, which might be overkill, 8th is probably enough, 
 +   but it's not very much more expensive. */
 +
 +static gmx_inline real series_sinhx(real x) 
 +{
 +  real x2 = x*x;
 +  return (1 + (x2/6.0)*(1 + (x2/20.0)*(1 + (x2/42.0)*(1 + (x2/72.0)*(1 + (x2/110.0))))));
 +}
 +
 +void vecinvsqrt(real in[],real out[],int n);
 +/* Perform out[i]=1.0/sqrt(in[i]) for n elements */
 +
 +
 +void vecrecip(real in[],real out[],int n);
 +/* Perform out[i]=1.0/(in[i]) for n elements */
 +
 +/* Note: If you need a fast version of vecinvsqrt 
 + * and/or vecrecip, call detectcpu() and run the SSE/3DNow/SSE2/Altivec
 + * versions if your hardware supports it.
 + *
 + * To use those routines, your memory HAS TO BE CACHE-ALIGNED.
 + * Use snew_aligned(ptr,size,32) to allocate and sfree_aligned to free.
 + */
 +
 +
 +static gmx_inline void rvec_add(const rvec a,const rvec b,rvec c)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void dvec_add(const dvec a,const dvec b,dvec c)
 +{
 +  double x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void ivec_add(const ivec a,const ivec b,ivec c)
 +{
 +  int x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void rvec_inc(rvec a,const rvec b)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +  
 +  a[XX]=x;
 +  a[YY]=y;
 +  a[ZZ]=z;
 +}
 +
 +static gmx_inline void dvec_inc(dvec a,const dvec b)
 +{
 +  double x,y,z;
 +
 +  x=a[XX]+b[XX];
 +  y=a[YY]+b[YY];
 +  z=a[ZZ]+b[ZZ];
 +
 +  a[XX]=x;
 +  a[YY]=y;
 +  a[ZZ]=z;
 +}
 +
 +static gmx_inline void rvec_sub(const rvec a,const rvec b,rvec c)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void dvec_sub(const dvec a,const dvec b,dvec c)
 +{
 +  double x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void rvec_dec(rvec a,const rvec b)
 +{
 +  real x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  a[XX]=x;
 +  a[YY]=y;
 +  a[ZZ]=z;
 +}
 +
 +static gmx_inline void copy_rvec(const rvec a,rvec b)
 +{
 +  b[XX]=a[XX];
 +  b[YY]=a[YY];
 +  b[ZZ]=a[ZZ];
 +}
 +
 +static gmx_inline void copy_rvecn(rvec *a,rvec *b,int startn, int endn)
 +{
 +  int i;
 +  for (i=startn;i<endn;i++) {
 +    b[i][XX]=a[i][XX];
 +    b[i][YY]=a[i][YY];
 +    b[i][ZZ]=a[i][ZZ];
 +  }
 +}
 +
 +static gmx_inline void copy_dvec(const dvec a,dvec b)
 +{
 +  b[XX]=a[XX];
 +  b[YY]=a[YY];
 +  b[ZZ]=a[ZZ];
 +}
 +
 +static gmx_inline void copy_ivec(const ivec a,ivec b)
 +{
 +  b[XX]=a[XX];
 +  b[YY]=a[YY];
 +  b[ZZ]=a[ZZ];
 +}
 +
 +static gmx_inline void ivec_sub(const ivec a,const ivec b,ivec c)
 +{
 +  int x,y,z;
 +  
 +  x=a[XX]-b[XX];
 +  y=a[YY]-b[YY];
 +  z=a[ZZ]-b[ZZ];
 +  
 +  c[XX]=x;
 +  c[YY]=y;
 +  c[ZZ]=z;
 +}
 +
 +static gmx_inline void copy_mat(matrix a,matrix b)
 +{
 +  copy_rvec(a[XX],b[XX]);
 +  copy_rvec(a[YY],b[YY]);
 +  copy_rvec(a[ZZ],b[ZZ]);
 +}
 +
 +static gmx_inline void svmul(real a,const rvec v1,rvec v2)
 +{
 +  v2[XX]=a*v1[XX];
 +  v2[YY]=a*v1[YY];
 +  v2[ZZ]=a*v1[ZZ];
 +}
 +
 +static gmx_inline void dsvmul(double a,const dvec v1,dvec v2)
 +{
 +  v2[XX]=a*v1[XX];
 +  v2[YY]=a*v1[YY];
 +  v2[ZZ]=a*v1[ZZ];
 +}
 +
 +static gmx_inline real distance2(const rvec v1,const rvec v2)
 +{
 +  return sqr(v2[XX]-v1[XX]) + sqr(v2[YY]-v1[YY]) + sqr(v2[ZZ]-v1[ZZ]);
 +}
 +
 +static gmx_inline void clear_rvec(rvec a)
 +{
 +  /* The ibm compiler has problems with inlining this 
 +   * when we use a const real variable
 +   */
 +  a[XX]=0.0;
 +  a[YY]=0.0;
 +  a[ZZ]=0.0;
 +}
 +
 +static gmx_inline void clear_dvec(dvec a)
 +{
 +  /* The ibm compiler has problems with inlining this 
 +   * when we use a const real variable
 +   */
 +  a[XX]=0.0;
 +  a[YY]=0.0;
 +  a[ZZ]=0.0;
 +}
 +
 +static gmx_inline void clear_ivec(ivec a)
 +{
 +  a[XX]=0;
 +  a[YY]=0;
 +  a[ZZ]=0;
 +}
 +
 +static gmx_inline void clear_rvecs(int n,rvec v[])
 +{
 +/*  memset(v[0],0,DIM*n*sizeof(v[0][0])); */
 +  int i;
 +    
 +  for(i=0; (i<n); i++) 
 +    clear_rvec(v[i]);
 +}
 +
 +static gmx_inline void clear_mat(matrix a)
 +{
 +/*  memset(a[0],0,DIM*DIM*sizeof(a[0][0])); */
 +  
 +  const real nul=0.0;
 +  
 +  a[XX][XX]=a[XX][YY]=a[XX][ZZ]=nul;
 +  a[YY][XX]=a[YY][YY]=a[YY][ZZ]=nul;
 +  a[ZZ][XX]=a[ZZ][YY]=a[ZZ][ZZ]=nul;
 +}
 +
 +static gmx_inline real iprod(const rvec a,const rvec b)
 +{
 +  return (a[XX]*b[XX]+a[YY]*b[YY]+a[ZZ]*b[ZZ]);
 +}
 +
 +static gmx_inline double diprod(const dvec a,const dvec b)
 +{
 +  return (a[XX]*b[XX]+a[YY]*b[YY]+a[ZZ]*b[ZZ]);
 +}
 +
 +static gmx_inline int iiprod(const ivec a,const ivec b)
 +{
 +  return (a[XX]*b[XX]+a[YY]*b[YY]+a[ZZ]*b[ZZ]);
 +}
 +
 +static gmx_inline real norm2(const rvec a)
 +{
 +  return a[XX]*a[XX]+a[YY]*a[YY]+a[ZZ]*a[ZZ];
 +}
 +
 +static gmx_inline double dnorm2(const dvec a)
 +{
 +  return a[XX]*a[XX]+a[YY]*a[YY]+a[ZZ]*a[ZZ];
 +}
 +
 +/* WARNING:
 + * As dnorm() uses sqrt() (which is slow) _only_ use it if you are sure you
 + * don't need 1/dnorm(), otherwise use dnorm2()*dinvnorm(). */
 +static gmx_inline double dnorm(const dvec a)
 +{
 +  return sqrt(diprod(a, a));
 +}
 +
 +/* WARNING:
 + * As norm() uses sqrtf() (which is slow) _only_ use it if you are sure you
 + * don't need 1/norm(), otherwise use norm2()*invnorm(). */
 +static gmx_inline real norm(const rvec a)
 +{
 +  /* This is ugly, but we deliberately do not define gmx_sqrt() and handle the
 +   * float/double case here instead to avoid gmx_sqrt() being accidentally used. */
 +#ifdef GMX_DOUBLE
 +  return dnorm(a);
 +#elif defined HAVE_SQRTF
 +  return sqrtf(iprod(a, a));
 +#else
 +  return sqrt(iprod(a, a));
 +#endif
 +}
 +
 +static gmx_inline real invnorm(const rvec a)
 +{
 +    return gmx_invsqrt(norm2(a));
 +}
 +
 +static gmx_inline real dinvnorm(const dvec a)
 +{
 +    return gmx_invsqrt(dnorm2(a));
 +}
 +
 +/* WARNING:
 + * Do _not_ use these routines to calculate the angle between two vectors
 + * as acos(cos_angle(u,v)). While it might seem obvious, the acos function
 + * is very flat close to -1 and 1, which will lead to accuracy-loss.
 + * Instead, use the new gmx_angle() function directly.
 + */
 +static gmx_inline real 
 +cos_angle(const rvec a,const rvec b)
 +{
 +  /* 
 +   *                  ax*bx + ay*by + az*bz
 +   * cos-vec (a,b) =  ---------------------
 +   *                      ||a|| * ||b||
 +   */
 +  real   cosval;
 +  int    m;
 +  double aa,bb,ip,ipa,ipb,ipab; /* For accuracy these must be double! */
 +  
 +  ip=ipa=ipb=0.0;
 +  for(m=0; (m<DIM); m++) {            /* 18           */
 +    aa   = a[m];
 +    bb   = b[m];
 +    ip  += aa*bb;
 +    ipa += aa*aa;
 +    ipb += bb*bb;
 +  }
 +  ipab = ipa*ipb;
 +  if (ipab > 0)
 +    cosval = ip*gmx_invsqrt(ipab);            /*  7           */
 +  else 
 +    cosval = 1;
 +                                      /* 25 TOTAL     */
 +  if (cosval > 1.0) 
 +    return  1.0; 
 +  if (cosval <-1.0) 
 +    return -1.0;
 +  
 +  return cosval;
 +}
 +
 +/* WARNING:
 + * Do _not_ use these routines to calculate the angle between two vectors
 + * as acos(cos_angle(u,v)). While it might seem obvious, the acos function
 + * is very flat close to -1 and 1, which will lead to accuracy-loss.
 + * Instead, use the new gmx_angle() function directly.
 + */
 +static gmx_inline real 
 +cos_angle_no_table(const rvec a,const rvec b)
 +{
 +  /* This version does not need the invsqrt lookup table */
 +  real   cosval;
 +  int    m;
 +  double aa,bb,ip,ipa,ipb; /* For accuracy these must be double! */
 +  
 +  ip=ipa=ipb=0.0;
 +  for(m=0; (m<DIM); m++) {            /* 18           */
 +    aa   = a[m];
 +    bb   = b[m];
 +    ip  += aa*bb;
 +    ipa += aa*aa;
 +    ipb += bb*bb;
 +  }
 +  cosval=ip/sqrt(ipa*ipb);            /* 12           */
 +                                      /* 30 TOTAL     */
 +  if (cosval > 1.0) 
 +    return  1.0; 
 +  if (cosval <-1.0) 
 +    return -1.0;
 +  
 +  return cosval;
 +}
 +
 +
 +static gmx_inline void cprod(const rvec a,const rvec b,rvec c)
 +{
 +  c[XX]=a[YY]*b[ZZ]-a[ZZ]*b[YY];
 +  c[YY]=a[ZZ]*b[XX]-a[XX]*b[ZZ];
 +  c[ZZ]=a[XX]*b[YY]-a[YY]*b[XX];
 +}
 +
 +static gmx_inline void dcprod(const dvec a,const dvec b,dvec c)
 +{
 +  c[XX]=a[YY]*b[ZZ]-a[ZZ]*b[YY];
 +  c[YY]=a[ZZ]*b[XX]-a[XX]*b[ZZ];
 +  c[ZZ]=a[XX]*b[YY]-a[YY]*b[XX];
 +}
 +
 +/* This routine calculates the angle between a & b without any loss of accuracy close to 0/PI.
 + * If you only need cos(theta), use the cos_angle() routines to save a few cycles.
 + * This routine is faster than it might appear, since atan2 is accelerated on many CPUs (e.g. x86).
 + */
 +static gmx_inline real 
 +gmx_angle(const rvec a, const rvec b)
 +{
 +    rvec w;
 +    real wlen,s;
 +    
 +    cprod(a,b,w);
 +    
 +    wlen  = norm(w);
 +    s     = iprod(a,b);
 +    
 +    return atan2(wlen,s);
 +}
 +
 +static gmx_inline void mmul_ur0(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX];
 +  dest[XX][YY]=0.0;
 +  dest[XX][ZZ]=0.0;
 +  dest[YY][XX]=a[YY][XX]*b[XX][XX]+a[YY][YY]*b[YY][XX];
 +  dest[YY][YY]=                    a[YY][YY]*b[YY][YY];
 +  dest[YY][ZZ]=0.0;
 +  dest[ZZ][XX]=a[ZZ][XX]*b[XX][XX]+a[ZZ][YY]*b[YY][XX]+a[ZZ][ZZ]*b[ZZ][XX];
 +  dest[ZZ][YY]=                    a[ZZ][YY]*b[YY][YY]+a[ZZ][ZZ]*b[ZZ][YY];
 +  dest[ZZ][ZZ]=                                        a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void mmul(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX]+a[XX][YY]*b[YY][XX]+a[XX][ZZ]*b[ZZ][XX];
 +  dest[YY][XX]=a[YY][XX]*b[XX][XX]+a[YY][YY]*b[YY][XX]+a[YY][ZZ]*b[ZZ][XX];
 +  dest[ZZ][XX]=a[ZZ][XX]*b[XX][XX]+a[ZZ][YY]*b[YY][XX]+a[ZZ][ZZ]*b[ZZ][XX];
 +  dest[XX][YY]=a[XX][XX]*b[XX][YY]+a[XX][YY]*b[YY][YY]+a[XX][ZZ]*b[ZZ][YY];
 +  dest[YY][YY]=a[YY][XX]*b[XX][YY]+a[YY][YY]*b[YY][YY]+a[YY][ZZ]*b[ZZ][YY];
 +  dest[ZZ][YY]=a[ZZ][XX]*b[XX][YY]+a[ZZ][YY]*b[YY][YY]+a[ZZ][ZZ]*b[ZZ][YY];
 +  dest[XX][ZZ]=a[XX][XX]*b[XX][ZZ]+a[XX][YY]*b[YY][ZZ]+a[XX][ZZ]*b[ZZ][ZZ];
 +  dest[YY][ZZ]=a[YY][XX]*b[XX][ZZ]+a[YY][YY]*b[YY][ZZ]+a[YY][ZZ]*b[ZZ][ZZ];
 +  dest[ZZ][ZZ]=a[ZZ][XX]*b[XX][ZZ]+a[ZZ][YY]*b[YY][ZZ]+a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void transpose(matrix src,matrix dest)
 +{
 +  dest[XX][XX]=src[XX][XX];
 +  dest[YY][XX]=src[XX][YY];
 +  dest[ZZ][XX]=src[XX][ZZ];
 +  dest[XX][YY]=src[YY][XX];
 +  dest[YY][YY]=src[YY][YY];
 +  dest[ZZ][YY]=src[YY][ZZ];
 +  dest[XX][ZZ]=src[ZZ][XX];
 +  dest[YY][ZZ]=src[ZZ][YY];
 +  dest[ZZ][ZZ]=src[ZZ][ZZ];
 +}
 +
 +static gmx_inline void tmmul(matrix a,matrix b,matrix dest)
 +{
 +  /* Computes dest=mmul(transpose(a),b,dest) - used in do_pr_pcoupl */
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX]+a[YY][XX]*b[YY][XX]+a[ZZ][XX]*b[ZZ][XX];
 +  dest[XX][YY]=a[XX][XX]*b[XX][YY]+a[YY][XX]*b[YY][YY]+a[ZZ][XX]*b[ZZ][YY];
 +  dest[XX][ZZ]=a[XX][XX]*b[XX][ZZ]+a[YY][XX]*b[YY][ZZ]+a[ZZ][XX]*b[ZZ][ZZ];
 +  dest[YY][XX]=a[XX][YY]*b[XX][XX]+a[YY][YY]*b[YY][XX]+a[ZZ][YY]*b[ZZ][XX];
 +  dest[YY][YY]=a[XX][YY]*b[XX][YY]+a[YY][YY]*b[YY][YY]+a[ZZ][YY]*b[ZZ][YY];
 +  dest[YY][ZZ]=a[XX][YY]*b[XX][ZZ]+a[YY][YY]*b[YY][ZZ]+a[ZZ][YY]*b[ZZ][ZZ];
 +  dest[ZZ][XX]=a[XX][ZZ]*b[XX][XX]+a[YY][ZZ]*b[YY][XX]+a[ZZ][ZZ]*b[ZZ][XX];
 +  dest[ZZ][YY]=a[XX][ZZ]*b[XX][YY]+a[YY][ZZ]*b[YY][YY]+a[ZZ][ZZ]*b[ZZ][YY];
 +  dest[ZZ][ZZ]=a[XX][ZZ]*b[XX][ZZ]+a[YY][ZZ]*b[YY][ZZ]+a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void mtmul(matrix a,matrix b,matrix dest)
 +{
 +  /* Computes dest=mmul(a,transpose(b),dest) - used in do_pr_pcoupl */
 +  dest[XX][XX]=a[XX][XX]*b[XX][XX]+a[XX][YY]*b[XX][YY]+a[XX][ZZ]*b[XX][ZZ];
 +  dest[XX][YY]=a[XX][XX]*b[YY][XX]+a[XX][YY]*b[YY][YY]+a[XX][ZZ]*b[YY][ZZ];
 +  dest[XX][ZZ]=a[XX][XX]*b[ZZ][XX]+a[XX][YY]*b[ZZ][YY]+a[XX][ZZ]*b[ZZ][ZZ];
 +  dest[YY][XX]=a[YY][XX]*b[XX][XX]+a[YY][YY]*b[XX][YY]+a[YY][ZZ]*b[XX][ZZ];
 +  dest[YY][YY]=a[YY][XX]*b[YY][XX]+a[YY][YY]*b[YY][YY]+a[YY][ZZ]*b[YY][ZZ];
 +  dest[YY][ZZ]=a[YY][XX]*b[ZZ][XX]+a[YY][YY]*b[ZZ][YY]+a[YY][ZZ]*b[ZZ][ZZ];
 +  dest[ZZ][XX]=a[ZZ][XX]*b[XX][XX]+a[ZZ][YY]*b[XX][YY]+a[ZZ][ZZ]*b[XX][ZZ];
 +  dest[ZZ][YY]=a[ZZ][XX]*b[YY][XX]+a[ZZ][YY]*b[YY][YY]+a[ZZ][ZZ]*b[YY][ZZ];
 +  dest[ZZ][ZZ]=a[ZZ][XX]*b[ZZ][XX]+a[ZZ][YY]*b[ZZ][YY]+a[ZZ][ZZ]*b[ZZ][ZZ];
 +}
 +
 +static gmx_inline real det(matrix a)
 +{
 +  return ( a[XX][XX]*(a[YY][YY]*a[ZZ][ZZ]-a[ZZ][YY]*a[YY][ZZ])
 +        -a[YY][XX]*(a[XX][YY]*a[ZZ][ZZ]-a[ZZ][YY]*a[XX][ZZ])
 +        +a[ZZ][XX]*(a[XX][YY]*a[YY][ZZ]-a[YY][YY]*a[XX][ZZ]));
 +}
 +
 +static gmx_inline void m_add(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]+b[XX][XX];
 +  dest[XX][YY]=a[XX][YY]+b[XX][YY];
 +  dest[XX][ZZ]=a[XX][ZZ]+b[XX][ZZ];
 +  dest[YY][XX]=a[YY][XX]+b[YY][XX];
 +  dest[YY][YY]=a[YY][YY]+b[YY][YY];
 +  dest[YY][ZZ]=a[YY][ZZ]+b[YY][ZZ];
 +  dest[ZZ][XX]=a[ZZ][XX]+b[ZZ][XX];
 +  dest[ZZ][YY]=a[ZZ][YY]+b[ZZ][YY];
 +  dest[ZZ][ZZ]=a[ZZ][ZZ]+b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void m_sub(matrix a,matrix b,matrix dest)
 +{
 +  dest[XX][XX]=a[XX][XX]-b[XX][XX];
 +  dest[XX][YY]=a[XX][YY]-b[XX][YY];
 +  dest[XX][ZZ]=a[XX][ZZ]-b[XX][ZZ];
 +  dest[YY][XX]=a[YY][XX]-b[YY][XX];
 +  dest[YY][YY]=a[YY][YY]-b[YY][YY];
 +  dest[YY][ZZ]=a[YY][ZZ]-b[YY][ZZ];
 +  dest[ZZ][XX]=a[ZZ][XX]-b[ZZ][XX];
 +  dest[ZZ][YY]=a[ZZ][YY]-b[ZZ][YY];
 +  dest[ZZ][ZZ]=a[ZZ][ZZ]-b[ZZ][ZZ];
 +}
 +
 +static gmx_inline void msmul(matrix m1,real r1,matrix dest)
 +{
 +  dest[XX][XX]=r1*m1[XX][XX];
 +  dest[XX][YY]=r1*m1[XX][YY];
 +  dest[XX][ZZ]=r1*m1[XX][ZZ];
 +  dest[YY][XX]=r1*m1[YY][XX];
 +  dest[YY][YY]=r1*m1[YY][YY];
 +  dest[YY][ZZ]=r1*m1[YY][ZZ];
 +  dest[ZZ][XX]=r1*m1[ZZ][XX];
 +  dest[ZZ][YY]=r1*m1[ZZ][YY];
 +  dest[ZZ][ZZ]=r1*m1[ZZ][ZZ];
 +}
 +
 +static gmx_inline void m_inv_ur0(matrix src,matrix dest)
 +{
 +  double tmp = src[XX][XX]*src[YY][YY]*src[ZZ][ZZ];
 +  if (fabs(tmp) <= 100*GMX_REAL_MIN)
 +    gmx_fatal(FARGS,"Can not invert matrix, determinant is zero");
 +
 +  dest[XX][XX] = 1/src[XX][XX];
 +  dest[YY][YY] = 1/src[YY][YY];
 +  dest[ZZ][ZZ] = 1/src[ZZ][ZZ];
 +  dest[ZZ][XX] = (src[YY][XX]*src[ZZ][YY]*dest[YY][YY]
 +                - src[ZZ][XX])*dest[XX][XX]*dest[ZZ][ZZ];
 +  dest[YY][XX] = -src[YY][XX]*dest[XX][XX]*dest[YY][YY];
 +  dest[ZZ][YY] = -src[ZZ][YY]*dest[YY][YY]*dest[ZZ][ZZ];
 +  dest[XX][YY] = 0.0;
 +  dest[XX][ZZ] = 0.0;
 +  dest[YY][ZZ] = 0.0;
 +}
 +
 +static gmx_inline void m_inv(matrix src,matrix dest)
 +{
 +  const real smallreal = (real)1.0e-24;
 +  const real largereal = (real)1.0e24;
 +  real  deter,c,fc;
 +
 +  deter = det(src);
 +  c     = (real)1.0/deter;
 +  fc    = (real)fabs(c);
 +  
 +  if ((fc <= smallreal) || (fc >= largereal)) 
 +    gmx_fatal(FARGS,"Can not invert matrix, determinant = %e",deter);
 +
 +  dest[XX][XX]= c*(src[YY][YY]*src[ZZ][ZZ]-src[ZZ][YY]*src[YY][ZZ]);
 +  dest[XX][YY]=-c*(src[XX][YY]*src[ZZ][ZZ]-src[ZZ][YY]*src[XX][ZZ]);
 +  dest[XX][ZZ]= c*(src[XX][YY]*src[YY][ZZ]-src[YY][YY]*src[XX][ZZ]);
 +  dest[YY][XX]=-c*(src[YY][XX]*src[ZZ][ZZ]-src[ZZ][XX]*src[YY][ZZ]);
 +  dest[YY][YY]= c*(src[XX][XX]*src[ZZ][ZZ]-src[ZZ][XX]*src[XX][ZZ]);
 +  dest[YY][ZZ]=-c*(src[XX][XX]*src[YY][ZZ]-src[YY][XX]*src[XX][ZZ]);
 +  dest[ZZ][XX]= c*(src[YY][XX]*src[ZZ][YY]-src[ZZ][XX]*src[YY][YY]);
 +  dest[ZZ][YY]=-c*(src[XX][XX]*src[ZZ][YY]-src[ZZ][XX]*src[XX][YY]);
 +  dest[ZZ][ZZ]= c*(src[XX][XX]*src[YY][YY]-src[YY][XX]*src[XX][YY]);
 +}
 +
 +static gmx_inline void mvmul(matrix a,const rvec src,rvec dest)
 +{
 +  dest[XX]=a[XX][XX]*src[XX]+a[XX][YY]*src[YY]+a[XX][ZZ]*src[ZZ];
 +  dest[YY]=a[YY][XX]*src[XX]+a[YY][YY]*src[YY]+a[YY][ZZ]*src[ZZ];
 +  dest[ZZ]=a[ZZ][XX]*src[XX]+a[ZZ][YY]*src[YY]+a[ZZ][ZZ]*src[ZZ];
 +}
 +
 +static gmx_inline void mvmul_ur0(matrix a,const rvec src,rvec dest)
 +{
 +  dest[ZZ]=a[ZZ][XX]*src[XX]+a[ZZ][YY]*src[YY]+a[ZZ][ZZ]*src[ZZ];
 +  dest[YY]=a[YY][XX]*src[XX]+a[YY][YY]*src[YY];
 +  dest[XX]=a[XX][XX]*src[XX];
 +}
 +
 +static gmx_inline void tmvmul_ur0(matrix a,const rvec src,rvec dest)
 +{
 +  dest[XX]=a[XX][XX]*src[XX]+a[YY][XX]*src[YY]+a[ZZ][XX]*src[ZZ];
 +  dest[YY]=                  a[YY][YY]*src[YY]+a[ZZ][YY]*src[ZZ];
 +  dest[ZZ]=                                    a[ZZ][ZZ]*src[ZZ];
 +}
 +
 +static gmx_inline void unitv(const rvec src,rvec dest)
 +{
 +  real linv;
 +  
 +  linv=gmx_invsqrt(norm2(src));
 +  dest[XX]=linv*src[XX];
 +  dest[YY]=linv*src[YY];
 +  dest[ZZ]=linv*src[ZZ];
 +}
 +
 +static gmx_inline void unitv_no_table(const rvec src,rvec dest)
 +{
 +  real linv;
 +  
 +  linv=1.0/sqrt(norm2(src));
 +  dest[XX]=linv*src[XX];
 +  dest[YY]=linv*src[YY];
 +  dest[ZZ]=linv*src[ZZ];
 +}
 +
 +static void calc_lll(rvec box,rvec lll)
 +{
 +  lll[XX] = 2.0*M_PI/box[XX];
 +  lll[YY] = 2.0*M_PI/box[YY];
 +  lll[ZZ] = 2.0*M_PI/box[ZZ];
 +}
 +
 +static gmx_inline real trace(matrix m)
 +{
 +  return (m[XX][XX]+m[YY][YY]+m[ZZ][ZZ]);
 +}
 +
 +static gmx_inline real _divide_err(real a,real b,const char *file,int line)
 +{
 +    if (fabs(b) <= GMX_REAL_MIN) 
 +        gmx_fatal(FARGS,"Dividing by zero, file %s, line %d",file,line);
 +    return a/b;
 +}
 +
 +static gmx_inline int _mod(int a,int b,char *file,int line)
 +{
 +  if(b==0)
 +    gmx_fatal(FARGS,"Modulo zero, file %s, line %d",file,line);
 +  return a % b;
 +}
 +
 +/* Operations on multidimensional rvecs, used e.g. in edsam.c */
 +static void m_rveccopy(int dim, rvec *a, rvec *b)
 +{
 +    /* b = a */
 +    int i;
 +
 +    for (i=0; i<dim; i++)
 +        copy_rvec(a[i],b[i]);
 +} 
 +
 +/*computer matrix vectors from base vectors and angles */
 +static void matrix_convert(matrix box, rvec vec, rvec angle)
 +{
 +    svmul(DEG2RAD,angle,angle);
 +    box[XX][XX] = vec[XX];
 +    box[YY][XX] = vec[YY]*cos(angle[ZZ]);
 +    box[YY][YY] = vec[YY]*sin(angle[ZZ]);
 +    box[ZZ][XX] = vec[ZZ]*cos(angle[YY]);
 +    box[ZZ][YY] = vec[ZZ]
 +                         *(cos(angle[XX])-cos(angle[YY])*cos(angle[ZZ]))/sin(angle[ZZ]);
 +    box[ZZ][ZZ] = sqrt(sqr(vec[ZZ])
 +                       -box[ZZ][XX]*box[ZZ][XX]-box[ZZ][YY]*box[ZZ][YY]);
 +}
 +
 +#define divide_err(a,b) _divide_err((a),(b),__FILE__,__LINE__)
 +#define mod(a,b)    _mod((a),(b),__FILE__,__LINE__)
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +
 +#endif        /* _vec_h */
index 73e278c36fbce445d8463e8e81fa6430b0209770,0000000000000000000000000000000000000000..5167747a6d9c8d7eb8abee0dc674f0184d7ef6f2
mode 100644,000000..100644
--- /dev/null
@@@ -1,926 -1,0 +1,944 @@@
-     gmx_enerdata_t ed_lam;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "nrnb.h"
 +#include "bondf.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "pme.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "gmx_omp_nthreads.h"
 +
 +
 +void ns(FILE *fp,
 +        t_forcerec *fr,
 +        rvec       x[],
 +        matrix     box,
 +        gmx_groups_t *groups,
 +        t_grpopts  *opts,
 +        gmx_localtop_t *top,
 +        t_mdatoms  *md,
 +        t_commrec  *cr,
 +        t_nrnb     *nrnb,
 +        real       *lambda,
 +        real       *dvdlambda,
 +        gmx_grppairener_t *grppener,
 +        gmx_bool       bFillGrid,
 +        gmx_bool       bDoLongRangeNS)
 +{
 +  char   *ptr;
 +  int    nsearch;
 +
 +
 +  if (!fr->ns.nblist_initialized)
 +  {
 +      init_neighbor_list(fp, fr, md->homenr);
 +  }
 +
 +  if (fr->bTwinRange)
 +    fr->nlr=0;
 +
 +    nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
 +                                lambda,dvdlambda,grppener,
 +                                bFillGrid,bDoLongRangeNS,TRUE);
 +  if (debug)
 +    fprintf(debug,"nsearch = %d\n",nsearch);
 +
 +  /* Check whether we have to do dynamic load balancing */
 +  /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
 +    count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
 +    &(top->idef),opts->ngener);
 +  */
 +  if (fr->ns.dump_nl > 0)
 +    dump_nblist(fp,cr,fr,fr->ns.dump_nl);
 +}
 +
 +static void reduce_thread_forces(int n,rvec *f,
 +                                 tensor vir,
 +                                 real *Vcorr,
 +                                 int efpt_ind,real *dvdl,
 +                                 int nthreads,f_thread_t *f_t)
 +{
 +    int t,i;
 +
 +    /* This reduction can run over any number of threads */
 +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
 +    for(i=0; i<n; i++)
 +    {
 +        for(t=1; t<nthreads; t++)
 +        {
 +            rvec_inc(f[i],f_t[t].f[i]);
 +        }
 +    }
 +    for(t=1; t<nthreads; t++)
 +    {
 +        *Vcorr += f_t[t].Vcorr;
 +        *dvdl  += f_t[t].dvdl[efpt_ind];
 +        m_add(vir,f_t[t].vir,vir);
 +    }
 +}
 +
 +void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
 +                       t_forcerec *fr,      t_inputrec *ir,
 +                       t_idef     *idef,    t_commrec  *cr,
 +                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
 +                       t_mdatoms  *md,
 +                       t_grpopts  *opts,
 +                       rvec       x[],      history_t  *hist,
 +                       rvec       f[],
 +                       rvec       f_longrange[],
 +                       gmx_enerdata_t *enerd,
 +                       t_fcdata   *fcd,
 +                       gmx_mtop_t     *mtop,
 +                       gmx_localtop_t *top,
 +                       gmx_genborn_t *born,
 +                       t_atomtypes *atype,
 +                       gmx_bool       bBornRadii,
 +                       matrix     box,
 +                       t_lambda   *fepvals,
 +                       real       *lambda,
 +                       t_graph    *graph,
 +                       t_blocka   *excl,
 +                       rvec       mu_tot[],
 +                       int        flags,
 +                       float      *cycles_pme)
 +{
 +    int     i,j,status;
 +    int     donb_flags;
 +    gmx_bool    bDoEpot,bSepDVDL,bSB;
 +    int     pme_flags;
 +    matrix  boxs;
 +    rvec    box_size;
 +    real    Vsr,Vlr,Vcorr=0;
 +    t_pbc   pbc;
 +    real    dvdgb;
 +    char    buf[22];
-         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
-     }
 +    double  clam_i,vlam_i;
 +    real    dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
 +    real    dvdlsum;
 +
 +#ifdef GMX_MPI
 +    double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
 +#endif
 +
 +#define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
 +
 +
 +    set_pbc(&pbc,fr->ePBC,box);
 +
 +    /* reset free energy components */
 +    for (i=0;i<efptNR;i++)
 +    {
 +        dvdl_nb[i]  = 0;
 +        dvdl_dum[i] = 0;
 +    }
 +
 +    /* Reset box */
 +    for(i=0; (i<DIM); i++)
 +    {
 +        box_size[i]=box[i][i];
 +    }
 +
 +    bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
 +    debug_gmx();
 +
 +    /* do QMMM first if requested */
 +    if(fr->bQMMM)
 +    {
 +        enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
 +    }
 +
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
 +                gmx_step_str(step,buf),cr->nodeid);
 +    }
 +
 +    /* Call the short range functions all in one go. */
 +
 +#ifdef GMX_MPI
 +    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
 +#define TAKETIME FALSE
 +    if (TAKETIME)
 +    {
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t0=MPI_Wtime();
 +    }
 +#endif
 +
 +    if (ir->nwall)
 +    {
 +        /* foreign lambda component for walls */
 +        dvdl = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
 +                        enerd->grpp.ener[egLJSR],nrnb);
 +        PRINT_SEPDVDL("Walls",0.0,dvdl);
 +        enerd->dvdl_lin[efptVDW] += dvdl;
 +    }
 +
 +      /* If doing GB, reset dvda and calculate the Born radii */
 +      if (ir->implicit_solvent)
 +      {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +
 +              for(i=0;i<born->nr;i++)
 +              {
 +                      fr->dvda[i]=0;
 +              }
 +
 +              if(bBornRadii)
 +              {
 +                      calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
 +              }
 +
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +      }
 +
 +    where();
 +    /* We only do non-bonded calculation with group scheme here, the verlet
 +     * calls are done from do_force_cutsVERLET(). */
 +    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
 +    {
 +        donb_flags = 0;
 +        /* Add short-range interactions */
 +        donb_flags |= GMX_NONBONDED_DO_SR;
 +
 +        if (flags & GMX_FORCE_FORCES)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_FORCE;
 +        }
 +        if (flags & GMX_FORCE_ENERGY)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
 +        }
 +        if (flags & GMX_FORCE_DO_LR)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_LR;
 +        }
 +
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +        do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
 +                    &enerd->grpp,box_size,nrnb,
 +                    lambda,dvdl_nb,-1,-1,donb_flags);
-     /* If we do foreign lambda and we have soft-core interactions
-      * we have to recalculate the (non-linear) energies contributions.
-      */
-     if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
-     {
-         wallcycle_sub_start(wcycle, ewcsNONBONDED);
-         init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
-         for(i=0; i<enerd->n_lambda; i++)
 +
-             for (j=0;j<efptNR;j++)
++        /* If we do foreign lambda and we have soft-core interactions
++         * we have to recalculate the (non-linear) energies contributions.
++         */
++        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 +        {
-                 lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
++            for(i=0; i<enerd->n_lambda; i++)
 +            {
-             reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
-             do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
-                          &(ed_lam.grpp), box_size,nrnb,
-                          lam_i,dvdl_dum,-1,-1,
-                          GMX_NONBONDED_DO_FOREIGNLAMBDA | GMX_NONBONDED_DO_SR);
-             sum_epot(&ir->opts,&ed_lam);
-             enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
++                for (j=0;j<efptNR;j++)
++                {
++                    lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
++                }
++                reset_foreign_enerdata(enerd);
++                do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
++                             &(enerd->foreign_grpp),box_size,nrnb,
++                             lam_i,dvdl_dum,-1,-1,
++                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
++                sum_epot(&ir->opts,&(enerd->foreign_grpp),enerd->foreign_term);
++                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 +            }
-         destroy_enerdata(&ed_lam);
 +        }
-     where();
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
++        where();
 +    }
-             init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
 +
 +      /* If we are doing GB, calculate bonded forces and apply corrections
 +       * to the solvation forces */
 +    /* MRS: Eventually, many need to include free energy contribution here! */
 +      if (ir->implicit_solvent)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsBONDED);
 +              calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
 +                       ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t1=MPI_Wtime();
 +        fr->t_fnbf += t1-t0;
 +    }
 +#endif
 +
 +    if (fepvals->sc_alpha!=0)
 +    {
 +        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +
 +    if (fepvals->sc_alpha!=0)
 +
 +        /* even though coulomb part is linear, we already added it, beacuse we
 +           need to go through the vdw calculation anyway */
 +    {
 +        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +
 +    Vsr = 0;
 +    if (bSepDVDL)
 +    {
 +        for(i=0; i<enerd->grpp.nener; i++)
 +        {
 +            Vsr +=
 +                (fr->bBHAM ?
 +                 enerd->grpp.ener[egBHAMSR][i] :
 +                 enerd->grpp.ener[egLJSR][i])
 +                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
 +        }
 +        dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
 +        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
 +    }
 +    debug_gmx();
 +
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
 +    }
 +
 +    /* Shift the coordinates. Must be done before bonded forces and PPPM,
 +     * but is also necessary for SHAKE and update, therefore it can NOT
 +     * go when no bonded forces have to be evaluated.
 +     */
 +
 +    /* Here sometimes we would not need to shift with NBFonly,
 +     * but we do so anyhow for consistency of the returned coordinates.
 +     */
 +    if (graph)
 +    {
 +        shift_self(graph,box,x);
 +        if (TRICLINIC(box))
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
 +        }
 +        else
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
 +        }
 +    }
 +    /* Check whether we need to do bondeds or correct for exclusions */
 +    if (fr->bMolPBC &&
 +        ((flags & GMX_FORCE_BONDED)
 +         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
 +    {
 +        /* Since all atoms are in the rectangular or triclinic unit-cell,
 +         * only single box vector shifts (2 in x) are required.
 +         */
 +        set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
 +    }
 +    debug_gmx();
 +
 +    if (flags & GMX_FORCE_BONDED)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsBONDED);
 +        calc_bonds(fplog,cr->ms,
 +                   idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
 +                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
 +                   flags,
 +                   fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
 +
 +        /* Check if we have to determine energy differences
 +         * at foreign lambda's.
 +         */
 +        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
 +            idef->ilsort != ilsortNO_FE)
 +        {
 +            if (idef->ilsort != ilsortFE_SORTED)
 +            {
 +                gmx_incons("The bonded interactions are not sorted for free energy");
 +            }
-                 reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
 +            for(i=0; i<enerd->n_lambda; i++)
 +            {
-                 calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
++                reset_foreign_enerdata(enerd);
 +                for (j=0;j<efptNR;j++)
 +                {
 +                    lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +                }
-                 sum_epot(&ir->opts,&ed_lam);
-                 enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
++                calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&(enerd->foreign_grpp),enerd->foreign_term,nrnb,lam_i,md,
 +                                  fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
-             destroy_enerdata(&ed_lam);
++                sum_epot(&ir->opts,&(enerd->foreign_grpp),enerd->foreign_term);
++                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 +            }
- void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd)
 +        }
 +        debug_gmx();
 +
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +    where();
 +
 +    *cycles_pme = 0;
 +    if (EEL_FULL(fr->eeltype))
 +    {
 +        bSB = (ir->nwall == 2);
 +        if (bSB)
 +        {
 +            copy_mat(box,boxs);
 +            svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +            box_size[ZZ] *= ir->wall_ewald_zfac;
 +        }
 +
 +        clear_mat(fr->vir_el_recip);
 +
 +        if (fr->bEwald)
 +        {
 +            Vcorr = 0;
 +            dvdl  = 0;
 +
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            /* The TPI molecule does not have exclusions with the rest
 +             * of the system and no intra-molecular PME grid contributions
 +             * will be calculated in gmx_pme_calc_energy.
 +             */
 +            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
 +                ir->ewald_geometry != eewg3D ||
 +                ir->epsilon_surface != 0)
 +            {
 +                int nthreads,t;
 +
 +                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
 +
 +                if (fr->n_tpi > 0)
 +                {
 +                    gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
 +                }
 +
 +                nthreads = gmx_omp_nthreads_get(emntBonded);
 +#pragma omp parallel for num_threads(nthreads) schedule(static)
 +                for(t=0; t<nthreads; t++)
 +                {
 +                    int s,e,i;
 +                    rvec *fnv;
 +                    tensor *vir;
 +                    real *Vcorrt,*dvdlt;
 +                    if (t == 0)
 +                    {
 +                        fnv    = fr->f_novirsum;
 +                        vir    = &fr->vir_el_recip;
 +                        Vcorrt = &Vcorr;
 +                        dvdlt  = &dvdl;
 +                    }
 +                    else
 +                    {
 +                        fnv    = fr->f_t[t].f;
 +                        vir    = &fr->f_t[t].vir;
 +                        Vcorrt = &fr->f_t[t].Vcorr;
 +                        dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
 +                        for(i=0; i<fr->natoms_force; i++)
 +                        {
 +                            clear_rvec(fnv[i]);
 +                        }
 +                        clear_mat(*vir);
 +                    }
 +                    *dvdlt = 0;
 +                    *Vcorrt =
 +                        ewald_LRcorrection(fplog,
 +                                           fr->excl_load[t],fr->excl_load[t+1],
 +                                           cr,t,fr,
 +                                           md->chargeA,
 +                                           md->nChargePerturbed ? md->chargeB : NULL,
 +                                           ir->cutoff_scheme != ecutsVERLET,
 +                                           excl,x,bSB ? boxs : box,mu_tot,
 +                                           ir->ewald_geometry,
 +                                           ir->epsilon_surface,
 +                                           fnv,*vir,
 +                                           lambda[efptCOUL],dvdlt);
 +                }
 +                if (nthreads > 1)
 +                {
 +                    reduce_thread_forces(fr->natoms_force,fr->f_novirsum,
 +                                         fr->vir_el_recip,
 +                                         &Vcorr,efptCOUL,&dvdl,
 +                                         nthreads,fr->f_t);
 +                }
 +
 +                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
 +            }
 +
 +            if (fr->n_tpi == 0)
 +            {
 +                Vcorr += ewald_charge_correction(cr,fr,lambda[efptCOUL],box,
 +                                                 &dvdl,fr->vir_el_recip);
 +            }
 +
 +            PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdl);
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +        }
 +
 +        status = 0;
 +        Vlr  = 0;
 +        dvdl = 0;
 +        switch (fr->eeltype)
 +        {
 +        case eelPME:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +        case eelP3M_AD:
 +            if (cr->duty & DUTY_PME)
 +            {
 +                assert(fr->n_tpi >= 0);
 +                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
 +                {
 +                    pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
 +                    if (flags & GMX_FORCE_FORCES)
 +                    {
 +                        pme_flags |= GMX_PME_CALC_F;
 +                    }
 +                    if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
 +                    {
 +                        pme_flags |= GMX_PME_CALC_ENER_VIR;
 +                    }
 +                    if (fr->n_tpi > 0)
 +                    {
 +                        /* We don't calculate f, but we do want the potential */
 +                        pme_flags |= GMX_PME_CALC_POT;
 +                    }
 +                    wallcycle_start(wcycle,ewcPMEMESH);
 +                    status = gmx_pme_do(fr->pmedata,
 +                                        md->start,md->homenr - fr->n_tpi,
 +                                        x,fr->f_novirsum,
 +                                        md->chargeA,md->chargeB,
 +                                        bSB ? boxs : box,cr,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
 +                                        nrnb,wcycle,
 +                                        fr->vir_el_recip,fr->ewaldcoeff,
 +                                        &Vlr,lambda[efptCOUL],&dvdl,
 +                                        pme_flags);
 +                    *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +                    /* We should try to do as little computation after
 +                     * this as possible, because parallel PME synchronizes
 +                     * the nodes, so we want all load imbalance of the rest
 +                     * of the force calculation to be before the PME call.
 +                     * DD load balancing is done on the whole time of
 +                     * the force call (without PME).
 +                     */
 +                }
 +                if (fr->n_tpi > 0)
 +                {
 +                    /* Determine the PME grid energy of the test molecule
 +                     * with the PME grid potential of the other charges.
 +                     */
 +                    gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
 +                                        x + md->homenr - fr->n_tpi,
 +                                        md->chargeA + md->homenr - fr->n_tpi,
 +                                        &Vlr);
 +                }
 +                PRINT_SEPDVDL("PME mesh",Vlr,dvdl);
 +            }
 +            break;
 +        case eelEWALD:
 +            Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
 +                           md->chargeA,md->chargeB,
 +                           box_size,cr,md->homenr,
 +                           fr->vir_el_recip,fr->ewaldcoeff,
 +                           lambda[efptCOUL],&dvdl,fr->ewald_table);
 +            PRINT_SEPDVDL("Ewald long-range",Vlr,dvdl);
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"No such electrostatics method implemented %s",
 +                      eel_names[fr->eeltype]);
 +        }
 +        if (status != 0)
 +        {
 +            gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
 +                      status,EELTYPE(fr->eeltype));
 +              }
 +        /* Note that with separate PME nodes we get the real energies later */
 +        enerd->dvdl_lin[efptCOUL] += dvdl;
 +        enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
 +        if (debug)
 +        {
 +            fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
 +                    Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
 +            pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
 +            pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
 +        }
 +    }
 +    else
 +    {
 +        if (EEL_RF(fr->eeltype))
 +        {
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
 +            {
 +                dvdl = 0;
 +                enerd->term[F_RF_EXCL] =
 +                    RF_excl_correction(fplog,fr,graph,md,excl,x,f,
 +                                       fr->fshift,&pbc,lambda[efptCOUL],&dvdl);
 +            }
 +
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +            PRINT_SEPDVDL("RF exclusion correction",
 +                          enerd->term[F_RF_EXCL],dvdl);
 +        }
 +    }
 +    where();
 +    debug_gmx();
 +
 +    if (debug)
 +    {
 +        print_nrnb(debug,nrnb);
 +    }
 +    debug_gmx();
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t2=MPI_Wtime();
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t3=MPI_Wtime();
 +        fr->t_wait += t3-t2;
 +        if (fr->timesteps == 11)
 +        {
 +            fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
 +                    cr->nodeid, gmx_step_str(fr->timesteps,buf),
 +                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
 +                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
 +        }
 +        fr->timesteps++;
 +    }
 +#endif
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
 +    }
 +
 +}
 +
 +void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
 +{
 +    int i,n2;
 +
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        enerd->term[i] = 0;
++        enerd->foreign_term[i] = 0;
 +    }
 +
 +
 +    for(i=0; i<efptNR; i++) {
 +        enerd->dvdl_lin[i]  = 0;
 +        enerd->dvdl_nonlin[i]  = 0;
 +    }
 +
 +    n2=ngener*ngener;
 +    if (debug)
 +    {
 +        fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
 +    }
 +    enerd->grpp.nener = n2;
++    enerd->foreign_grpp.nener = n2;
 +    for(i=0; (i<egNR); i++)
 +    {
 +        snew(enerd->grpp.ener[i],n2);
++        snew(enerd->foreign_grpp.ener[i],n2);
 +    }
 +
 +    if (n_lambda)
 +    {
 +        enerd->n_lambda = 1 + n_lambda;
 +        snew(enerd->enerpart_lambda,enerd->n_lambda);
 +    }
 +    else
 +    {
 +        enerd->n_lambda = 0;
 +    }
 +}
 +
 +void destroy_enerdata(gmx_enerdata_t *enerd)
 +{
 +    int i;
 +
 +    for(i=0; (i<egNR); i++)
 +    {
 +        sfree(enerd->grpp.ener[i]);
 +    }
 +
++    for(i=0; (i<egNR); i++)
++    {
++        sfree(enerd->foreign_grpp.ener[i]);
++    }
++
 +    if (enerd->n_lambda)
 +    {
 +        sfree(enerd->enerpart_lambda);
 +    }
 +}
 +
 +static real sum_v(int n,real v[])
 +{
 +  real t;
 +  int  i;
 +
 +  t = 0.0;
 +  for(i=0; (i<n); i++)
 +    t = t + v[i];
 +
 +  return t;
 +}
 +
-   gmx_grppairener_t *grpp;
-   real *epot;
++void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot)
 +{
-   grpp = &enerd->grpp;
-   epot = enerd->term;
 +  int i;
 +
 +  /* Accumulate energies */
 +  epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
 +  epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
 +  epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
 +  epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
 +  epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
 +  epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
 +  /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
 +  epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
 +
 +/* lattice part of LR doesnt belong to any group
 + * and has been added earlier
 + */
 +  epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
 +  epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
 +
 +  epot[F_EPOT] = 0;
 +  for(i=0; (i<F_EPOT); i++)
 +  {
 +      if (i != F_DISRESVIOL && i != F_ORIRESDEV)
 +      {
 +          epot[F_EPOT] += epot[i];
 +      }
 +  }
 +}
 +
 +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
 +{
 +    int i,j,index;
 +    double dlam;
 +
 +    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
 +    enerd->term[F_DVDL] = 0.0;
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (fepvals->separate_dvdl[i])
 +        {
 +            /* could this be done more readably/compactly? */
 +            switch (i) {
 +            case (efptCOUL):
 +                index = F_DVDL_COUL;
 +                break;
 +            case (efptVDW):
 +                index = F_DVDL_VDW;
 +                break;
 +            case (efptBONDED):
 +                index = F_DVDL_BONDED;
 +                break;
 +            case (efptRESTRAINT):
 +                index = F_DVDL_RESTRAINT;
 +                break;
 +            case (efptMASS):
 +                index = F_DKDL;
 +                break;
 +            default:
 +                index = F_DVDL;
 +                break;
 +            }
 +            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
 +            }
 +        }
 +        else
 +        {
 +            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
 +            }
 +        }
 +    }
 +
 +    /* Notes on the foreign lambda free energy difference evaluation:
 +     * Adding the potential and ekin terms that depend linearly on lambda
 +     * as delta lam * dvdl to the energy differences is exact.
 +     * For the constraints this is not exact, but we have no other option
 +     * without literally changing the lengths and reevaluating the energies at each step.
 +     * (try to remedy this post 4.6 - MRS)
 +     * For the non-bonded LR term we assume that the soft-core (if present)
 +     * no longer affects the energy beyond the short-range cut-off,
 +     * which is a very good approximation (except for exotic settings).
 +     * (investigate how to overcome this post 4.6 - MRS)
 +     */
 +
 +    for(i=0; i<fepvals->n_lambda; i++)
 +    {                                         /* note we are iterating over fepvals here!
 +                                                 For the current lam, dlam = 0 automatically,
 +                                                 so we don't need to add anything to the
 +                                                 enerd->enerpart_lambda[0] */
 +
 +        /* we don't need to worry about dvdl contributions to the current lambda, because
 +           it's automatically zero */
 +
 +        /* first kinetic energy term */
 +        dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
 +
 +        enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
 +
 +        for (j=0;j<efptNR;j++)
 +        {
 +            if (j==efptMASS) {continue;} /* no other mass term to worry about */
 +
 +            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
 +            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
 +            if (debug)
 +            {
 +                fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
 +                        fepvals->all_lambda[j][i],efpt_names[j],
 +                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
 +                        dlam,enerd->dvdl_lin[j]);
 +            }
 +        }
 +    }
 +}
 +
++
++void reset_foreign_enerdata(gmx_enerdata_t *enerd)
++{
++    int  i,j;
++
++    /* First reset all foreign energy components.  Foreign energies always called on
++       neighbor search steps */
++    for(i=0; (i<egNR); i++)
++    {
++        for(j=0; (j<enerd->grpp.nener); j++)
++        {
++            enerd->foreign_grpp.ener[i][j] = 0.0;
++        }
++    }
++
++    /* potential energy components */
++    for(i=0; (i<=F_EPOT); i++)
++    {
++        enerd->foreign_term[i] = 0.0;
++    }
++}
++
 +void reset_enerdata(t_grpopts *opts,
 +                    t_forcerec *fr,gmx_bool bNS,
 +                    gmx_enerdata_t *enerd,
 +                    gmx_bool bMaster)
 +{
 +    gmx_bool bKeepLR;
 +    int  i,j;
 +
 +    /* First reset all energy components, except for the long range terms
 +     * on the master at non neighbor search steps, since the long range
 +     * terms have already been summed at the last neighbor search step.
 +     */
 +    bKeepLR = (fr->bTwinRange && !bNS);
 +    for(i=0; (i<egNR); i++) {
 +        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
 +            for(j=0; (j<enerd->grpp.nener); j++)
 +                enerd->grpp.ener[i][j] = 0.0;
 +        }
 +    }
 +    for (i=0;i<efptNR;i++)
 +    {
 +        enerd->dvdl_lin[i]    = 0.0;
 +        enerd->dvdl_nonlin[i] = 0.0;
 +    }
 +
 +    /* Normal potential energy components */
 +    for(i=0; (i<=F_EPOT); i++) {
 +        enerd->term[i] = 0.0;
 +    }
 +    /* Initialize the dVdlambda term with the long range contribution */
 +    /* Initialize the dvdl term with the long range contribution */
 +    enerd->term[F_DVDL]            = 0.0;
 +    enerd->term[F_DVDL_COUL]       = 0.0;
 +    enerd->term[F_DVDL_VDW]        = 0.0;
 +    enerd->term[F_DVDL_BONDED]     = 0.0;
 +    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
 +    enerd->term[F_DKDL]            = 0.0;
 +    if (enerd->n_lambda > 0)
 +    {
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
 +            enerd->enerpart_lambda[i] = 0.0;
 +        }
 +    }
++    /* reset foreign energy data - separate function since we also call it elsewhere */
++    reset_foreign_enerdata(enerd);
 +}
index ebfcc53cd84331b3fcf3338b5f90b0996e9472c2,0000000000000000000000000000000000000000..4d684843e3da506049764380b649d11b332ffe28
mode 100644,000000..100644
--- /dev/null
@@@ -1,2693 -1,0 +1,2734 @@@
-                                   int *kernel_type)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "gmx_fatal.h"
 +#include "gmx_fatal_collective.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "tables.h"
 +#include "nonbonded.h"
 +#include "invblock.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "md_support.h"
 +#include "md_logging.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "copyrite.h"
 +#include "mtop_util.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_consts.h"
 +#include "statutil.h"
 +#include "gmx_omp_nthreads.h"
 +
 +#ifdef _MSC_VER
 +/* MSVC definition for __cpuid() */
 +#include <intrin.h>
 +#endif
 +
 +#include "types/nbnxn_cuda_types_ext.h"
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +
 +t_forcerec *mk_forcerec(void)
 +{
 +  t_forcerec *fr;
 +  
 +  snew(fr,1);
 +  
 +  return fr;
 +}
 +
 +#ifdef DEBUG
 +static void pr_nbfp(FILE *fp,real *nbfp,gmx_bool bBHAM,int atnr)
 +{
 +  int i,j;
 +  
 +  for(i=0; (i<atnr); i++) {
 +    for(j=0; (j<atnr); j++) {
 +      fprintf(fp,"%2d - %2d",i,j);
 +      if (bBHAM)
 +      fprintf(fp,"  a=%10g, b=%10g, c=%10g\n",BHAMA(nbfp,atnr,i,j),
 +              BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j)/6.0);
 +      else
 +      fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j)/6.0,
 +            C12(nbfp,atnr,i,j)/12.0);
 +    }
 +  }
 +}
 +#endif
 +
 +static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
 +{
 +  real *nbfp;
 +  int  i,j,k,atnr;
 +  
 +  atnr=idef->atnr;
 +  if (bBHAM) {
 +    snew(nbfp,3*atnr*atnr);
 +    for(i=k=0; (i<atnr); i++) {
 +      for(j=0; (j<atnr); j++,k++) {
 +          BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
 +          BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
 +          /* nbfp now includes the 6.0 derivative prefactor */
 +          BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c*6.0;
 +      }
 +    }
 +  }
 +  else {
 +    snew(nbfp,2*atnr*atnr);
 +    for(i=k=0; (i<atnr); i++) {
 +      for(j=0; (j<atnr); j++,k++) {
 +          /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +          C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6*6.0;
 +          C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12*12.0;
 +      }
 +    }
 +  }
 +
 +  return nbfp;
 +}
 +
 +/* This routine sets fr->solvent_opt to the most common solvent in the 
 + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in 
 + * the fr->solvent_type array with the correct type (or esolNO).
 + *
 + * Charge groups that fulfill the conditions but are not identical to the
 + * most common one will be marked as esolNO in the solvent_type array. 
 + *
 + * TIP3p is identical to SPC for these purposes, so we call it
 + * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 + * 
 + * NOTE: QM particle should not
 + * become an optimized solvent. Not even if there is only one charge
 + * group in the Qm 
 + */
 +
 +typedef struct 
 +{
 +    int    model;          
 +    int    count;
 +    int    vdwtype[4];
 +    real   charge[4];
 +} solvent_parameters_t;
 +
 +static void
 +check_solvent_cg(const gmx_moltype_t   *molt,
 +                 int                   cg0,
 +                 int                   nmol,
 +                 const unsigned char   *qm_grpnr,
 +                 const t_grps          *qm_grps,
 +                 t_forcerec *          fr,
 +                 int                   *n_solvent_parameters,
 +                 solvent_parameters_t  **solvent_parameters_p,
 +                 int                   cginfo,
 +                 int                   *cg_sp)
 +{
 +    const t_blocka *  excl;
 +    t_atom            *atom;
 +    int               j,k;
 +    int               j0,j1,nj;
 +    gmx_bool              perturbed;
 +    gmx_bool              has_vdw[4];
 +    gmx_bool              match;
 +    real              tmp_charge[4];
 +    int               tmp_vdwtype[4];
 +    int               tjA;
 +    gmx_bool              qm;
 +    solvent_parameters_t *solvent_parameters;
 +
 +    /* We use a list with parameters for each solvent type. 
 +     * Every time we discover a new molecule that fulfills the basic 
 +     * conditions for a solvent we compare with the previous entries
 +     * in these lists. If the parameters are the same we just increment
 +     * the counter for that type, and otherwise we create a new type
 +     * based on the current molecule.
 +     *
 +     * Once we've finished going through all molecules we check which
 +     * solvent is most common, and mark all those molecules while we
 +     * clear the flag on all others.
 +     */   
 +
 +    solvent_parameters = *solvent_parameters_p;
 +
 +    /* Mark the cg first as non optimized */
 +    *cg_sp = -1;
 +    
 +    /* Check if this cg has no exclusions with atoms in other charge groups
 +     * and all atoms inside the charge group excluded.
 +     * We only have 3 or 4 atom solvent loops.
 +     */
 +    if (GET_CGINFO_EXCL_INTER(cginfo) ||
 +        !GET_CGINFO_EXCL_INTRA(cginfo))
 +    {
 +        return;
 +    }
 +
 +    /* Get the indices of the first atom in this charge group */
 +    j0     = molt->cgs.index[cg0];
 +    j1     = molt->cgs.index[cg0+1];
 +    
 +    /* Number of atoms in our molecule */
 +    nj     = j1 - j0;
 +
 +    if (debug) {
 +        fprintf(debug,
 +                "Moltype '%s': there are %d atoms in this charge group\n",
 +                *molt->name,nj);
 +    }
 +    
 +    /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 +     * otherwise skip it.
 +     */
 +    if (nj<3 || nj>4)
 +    {
 +        return;
 +    }
 +    
 +    /* Check if we are doing QM on this group */
 +    qm = FALSE; 
 +    if (qm_grpnr != NULL)
 +    {
 +        for(j=j0 ; j<j1 && !qm; j++)
 +        {
 +            qm = (qm_grpnr[j] < qm_grps->nr - 1);
 +        }
 +    }
 +    /* Cannot use solvent optimization with QM */
 +    if (qm)
 +    {
 +        return;
 +    }
 +    
 +    atom = molt->atoms.atom;
 +
 +    /* Still looks like a solvent, time to check parameters */
 +    
 +    /* If it is perturbed (free energy) we can't use the solvent loops,
 +     * so then we just skip to the next molecule.
 +     */   
 +    perturbed = FALSE; 
 +    
 +    for(j=j0; j<j1 && !perturbed; j++)
 +    {
 +        perturbed = PERTURBED(atom[j]);
 +    }
 +    
 +    if (perturbed)
 +    {
 +        return;
 +    }
 +    
 +    /* Now it's only a question if the VdW and charge parameters 
 +     * are OK. Before doing the check we compare and see if they are 
 +     * identical to a possible previous solvent type.
 +     * First we assign the current types and charges.    
 +     */
 +    for(j=0; j<nj; j++)
 +    {
 +        tmp_vdwtype[j] = atom[j0+j].type;
 +        tmp_charge[j]  = atom[j0+j].q;
 +    } 
 +    
 +    /* Does it match any previous solvent type? */
 +    for(k=0 ; k<*n_solvent_parameters; k++)
 +    {
 +        match = TRUE;
 +        
 +        
 +        /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 +        if( (solvent_parameters[k].model==esolSPC   && nj!=3)  ||
 +            (solvent_parameters[k].model==esolTIP4P && nj!=4) )
 +            match = FALSE;
 +        
 +        /* Check that types & charges match for all atoms in molecule */
 +        for(j=0 ; j<nj && match==TRUE; j++)
 +        {                     
 +            if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 +            {
 +                match = FALSE;
 +            }
 +            if(tmp_charge[j] != solvent_parameters[k].charge[j])
 +            {
 +                match = FALSE;
 +            }
 +        }
 +        if (match == TRUE)
 +        {
 +            /* Congratulations! We have a matched solvent.
 +             * Flag it with this type for later processing.
 +             */
 +            *cg_sp = k;
 +            solvent_parameters[k].count += nmol;
 +
 +            /* We are done with this charge group */
 +            return;
 +        }
 +    }
 +    
 +    /* If we get here, we have a tentative new solvent type.
 +     * Before we add it we must check that it fulfills the requirements
 +     * of the solvent optimized loops. First determine which atoms have
 +     * VdW interactions.   
 +     */
 +    for(j=0; j<nj; j++) 
 +    {
 +        has_vdw[j] = FALSE;
 +        tjA        = tmp_vdwtype[j];
 +        
 +        /* Go through all other tpes and see if any have non-zero
 +         * VdW parameters when combined with this one.
 +         */   
 +        for(k=0; k<fr->ntype && (has_vdw[j]==FALSE); k++)
 +        {
 +            /* We already checked that the atoms weren't perturbed,
 +             * so we only need to check state A now.
 +             */ 
 +            if (fr->bBHAM) 
 +            {
 +                has_vdw[j] = (has_vdw[j] || 
 +                              (BHAMA(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
 +                              (BHAMB(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
 +                              (BHAMC(fr->nbfp,fr->ntype,tjA,k) != 0.0));
 +            }
 +            else
 +            {
 +                /* Standard LJ */
 +                has_vdw[j] = (has_vdw[j] || 
 +                              (C6(fr->nbfp,fr->ntype,tjA,k)  != 0.0) ||
 +                              (C12(fr->nbfp,fr->ntype,tjA,k) != 0.0));
 +            }
 +        }
 +    }
 +    
 +    /* Now we know all we need to make the final check and assignment. */
 +    if (nj == 3)
 +    {
 +        /* So, is it an SPC?
 +         * For this we require thatn all atoms have charge, 
 +         * the charges on atom 2 & 3 should be the same, and only
 +         * atom 1 might have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            tmp_charge[0]  != 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1])
 +        {
 +            srenew(solvent_parameters,*n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolSPC;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for(k=0;k<3;k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +    else if (nj==4)
 +    {
 +        /* Or could it be a TIP4P?
 +         * For this we require thatn atoms 2,3,4 have charge, but not atom 1. 
 +         * Only atom 1 mght have VdW.
 +         */
 +        if(has_vdw[1] == FALSE &&
 +           has_vdw[2] == FALSE &&
 +           has_vdw[3] == FALSE &&
 +           tmp_charge[0]  == 0 &&
 +           tmp_charge[1]  != 0 &&
 +           tmp_charge[2]  == tmp_charge[1] &&
 +           tmp_charge[3]  != 0)
 +        {
 +            srenew(solvent_parameters,*n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for(k=0;k<4;k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +            
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +
 +    *solvent_parameters_p = solvent_parameters;
 +}
 +
 +static void
 +check_solvent(FILE *                fp,
 +              const gmx_mtop_t *    mtop,
 +              t_forcerec *          fr,
 +              cginfo_mb_t           *cginfo_mb)
 +{
 +    const t_block *   cgs;
 +    const t_block *   mols;
 +    const gmx_moltype_t *molt;
 +    int               mb,mol,cg_mol,at_offset,cg_offset,am,cgm,i,nmol_ch,nmol;
 +    int               n_solvent_parameters;
 +    solvent_parameters_t *solvent_parameters;
 +    int               **cg_sp;
 +    int               bestsp,bestsol;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Going to determine what solvent types we have.\n");
 +    }
 +
 +    mols = &mtop->mols;
 +
 +    n_solvent_parameters = 0;
 +    solvent_parameters = NULL;
 +    /* Allocate temporary array for solvent type */
 +    snew(cg_sp,mtop->nmolblock);
 +
 +    cg_offset = 0;
 +    at_offset = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        molt = &mtop->moltype[mtop->molblock[mb].type];
 +        cgs  = &molt->cgs;
 +        /* Here we have to loop over all individual molecules
 +         * because we need to check for QMMM particles.
 +         */
 +        snew(cg_sp[mb],cginfo_mb[mb].cg_mod);
 +        nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 +        nmol    = mtop->molblock[mb].nmol/nmol_ch;
 +        for(mol=0; mol<nmol_ch; mol++)
 +        {
 +            cgm = mol*cgs->nr;
 +            am  = mol*cgs->index[cgs->nr];
 +            for(cg_mol=0; cg_mol<cgs->nr; cg_mol++)
 +            {
 +                check_solvent_cg(molt,cg_mol,nmol,
 +                                 mtop->groups.grpnr[egcQMMM] ?
 +                                 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 +                                 &mtop->groups.grps[egcQMMM],
 +                                 fr,
 +                                 &n_solvent_parameters,&solvent_parameters,
 +                                 cginfo_mb[mb].cginfo[cgm+cg_mol],
 +                                 &cg_sp[mb][cgm+cg_mol]);
 +            }
 +        }
 +        cg_offset += cgs->nr;
 +        at_offset += cgs->index[cgs->nr];
 +    }
 +
 +    /* Puh! We finished going through all charge groups.
 +     * Now find the most common solvent model.
 +     */   
 +    
 +    /* Most common solvent this far */
 +    bestsp = -2;
 +    for(i=0;i<n_solvent_parameters;i++)
 +    {
 +        if (bestsp == -2 ||
 +            solvent_parameters[i].count > solvent_parameters[bestsp].count)
 +        {
 +            bestsp = i;
 +        }
 +    }
 +    
 +    if (bestsp >= 0)
 +    {
 +        bestsol = solvent_parameters[bestsp].model;
 +    }
 +    else
 +    {
 +        bestsol = esolNO;
 +    }
 +    
 +#ifdef DISABLE_WATER_NLIST
 +      bestsol = esolNO;
 +#endif
 +
 +    fr->nWatMol = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
 +        nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 +        for(i=0; i<cginfo_mb[mb].cg_mod; i++)
 +        {
 +            if (cg_sp[mb][i] == bestsp)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],bestsol);
 +                fr->nWatMol += nmol;
 +            }
 +            else
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],esolNO);
 +            }
 +        }
 +        sfree(cg_sp[mb]);
 +    }
 +    sfree(cg_sp);
 +    
 +    if (bestsol != esolNO && fp!=NULL)
 +    {
 +        fprintf(fp,"\nEnabling %s-like water optimization for %d molecules.\n\n",
 +                esol_names[bestsol],
 +                solvent_parameters[bestsp].count);
 +    }
 +
 +    sfree(solvent_parameters);
 +    fr->solvent_opt = bestsol;
 +}
 +
 +enum { acNONE=0, acCONSTRAINT, acSETTLE };
 +
 +static cginfo_mb_t *init_cginfo_mb(FILE *fplog,const gmx_mtop_t *mtop,
 +                                   t_forcerec *fr,gmx_bool bNoSolvOpt,
 +                                   gmx_bool *bExcl_IntraCGAll_InterCGNone)
 +{
 +    const t_block *cgs;
 +    const t_blocka *excl;
 +    const gmx_moltype_t *molt;
 +    const gmx_molblock_t *molb;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool *type_VDW;
 +    int  *cginfo;
 +    int  cg_offset,a_offset,cgm,am;
 +    int  mb,m,ncg_tot,cg,a0,a1,gid,ai,j,aj,excl_nalloc;
 +    int  *a_con;
 +    int  ftype;
 +    int  ia;
 +    gmx_bool bId,*bExcl,bExclIntraAll,bExclInter,bHaveVDW,bHaveQ;
 +
 +    ncg_tot = ncg_mtop(mtop);
 +    snew(cginfo_mb,mtop->nmolblock);
 +
 +    snew(type_VDW,fr->ntype);
 +    for(ai=0; ai<fr->ntype; ai++)
 +    {
 +        type_VDW[ai] = FALSE;
 +        for(j=0; j<fr->ntype; j++)
 +        {
 +            type_VDW[ai] = type_VDW[ai] ||
 +                fr->bBHAM ||
 +                C6(fr->nbfp,fr->ntype,ai,j) != 0 ||
 +                C12(fr->nbfp,fr->ntype,ai,j) != 0;
 +        }
 +    }
 +
 +    *bExcl_IntraCGAll_InterCGNone = TRUE;
 +
 +    excl_nalloc = 10;
 +    snew(bExcl,excl_nalloc);
 +    cg_offset = 0;
 +    a_offset  = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        cgs  = &molt->cgs;
 +        excl = &molt->excls;
 +
 +        /* Check if the cginfo is identical for all molecules in this block.
 +         * If so, we only need an array of the size of one molecule.
 +         * Otherwise we make an array of #mol times #cgs per molecule.
 +         */
 +        bId = TRUE;
 +        am = 0;
 +        for(m=0; m<molb->nmol; m++)
 +        {
 +            am = m*cgs->index[cgs->nr];
 +            for(cg=0; cg<cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                if (ggrpnr(&mtop->groups,egcENER,a_offset+am+a0) !=
 +                    ggrpnr(&mtop->groups,egcENER,a_offset   +a0))
 +                {
 +                    bId = FALSE;
 +                }
 +                if (mtop->groups.grpnr[egcQMMM] != NULL)
 +                {
 +                    for(ai=a0; ai<a1; ai++)
 +                    {
 +                        if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 +                            mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 +                        {
 +                            bId = FALSE;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +
 +        cginfo_mb[mb].cg_start = cg_offset;
 +        cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 +        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 +        snew(cginfo_mb[mb].cginfo,cginfo_mb[mb].cg_mod);
 +        cginfo = cginfo_mb[mb].cginfo;
 +
 +        /* Set constraints flags for constrained atoms */
 +        snew(a_con,molt->atoms.nr);
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (interaction_function[ftype].flags & IF_CONSTRAINT)
 +            {
 +                int nral;
 +
 +                nral = NRAL(ftype);
 +                for(ia=0; ia<molt->ilist[ftype].nr; ia+=1+nral)
 +                {
 +                    int a;
 +
 +                    for(a=0; a<nral; a++)
 +                    {
 +                        a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 +                            (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 +                    }
 +                }
 +            }
 +        }
 +
 +        for(m=0; m<(bId ? 1 : molb->nmol); m++)
 +        {
 +            cgm = m*cgs->nr;
 +            am  = m*cgs->index[cgs->nr];
 +            for(cg=0; cg<cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +
 +                /* Store the energy group in cginfo */
 +                gid = ggrpnr(&mtop->groups,egcENER,a_offset+am+a0);
 +                SET_CGINFO_GID(cginfo[cgm+cg],gid);
 +                
 +                /* Check the intra/inter charge group exclusions */
 +                if (a1-a0 > excl_nalloc) {
 +                    excl_nalloc = a1 - a0;
 +                    srenew(bExcl,excl_nalloc);
 +                }
 +                /* bExclIntraAll: all intra cg interactions excluded
 +                 * bExclInter:    any inter cg interactions excluded
 +                 */
 +                bExclIntraAll = TRUE;
 +                bExclInter    = FALSE;
 +                bHaveVDW      = FALSE;
 +                bHaveQ        = FALSE;
 +                for(ai=a0; ai<a1; ai++)
 +                {
 +                    /* Check VDW and electrostatic interactions */
 +                    bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 +                                            type_VDW[molt->atoms.atom[ai].typeB]);
 +                    bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 +                                            molt->atoms.atom[ai].qB != 0);
 +
 +                    /* Clear the exclusion list for atom ai */
 +                    for(aj=a0; aj<a1; aj++)
 +                    {
 +                        bExcl[aj-a0] = FALSE;
 +                    }
 +                    /* Loop over all the exclusions of atom ai */
 +                    for(j=excl->index[ai]; j<excl->index[ai+1]; j++)
 +                    {
 +                        aj = excl->a[j];
 +                        if (aj < a0 || aj >= a1)
 +                        {
 +                            bExclInter = TRUE;
 +                        }
 +                        else
 +                        {
 +                            bExcl[aj-a0] = TRUE;
 +                        }
 +                    }
 +                    /* Check if ai excludes a0 to a1 */
 +                    for(aj=a0; aj<a1; aj++)
 +                    {
 +                        if (!bExcl[aj-a0])
 +                        {
 +                            bExclIntraAll = FALSE;
 +                        }
 +                    }
 +
 +                    switch (a_con[ai])
 +                    {
 +                    case acCONSTRAINT:
 +                        SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 +                        break;
 +                    case acSETTLE:
 +                        SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 +                        break;
 +                    default:
 +                        break;
 +                    }
 +                }
 +                if (bExclIntraAll)
 +                {
 +                    SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 +                }
 +                if (bExclInter)
 +                {
 +                    SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 +                }
 +                if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 +                {
 +                    /* The size in cginfo is currently only read with DD */
 +                    gmx_fatal(FARGS,"A charge group has size %d which is larger than the limit of %d atoms",a1-a0,MAX_CHARGEGROUP_SIZE);
 +                }
 +                if (bHaveVDW)
 +                {
 +                    SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 +                }
 +                if (bHaveQ)
 +                {
 +                    SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 +                }
 +                /* Store the charge group size */
 +                SET_CGINFO_NATOMS(cginfo[cgm+cg],a1-a0);
 +
 +                if (!bExclIntraAll || bExclInter)
 +                {
 +                    *bExcl_IntraCGAll_InterCGNone = FALSE;
 +                }
 +            }
 +        }
 +
 +        sfree(a_con);
 +
 +        cg_offset += molb->nmol*cgs->nr;
 +        a_offset  += molb->nmol*cgs->index[cgs->nr];
 +    }
 +    sfree(bExcl);
 +    
 +    /* the solvent optimizer is called after the QM is initialized,
 +     * because we don't want to have the QM subsystemto become an
 +     * optimized solvent
 +     */
 +
 +    check_solvent(fplog,mtop,fr,cginfo_mb);
 +    
 +    if (getenv("GMX_NO_SOLV_OPT"))
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found environment variable GMX_NO_SOLV_OPT.\n"
 +                    "Disabling all solvent optimization\n");
 +        }
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (bNoSolvOpt)
 +    {
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (!fr->solvent_opt)
 +    {
 +        for(mb=0; mb<mtop->nmolblock; mb++)
 +        {
 +            for(cg=0; cg<cginfo_mb[mb].cg_mod; cg++)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg],esolNO);
 +            }
 +        }
 +    }
 +    
 +    return cginfo_mb;
 +}
 +
 +static int *cginfo_expand(int nmb,cginfo_mb_t *cgi_mb)
 +{
 +    int ncg,mb,cg;
 +    int *cginfo;
 +
 +    ncg = cgi_mb[nmb-1].cg_end;
 +    snew(cginfo,ncg);
 +    mb = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        while (cg >= cgi_mb[mb].cg_end)
 +        {
 +            mb++;
 +        }
 +        cginfo[cg] =
 +            cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 +    }
 +
 +    return cginfo;
 +}
 +
 +static void set_chargesum(FILE *log,t_forcerec *fr,const gmx_mtop_t *mtop)
 +{
 +    double qsum,q2sum,q;
 +    int    mb,nmol,i;
 +    const t_atoms *atoms;
 +    
 +    qsum  = 0;
 +    q2sum = 0;
 +    for(mb=0; mb<mtop->nmolblock; mb++)
 +    {
 +        nmol  = mtop->molblock[mb].nmol;
 +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +        for(i=0; i<atoms->nr; i++)
 +        {
 +            q = atoms->atom[i].q;
 +            qsum  += nmol*q;
 +            q2sum += nmol*q*q;
 +        }
 +    }
 +    fr->qsum[0]  = qsum;
 +    fr->q2sum[0] = q2sum;
 +    if (fr->efep != efepNO)
 +    {
 +        qsum  = 0;
 +        q2sum = 0;
 +        for(mb=0; mb<mtop->nmolblock; mb++)
 +        {
 +            nmol  = mtop->molblock[mb].nmol;
 +            atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +            for(i=0; i<atoms->nr; i++)
 +            {
 +                q = atoms->atom[i].qB;
 +                qsum  += nmol*q;
 +                q2sum += nmol*q*q;
 +            }
 +            fr->qsum[1]  = qsum;
 +            fr->q2sum[1] = q2sum;
 +        }
 +    }
 +    else
 +    {
 +        fr->qsum[1]  = fr->qsum[0];
 +        fr->q2sum[1] = fr->q2sum[0];
 +    }
 +    if (log) {
 +        if (fr->efep == efepNO)
 +            fprintf(log,"System total charge: %.3f\n",fr->qsum[0]);
 +        else
 +            fprintf(log,"System total charge, top. A: %.3f top. B: %.3f\n",
 +                    fr->qsum[0],fr->qsum[1]);
 +    }
 +}
 +
 +void update_forcerec(FILE *log,t_forcerec *fr,matrix box)
 +{
 +    if (fr->eeltype == eelGRF)
 +    {
 +        calc_rffac(NULL,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
 +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
 +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
 +    }
 +}
 +
 +void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *atoms,*atoms_tpi;
 +    const t_blocka *excl;
 +    int    mb,nmol,nmolc,i,j,tpi,tpj,j1,j2,k,n,nexcl,q;
 +#if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)    
 +    long long int  npair,npair_ij,tmpi,tmpj;
 +#else
 +    double npair, npair_ij,tmpi,tmpj;
 +#endif
 +    double csix,ctwelve;
 +    int    ntp,*typecount;
 +    gmx_bool   bBHAM;
 +    real   *nbfp;
 +
 +    ntp = fr->ntype;
 +    bBHAM = fr->bBHAM;
 +    nbfp = fr->nbfp;
 +    
 +    for(q=0; q<(fr->efep==efepNO ? 1 : 2); q++) {
 +        csix = 0;
 +        ctwelve = 0;
 +        npair = 0;
 +        nexcl = 0;
 +        if (!fr->n_tpi) {
 +            /* Count the types so we avoid natoms^2 operations */
 +            snew(typecount,ntp);
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for(i=0; i<atoms->nr; i++) {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    typecount[tpi] += nmol;
 +                }
 +            }
 +            for(tpi=0; tpi<ntp; tpi++) {
 +                for(tpj=tpi; tpj<ntp; tpj++) {
 +                    tmpi = typecount[tpi];
 +                    tmpj = typecount[tpj];
 +                    if (tpi != tpj)
 +                    {
 +                        npair_ij = tmpi*tmpj;
 +                    }
 +                    else
 +                    {
 +                        npair_ij = tmpi*(tmpi - 1)/2;
 +                    }
 +                    if (bBHAM) {
 +                        /* nbfp now includes the 6.0 derivative prefactor */
 +                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                    } else {
 +                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj)/6.0;
 +                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj)/12.0;
 +                    }
 +                    npair += npair_ij;
 +                }
 +            }
 +            sfree(typecount);
 +            /* Subtract the excluded pairs.
 +             * The main reason for substracting exclusions is that in some cases
 +             * some combinations might never occur and the parameters could have
 +             * any value. These unused values should not influence the dispersion
 +             * correction.
 +             */
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 +                for(i=0; (i<atoms->nr); i++) {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    j1  = excl->index[i];
 +                    j2  = excl->index[i+1];
 +                    for(j=j1; j<j2; j++) {
 +                        k = excl->a[j];
 +                        if (k > i)
 +                        {
 +                            if (q == 0)
 +                            {
 +                                tpj = atoms->atom[k].type;
 +                            }
 +                            else
 +                            {
 +                                tpj = atoms->atom[k].typeB;
 +                            }
 +                            if (bBHAM) {
 +                                /* nbfp now includes the 6.0 derivative prefactor */
 +                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                            } else {
 +                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj)/6.0;
 +                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj)/12.0;
 +                            }
 +                            nexcl += nmol;
 +                        }
 +                    }
 +                }
 +            }
 +        } else {
 +            /* Only correct for the interaction of the test particle
 +             * with the rest of the system.
 +             */
 +            atoms_tpi =
 +                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
 +
 +            npair = 0;
 +            for(mb=0; mb<mtop->nmolblock; mb++) {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for(j=0; j<atoms->nr; j++) {
 +                    nmolc = nmol;
 +                    /* Remove the interaction of the test charge group
 +                     * with itself.
 +                     */
 +                    if (mb == mtop->nmolblock-1)
 +                    {
 +                        nmolc--;
 +                        
 +                        if (mb == 0 && nmol == 1)
 +                        {
 +                            gmx_fatal(FARGS,"Old format tpr with TPI, please generate a new tpr file");
 +                        }
 +                    }
 +                    if (q == 0)
 +                    {
 +                        tpj = atoms->atom[j].type;
 +                    }
 +                    else
 +                    {
 +                        tpj = atoms->atom[j].typeB;
 +                    }
 +                    for(i=0; i<fr->n_tpi; i++)
 +                    {
 +                        if (q == 0)
 +                        {
 +                            tpi = atoms_tpi->atom[i].type;
 +                        }
 +                        else
 +                        {
 +                            tpi = atoms_tpi->atom[i].typeB;
 +                        }
 +                        if (bBHAM)
 +                        {
 +                            /* nbfp now includes the 6.0 derivative prefactor */
 +                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
 +                        }
 +                        else
 +                        {
 +                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj)/6.0;
 +                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj)/12.0;
 +                        }
 +                        npair += nmolc;
 +                    }
 +                }
 +            }
 +        }
 +        if (npair - nexcl <= 0 && fplog) {
 +            fprintf(fplog,"\nWARNING: There are no atom pairs for dispersion correction\n\n");
 +            csix     = 0;
 +            ctwelve  = 0;
 +        } else {
 +            csix    /= npair - nexcl;
 +            ctwelve /= npair - nexcl;
 +        }
 +        if (debug) {
 +            fprintf(debug,"Counted %d exclusions\n",nexcl);
 +            fprintf(debug,"Average C6 parameter is: %10g\n",(double)csix);
 +            fprintf(debug,"Average C12 parameter is: %10g\n",(double)ctwelve);
 +        }
 +        fr->avcsix[q]    = csix;
 +        fr->avctwelve[q] = ctwelve;
 +    }
 +    if (fplog != NULL)
 +    {
 +        if (fr->eDispCorr == edispcAllEner ||
 +            fr->eDispCorr == edispcAllEnerPres)
 +        {
 +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                    fr->avcsix[0],fr->avctwelve[0]);
 +        }
 +        else
 +        {
 +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e\n",fr->avcsix[0]);
 +        }
 +    }
 +}
 +
 +
 +static void set_bham_b_max(FILE *fplog,t_forcerec *fr,
 +                           const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *at1,*at2;
 +    int  mt1,mt2,i,j,tpi,tpj,ntypes;
 +    real b,bmin;
 +    real *nbfp;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Determining largest Buckingham b parameter for table\n");
 +    }
 +    nbfp   = fr->nbfp;
 +    ntypes = fr->ntype;
 +    
 +    bmin           = -1;
 +    fr->bham_b_max = 0;
 +    for(mt1=0; mt1<mtop->nmoltype; mt1++)
 +    {
 +        at1 = &mtop->moltype[mt1].atoms;
 +        for(i=0; (i<at1->nr); i++)
 +        {
 +            tpi = at1->atom[i].type;
 +            if (tpi >= ntypes)
 +                gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",i,tpi,ntypes);
 +            
 +            for(mt2=mt1; mt2<mtop->nmoltype; mt2++)
 +            {
 +                at2 = &mtop->moltype[mt2].atoms;
 +                for(j=0; (j<at2->nr); j++) {
 +                    tpj = at2->atom[j].type;
 +                    if (tpj >= ntypes)
 +                    {
 +                        gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",j,tpj,ntypes);
 +                    }
 +                    b = BHAMB(nbfp,ntypes,tpi,tpj);
 +                    if (b > fr->bham_b_max)
 +                    {
 +                        fr->bham_b_max = b;
 +                    }
 +                    if ((b < bmin) || (bmin==-1))
 +                    {
 +                        bmin = b;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Buckingham b parameters, min: %g, max: %g\n",
 +                bmin,fr->bham_b_max);
 +    }
 +}
 +
 +static void make_nbf_tables(FILE *fp,const output_env_t oenv,
 +                            t_forcerec *fr,real rtab,
 +                            const t_commrec *cr,
 +                            const char *tabfn,char *eg1,char *eg2,
 +                            t_nblists *nbl)
 +{
 +    char buf[STRLEN];
 +    int i,j;
 +
 +    if (tabfn == NULL) {
 +        if (debug)
 +            fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
 +        return;
 +    }
 +
 +    sprintf(buf,"%s",tabfn);
 +    if (eg1 && eg2)
 +    /* Append the two energy group names */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
 +                eg1,eg2,ftp2ext(efXVG));
 +    nbl->table_elec_vdw = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
 +    /* Copy the contents of the table to separate coulomb and LJ tables too,
 +     * to improve cache performance.
 +     */
 +    /* For performance reasons we want
 +     * the table data to be aligned to 16-byte. The pointers could be freed
 +     * but currently aren't.
 +     */
 +    nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
 +    nbl->table_elec.format = nbl->table_elec_vdw.format;
 +    nbl->table_elec.r = nbl->table_elec_vdw.r;
 +    nbl->table_elec.n = nbl->table_elec_vdw.n;
 +    nbl->table_elec.scale = nbl->table_elec_vdw.scale;
 +    nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
 +    nbl->table_elec.ninteractions = 1;
 +    nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
 +    snew_aligned(nbl->table_elec.data,nbl->table_elec.stride*(nbl->table_elec.n+1),16);
 +
 +    nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
 +    nbl->table_vdw.format = nbl->table_elec_vdw.format;
 +    nbl->table_vdw.r = nbl->table_elec_vdw.r;
 +    nbl->table_vdw.n = nbl->table_elec_vdw.n;
 +    nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
 +    nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
 +    nbl->table_vdw.ninteractions = 2;
 +    nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
 +    snew_aligned(nbl->table_vdw.data,nbl->table_vdw.stride*(nbl->table_vdw.n+1),16);
 +
 +    for(i=0; i<=nbl->table_elec_vdw.n; i++)
 +    {
 +        for(j=0; j<4; j++)
 +            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
 +        for(j=0; j<8; j++)
 +            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
 +    }
 +}
 +
 +static void count_tables(int ftype1,int ftype2,const gmx_mtop_t *mtop,
 +                         int *ncount,int **count)
 +{
 +    const gmx_moltype_t *molt;
 +    const t_ilist *il;
 +    int mt,ftype,stride,i,j,tabnr;
 +    
 +    for(mt=0; mt<mtop->nmoltype; mt++)
 +    {
 +        molt = &mtop->moltype[mt];
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (ftype == ftype1 || ftype == ftype2) {
 +                il = &molt->ilist[ftype];
 +                stride = 1 + NRAL(ftype);
 +                for(i=0; i<il->nr; i+=stride) {
 +                    tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
 +                    if (tabnr < 0)
 +                        gmx_fatal(FARGS,"A bonded table number is smaller than 0: %d\n",tabnr);
 +                    if (tabnr >= *ncount) {
 +                        srenew(*count,tabnr+1);
 +                        for(j=*ncount; j<tabnr+1; j++)
 +                            (*count)[j] = 0;
 +                        *ncount = tabnr+1;
 +                    }
 +                    (*count)[tabnr]++;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static bondedtable_t *make_bonded_tables(FILE *fplog,
 +                                         int ftype1,int ftype2,
 +                                         const gmx_mtop_t *mtop,
 +                                         const char *basefn,const char *tabext)
 +{
 +    int  i,ncount,*count;
 +    char tabfn[STRLEN];
 +    bondedtable_t *tab;
 +    
 +    tab = NULL;
 +    
 +    ncount = 0;
 +    count = NULL;
 +    count_tables(ftype1,ftype2,mtop,&ncount,&count);
 +    
 +    if (ncount > 0) {
 +        snew(tab,ncount);
 +        for(i=0; i<ncount; i++) {
 +            if (count[i] > 0) {
 +                sprintf(tabfn,"%s",basefn);
 +                sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1,"_%s%d.%s",
 +                        tabext,i,ftp2ext(efXVG));
 +                tab[i] = make_bonded_table(fplog,tabfn,NRAL(ftype1)-2);
 +            }
 +        }
 +        sfree(count);
 +    }
 +  
 +    return tab;
 +}
 +
 +void forcerec_set_ranges(t_forcerec *fr,
 +                         int ncg_home,int ncg_force,
 +                         int natoms_force,
 +                         int natoms_force_constr,int natoms_f_novirsum)
 +{
 +    fr->cg0 = 0;
 +    fr->hcg = ncg_home;
 +
 +    /* fr->ncg_force is unused in the standard code,
 +     * but it can be useful for modified code dealing with charge groups.
 +     */
 +    fr->ncg_force           = ncg_force;
 +    fr->natoms_force        = natoms_force;
 +    fr->natoms_force_constr = natoms_force_constr;
 +
 +    if (fr->natoms_force_constr > fr->nalloc_force)
 +    {
 +        fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
 +
 +        if (fr->bTwinRange)
 +        {
 +            srenew(fr->f_twin,fr->nalloc_force);
 +        }
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        fr->f_novirsum_n = natoms_f_novirsum;
 +        if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
 +        {
 +            fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
 +            srenew(fr->f_novirsum_alloc,fr->f_novirsum_nalloc);
 +        }
 +    }
 +    else
 +    {
 +        fr->f_novirsum_n = 0;
 +    }
 +}
 +
 +static real cutoff_inf(real cutoff)
 +{
 +    if (cutoff == 0)
 +    {
 +        cutoff = GMX_CUTOFF_INF;
 +    }
 +
 +    return cutoff;
 +}
 +
 +static void make_adress_tf_tables(FILE *fp,const output_env_t oenv,
 +                            t_forcerec *fr,const t_inputrec *ir,
 +                          const char *tabfn, const gmx_mtop_t *mtop,
 +                            matrix     box)
 +{
 +  char buf[STRLEN];
 +  int i,j;
 +
 +  if (tabfn == NULL) {
 +        gmx_fatal(FARGS,"No thermoforce table file given. Use -tabletf to specify a file\n");
 +    return;
 +  }
 +
 +  snew(fr->atf_tabs, ir->adress->n_tf_grps);
 +
 +  for (i=0; i<ir->adress->n_tf_grps; i++){
 +    j = ir->adress->tf_table_index[i]; /* get energy group index */
 +    sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"tf_%s.%s",
 +        *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]) ,ftp2ext(efXVG));
 +    printf("loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[j], buf);
 +    fr->atf_tabs[i] = make_atf_table(fp,oenv,fr,buf, box);
 +  }
 +
 +}
 +
 +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 +                      gmx_bool bPrintNote,t_commrec *cr,FILE *fp)
 +{
 +    gmx_bool bAllvsAll;
 +
 +    bAllvsAll =
 +        (
 +         ir->rlist==0            &&
 +         ir->rcoulomb==0         &&
 +         ir->rvdw==0             &&
 +         ir->ePBC==epbcNONE      &&
 +         ir->vdwtype==evdwCUT    &&
 +         ir->coulombtype==eelCUT &&
 +         ir->efep==efepNO        &&
 +         (ir->implicit_solvent == eisNO || 
 +          (ir->implicit_solvent==eisGBSA && (ir->gb_algorithm==egbSTILL || 
 +                                             ir->gb_algorithm==egbHCT   || 
 +                                             ir->gb_algorithm==egbOBC))) &&
 +         getenv("GMX_NO_ALLVSALL") == NULL
 +            );
 +    
 +    if (bAllvsAll && ir->opts.ngener > 1)
 +    {
 +        const char *note="NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
 +
 +        if (bPrintNote)
 +        {
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr,"\n%s\n",note);
 +            }
 +            if (fp != NULL)
 +            {
 +                fprintf(fp,"\n%s\n",note);
 +            }
 +        }
 +        bAllvsAll = FALSE;
 +    }
 +
 +    if(bAllvsAll && fp && MASTER(cr))
 +    {
 +        fprintf(fp,"\nUsing accelerated all-vs-all kernels.\n\n");
 +    }
 +    
 +    return bAllvsAll;
 +}
 +
 +
 +static void init_forcerec_f_threads(t_forcerec *fr,int nenergrp)
 +{
 +    int t,i;
 +
 +    /* These thread local data structures are used for bondeds only */
 +    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
 +
 +    if (fr->nthreads > 1)
 +    {
 +        snew(fr->f_t,fr->nthreads);
 +        /* Thread 0 uses the global force and energy arrays */
 +        for(t=1; t<fr->nthreads; t++)
 +        {
 +            fr->f_t[t].f = NULL;
 +            fr->f_t[t].f_nalloc = 0;
 +            snew(fr->f_t[t].fshift,SHIFTS);
 +            fr->f_t[t].grpp.nener = nenergrp*nenergrp;
 +            for(i=0; i<egNR; i++)
 +            {
 +                snew(fr->f_t[t].grpp.ener[i],fr->f_t[t].grpp.nener);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void pick_nbnxn_kernel_cpu(FILE *fp,
 +                                  const t_commrec *cr,
 +                                  const gmx_cpuid_t cpuid_info,
-                               int *kernel_type)
++                                  int *kernel_type,
++                                  int *ewald_excl)
 +{
 +    *kernel_type = nbk4x4_PlainC;
++    *ewald_excl  = ewaldexclTable;
 +
 +#ifdef GMX_X86_SSE2
 +    {
 +        /* On Intel Sandy-Bridge AVX-256 kernels are always faster.
 +         * On AMD Bulldozer AVX-256 is much slower than AVX-128.
 +         */
 +        if(gmx_cpuid_feature(cpuid_info, GMX_CPUID_FEATURE_X86_AVX) == 1 &&
 +           gmx_cpuid_vendor(cpuid_info) != GMX_CPUID_VENDOR_AMD)
 +        {
 +#ifdef GMX_X86_AVX_256
 +            *kernel_type = nbk4xN_X86_SIMD256;
 +#else
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +#endif
 +        }
 +        else
 +        {
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +        }
 +
 +        if (getenv("GMX_NBNXN_AVX128") != NULL)
 +        {
 +            *kernel_type = nbk4xN_X86_SIMD128;
 +        }
 +        if (getenv("GMX_NBNXN_AVX256") != NULL)
 +        {
 +#ifdef GMX_X86_AVX_256
 +            *kernel_type = nbk4xN_X86_SIMD256;
 +#else
 +            gmx_fatal(FARGS,"You requested AVX-256 nbnxn kernels, but GROMACS was built without AVX support");
 +#endif
 +        }
++
++        /* Analytical Ewald exclusion correction is only an option in the
++         * x86 SIMD kernel. This is faster in single precision
++         * on Bulldozer and slightly faster on Sandy Bridge.
++         */
++#if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE
++        *ewald_excl = ewaldexclAnalytical;
++#endif
++        if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
++        {
++            *ewald_excl = ewaldexclTable;
++        }
++        if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
++        {
++            *ewald_excl = ewaldexclAnalytical;
++        }
++
 +    }
 +#endif /* GMX_X86_SSE2 */
 +}
 +
 +
 +/* Note that _mm_... intrinsics can be converted to either SSE or AVX
 + * depending on compiler flags.
 + * For gcc we check for __AVX__
 + * At least a check for icc should be added (if there is a macro)
 + */
 +static const char *nbk_name[] =
 +  { "not set", "plain C 4x4",
 +#if !(defined GMX_X86_AVX_256 || defined GMX_X86_AVX128_FMA || defined __AVX__)
 +#ifndef GMX_X86_SSE4_1
 +#ifndef GMX_DOUBLE
 +    "SSE2 4x4",
 +#else
 +    "SSE2 4x2",
 +#endif
 +#else
 +#ifndef GMX_DOUBLE
 +    "SSE4.1 4x4",
 +#else
 +    "SSE4.1 4x2",
 +#endif
 +#endif
 +#else
 +#ifndef GMX_DOUBLE
 +    "AVX-128 4x4",
 +#else
 +    "AVX-128 4x2",
 +#endif
 +#endif
 +#ifndef GMX_DOUBLE
 +    "AVX-256 4x8",
 +#else
 +    "AVX-256 4x4",
 +#endif
 +    "CUDA 8x8x8", "plain C 8x8x8" };
 +
 +static void pick_nbnxn_kernel(FILE *fp,
 +                              const t_commrec *cr,
 +                              const gmx_hw_info_t *hwinfo,
 +                              gmx_bool use_cpu_acceleration,
 +                              gmx_bool *bUseGPU,
-     gmx_bool bEmulateGPU, bGPU;
++                              int *kernel_type,
++                              int *ewald_excl,
++                              gmx_bool bDoNonbonded)
 +{
-     /* if bUseGPU == NULL we don't want a GPU (e.g. hybrid mode kernel selection) */
-     bGPU = (bUseGPU != NULL) && hwinfo->bCanUseGPU;
++    gmx_bool bEmulateGPU, bGPU, bEmulateGPUEnvVarSet;
 +    char gpu_err_str[STRLEN];
 +
 +    assert(kernel_type);
 +
 +    *kernel_type = nbkNotSet;
-     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined or in case if nobonded
-        calculations are turned off via GMX_NO_NONBONDED -- this is the simple way
-        to turn off GPU/CUDA initializations as well.. */
-     bEmulateGPU = ((getenv("GMX_EMULATE_GPU") != NULL) ||
-                    (getenv("GMX_NO_NONBONDED") != NULL));
++    *ewald_excl  = ewaldexclTable;
 +
-     if (bGPU)
++    bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
 +
-              * list of detected/selected GPUs. */ 
++    /* if bUseGPU == NULL we don't want a GPU (e.g. hybrid mode kernel selection) */
++    bGPU = ((bUseGPU != NULL) && hwinfo->bCanUseGPU);
++
++    /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. We will
++     * automatically switch to emulation if non-bonded calculations are
++     * turned off via GMX_NO_NONBONDED - this is the simple and elegant
++     * way to turn off GPU initialization, data movement, and cleanup. */
++    bEmulateGPU = (bEmulateGPUEnvVarSet || (!bDoNonbonded && bGPU));
++
++    /* Enable GPU mode when GPUs are available or GPU emulation is requested.
++     * The latter is useful to assess the performance one can expect by adding
++     * GPU(s) to the machine. The conditional below allows this even if mdrun
++     * is compiled without GPU acceleration support.
++     * Note that such a GPU acceleration performance assessment should be
++     * carried out by setting the GMX_EMULATE_GPU and GMX_NO_NONBONDED env. vars
++     * (and freezing the system as otherwise it would explode). */
++    if (bGPU || bEmulateGPUEnvVarSet)
 +    {
 +        if (bEmulateGPU)
 +        {
 +            bGPU = FALSE;
 +        }
 +        else
 +        {
 +            /* Each PP node will use the intra-node id-th device from the
-                 /* At this point the init should never fail as we made sure that 
++             * list of detected/selected GPUs. */
 +            if (!init_gpu(cr->nodeid_group_intra, gpu_err_str, &hwinfo->gpu_info))
 +            {
-         md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
++                /* At this point the init should never fail as we made sure that
 +                 * we have all the GPUs we need. If it still does, we'll bail. */
 +                gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
 +                          cr->nodeid,
 +                          get_gpu_device_id(&hwinfo->gpu_info, cr->nodeid_group_intra),
 +                          gpu_err_str);
 +            }
 +        }
 +        *bUseGPU = bGPU;
 +    }
 +
 +    if (bEmulateGPU)
 +    {
 +        *kernel_type = nbk8x8x8_PlainC;
 +
-             pick_nbnxn_kernel_cpu(fp,cr,hwinfo->cpuid_info,kernel_type);
++        if (bDoNonbonded)
++        {
++            md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
++        }
 +    }
 +    else if (bGPU)
 +    {
 +        *kernel_type = nbk8x8x8_CUDA;
 +    }
 +
 +    if (*kernel_type == nbkNotSet)
 +    {
 +        if (use_cpu_acceleration)
 +        {
-     if (fp != NULL)
++            pick_nbnxn_kernel_cpu(fp,cr,hwinfo->cpuid_info,
++                                  kernel_type,ewald_excl);
 +        }
 +        else
 +        {
 +            *kernel_type = nbk4x4_PlainC;
 +        }
 +    }
 +
-                               &nbv->grp[i].kernel_type);
++    if (bDoNonbonded && fp != NULL)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"Using %s non-bonded kernels\n",
 +                    nbk_name[*kernel_type]);
 +        }
 +        fprintf(fp,"\nUsing %s non-bonded kernels\n\n",
 +                nbk_name[*kernel_type]);
 +    }
 +}
 +
 +gmx_bool uses_simple_tables(int cutoff_scheme,
 +                            nonbonded_verlet_t *nbv,
 +                            int group)
 +{
 +    gmx_bool bUsesSimpleTables = TRUE;
 +    int grp_index;
 +
 +    switch(cutoff_scheme)
 +    {
 +    case ecutsGROUP:
 +        bUsesSimpleTables = TRUE;
 +        break;
 +    case ecutsVERLET:
 +        assert(NULL != nbv && NULL != nbv->grp);
 +        grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
 +        bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
 +        break;
 +    default:
 +        gmx_incons("unimplemented");
 +    }
 +    return bUsesSimpleTables;
 +}
 +
 +static void init_ewald_f_table(interaction_const_t *ic,
 +                               gmx_bool bUsesSimpleTables,
 +                               real rtab)
 +{
 +    real maxr;
 +
 +    if (bUsesSimpleTables)
 +    {
 +        /* With a spacing of 0.0005 we are at the force summation accuracy
 +         * for the SSE kernels for "normal" atomistic simulations.
 +         */
 +        ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
 +                                                   ic->rcoulomb);
 +        
 +        maxr = (rtab>ic->rcoulomb) ? rtab : ic->rcoulomb;
 +        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
 +    }
 +    else
 +    {
 +        ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +        /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +        ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
 +    }
 +
 +    sfree_aligned(ic->tabq_coul_FDV0);
 +    sfree_aligned(ic->tabq_coul_F);
 +    sfree_aligned(ic->tabq_coul_V);
 +
 +    /* Create the original table data in FDV0 */
 +    snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
 +    snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
 +    snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
 +    table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,ic->tabq_coul_FDV0,
 +                                ic->tabq_size,1/ic->tabq_scale,ic->ewaldcoeff);
 +}
 +
 +void init_interaction_const_tables(FILE *fp, 
 +                                   interaction_const_t *ic,
 +                                   gmx_bool bUsesSimpleTables,
 +                                   real rtab)
 +{
 +    real spacing;
 +
 +    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
 +    {
 +        init_ewald_f_table(ic,bUsesSimpleTables,rtab);
 +
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
 +                    1/ic->tabq_scale,ic->tabq_size);
 +        }
 +    }
 +}
 +
 +void init_interaction_const(FILE *fp, 
 +                            interaction_const_t **interaction_const,
 +                            const t_forcerec *fr,
 +                            real  rtab)
 +{
 +    interaction_const_t *ic;
 +    gmx_bool bUsesSimpleTables = TRUE;
 +
 +    snew(ic, 1);
 +
 +    /* Just allocate something so we can free it */
 +    snew_aligned(ic->tabq_coul_FDV0,16,16);
 +    snew_aligned(ic->tabq_coul_F,16,16);
 +    snew_aligned(ic->tabq_coul_V,16,16);
 +
 +    ic->rlist       = fr->rlist;
 +    ic->rlistlong   = fr->rlistlong;
 +    
 +    /* Lennard-Jones */
 +    ic->rvdw        = fr->rvdw;
 +    if (fr->vdw_modifier==eintmodPOTSHIFT)
 +    {
 +        ic->sh_invrc6 = pow(ic->rvdw,-6.0);
 +    }
 +    else
 +    {
 +        ic->sh_invrc6 = 0;
 +    }
 +
 +    /* Electrostatics */
 +    ic->eeltype     = fr->eeltype;
 +    ic->rcoulomb    = fr->rcoulomb;
 +    ic->epsilon_r   = fr->epsilon_r;
 +    ic->epsfac      = fr->epsfac;
 +
 +    /* Ewald */
 +    ic->ewaldcoeff  = fr->ewaldcoeff;
 +    if (fr->coulomb_modifier==eintmodPOTSHIFT)
 +    {
 +        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
 +    }
 +    else
 +    {
 +        ic->sh_ewald = 0;
 +    }
 +
 +    /* Reaction-field */
 +    if (EEL_RF(ic->eeltype))
 +    {
 +        ic->epsilon_rf = fr->epsilon_rf;
 +        ic->k_rf       = fr->k_rf;
 +        ic->c_rf       = fr->c_rf;
 +    }
 +    else
 +    {
 +        /* For plain cut-off we might use the reaction-field kernels */
 +        ic->epsilon_rf = ic->epsilon_r;
 +        ic->k_rf       = 0;
 +        if (fr->coulomb_modifier==eintmodPOTSHIFT)
 +        {
 +            ic->c_rf   = 1/ic->rcoulomb;
 +        }
 +        else
 +        {
 +            ic->c_rf   = 0;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"Potential shift: LJ r^-12: %.3f r^-6 %.3f",
 +                sqr(ic->sh_invrc6),ic->sh_invrc6);
 +        if (ic->eeltype == eelCUT)
 +        {
 +            fprintf(fp,", Coulomb %.3f",ic->c_rf);
 +        }
 +        else if (EEL_PME(ic->eeltype))
 +        {
 +            fprintf(fp,", Ewald %.3e",ic->sh_ewald);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +
 +    *interaction_const = ic;
 +
 +    if (fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
 +    }
 +
 +    bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
 +    init_interaction_const_tables(fp,ic,bUsesSimpleTables,rtab);
 +}
 +
 +static void init_nb_verlet(FILE *fp,
 +                           nonbonded_verlet_t **nb_verlet,
 +                           const t_inputrec *ir,
 +                           const t_forcerec *fr,
 +                           const t_commrec *cr,
 +                           const char *nbpu_opt)
 +{
 +    nonbonded_verlet_t *nbv;
 +    int  i;
 +    char *env;
 +    gmx_bool bHybridGPURun = FALSE;
 +
 +    nbnxn_alloc_t *nb_alloc;
 +    nbnxn_free_t  *nb_free;
 +
 +    snew(nbv, 1);
 +
 +    nbv->nbs = NULL;
 +
 +    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
 +    for(i=0; i<nbv->ngrp; i++)
 +    {
 +        nbv->grp[i].nbl_lists.nnbl = 0;
 +        nbv->grp[i].nbat           = NULL;
 +        nbv->grp[i].kernel_type    = nbkNotSet;
 +
 +        if (i == 0) /* local */
 +        {
 +            pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                              &nbv->bUseGPU,
-                                   &nbv->grp[i].kernel_type);
++                              &nbv->grp[i].kernel_type,
++                              &nbv->grp[i].ewald_excl,
++                              fr->bNonbonded);
 +        }
 +        else /* non-local */
 +        {
 +            if (nbpu_opt != NULL && strcmp(nbpu_opt,"gpu_cpu") == 0)
 +            {
 +                /* Use GPU for local, select a CPU kernel for non-local */
 +                pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                                  NULL,
++                                  &nbv->grp[i].kernel_type,
++                                  &nbv->grp[i].ewald_excl,
++                                  fr->bNonbonded);
 +
 +                bHybridGPURun = TRUE;
 +            }
 +            else
 +            {
 +                /* Use the same kernel for local and non-local interactions */
 +                nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
++                nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
 +            }
 +        }
 +    }
 +
 +    if (nbv->bUseGPU)
 +    {
 +        /* init the NxN GPU data; the last argument tells whether we'll have
 +         * both local and non-local NB calculation on GPU */
 +        nbnxn_cuda_init(fp, &nbv->cu_nbv,
 +                        &fr->hwinfo->gpu_info, cr->nodeid_group_intra,
 +                        (nbv->ngrp > 1) && !bHybridGPURun);
 +
 +        if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
 +        {
 +            char *end;
 +
 +            nbv->min_ci_balanced = strtol(env, &end, 10);
 +            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
 +            {
 +                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
 +            }
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n", 
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +        else
 +        {
 +            nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nbv->min_ci_balanced = 0;
 +    }
 +
 +    *nb_verlet = nbv;
 +
 +    nbnxn_init_search(&nbv->nbs,
 +                      DOMAINDECOMP(cr) ? & cr->dd->nc : NULL,
 +                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
 +                      gmx_omp_nthreads_get(emntNonbonded));
 +
 +    for(i=0; i<nbv->ngrp; i++)
 +    {
 +        if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
 +        {
 +            nb_alloc = &pmalloc;
 +            nb_free  = &pfree;
 +        }
 +        else
 +        {
 +            nb_alloc = NULL;
 +            nb_free  = NULL;
 +        }
 +
 +        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                /* 8x8x8 "non-simple" lists are ATM always combined */
 +                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                nb_alloc, nb_free);
 +
 +        if (i == 0 ||
 +            nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
 +        {
 +            snew(nbv->grp[i].nbat,1);
 +            nbnxn_atomdata_init(fp,
 +                                nbv->grp[i].nbat,
 +                                nbv->grp[i].kernel_type,
 +                                fr->ntype,fr->nbfp,
 +                                ir->opts.ngener,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
 +                                nb_alloc, nb_free);
 +        }
 +        else
 +        {
 +            nbv->grp[i].nbat = nbv->grp[0].nbat;
 +        }
 +    }
 +}
 +
 +void init_forcerec(FILE *fp,
 +                   const output_env_t oenv,
 +                   t_forcerec *fr,
 +                   t_fcdata   *fcd,
 +                   const t_inputrec *ir,
 +                   const gmx_mtop_t *mtop,
 +                   const t_commrec  *cr,
 +                   matrix     box,
 +                   gmx_bool       bMolEpot,
 +                   const char *tabfn,
 +                   const char *tabafn,
 +                   const char *tabpfn,
 +                   const char *tabbfn,
 +                   const char *nbpu_opt,
 +                   gmx_bool   bNoSolvOpt,
 +                   real       print_force)
 +{
 +    int     i,j,m,natoms,ngrp,negp_pp,negptable,egi,egj;
 +    real    rtab;
 +    char    *env;
 +    double  dbl;
 +    rvec    box_size;
 +    const t_block *cgs;
 +    gmx_bool    bGenericKernelOnly;
 +    gmx_bool    bTab,bSep14tab,bNormalnblists;
 +    t_nblists *nbl;
 +    int     *nm_ind,egp_flags;
 +    
 +    /* By default we turn acceleration on, but it might be turned off further down... */
 +    fr->use_cpu_acceleration = TRUE;
 +
 +    fr->bDomDec = DOMAINDECOMP(cr);
 +
 +    natoms = mtop->natoms;
 +
 +    if (check_box(ir->ePBC,box))
 +    {
 +        gmx_fatal(FARGS,check_box(ir->ePBC,box));
 +    }
 +    
 +    /* Test particle insertion ? */
 +    if (EI_TPI(ir->eI)) {
 +        /* Set to the size of the molecule to be inserted (the last one) */
 +        /* Because of old style topologies, we have to use the last cg
 +         * instead of the last molecule type.
 +         */
 +        cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
 +        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
 +        if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1]) {
 +            gmx_fatal(FARGS,"The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
 +        }
 +    } else {
 +        fr->n_tpi = 0;
 +    }
 +    
 +    /* Copy AdResS parameters */
 +    if (ir->bAdress) {
 +      fr->adress_type     = ir->adress->type;
 +      fr->adress_const_wf = ir->adress->const_wf;
 +      fr->adress_ex_width = ir->adress->ex_width;
 +      fr->adress_hy_width = ir->adress->hy_width;
 +      fr->adress_icor     = ir->adress->icor;
 +      fr->adress_site     = ir->adress->site;
 +      fr->adress_ex_forcecap = ir->adress->ex_forcecap;
 +      fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
 +
 +
 +      snew(fr->adress_group_explicit , ir->adress->n_energy_grps);
 +      for (i=0; i< ir->adress->n_energy_grps; i++){
 +          fr->adress_group_explicit[i]= ir->adress->group_explicit[i];
 +      }
 +
 +      fr->n_adress_tf_grps = ir->adress->n_tf_grps;
 +      snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
 +      for (i=0; i< fr->n_adress_tf_grps; i++){
 +          fr->adress_tf_table_index[i]= ir->adress->tf_table_index[i];
 +      }
 +      copy_rvec(ir->adress->refs,fr->adress_refs);
 +    } else {
 +      fr->adress_type = eAdressOff;
 +      fr->adress_do_hybridpairs = FALSE;
 +    }
 +    
 +    /* Copy the user determined parameters */
 +    fr->userint1 = ir->userint1;
 +    fr->userint2 = ir->userint2;
 +    fr->userint3 = ir->userint3;
 +    fr->userint4 = ir->userint4;
 +    fr->userreal1 = ir->userreal1;
 +    fr->userreal2 = ir->userreal2;
 +    fr->userreal3 = ir->userreal3;
 +    fr->userreal4 = ir->userreal4;
 +    
 +    /* Shell stuff */
 +    fr->fc_stepsize = ir->fc_stepsize;
 +    
 +    /* Free energy */
 +    fr->efep       = ir->efep;
 +    fr->sc_alphavdw = ir->fepvals->sc_alpha;
 +    if (ir->fepvals->bScCoul)
 +    {
 +        fr->sc_alphacoul = ir->fepvals->sc_alpha;
 +        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min,6);
 +    }
 +    else
 +    {
 +        fr->sc_alphacoul = 0;
 +        fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
 +    }
 +    fr->sc_power   = ir->fepvals->sc_power;
 +    fr->sc_r_power   = ir->fepvals->sc_r_power;
 +    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma,6);
 +
 +    env = getenv("GMX_SCSIGMA_MIN");
 +    if (env != NULL)
 +    {
 +        dbl = 0;
 +        sscanf(env,"%lf",&dbl);
 +        fr->sc_sigma6_min = pow(dbl,6);
 +        if (fp)
 +        {
 +            fprintf(fp,"Setting the minimum soft core sigma to %g nm\n",dbl);
 +        }
 +    }
 +
 +    fr->bNonbonded = TRUE;
 +    if (getenv("GMX_NO_NONBONDED") != NULL)
 +    {
 +        /* turn off non-bonded calculations */
 +        fr->bNonbonded = FALSE;
 +        md_print_warn(cr,fp,
 +                      "Found environment variable GMX_NO_NONBONDED.\n"
 +                      "Disabling nonbonded calculations.\n");
 +    }
 +
 +    bGenericKernelOnly = FALSE;
 +
 +    /* We now check in the NS code whether a particular combination of interactions
 +     * can be used with water optimization, and disable it if that is not the case.
 +     */
 +
 +    if (getenv("GMX_NB_GENERIC") != NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "Found environment variable GMX_NB_GENERIC.\n"
 +                    "Disabling all interaction-specific nonbonded kernels, will only\n"
 +                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
 +        }
 +        bGenericKernelOnly = TRUE;
 +    }
 +
 +    if (bGenericKernelOnly==TRUE)
 +    {
 +        bNoSolvOpt         = TRUE;
 +    }
 +
 +    if( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
 +    {
 +        fr->use_cpu_acceleration = FALSE;
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
 +                    "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
 +        }
 +    }
 +
 +    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +
 +    /* Check if we can/should do all-vs-all kernels */
 +    fr->bAllvsAll       = can_use_allvsall(ir,mtop,FALSE,NULL,NULL);
 +    fr->AllvsAll_work   = NULL;
 +    fr->AllvsAll_workgb = NULL;
 +
 +
 +    /* Neighbour searching stuff */
 +    fr->cutoff_scheme = ir->cutoff_scheme;
 +    fr->bGrid         = (ir->ns_type == ensGRID);
 +    fr->ePBC          = ir->ePBC;
 +
 +    /* Determine if we will do PBC for distances in bonded interactions */
 +    if (fr->ePBC == epbcNONE)
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            /* The group cut-off scheme and SHAKE assume charge groups
 +             * are whole, but not using molpbc is faster in most cases.
 +             */
 +            if (fr->cutoff_scheme == ecutsGROUP ||
 +                (ir->eConstrAlg == econtSHAKE &&
 +                 (gmx_mtop_ftype_count(mtop,F_CONSTR) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_CONSTRNC) > 0)))
 +            {
 +                fr->bMolPBC = ir->bPeriodicMols;
 +            }
 +            else
 +            {
 +                fr->bMolPBC = TRUE;
 +                if (getenv("GMX_USE_GRAPH") != NULL)
 +                {
 +                    fr->bMolPBC = FALSE;
 +                    if (fp)
 +                    {
 +                        fprintf(fp,"\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fr->bMolPBC = dd_bonded_molpbc(cr->dd,fr->ePBC);
 +        }
 +    }
 +
 +    fr->rc_scaling = ir->refcoord_scaling;
 +    copy_rvec(ir->posres_com,fr->posres_com);
 +    copy_rvec(ir->posres_comB,fr->posres_comB);
 +    fr->rlist      = cutoff_inf(ir->rlist);
 +    fr->rlistlong  = cutoff_inf(ir->rlistlong);
 +    fr->eeltype    = ir->coulombtype;
 +    fr->vdwtype    = ir->vdwtype;
 +
 +    fr->coulomb_modifier = ir->coulomb_modifier;
 +    fr->vdw_modifier     = ir->vdw_modifier;
 +
 +    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
 +    switch(fr->eeltype)
 +    {
 +        case eelCUT:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB;
 +            break;
 +
 +        case eelRF:
 +        case eelGRF:
 +        case eelRF_NEC:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            break;
 +
 +        case eelRF_ZERO:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
 +            break;
 +
 +        case eelSWITCH:
 +        case eelSHIFT:
 +        case eelUSER:
 +        case eelENCADSHIFT:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            break;
 +
 +        case eelPME:
 +        case eelEWALD:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS,"Unsupported electrostatic interaction: %s",eel_names[fr->eeltype]);
 +            break;
 +    }
 +
 +    /* Vdw: Translate from mdp settings to kernel format */
 +    switch(fr->vdwtype)
 +    {
 +        case evdwCUT:
 +            if(fr->bBHAM)
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
 +            }
 +            else
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
 +            }
 +            break;
 +
 +        case evdwSWITCH:
 +        case evdwSHIFT:
 +        case evdwUSER:
 +        case evdwENCADSHIFT:
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS,"Unsupported vdw interaction: %s",evdw_names[fr->vdwtype]);
 +            break;
 +    }
 +
 +    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
 +    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
 +    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
 +
 +    fr->bTwinRange = fr->rlistlong > fr->rlist;
 +    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype==eelEWALD);
 +    
 +    fr->reppow     = mtop->ffparams.reppow;
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
 +                          !gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS));
 +        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
 +        fr->bcoultab   = !(fr->eeltype == eelCUT ||
 +                           fr->eeltype == eelEWALD ||
 +                           fr->eeltype == eelPME ||
 +                           fr->eeltype == eelRF ||
 +                           fr->eeltype == eelRF_ZERO);
 +
 +        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
 +         * going to be faster to tabulate the interaction than calling the generic kernel.
 +         */
 +        if(fr->nbkernel_elec_modifier==eintmodPOTSWITCH && fr->nbkernel_vdw_modifier==eintmodPOTSWITCH)
 +        {
 +            if((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +        else if((fr->nbkernel_elec_modifier==eintmodPOTSHIFT && fr->nbkernel_vdw_modifier==eintmodPOTSHIFT) ||
 +                ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
 +                  fr->nbkernel_elec_modifier==eintmodEXACTCUTOFF &&
 +                  (fr->nbkernel_vdw_modifier==eintmodPOTSWITCH || fr->nbkernel_vdw_modifier==eintmodPOTSHIFT))))
 +        {
 +            if(fr->rcoulomb != fr->rvdw)
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +
 +        if (getenv("GMX_REQUIRE_TABLES"))
 +        {
 +            fr->bvdwtab  = TRUE;
 +            fr->bcoultab = TRUE;
 +        }
 +
 +        if (fp)
 +        {
 +            fprintf(fp,"Table routines are used for coulomb: %s\n",bool_names[fr->bcoultab]);
 +            fprintf(fp,"Table routines are used for vdw:     %s\n",bool_names[fr->bvdwtab ]);
 +        }
 +
 +        if(fr->bvdwtab==TRUE)
 +        {
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            fr->nbkernel_vdw_modifier    = eintmodNONE;
 +        }
 +        if(fr->bcoultab==TRUE)
 +        {
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            fr->nbkernel_elec_modifier    = eintmodNONE;
 +        }
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (!gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS))
 +        {
 +            gmx_fatal(FARGS,"Cut-off scheme %S only supports LJ repulsion power 12",ecutscheme_names[ir->cutoff_scheme]);
 +        }
 +        fr->bvdwtab  = FALSE;
 +        fr->bcoultab = FALSE;
 +    }
 +    
 +    /* Tables are used for direct ewald sum */
 +    if(fr->bEwald)
 +    {
 +        if (EEL_PME(ir->coulombtype))
 +        {
 +            if (fp)
 +                fprintf(fp,"Will do PME sum in reciprocal space.\n");
 +            if (ir->coulombtype == eelP3M_AD)
 +            {
 +                please_cite(fp,"Hockney1988");
 +                please_cite(fp,"Ballenegger2012");
 +            }
 +            else
 +            {
 +                please_cite(fp,"Essmann95a");
 +            }
 +            
 +            if (ir->ewald_geometry == eewg3DC)
 +            {
 +                if (fp)
 +                {
 +                    fprintf(fp,"Using the Ewald3DC correction for systems with a slab geometry.\n");
 +                }
 +                please_cite(fp,"In-Chul99a");
 +            }
 +        }
 +        fr->ewaldcoeff=calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
 +        init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
 +        if (fp)
 +        {
 +            fprintf(fp,"Using a Gaussian width (1/beta) of %g nm for Ewald\n",
 +                    1/fr->ewaldcoeff);
 +        }
 +    }
 +    
 +    /* Electrostatics */
 +    fr->epsilon_r  = ir->epsilon_r;
 +    fr->epsilon_rf = ir->epsilon_rf;
 +    fr->fudgeQQ    = mtop->ffparams.fudgeQQ;
 +    fr->rcoulomb_switch = ir->rcoulomb_switch;
 +    fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
 +    
 +    /* Parameters for generalized RF */
 +    fr->zsquare = 0.0;
 +    fr->temp    = 0.0;
 +    
 +    if (fr->eeltype == eelGRF)
 +    {
 +        init_generalized_rf(fp,mtop,ir,fr);
 +    }
 +    else if (fr->eeltype == eelSHIFT)
 +    {
 +        for(m=0; (m<DIM); m++)
 +            box_size[m]=box[m][m];
 +        
 +        if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
 +            set_shift_consts(fp,fr->rcoulomb_switch,fr->rcoulomb,box_size,fr);
 +    }
 +    
 +    fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
 +                       gmx_mtop_ftype_count(mtop,F_POSRES) > 0 ||
 +                       gmx_mtop_ftype_count(mtop,F_FBPOSRES) > 0 ||
 +                       IR_ELEC_FIELD(*ir) ||
 +                       (fr->adress_icor != eAdressICOff)
 +                      );
 +    
 +    if (fr->cutoff_scheme == ecutsGROUP &&
 +        ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr)) {
 +        /* Count the total number of charge groups */
 +        fr->cg_nalloc = ncg_mtop(mtop);
 +        srenew(fr->cg_cm,fr->cg_nalloc);
 +    }
 +    if (fr->shift_vec == NULL)
 +        snew(fr->shift_vec,SHIFTS);
 +    
 +    if (fr->fshift == NULL)
 +        snew(fr->fshift,SHIFTS);
 +    
 +    if (fr->nbfp == NULL) {
 +        fr->ntype = mtop->ffparams.atnr;
 +        fr->nbfp  = mk_nbfp(&mtop->ffparams,fr->bBHAM);
 +    }
 +    
 +    /* Copy the energy group exclusions */
 +    fr->egp_flags = ir->opts.egp_flags;
 +    
 +    /* Van der Waals stuff */
 +    fr->rvdw        = cutoff_inf(ir->rvdw);
 +    fr->rvdw_switch = ir->rvdw_switch;
 +    if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM) {
 +        if (fr->rvdw_switch >= fr->rvdw)
 +            gmx_fatal(FARGS,"rvdw_switch (%f) must be < rvdw (%f)",
 +                      fr->rvdw_switch,fr->rvdw);
 +        if (fp)
 +            fprintf(fp,"Using %s Lennard-Jones, switch between %g and %g nm\n",
 +                    (fr->eeltype==eelSWITCH) ? "switched":"shifted",
 +                    fr->rvdw_switch,fr->rvdw);
 +    } 
 +    
 +    if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
 +        gmx_fatal(FARGS,"Switch/shift interaction not supported with Buckingham");
 +    
 +    if (fp)
 +        fprintf(fp,"Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
 +                fr->rlist,fr->rcoulomb,fr->bBHAM ? "BHAM":"LJ",fr->rvdw);
 +    
 +    fr->eDispCorr = ir->eDispCorr;
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        set_avcsixtwelve(fp,fr,mtop);
 +    }
 +    
 +    if (fr->bBHAM)
 +    {
 +        set_bham_b_max(fp,fr,mtop);
 +    }
 +
 +    fr->bGB = (ir->implicit_solvent == eisGBSA);
 +      fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 +
 +    /* Copy the GBSA data (radius, volume and surftens for each
 +     * atomtype) from the topology atomtype section to forcerec.
 +     */
 +    snew(fr->atype_radius,fr->ntype);
 +    snew(fr->atype_vol,fr->ntype);
 +    snew(fr->atype_surftens,fr->ntype);
 +    snew(fr->atype_gb_radius,fr->ntype);
 +    snew(fr->atype_S_hct,fr->ntype);
 +
 +    if (mtop->atomtypes.nr > 0)
 +    {
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_radius[i] =mtop->atomtypes.radius[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_vol[i] = mtop->atomtypes.vol[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
 +        for(i=0;i<fr->ntype;i++)
 +            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
 +    }  
 +      
 +      /* Generate the GB table if needed */
 +      if(fr->bGB)
 +      {
 +#ifdef GMX_DOUBLE
 +              fr->gbtabscale=2000;
 +#else
 +              fr->gbtabscale=500;
 +#endif
 +              
 +              fr->gbtabr=100;
 +              fr->gbtab=make_gb_table(fp,oenv,fr,tabpfn,fr->gbtabscale);
 +
 +        init_gb(&fr->born,cr,fr,ir,mtop,ir->rgbradii,ir->gb_algorithm);
 +
 +        /* Copy local gb data (for dd, this is done in dd_partition_system) */
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            make_local_gb(cr,fr->born,ir->gb_algorithm);
 +        }
 +    }
 +
 +    /* Set the charge scaling */
 +    if (fr->epsilon_r != 0)
 +        fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
 +    else
 +        /* eps = 0 is infinite dieletric: no coulomb interactions */
 +        fr->epsfac = 0;
 +    
 +    /* Reaction field constants */
 +    if (EEL_RF(fr->eeltype))
 +        calc_rffac(fp,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
 +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
 +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
 +    
 +    set_chargesum(fp,fr,mtop);
 +    
 +    /* if we are using LR electrostatics, and they are tabulated,
 +     * the tables will contain modified coulomb interactions.
 +     * Since we want to use the non-shifted ones for 1-4
 +     * coulombic interactions, we must have an extra set of tables.
 +     */
 +    
 +    /* Construct tables.
 +     * A little unnecessary to make both vdw and coul tables sometimes,
 +     * but what the heck... */
 +    
 +    bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
 +
 +    bSep14tab = ((!bTab || fr->eeltype!=eelCUT || fr->vdwtype!=evdwCUT ||
 +                  fr->bBHAM || fr->bEwald) &&
 +                 (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0 ||
 +                  gmx_mtop_ftype_count(mtop,F_LJC_PAIRS_NB) > 0));
 +
 +    negp_pp = ir->opts.ngener - ir->nwall;
 +    negptable = 0;
 +    if (!bTab) {
 +        bNormalnblists = TRUE;
 +        fr->nnblists = 1;
 +    } else {
 +        bNormalnblists = (ir->eDispCorr != edispcNO);
 +        for(egi=0; egi<negp_pp; egi++) {
 +            for(egj=egi;  egj<negp_pp; egj++) {
 +                egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
 +                if (!(egp_flags & EGP_EXCL)) {
 +                    if (egp_flags & EGP_TABLE) {
 +                        negptable++;
 +                    } else {
 +                        bNormalnblists = TRUE;
 +                    }
 +                }
 +            }
 +        }
 +        if (bNormalnblists) {
 +            fr->nnblists = negptable + 1;
 +        } else {
 +            fr->nnblists = negptable;
 +        }
 +        if (fr->nnblists > 1)
 +            snew(fr->gid2nblists,ir->opts.ngener*ir->opts.ngener);
 +    }
 +    snew(fr->nblists,fr->nnblists);
 +    
 +    /* This code automatically gives table length tabext without cut-off's,
 +     * in that case grompp should already have checked that we do not need
 +     * normal tables and we only generate tables for 1-4 interactions.
 +     */
 +    rtab = ir->rlistlong + ir->tabext;
 +
 +    if (bTab) {
 +        /* make tables for ordinary interactions */
 +        if (bNormalnblists) {
 +            make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,NULL,NULL,&fr->nblists[0]);
 +            if (!bSep14tab)
 +                fr->tab14 = fr->nblists[0].table_elec_vdw;
 +            m = 1;
 +        } else {
 +            m = 0;
 +        }
 +        if (negptable > 0) {
 +            /* Read the special tables for certain energy group pairs */
 +            nm_ind = mtop->groups.grps[egcENER].nm_ind;
 +            for(egi=0; egi<negp_pp; egi++) {
 +                for(egj=egi;  egj<negp_pp; egj++) {
 +                    egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
 +                    if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL)) {
 +                        nbl = &(fr->nblists[m]);
 +                        if (fr->nnblists > 1) {
 +                            fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = m;
 +                        }
 +                        /* Read the table file with the two energy groups names appended */
 +                        make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,
 +                                        *mtop->groups.grpname[nm_ind[egi]],
 +                                        *mtop->groups.grpname[nm_ind[egj]],
 +                                        &fr->nblists[m]);
 +                        m++;
 +                    } else if (fr->nnblists > 1) {
 +                        fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = 0;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (bSep14tab)
 +    {
 +        /* generate extra tables with plain Coulomb for 1-4 interactions only */
 +        fr->tab14 = make_tables(fp,oenv,fr,MASTER(cr),tabpfn,rtab,
 +                                GMX_MAKETABLES_14ONLY);
 +    }
 +
 +    /* Read AdResS Thermo Force table if needed */
 +    if(fr->adress_icor == eAdressICThermoForce)
 +    {
 +        /* old todo replace */ 
 +        
 +        if (ir->adress->n_tf_grps > 0){
 +            make_adress_tf_tables(fp,oenv,fr,ir,tabfn, mtop, box);
 +
 +        }else{
 +            /* load the default table */
 +            snew(fr->atf_tabs, 1);
 +            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp,oenv,fr,tabafn, box);
 +        }
 +    }
 +    
 +    /* Wall stuff */
 +    fr->nwall = ir->nwall;
 +    if (ir->nwall && ir->wall_type==ewtTABLE)
 +    {
 +        make_wall_tables(fp,oenv,ir,tabfn,&mtop->groups,fr);
 +    }
 +    
 +    if (fcd && tabbfn) {
 +        fcd->bondtab  = make_bonded_tables(fp,
 +                                           F_TABBONDS,F_TABBONDSNC,
 +                                           mtop,tabbfn,"b");
 +        fcd->angletab = make_bonded_tables(fp,
 +                                           F_TABANGLES,-1,
 +                                           mtop,tabbfn,"a");
 +        fcd->dihtab   = make_bonded_tables(fp,
 +                                           F_TABDIHS,-1,
 +                                           mtop,tabbfn,"d");
 +    } else {
 +        if (debug)
 +            fprintf(debug,"No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
 +    }
 +    
 +    /* QM/MM initialization if requested
 +     */
 +    if (ir->bQMMM)
 +    {
 +        fprintf(stderr,"QM/MM calculation requested.\n");
 +    }
 +    
 +    fr->bQMMM      = ir->bQMMM;   
 +    fr->qr         = mk_QMMMrec();
 +    
 +    /* Set all the static charge group info */
 +    fr->cginfo_mb = init_cginfo_mb(fp,mtop,fr,bNoSolvOpt,
 +                                   &fr->bExcl_IntraCGAll_InterCGNone);
 +    if (DOMAINDECOMP(cr)) {
 +        fr->cginfo = NULL;
 +    } else {
 +        fr->cginfo = cginfo_expand(mtop->nmolblock,fr->cginfo_mb);
 +    }
 +    
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* When using particle decomposition, the effect of the second argument,
 +         * which sets fr->hcg, is corrected later in do_md and init_em.
 +         */
 +        forcerec_set_ranges(fr,ncg_mtop(mtop),ncg_mtop(mtop),
 +                            mtop->natoms,mtop->natoms,mtop->natoms);
 +    }
 +    
 +    fr->print_force = print_force;
 +
 +
 +    /* coarse load balancing vars */
 +    fr->t_fnbf=0.;
 +    fr->t_wait=0.;
 +    fr->timesteps=0;
 +    
 +    /* Initialize neighbor search */
 +    init_ns(fp,cr,&fr->ns,fr,mtop,box);
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        gmx_nonbonded_setup(fp,fr,bGenericKernelOnly);
 +    /*
 +     if (ir->bAdress)
 +        {
 +            gmx_setup_adress_kernels(fp,bGenericKernelOnly);
 +        }
 +     */
 +    }
 +
 +    /* Initialize the thread working data for bonded interactions */
 +    init_forcerec_f_threads(fr,mtop->groups.grps[egcENER].nr);
 +    
 +    snew(fr->excl_load,fr->nthreads+1);
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            gmx_fatal(FARGS,"With Verlet lists rcoulomb and rvdw should be identical");
 +        }
 +
 +        init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
 +    }
 +
 +    /* fr->ic is used both by verlet and group kernels (to some extent) now */
 +    init_interaction_const(fp, &fr->ic, fr, rtab);
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        calc_enervirdiff(fp,ir->eDispCorr,fr);
 +    }
 +}
 +
 +#define pr_real(fp,r) fprintf(fp,"%s: %e\n",#r,r)
 +#define pr_int(fp,i)  fprintf((fp),"%s: %d\n",#i,i)
 +#define pr_bool(fp,b) fprintf((fp),"%s: %s\n",#b,bool_names[b])
 +
 +void pr_forcerec(FILE *fp,t_forcerec *fr,t_commrec *cr)
 +{
 +  int i;
 +
 +  pr_real(fp,fr->rlist);
 +  pr_real(fp,fr->rcoulomb);
 +  pr_real(fp,fr->fudgeQQ);
 +  pr_bool(fp,fr->bGrid);
 +  pr_bool(fp,fr->bTwinRange);
 +  /*pr_int(fp,fr->cg0);
 +    pr_int(fp,fr->hcg);*/
 +  for(i=0; i<fr->nnblists; i++)
 +    pr_int(fp,fr->nblists[i].table_elec_vdw.n);
 +  pr_real(fp,fr->rcoulomb_switch);
 +  pr_real(fp,fr->rcoulomb);
 +  
 +  fflush(fp);
 +}
 +
 +void forcerec_set_excl_load(t_forcerec *fr,
 +                            const gmx_localtop_t *top,const t_commrec *cr)
 +{
 +    const int *ind,*a;
 +    int t,i,j,ntot,n,ntarget;
 +
 +    if (cr != NULL && PARTDECOMP(cr))
 +    {
 +        /* No OpenMP with particle decomposition */
 +        pd_at_range(cr,
 +                    &fr->excl_load[0],
 +                    &fr->excl_load[1]);
 +
 +        return;
 +    }
 +
 +    ind = top->excls.index;
 +    a   = top->excls.a;
 +
 +    ntot = 0;
 +    for(i=0; i<top->excls.nr; i++)
 +    {
 +        for(j=ind[i]; j<ind[i+1]; j++)
 +        {
 +            if (a[j] > i)
 +            {
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    fr->excl_load[0] = 0;
 +    n = 0;
 +    i = 0;
 +    for(t=1; t<=fr->nthreads; t++)
 +    {
 +        ntarget = (ntot*t)/fr->nthreads;
 +        while(i < top->excls.nr && n < ntarget)
 +        {
 +            for(j=ind[i]; j<ind[i+1]; j++)
 +            {
 +                if (a[j] > i)
 +                {
 +                    n++;
 +                }
 +            }
 +            i++;
 +        }
 +        fr->excl_load[t] = i;
 +    }
 +}
 +
Simple merge
index 539e718f57e12f87338a6bb8a55a182751e81fe8,0000000000000000000000000000000000000000..86dbc9e58d2288dc70e8a776cf1bb753f7eae23e
mode 100644,000000..100644
--- /dev/null
@@@ -1,2631 -1,0 +1,2648 @@@
-     int     nnbl, kernel_type, sh_e;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#include<catamount/dclock.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include <time.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +#include <math.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
 +#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "sim_util.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "copyrite.h"
 +#include "pull_rotation.h"
 +#include "gmx_random.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "gmx_wallcycle.h"
 +#include "genborn.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_kernels/nbnxn_kernel_ref.h"
 +#include "nbnxn_kernels/nbnxn_kernel_x86_simd128.h"
 +#include "nbnxn_kernels/nbnxn_kernel_x86_simd256.h"
 +#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include "adress.h"
 +#include "qmmm.h"
 +
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "nbnxn_cuda/nbnxn_cuda.h"
 +
 +#if 0
 +typedef struct gmx_timeprint {
 +
 +} t_gmx_timeprint;
 +#endif
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +double
 +gmx_gettime()
 +{
 +#ifdef HAVE_GETTIMEOFDAY
 +      struct timeval t;
 +      double seconds;
 +
 +      gettimeofday(&t,NULL);
 +
 +      seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
 +
 +      return seconds;
 +#else
 +      double  seconds;
 +
 +      seconds = time(NULL);
 +
 +      return seconds;
 +#endif
 +}
 +
 +
 +#define difftime(end,start) ((double)(end)-(double)(start))
 +
 +void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,
 +                t_inputrec *ir, t_commrec *cr)
 +{
 +    time_t finish;
 +    char   timebuf[STRLEN];
 +    double dt;
 +    char buf[48];
 +
 +#ifndef GMX_THREAD_MPI
 +    if (!PAR(cr))
 +#endif
 +    {
 +        fprintf(out,"\r");
 +    }
 +    fprintf(out,"step %s",gmx_step_str(step,buf));
 +    if ((step >= ir->nstlist))
 +    {
 +        runtime->last = gmx_gettime();
 +        dt = difftime(runtime->last,runtime->real);
 +        runtime->time_per_step = dt/(step - ir->init_step + 1);
 +
 +        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
 +
 +        if (ir->nsteps >= 0)
 +        {
 +            if (dt >= 300)
 +            {
 +                finish = (time_t) (runtime->last + dt);
 +                gmx_ctime_r(&finish,timebuf,STRLEN);
 +                sprintf(buf,"%s",timebuf);
 +                buf[strlen(buf)-1]='\0';
 +                fprintf(out,", will finish %s",buf);
 +            }
 +            else
 +                fprintf(out,", remaining runtime: %5d s          ",(int)dt);
 +        }
 +        else
 +        {
 +            fprintf(out," performance: %.1f ns/day    ",
 +                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        fprintf(out,"\n");
 +    }
 +#endif
 +
 +    fflush(out);
 +}
 +
 +#ifdef NO_CLOCK
 +#define clock() -1
 +#endif
 +
 +static double set_proctime(gmx_runtime_t *runtime)
 +{
 +    double diff;
 +#ifdef GMX_CRAY_XT3
 +    double prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = dclock();
 +
 +    diff = runtime->proc - prev;
 +#else
 +    clock_t prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = clock();
 +
 +    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
 +#endif
 +    if (diff < 0)
 +    {
 +        /* The counter has probably looped, ignore this data */
 +        diff = 0;
 +    }
 +
 +    return diff;
 +}
 +
 +void runtime_start(gmx_runtime_t *runtime)
 +{
 +    runtime->real = gmx_gettime();
 +    runtime->proc          = 0;
 +    set_proctime(runtime);
 +    runtime->realtime      = 0;
 +    runtime->proctime      = 0;
 +    runtime->last          = 0;
 +    runtime->time_per_step = 0;
 +}
 +
 +void runtime_end(gmx_runtime_t *runtime)
 +{
 +    double now;
 +
 +    now = gmx_gettime();
 +
 +    runtime->proctime += set_proctime(runtime);
 +    runtime->realtime  = now - runtime->real;
 +    runtime->real      = now;
 +}
 +
 +void runtime_upd_proc(gmx_runtime_t *runtime)
 +{
 +    runtime->proctime += set_proctime(runtime);
 +}
 +
 +void print_date_and_time(FILE *fplog,int nodeid,const char *title,
 +                         const gmx_runtime_t *runtime)
 +{
 +    int i;
 +    char timebuf[STRLEN];
 +    char time_string[STRLEN];
 +    time_t tmptime;
 +
 +    if (fplog)
 +    {
 +        if (runtime != NULL)
 +        {
 +            tmptime = (time_t) runtime->real;
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        else
 +        {
 +            tmptime = (time_t) gmx_gettime();
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        for(i=0; timebuf[i]>=' '; i++)
 +        {
 +            time_string[i]=timebuf[i];
 +        }
 +        time_string[i]='\0';
 +
 +        fprintf(fplog,"%s on node %d %s\n",title,nodeid,time_string);
 +    }
 +}
 +
 +static void sum_forces(int start,int end,rvec f[],rvec flr[])
 +{
 +  int i;
 +
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"fsr",f+start,end-start);
 +    pr_rvecs(debug,0,"flr",flr+start,end-start);
 +  }
 +  for(i=start; (i<end); i++)
 +    rvec_inc(f[i],flr[i]);
 +}
 +
 +/*
 + * calc_f_el calculates forces due to an electric field.
 + *
 + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
 + *
 + * Et[] contains the parameters for the time dependent
 + * part of the field (not yet used).
 + * Ex[] contains the parameters for
 + * the spatial dependent part of the field. You can have cool periodic
 + * fields in principle, but only a constant field is supported
 + * now.
 + * The function should return the energy due to the electric field
 + * (if any) but for now returns 0.
 + *
 + * WARNING:
 + * There can be problems with the virial.
 + * Since the field is not self-consistent this is unavoidable.
 + * For neutral molecules the virial is correct within this approximation.
 + * For neutral systems with many charged molecules the error is small.
 + * But for systems with a net charge or a few charged molecules
 + * the error can be significant when the field is high.
 + * Solution: implement a self-consitent electric field into PME.
 + */
 +static void calc_f_el(FILE *fp,int  start,int homenr,
 +                      real charge[],rvec x[],rvec f[],
 +                      t_cosines Ex[],t_cosines Et[],double t)
 +{
 +    rvec Ext;
 +    real t0;
 +    int  i,m;
 +
 +    for(m=0; (m<DIM); m++)
 +    {
 +        if (Et[m].n > 0)
 +        {
 +            if (Et[m].n == 3)
 +            {
 +                t0 = Et[m].a[1];
 +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
 +            }
 +            else
 +            {
 +                Ext[m] = cos(Et[m].a[0]*t);
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 1.0;
 +        }
 +        if (Ex[m].n > 0)
 +        {
 +            /* Convert the field strength from V/nm to MD-units */
 +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
 +            for(i=start; (i<start+homenr); i++)
 +                f[i][m] += charge[i]*Ext[m];
 +        }
 +        else
 +        {
 +            Ext[m] = 0;
 +        }
 +    }
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
 +                Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
 +    }
 +}
 +
 +static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
 +                      tensor vir_part,t_graph *graph,matrix box,
 +                      t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
 +{
 +  int i,j;
 +  tensor virtest;
 +
 +  /* The short-range virial from surrounding boxes */
 +  clear_mat(vir_part);
 +  calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
 +
 +  /* Calculate partial virial, for local atoms only, based on short range.
 +   * Total virial is computed in global_stat, called from do_md
 +   */
 +  f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,homenr);
 +
 +  /* Add position restraint contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][i] += fr->vir_diag_posres[i];
 +  }
 +
 +  /* Add wall contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][ZZ] += fr->vir_wall_z[i];
 +  }
 +
 +  if (debug)
 +    pr_rvecs(debug,0,"vir_part",vir_part,DIM);
 +}
 +
 +static void posres_wrapper(FILE *fplog,
 +                           int flags,
 +                           gmx_bool bSepDVDL,
 +                           t_inputrec *ir,
 +                           t_nrnb *nrnb,
 +                           gmx_localtop_t *top,
 +                           matrix box,rvec x[],
 +                           rvec f[],
 +                           gmx_enerdata_t *enerd,
 +                           real *lambda,
 +                           t_forcerec *fr)
 +{
 +    t_pbc pbc;
 +    real  v,dvdl;
 +    int   i;
 +
 +    /* Position restraints always require full pbc */
 +    set_pbc(&pbc,ir->ePBC,box);
 +    dvdl = 0;
 +    v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +               top->idef.iparams_posres,
 +               (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +               ir->ePBC==epbcNONE ? NULL : &pbc,
 +               lambda[efptRESTRAINT],&dvdl,
 +               fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,sepdvdlformat,
 +                interaction_function[F_POSRES].longname,v,dvdl);
 +    }
 +    enerd->term[F_POSRES] += v;
 +    /* If just the force constant changes, the FEP term is linear,
 +     * but if k changes, it is not.
 +     */
 +    enerd->dvdl_nonlin[efptRESTRAINT] += dvdl;
 +    inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
 +
 +    if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
 +    {
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
 +            real dvdl_dum,lambda_dum;
 +
 +            lambda_dum = (i==0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]);
 +            v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +                       top->idef.iparams_posres,
 +                       (const rvec*)x,NULL,NULL,
 +                       ir->ePBC==epbcNONE ? NULL : &pbc,lambda_dum,&dvdl,
 +                       fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +            enerd->enerpart_lambda[i] += v;
 +        }
 +    }
 +}
 +
 +static void pull_potential_wrapper(FILE *fplog,
 +                                   gmx_bool bSepDVDL,
 +                                   t_commrec *cr,
 +                                   t_inputrec *ir,
 +                                   matrix box,rvec x[],
 +                                   rvec f[],
 +                                   tensor vir_force,
 +                                   t_mdatoms *mdatoms,
 +                                   gmx_enerdata_t *enerd,
 +                                   real *lambda,
 +                                   double t)
 +{
 +    t_pbc  pbc;
 +    real   dvdl;
 +
 +    /* Calculate the center of mass forces, this requires communication,
 +     * which is why pull_potential is called close to other communication.
 +     * The virial contribution is calculated directly,
 +     * which is why we call pull_potential after calc_virial.
 +     */
 +    set_pbc(&pbc,ir->ePBC,box);
 +    dvdl = 0; 
 +    enerd->term[F_COM_PULL] +=
 +        pull_potential(ir->ePull,ir->pull,mdatoms,&pbc,
 +                       cr,t,lambda[efptRESTRAINT],x,f,vir_force,&dvdl);
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
 +    }
 +    enerd->dvdl_lin[efptRESTRAINT] += dvdl;
 +}
 +
 +static void pme_receive_force_ener(FILE *fplog,
 +                                   gmx_bool bSepDVDL,
 +                                   t_commrec *cr,
 +                                   gmx_wallcycle_t wcycle,
 +                                   gmx_enerdata_t *enerd,
 +                                   t_forcerec *fr)
 +{
 +    real   e,v,dvdl;    
 +    float  cycles_ppdpme,cycles_seppme;
 +
 +    cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
 +    dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
 +
 +    /* In case of node-splitting, the PP nodes receive the long-range 
 +     * forces, virial and energy from the PME nodes here.
 +     */    
 +    wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
 +    dvdl = 0;
 +    gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
 +                      &cycles_seppme);
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
 +    }
 +    enerd->term[F_COUL_RECIP] += e;
 +    enerd->dvdl_lin[efptCOUL] += dvdl;
 +    if (wcycle)
 +    {
 +        dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
 +    }
 +    wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
 +}
 +
 +static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
 +                             gmx_large_int_t step,real pforce,rvec *x,rvec *f)
 +{
 +  int  i;
 +  real pf2,fn2;
 +  char buf[STEPSTRSIZE];
 +
 +  pf2 = sqr(pforce);
 +  for(i=md->start; i<md->start+md->homenr; i++) {
 +    fn2 = norm2(f[i]);
 +    /* We also catch NAN, if the compiler does not optimize this away. */
 +    if (fn2 >= pf2 || fn2 != fn2) {
 +      fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
 +            gmx_step_str(step,buf),
 +            ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
 +    }
 +  }
 +}
 +
 +static void post_process_forces(FILE *fplog,
 +                                t_commrec *cr,
 +                                gmx_large_int_t step,
 +                                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                                gmx_localtop_t *top,
 +                                matrix box,rvec x[],
 +                                rvec f[],
 +                                tensor vir_force,
 +                                t_mdatoms *mdatoms,
 +                                t_graph *graph,
 +                                t_forcerec *fr,gmx_vsite_t *vsite,
 +                                int flags)
 +{
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite)
 +        {
 +            /* Spread the mesh force on virtual sites to the other particles... 
 +             * This is parallellized. MPI communication is performed
 +             * if the constructing atoms aren't local.
 +             */
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,
 +                           (flags & GMX_FORCE_VIRIAL),fr->vir_el_recip,
 +                           nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +        }
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Now add the forces, this is local */
 +            if (fr->bDomDec)
 +            {
 +                sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
 +            }
 +            else
 +            {
 +                sum_forces(mdatoms->start,mdatoms->start+mdatoms->homenr,
 +                           f,fr->f_novirsum);
 +            }
 +            if (EEL_FULL(fr->eeltype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force,fr->vir_el_recip,vir_force);
 +            }
 +            if (debug)
 +            {
 +                pr_rvecs(debug,0,"vir_force",vir_force,DIM);
 +            }
 +        }
 +    }
 +    
 +    if (fr->print_force >= 0)
 +    {
 +        print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
 +    }
 +}
 +
 +static void do_nb_verlet(t_forcerec *fr,
 +                         interaction_const_t *ic,
 +                         gmx_enerdata_t *enerd,
 +                         int flags, int ilocality,
 +                         int clearF,
 +                         t_nrnb *nrnb,
 +                         gmx_wallcycle_t wcycle)
 +{
-     /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
-     sh_e = ((flags & GMX_FORCE_ENERGY) ? 1 : 0);
-     inc_nrnb(nrnb,
-              ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
-               eNR_NBNXN_LJ_RF : eNR_NBNXN_LJ_TAB) + sh_e,
++    int     nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
 +    char    *env;
 +    nonbonded_verlet_group_t  *nbvg;
 +
 +    if (!(flags & GMX_FORCE_NONBONDED))
 +    {
 +        /* skip non-bonded calculation */
 +        return;
 +    }
 +
 +    nbvg = &fr->nbv->grp[ilocality];
 +
 +    /* CUDA kernel launch overhead is already timed separately */
 +    if (fr->cutoff_scheme != ecutsVERLET)
 +    {
 +        gmx_incons("Invalid cut-off scheme passed!");
 +    }
 +
 +    if (nbvg->kernel_type != nbk8x8x8_CUDA)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +    }
 +    switch (nbvg->kernel_type)
 +    {
 +        case nbk4x4_PlainC:
 +            nbnxn_kernel_ref(&nbvg->nbl_lists,
 +                             nbvg->nbat, ic,
 +                             fr->shift_vec,
 +                             flags,
 +                             clearF,
 +                             fr->fshift[0],
 +                             enerd->grpp.ener[egCOULSR],
 +                             fr->bBHAM ?
 +                             enerd->grpp.ener[egBHAMSR] :
 +                             enerd->grpp.ener[egLJSR]);
 +            break;
 +        
 +        case nbk4xN_X86_SIMD128:
 +            nbnxn_kernel_x86_simd128(&nbvg->nbl_lists,
 +                                     nbvg->nbat, ic,
++                                     nbvg->ewald_excl,
 +                                     fr->shift_vec,
 +                                     flags,
 +                                     clearF,
 +                                     fr->fshift[0],
 +                                     enerd->grpp.ener[egCOULSR],
 +                                     fr->bBHAM ?
 +                                     enerd->grpp.ener[egBHAMSR] :
 +                                     enerd->grpp.ener[egLJSR]);
 +            break;
 +        case nbk4xN_X86_SIMD256:
 +            nbnxn_kernel_x86_simd256(&nbvg->nbl_lists,
 +                                     nbvg->nbat, ic,
++                                     nbvg->ewald_excl,
 +                                     fr->shift_vec,
 +                                     flags,
 +                                     clearF,
 +                                     fr->fshift[0],
 +                                     enerd->grpp.ener[egCOULSR],
 +                                     fr->bBHAM ?
 +                                     enerd->grpp.ener[egBHAMSR] :
 +                                     enerd->grpp.ener[egLJSR]);
 +            break;
 +
 +        case nbk8x8x8_CUDA:
 +            nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality);
 +            break;
 +
 +        case nbk8x8x8_PlainC:
 +            nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
 +                                 nbvg->nbat, ic,
 +                                 fr->shift_vec,
 +                                 flags,
 +                                 clearF,
 +                                 nbvg->nbat->out[0].f,
 +                                 fr->fshift[0],
 +                                 enerd->grpp.ener[egCOULSR],
 +                                 fr->bBHAM ?
 +                                 enerd->grpp.ener[egBHAMSR] :
 +                                 enerd->grpp.ener[egLJSR]);
 +            break;
 +
 +        default:
 +            gmx_incons("Invalid nonbonded kernel type passed!");
 +
 +    }
 +    if (nbvg->kernel_type != nbk8x8x8_CUDA)
 +    {
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +
-     inc_nrnb(nrnb,eNR_NBNXN_LJ+sh_e,nbvg->nbl_lists.natpair_lj);
-     inc_nrnb(nrnb,
-              ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
-               eNR_NBNXN_RF : eNR_NBNXN_TAB)+sh_e,
++    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
++    {
++        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
++    }
++    else if (nbvg->ewald_excl == ewaldexclTable)
++    {
++        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
++    }
++    else
++    {
++        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
++    }
++    enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
++    if (flags & GMX_FORCE_ENERGY)
++    {
++        /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
++        enr_nbnxn_kernel_ljc += 1;
++        enr_nbnxn_kernel_lj  += 1;
++    }
++
++    inc_nrnb(nrnb,enr_nbnxn_kernel_ljc,
 +             nbvg->nbl_lists.natpair_ljq);
-     sum_epot(&(inputrec->opts),enerd);
++    inc_nrnb(nrnb,enr_nbnxn_kernel_lj,
++             nbvg->nbl_lists.natpair_lj);
++    inc_nrnb(nrnb,enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
 +             nbvg->nbl_lists.natpair_q);
 +}
 +
 +void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real *lambda,t_graph *graph,
 +              t_forcerec *fr, interaction_const_t *ic,
 +              gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int     cg0,cg1,i,j;
 +    int     start,homenr;
 +    int     nb_kernel_type;
 +    double  mu[2*DIM];
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
 +    gmx_bool   bDoLongRange,bDoForces,bSepLRF,bUseGPU,bUseOrEmulGPU;
 +    gmx_bool   bDiffKernels=FALSE;
 +    matrix  boxs;
 +    rvec    vzero,box_diag;
 +    real    e,v,dvdl;
 +    float  cycles_pme,cycles_force;
 +    nonbonded_verlet_t *nbv;
 +
 +    cycles_force = 0;
 +    nbv = fr->nbv;
 +    nb_kernel_type = fr->nbv->grp[0].kernel_type;
 +
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    cg0 = 0;
 +    if (DOMAINDECOMP(cr))
 +    {
 +        cg1 = cr->dd->ncg_tot;
 +    }
 +    else
 +    {
 +        cg1 = top->cgs.nr;
 +    }
 +    if (fr->n_tpi > 0)
 +    {
 +        cg1--;
 +    }
 +
 +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
 +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
 +    bFillGrid     = (bNS && bStateChanged);
 +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR));
 +    bDoForces     = (flags & GMX_FORCE_FORCES);
 +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
 +    bUseGPU       = fr->nbv->bUseGPU;
 +    bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbk8x8x8_PlainC);
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
 +
 +        if (NEED_MUTOT(*inputrec))
 +        {
 +            /* Calculate total (local) dipole moment in a temporary common array.
 +             * This makes it possible to sum them over nodes faster.
 +             */
 +            calc_mu(start,homenr,
 +                    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                    mu,mu+DIM);
 +        }
 +    }
 +
 +    if (fr->ePBC != epbcNONE) { 
 +        /* Compute shift vectors every step,
 +         * because of pressure coupling or box deformation!
 +         */
 +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +            calc_shifts(box,fr->shift_vec);
 +
 +        if (bCalcCGCM) { 
 +            put_atoms_in_box_omp(fr->ePBC,box,homenr,x);
 +            inc_nrnb(nrnb,eNR_SHIFTX,homenr);
 +        } 
 +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +            unshift_self(graph,box,x);
 +        }
 +    } 
 +
 +    nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
 +                                  fr->shift_vec,nbv->grp[0].nbat);
 +
 +#ifdef GMX_MPI
 +    if (!(cr->duty & DUTY_PME)) {
 +        /* Send particle coordinates to the pme nodes.
 +         * Since this is only implemented for domain decomposition
 +         * and domain decomposition does not use the graph,
 +         * we do not need to worry about shifting.
 +         */    
 +
 +        wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +        bBS = (inputrec->nwall == 2);
 +        if (bBS) {
 +            copy_mat(box,boxs);
 +            svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +        }
 +
 +        gmx_pme_send_x(cr,bBS ? boxs : box,x,
 +                       mdatoms->nChargePerturbed,lambda[efptCOUL],
 +                       (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),step);
 +
 +        wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +    }
 +#endif /* GMX_MPI */
 +
 +    /* do gridding for pair search */
 +    if (bNS)
 +    {
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
 +        clear_rvec(vzero);
 +        box_diag[XX] = box[XX][XX];
 +        box_diag[YY] = box[YY][YY];
 +        box_diag[ZZ] = box[ZZ][ZZ];
 +
 +        wallcycle_start(wcycle,ewcNS);
 +        if (!fr->bDomDec)
 +        {
 +            wallcycle_sub_start(wcycle,ewcsNBS_GRID_LOCAL);
 +            nbnxn_put_on_grid(nbv->nbs,fr->ePBC,box,
 +                              0,vzero,box_diag,
 +                              0,mdatoms->homenr,-1,fr->cginfo,x,
 +                              0,NULL,
 +                              nbv->grp[eintLocal].kernel_type,
 +                              nbv->grp[eintLocal].nbat);
 +            wallcycle_sub_stop(wcycle,ewcsNBS_GRID_LOCAL);
 +        }
 +        else
 +        {
 +            wallcycle_sub_start(wcycle,ewcsNBS_GRID_NONLOCAL);
 +            nbnxn_put_on_grid_nonlocal(nbv->nbs,domdec_zones(cr->dd),
 +                                       fr->cginfo,x,
 +                                       nbv->grp[eintNonlocal].kernel_type,
 +                                       nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle,ewcsNBS_GRID_NONLOCAL);
 +        }
 +
 +        if (nbv->ngrp == 1 ||
 +            nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
 +        {
 +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat,eatAll,
 +                                nbv->nbs,mdatoms,fr->cginfo);
 +        }
 +        else
 +        {
 +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat,eatLocal,
 +                                nbv->nbs,mdatoms,fr->cginfo);
 +            nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat,eatAll,
 +                                nbv->nbs,mdatoms,fr->cginfo);
 +        }
 +        wallcycle_stop(wcycle, ewcNS);
 +    }
 +
 +    /* initialize the GPU atom data and copy shift vector */
 +    if (bUseGPU)
 +    {
 +        if (bNS)
 +        {
 +            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +            nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
 +            wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +        }
 +
 +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +        nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
 +        wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +    }
 +
 +    /* do local pair search */
 +    if (bNS)
 +    {
 +        wallcycle_start_nocount(wcycle,ewcNS);
 +        wallcycle_sub_start(wcycle,ewcsNBS_SEARCH_LOCAL);
 +        nbnxn_make_pairlist(nbv->nbs,nbv->grp[eintLocal].nbat,
 +                            &top->excls,
 +                            ic->rlist,
 +                            nbv->min_ci_balanced,
 +                            &nbv->grp[eintLocal].nbl_lists,
 +                            eintLocal,
 +                            nbv->grp[eintLocal].kernel_type,
 +                            nrnb);
 +        wallcycle_sub_stop(wcycle,ewcsNBS_SEARCH_LOCAL);
 +
 +        if (bUseGPU)
 +        {
 +            /* initialize local pair-list on the GPU */
 +            nbnxn_cuda_init_pairlist(nbv->cu_nbv,
 +                                     nbv->grp[eintLocal].nbl_lists.nbl[0],
 +                                     eintLocal);
 +        }
 +        wallcycle_stop(wcycle, ewcNS);
 +    }
 +    else
 +    {
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +        nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatLocal,FALSE,x,
 +                                        nbv->grp[eintLocal].nbat);
 +        wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +    }
 +
 +    if (bUseGPU)
 +    {
 +        wallcycle_start(wcycle,ewcLAUNCH_GPU_NB);
 +        /* launch local nonbonded F on GPU */
 +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
 +                     nrnb, wcycle);
 +        wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
 +    }
 +
 +    /* Communicate coordinates and sum dipole if necessary + 
 +       do non-local pair search */
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
 +                        nbv->grp[eintLocal].kernel_type);
 +
 +        if (bDiffKernels)
 +        {
 +            /* With GPU+CPU non-bonded calculations we need to copy
 +             * the local coordinates to the non-local nbat struct
 +             * (in CPU format) as the non-local kernel call also
 +             * calculates the local - non-local interactions.
 +             */
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatLocal,TRUE,x,
 +                                             nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +
 +        if (bNS)
 +        {
 +            wallcycle_start_nocount(wcycle,ewcNS);
 +            wallcycle_sub_start(wcycle,ewcsNBS_SEARCH_NONLOCAL);
 +
 +            if (bDiffKernels)
 +            {
 +                nbnxn_grid_add_simple(nbv->nbs,nbv->grp[eintNonlocal].nbat);
 +            }
 +
 +            nbnxn_make_pairlist(nbv->nbs,nbv->grp[eintNonlocal].nbat,
 +                                &top->excls,
 +                                ic->rlist,
 +                                nbv->min_ci_balanced,
 +                                &nbv->grp[eintNonlocal].nbl_lists,
 +                                eintNonlocal,
 +                                nbv->grp[eintNonlocal].kernel_type,
 +                                nrnb);
 +
 +            wallcycle_sub_stop(wcycle,ewcsNBS_SEARCH_NONLOCAL);
 +
 +            if (nbv->grp[eintNonlocal].kernel_type == nbk8x8x8_CUDA)
 +            {
 +                /* initialize non-local pair-list on the GPU */
 +                nbnxn_cuda_init_pairlist(nbv->cu_nbv,
 +                                         nbv->grp[eintNonlocal].nbl_lists.nbl[0],
 +                                         eintNonlocal);
 +            }
 +            wallcycle_stop(wcycle,ewcNS);
 +        } 
 +        else
 +        {
 +            wallcycle_start(wcycle,ewcMOVEX);
 +            dd_move_x(cr->dd,box,x);
 +
 +            /* When we don't need the total dipole we sum it in global_stat */
 +            if (bStateChanged && NEED_MUTOT(*inputrec))
 +            {
 +                gmx_sumd(2*DIM,mu,cr);
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEX);
 +
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatNonlocal,FALSE,x,
 +                                            nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +
 +        if (bUseGPU && !bDiffKernels)
 +        { 
 +            wallcycle_start(wcycle,ewcLAUNCH_GPU_NB);
 +            /* launch non-local nonbonded F on GPU */
 +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
 +                         nrnb, wcycle);
 +            cycles_force += wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
 +        }
 +    }
 +
 +    if (bUseGPU)
 +    {
 +        /* launch D2H copy-back F */
 +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +        if (DOMAINDECOMP(cr) && !bDiffKernels)
 +        {
 +            nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat,
 +                                      flags, eatNonlocal);
 +        }
 +        nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat,
 +                                  flags, eatLocal);
 +        cycles_force += wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
 +    }
 +
 +    if (bStateChanged && NEED_MUTOT(*inputrec))
 +    {
 +        if (PAR(cr))
 +        {
 +            gmx_sumd(2*DIM,mu,cr);
 +        } 
 +
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0;j<DIM;j++)
 +            {
 +                fr->mu_tot[i][j] = mu[i*DIM + j];
 +            }
 +        }
 +    }
 +    if (fr->efep == efepNO)
 +    {
 +        copy_rvec(fr->mu_tot[0],mu_tot);
 +    }
 +    else
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            mu_tot[j] =
 +                (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
 +                lambda[efptCOUL]*fr->mu_tot[1][j];
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
 +    
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum) 
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        /* Clear the short- and long-range forces */
 +        clear_rvecs(fr->natoms_force_constr,f);
 +        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
 +        {
 +            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
 +        }
 +        
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        posres_wrapper(fplog,flags,bSepDVDL,inputrec,nrnb,top,box,x,
 +                       f,enerd,lambda,fr);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */    
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
 +                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
 +                      inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
 +                      flags, &cycles_pme);
 +
 +    if(bSepLRF)
 +    {
 +        if (do_per_step(step,inputrec->nstcalclr))
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for(i=0; i<fr->natoms_force_constr; i++)
 +            {
 +                rvec_add(fr->f_twin[i],f[i],f[i]);
 +            }
 +        }
 +    }
 +    
 +    if (!bUseOrEmulGPU)
 +    {
 +        /* Maybe we should move this into do_force_lowlevel */
 +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
 +                     nrnb, wcycle);
 +    }
 +        
 +
 +    if (!bUseOrEmulGPU || bDiffKernels)
 +    {
 +        int aloc;
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
 +                         bDiffKernels ? enbvClearFYes : enbvClearFNo,
 +                         nrnb, wcycle);
 +        }
 +
 +        if (!bUseOrEmulGPU)
 +        {
 +            aloc = eintLocal;
 +        }
 +        else
 +        {
 +            aloc = eintNonlocal;
 +        }
 +
 +        /* Add all the non-bonded force to the normal force array.
 +         * This can be split into a local a non-local part when overlapping
 +         * communication with calculation with domain decomposition.
 +         */
 +        cycles_force += wallcycle_stop(wcycle,ewcFORCE);
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatAll,nbv->grp[aloc].nbat,f);
 +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +        cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_start_nocount(wcycle,ewcFORCE);
 +
 +        /* if there are multiple fshift output buffers reduce them */
 +        if ((flags & GMX_FORCE_VIRIAL) &&
 +            nbv->grp[aloc].nbl_lists.nnbl > 1)
 +        {
 +            nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
 +                                                      fr->fshift);
 +        }
 +    }
 +    
 +    cycles_force += wallcycle_stop(wcycle,ewcFORCE);
 +    
 +    if (ed)
 +    {
 +        do_flood(fplog,cr,x,f,ed,box,step,bNS);
 +    }
 +
 +    if (bUseOrEmulGPU && !bDiffKernels)
 +    {
 +        /* wait for non-local forces (or calculate in emulation mode) */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            if (bUseGPU)
 +            {
 +                wallcycle_start(wcycle,ewcWAIT_GPU_NB_NL);
 +                nbnxn_cuda_wait_gpu(nbv->cu_nbv,
 +                                    nbv->grp[eintNonlocal].nbat,
 +                                    flags, eatNonlocal,
 +                                    enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
 +                                    fr->fshift);
 +                cycles_force += wallcycle_stop(wcycle,ewcWAIT_GPU_NB_NL);
 +            }
 +            else
 +            {
 +                wallcycle_start_nocount(wcycle,ewcFORCE);
 +                do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
 +                             nrnb, wcycle);
 +                cycles_force += wallcycle_stop(wcycle,ewcFORCE);
 +            }            
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +            /* skip the reduction if there was no non-local work to do */
 +            if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
 +            {
 +                nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatNonlocal,
 +                                               nbv->grp[eintNonlocal].nbat,f);
 +            }
 +            wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +    }
 + 
 +    if (bUseOrEmulGPU)
 +    {
 +        /* wait for local forces (or calculate in emulation mode) */
 +        if (bUseGPU)
 +        {
 +            wallcycle_start(wcycle,ewcWAIT_GPU_NB_L);
 +            nbnxn_cuda_wait_gpu(nbv->cu_nbv,
 +                                nbv->grp[eintLocal].nbat,
 +                                flags, eatLocal,
 +                                enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
 +                                fr->fshift);
 +            wallcycle_stop(wcycle,ewcWAIT_GPU_NB_L);
 +
 +            /* now clear the GPU outputs while we finish the step on the CPU */
 +            nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
 +        }
 +        else
 +        {            
 +            wallcycle_start_nocount(wcycle,ewcFORCE);
 +            do_nb_verlet(fr, ic, enerd, flags, eintLocal,
 +                         DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
 +                         nrnb, wcycle);
 +            wallcycle_stop(wcycle,ewcFORCE);
 +        }
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +        if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
 +        {
 +            /* skip the reduction if there was no non-local work to do */
 +            nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatLocal,
 +                                           nbv->grp[eintLocal].nbat,f);
 +        }
 +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +    }
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
 +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
 +
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        pull_potential_wrapper(fplog,bSepDVDL,cr,inputrec,box,x,
 +                               f,vir_force,mdatoms,enerd,lambda,t);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        /* In case of node-splitting, the PP nodes receive the long-range 
 +         * forces, virial and energy from the PME nodes here.
 +         */    
 +        pme_receive_force_ener(fplog,bSepDVDL,cr,wcycle,enerd,fr);
 +    }
 +
 +    if (bDoForces)
 +    {
 +        post_process_forces(fplog,cr,step,nrnb,wcycle,
 +                            top,box,x,f,vir_force,mdatoms,graph,fr,vsite,
 +                            flags);
 +    }
 +    
 +    /* Sum the potential energy terms from group contributions */
-     sum_epot(&(inputrec->opts),enerd);
++    sum_epot(&(inputrec->opts),&(enerd->grpp),enerd->term);
 +}
 +
 +void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real *lambda,t_graph *graph,
 +              t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int    cg0,cg1,i,j;
 +    int    start,homenr;
 +    double mu[2*DIM];
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
 +    gmx_bool   bDoLongRangeNS,bDoForces,bDoPotential,bSepLRF;
 +    gmx_bool   bDoAdressWF;
 +    matrix boxs;
 +    rvec   vzero,box_diag;
 +    real   e,v,dvdlambda[efptNR];
 +    t_pbc  pbc;
 +    float  cycles_pme,cycles_force;
 +
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        pd_cg_range(cr,&cg0,&cg1);
 +    }
 +    else
 +    {
 +        cg0 = 0;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            cg1 = cr->dd->ncg_tot;
 +        }
 +        else
 +        {
 +            cg1 = top->cgs.nr;
 +        }
 +        if (fr->n_tpi > 0)
 +        {
 +            cg1--;
 +        }
 +    }
 +
 +    bStateChanged  = (flags & GMX_FORCE_STATECHANGED);
 +    bNS            = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
 +    /* Should we update the long-range neighborlists at this step? */
 +    bDoLongRangeNS = fr->bTwinRange && bNS;
 +    /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
 +    bFillGrid      = (bNS && bStateChanged);
 +    bCalcCGCM      = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoForces      = (flags & GMX_FORCE_FORCES);
 +    bDoPotential   = (flags & GMX_FORCE_ENERGY);
 +    bSepLRF        = ((inputrec->nstcalclr>1) && bDoForces &&
 +                      (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR));
 +
 +    /* should probably move this to the forcerec since it doesn't change */
 +    bDoAdressWF   = ((fr->adress_type!=eAdressOff));
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
 +
 +        if (NEED_MUTOT(*inputrec))
 +        {
 +            /* Calculate total (local) dipole moment in a temporary common array.
 +             * This makes it possible to sum them over nodes faster.
 +             */
 +            calc_mu(start,homenr,
 +                    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                    mu,mu+DIM);
 +        }
 +    }
 +
 +    if (fr->ePBC != epbcNONE) { 
 +        /* Compute shift vectors every step,
 +         * because of pressure coupling or box deformation!
 +         */
 +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +            calc_shifts(box,fr->shift_vec);
 +
 +        if (bCalcCGCM) { 
 +            put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
 +                    &(top->cgs),x,fr->cg_cm);
 +            inc_nrnb(nrnb,eNR_CGCM,homenr);
 +            inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
 +        } 
 +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +            unshift_self(graph,box,x);
 +        }
 +    } 
 +    else if (bCalcCGCM) {
 +        calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
 +        inc_nrnb(nrnb,eNR_CGCM,homenr);
 +    }
 +
 +    if (bCalcCGCM) {
 +        if (PAR(cr)) {
 +            move_cgcm(fplog,cr,fr->cg_cm);
 +        }
 +        if (gmx_debug_at)
 +            pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (!(cr->duty & DUTY_PME)) {
 +        /* Send particle coordinates to the pme nodes.
 +         * Since this is only implemented for domain decomposition
 +         * and domain decomposition does not use the graph,
 +         * we do not need to worry about shifting.
 +         */    
 +
 +        wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +        bBS = (inputrec->nwall == 2);
 +        if (bBS) {
 +            copy_mat(box,boxs);
 +            svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +        }
 +
 +        gmx_pme_send_x(cr,bBS ? boxs : box,x,
 +                       mdatoms->nChargePerturbed,lambda[efptCOUL],
 +                       (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),step);
 +
 +        wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +    }
 +#endif /* GMX_MPI */
 +
 +    /* Communicate coordinates and sum dipole if necessary */
 +    if (PAR(cr))
 +    {
 +        wallcycle_start(wcycle,ewcMOVEX);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_move_x(cr->dd,box,x);
 +        }
 +        else
 +        {
 +            move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
 +        }
 +        wallcycle_stop(wcycle,ewcMOVEX);
 +    }
 +
 +    /* update adress weight beforehand */
 +    if(bStateChanged && bDoAdressWF)
 +    {
 +        /* need pbc for adress weight calculation with pbc_dx */
 +        set_pbc(&pbc,inputrec->ePBC,box);
 +        if(fr->adress_site == eAdressSITEcog)
 +        {
 +            update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
 +                                      inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        else if (fr->adress_site == eAdressSITEcom)
 +        {
 +            update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                      inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        else if (fr->adress_site == eAdressSITEatomatom){
 +            update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        else
 +        {
 +            update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                       inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +    }
 +
 +    if (NEED_MUTOT(*inputrec))
 +    {
 +
 +        if (bStateChanged)
 +        {
 +            if (PAR(cr))
 +            {
 +                gmx_sumd(2*DIM,mu,cr);
 +            }
 +            for(i=0; i<2; i++)
 +            {
 +                for(j=0;j<DIM;j++)
 +                {
 +                    fr->mu_tot[i][j] = mu[i*DIM + j];
 +                }
 +            }
 +        }
 +        if (fr->efep == efepNO)
 +        {
 +            copy_rvec(fr->mu_tot[0],mu_tot);
 +        }
 +        else
 +        {
 +            for(j=0; j<DIM; j++)
 +            {
 +                mu_tot[j] =
 +                    (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
 +            }
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (bNS)
 +    {
 +        wallcycle_start(wcycle,ewcNS);
 +
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
 +        /* Do the actual neighbour searching and if twin range electrostatics
 +         * also do the calculation of long range forces and energies.
 +         */
 +        for (i=0;i<efptNR;i++) {dvdlambda[i] = 0;}
 +        ns(fplog,fr,x,box,
 +           groups,&(inputrec->opts),top,mdatoms,
 +           cr,nrnb,lambda,dvdlambda,&enerd->grpp,bFillGrid,
 +           bDoLongRangeNS);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdlambda);
 +        }
 +        enerd->dvdl_lin[efptVDW] += dvdlambda[efptVDW];
 +        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +
 +        wallcycle_stop(wcycle,ewcNS);
 +    }
 +
 +    if (inputrec->implicit_solvent && bNS)
 +    {
 +        make_gb_nblist(cr,inputrec->gb_algorithm,inputrec->rlist,
 +                       x,box,fr,&top->idef,graph,fr->born);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
 +
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle,ewcROT);
 +        do_rotation(cr,inputrec,box,x,t,step,wcycle,bNS);
 +        wallcycle_stop(wcycle,ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +    
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum)
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        /* Clear the short- and long-range forces */
 +        clear_rvecs(fr->natoms_force_constr,f);
 +        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
 +        {
 +            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
 +        }
 +        
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        posres_wrapper(fplog,flags,bSepDVDL,inputrec,nrnb,top,box,x,
 +                       f,enerd,lambda,fr);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
 +    {
 +        /* Flat-bottomed position restraints always require full pbc */
 +        if(!(bStateChanged && bDoAdressWF))
 +        {
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +        }
 +        v = fbposres(top->idef.il[F_FBPOSRES].nr,top->idef.il[F_FBPOSRES].iatoms,
 +                     top->idef.iparams_fbposres,
 +                     (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +                     inputrec->ePBC==epbcNONE ? NULL : &pbc,
 +                     fr->rc_scaling,fr->ePBC,fr->posres_com);
 +        enerd->term[F_FBPOSRES] += v;
 +        inc_nrnb(nrnb,eNR_FBPOSRES,top->idef.il[F_FBPOSRES].nr/2);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
 +                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
 +                      inputrec->fepvals,lambda,
 +                      graph,&(top->excls),fr->mu_tot,
 +                      flags,
 +                      &cycles_pme);
 +
 +    if(bSepLRF)
 +    {
 +        if (do_per_step(step,inputrec->nstcalclr))
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for(i=0; i<fr->natoms_force_constr; i++)
 +            {
 +                rvec_add(fr->f_twin[i],f[i],f[i]);
 +            }
 +        }
 +    }
 +    
 +    cycles_force = wallcycle_stop(wcycle,ewcFORCE);
 +
 +    if (ed)
 +    {
 +        do_flood(fplog,cr,x,f,ed,box,step,bNS);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
 +        {
 +            /* Compute thermodynamic force in hybrid AdResS region */
 +            adress_thermo_force(start,homenr,&(top->cgs),x,fr->f_novirsum,fr,mdatoms,
 +                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            else
 +            {
 +                pd_move_f(cr,f,nrnb);
 +                if (bSepLRF)
 +                {
 +                    pd_move_f(cr,fr->f_twin,nrnb);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
 +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
 +
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        pull_potential_wrapper(fplog,bSepDVDL,cr,inputrec,box,x,
 +                               f,vir_force,mdatoms,enerd,lambda,t);
 +    }
 +
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle,ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr,step,t);
 +        wallcycle_stop(wcycle,ewcROTadd);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        /* In case of node-splitting, the PP nodes receive the long-range 
 +         * forces, virial and energy from the PME nodes here.
 +         */
 +        pme_receive_force_ener(fplog,bSepDVDL,cr,wcycle,enerd,fr);
 +    }
 +
 +    if (bDoForces)
 +    {
 +        post_process_forces(fplog,cr,step,nrnb,wcycle,
 +                            top,box,x,f,vir_force,mdatoms,graph,fr,vsite,
 +                            flags);
 +    }
 +
 +    /* Sum the potential energy terms from group contributions */
++    sum_epot(&(inputrec->opts),&(enerd->grpp),enerd->term);
 +}
 +
 +void do_force(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real *lambda,t_graph *graph,
 +              t_forcerec *fr,
 +              gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    /* modify force flag if not doing nonbonded */
 +    if (!fr->bNonbonded)
 +    {
 +        flags &= ~GMX_FORCE_NONBONDED;
 +    }
 +
 +    switch (inputrec->cutoff_scheme)
 +    {
 +        case ecutsVERLET:
 +            do_force_cutsVERLET(fplog, cr, inputrec,
 +                                step, nrnb, wcycle,
 +                                top, mtop,
 +                                groups,
 +                                box, x, hist,
 +                                f, vir_force,
 +                                mdatoms,
 +                                enerd, fcd,
 +                                lambda, graph,
 +                                fr, fr->ic, 
 +                                vsite, mu_tot,
 +                                t, field, ed,
 +                                bBornRadii,
 +                                flags);
 +            break;
 +        case ecutsGROUP:
 +             do_force_cutsGROUP(fplog, cr, inputrec,
 +                                step, nrnb, wcycle,
 +                                top, mtop,
 +                                groups,
 +                                box, x, hist,
 +                                f, vir_force,
 +                                mdatoms,
 +                                enerd, fcd,
 +                                lambda, graph,
 +                                fr, vsite, mu_tot,
 +                                t, field, ed,
 +                                bBornRadii,
 +                                flags);
 +            break;
 +        default:
 +            gmx_incons("Invalid cut-off scheme passed!");
 +    }
 +}
 +
 +
 +void do_constrain_first(FILE *fplog,gmx_constr_t constr,
 +                        t_inputrec *ir,t_mdatoms *md,
 +                        t_state *state,rvec *f,
 +                        t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
 +                        t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir)
 +{
 +    int    i,m,start,end;
 +    gmx_large_int_t step;
 +    real   dt=ir->delta_t;
 +    real   dvdl_dum;
 +    rvec   *savex;
 +
 +    snew(savex,state->natoms);
 +
 +    start = md->start;
 +    end   = md->homenr + start;
 +
 +    if (debug)
 +        fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
 +                start,md->homenr,end);
 +    /* Do a first constrain to reset particles... */
 +    step = ir->init_step;
 +    if (fplog)
 +    {
 +        char buf[STEPSTRSIZE];
 +        fprintf(fplog,"\nConstraining the starting coordinates (step %s)\n",
 +                gmx_step_str(step,buf));
 +    }
 +    dvdl_dum = 0;
 +
 +    /* constrain the current position */
 +    constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +              ir,NULL,cr,step,0,md,
 +              state->x,state->x,NULL,
 +              fr->bMolPBC,state->box,
 +              state->lambda[efptBONDED],&dvdl_dum,
 +              NULL,NULL,nrnb,econqCoord,
 +              ir->epc==epcMTTK,state->veta,state->veta);
 +    if (EI_VV(ir->eI))
 +    {
 +        /* constrain the inital velocity, and save it */
 +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
 +        /* might not yet treat veta correctly */
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,0,md,
 +                  state->x,state->v,state->v,
 +                  fr->bMolPBC,state->box,
 +                  state->lambda[efptBONDED],&dvdl_dum,
 +                  NULL,NULL,nrnb,econqVeloc,
 +                  ir->epc==epcMTTK,state->veta,state->veta);
 +    }
 +    /* constrain the inital velocities at t-dt/2 */
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI!=eiVV)
 +    {
 +        for(i=start; (i<end); i++)
 +        {
 +            for(m=0; (m<DIM); m++)
 +            {
 +                /* Reverse the velocity */
 +                state->v[i][m] = -state->v[i][m];
 +                /* Store the position at t-dt in buf */
 +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
 +            }
 +        }
 +    /* Shake the positions at t=-dt with the positions at t=0
 +     * as reference coordinates.
 +         */
 +        if (fplog)
 +        {
 +            char buf[STEPSTRSIZE];
 +            fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %s)\n",
 +                    gmx_step_str(step,buf));
 +        }
 +        dvdl_dum = 0;
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,-1,md,
 +                  state->x,savex,NULL,
 +                  fr->bMolPBC,state->box,
 +                  state->lambda[efptBONDED],&dvdl_dum,
 +                  state->v,NULL,nrnb,econqCoord,
 +                  ir->epc==epcMTTK,state->veta,state->veta);
 +        
 +        for(i=start; i<end; i++) {
 +            for(m=0; m<DIM; m++) {
 +                /* Re-reverse the velocities */
 +                state->v[i][m] = -state->v[i][m];
 +            }
 +        }
 +    }
 +    sfree(savex);
 +}
 +
 +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
 +{
 +  double eners[2],virs[2],enersum,virsum,y0,f,g,h;
 +  double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
 +  double invscale,invscale2,invscale3;
 +  int    ri0,ri1,ri,i,offstart,offset;
 +  real   scale,*vdwtab,tabfactor,tmp;
 +
 +  fr->enershiftsix = 0;
 +  fr->enershifttwelve = 0;
 +  fr->enerdiffsix = 0;
 +  fr->enerdifftwelve = 0;
 +  fr->virdiffsix = 0;
 +  fr->virdifftwelve = 0;
 +
 +  if (eDispCorr != edispcNO) {
 +    for(i=0; i<2; i++) {
 +      eners[i] = 0;
 +      virs[i]  = 0;
 +    }
 +    if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT)) {
 +      if (fr->rvdw_switch == 0)
 +      gmx_fatal(FARGS,
 +                "With dispersion correction rvdw-switch can not be zero "
 +                "for vdw-type = %s",evdw_names[fr->vdwtype]);
 +
 +      scale  = fr->nblists[0].table_elec_vdw.scale;
 +      vdwtab = fr->nblists[0].table_vdw.data;
 +
 +      /* Round the cut-offs to exact table values for precision */
 +      ri0 = floor(fr->rvdw_switch*scale);
 +      ri1 = ceil(fr->rvdw*scale);
 +      r0  = ri0/scale;
 +      r1  = ri1/scale;
 +      rc3 = r0*r0*r0;
 +      rc9  = rc3*rc3*rc3;
 +
 +      if (fr->vdwtype == evdwSHIFT)
 +      {
 +          /* Determine the constant energy shift below rvdw_switch.
 +           * Table has a scale factor since we have scaled it down to compensate
 +           * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
 +           */
 +          fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
 +          fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
 +      }
 +      /* Add the constant part from 0 to rvdw_switch.
 +       * This integration from 0 to rvdw_switch overcounts the number
 +       * of interactions by 1, as it also counts the self interaction.
 +       * We will correct for this later.
 +       */
 +      eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
 +      eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
 +
 +      invscale = 1.0/(scale);
 +      invscale2 = invscale*invscale;
 +      invscale3 = invscale*invscale2;
 +
 +      /* following summation derived from cubic spline definition,
 +      Numerical Recipies in C, second edition, p. 113-116.  Exact
 +      for the cubic spline.  We first calculate the negative of
 +      the energy from rvdw to rvdw_switch, assuming that g(r)=1,
 +      and then add the more standard, abrupt cutoff correction to
 +      that result, yielding the long-range correction for a
 +      switched function.  We perform both the pressure and energy
 +      loops at the same time for simplicity, as the computational
 +      cost is low. */
 +
 +      for (i=0;i<2;i++) {
 +        enersum = 0.0; virsum = 0.0;
 +        if (i==0)
 +        {
 +            offstart = 0;
 +            /* Since the dispersion table has been scaled down a factor 6.0 and the repulsion
 +             * a factor 12.0 to compensate for the c6/c12 parameters inside nbfp[] being scaled
 +             * up (to save flops in kernels), we need to correct for this.
 +             */
 +            tabfactor = 6.0;
 +        }
 +        else
 +        {
 +            offstart = 4;
 +            tabfactor = 12.0;
 +        }
 +      for (ri=ri0; ri<ri1; ri++) {
 +          r = ri*invscale;
 +          ea = invscale3;
 +          eb = 2.0*invscale2*r;
 +          ec = invscale*r*r;
 +
 +          pa = invscale3;
 +          pb = 3.0*invscale2*r;
 +          pc = 3.0*invscale*r*r;
 +          pd = r*r*r;
 +
 +          /* this "8" is from the packing in the vdwtab array - perhaps should be #define'ed? */
 +          offset = 8*ri + offstart;
 +          y0 = vdwtab[offset];
 +          f  = vdwtab[offset+1];
 +          g  = vdwtab[offset+2];
 +          h  = vdwtab[offset+3];
 +
 +          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
 +          virsum  += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
 +        }
 +          
 +        enersum *= 4.0*M_PI*tabfactor;
 +        virsum  *= 4.0*M_PI*tabfactor;
 +        eners[i] -= enersum;
 +        virs[i]  -= virsum;
 +      }
 +
 +      /* now add the correction for rvdw_switch to infinity */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    }
 +    else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER)) {
 +      if (fr->vdwtype == evdwUSER && fplog)
 +      fprintf(fplog,
 +              "WARNING: using dispersion correction with user tables\n");
 +      rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
 +      rc9  = rc3*rc3*rc3;
 +      /* Contribution beyond the cut-off */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      if (fr->vdw_modifier==eintmodPOTSHIFT) {
 +          /* Contribution within the cut-off */
 +          eners[0] += -4.0*M_PI/(3.0*rc3);
 +          eners[1] +=  4.0*M_PI/(3.0*rc9);
 +      }
 +      /* Contribution beyond the cut-off */
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } else {
 +      gmx_fatal(FARGS,
 +              "Dispersion correction is not implemented for vdw-type = %s",
 +              evdw_names[fr->vdwtype]);
 +    }
 +    fr->enerdiffsix    = eners[0];
 +    fr->enerdifftwelve = eners[1];
 +    /* The 0.5 is due to the Gromacs definition of the virial */
 +    fr->virdiffsix     = 0.5*virs[0];
 +    fr->virdifftwelve  = 0.5*virs[1];
 +  }
 +}
 +
 +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
 +                   gmx_large_int_t step,int natoms,
 +                   matrix box,real lambda,tensor pres,tensor virial,
 +                   real *prescorr, real *enercorr, real *dvdlcorr)
 +{
 +    gmx_bool bCorrAll,bCorrPres;
 +    real dvdlambda,invvol,dens,ninter,avcsix,avctwelve,enerdiff,svir=0,spres=0;
 +    int  m;
 +
 +    *prescorr = 0;
 +    *enercorr = 0;
 +    *dvdlcorr = 0;
 +
 +    clear_mat(virial);
 +    clear_mat(pres);
 +
 +    if (ir->eDispCorr != edispcNO) {
 +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +
 +        invvol = 1/det(box);
 +        if (fr->n_tpi)
 +        {
 +            /* Only correct for the interactions with the inserted molecule */
 +            dens = (natoms - fr->n_tpi)*invvol;
 +            ninter = fr->n_tpi;
 +        }
 +        else
 +        {
 +            dens = natoms*invvol;
 +            ninter = 0.5*natoms;
 +        }
 +
 +        if (ir->efep == efepNO)
 +        {
 +            avcsix    = fr->avcsix[0];
 +            avctwelve = fr->avctwelve[0];
 +        }
 +        else
 +        {
 +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
 +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
 +        }
 +
 +        enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
 +        *enercorr += avcsix*enerdiff;
 +        dvdlambda = 0.0;
 +        if (ir->efep != efepNO)
 +        {
 +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
 +        }
 +        if (bCorrAll)
 +        {
 +            enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
 +            *enercorr += avctwelve*enerdiff;
 +            if (fr->efep != efepNO)
 +            {
 +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
 +            }
 +        }
 +
 +        if (bCorrPres)
 +        {
 +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
 +            if (ir->eDispCorr == edispcAllEnerPres)
 +            {
 +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
 +            }
 +            /* The factor 2 is because of the Gromacs virial definition */
 +            spres = -2.0*invvol*svir*PRESFAC;
 +
 +            for(m=0; m<DIM; m++) {
 +                virial[m][m] += svir;
 +                pres[m][m] += spres;
 +            }
 +            *prescorr += spres;
 +        }
 +
 +        /* Can't currently control when it prints, for now, just print when degugging */
 +        if (debug)
 +        {
 +            if (bCorrAll) {
 +                fprintf(debug,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                        avcsix,avctwelve);
 +            }
 +            if (bCorrPres)
 +            {
 +                fprintf(debug,
 +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
 +                        *enercorr,spres,svir);
 +            }
 +            else
 +            {
 +                fprintf(debug,"Long Range LJ corr.: Epot %10g\n",*enercorr);
 +            }
 +        }
 +
 +        if (fr->bSepDVDL && do_per_step(step,ir->nstlog))
 +        {
 +            fprintf(fplog,sepdvdlformat,"Dispersion correction",
 +                    *enercorr,dvdlambda);
 +        }
 +        if (fr->efep != efepNO)
 +        {
 +            *dvdlcorr += dvdlambda;
 +        }
 +    }
 +}
 +
 +void do_pbc_first(FILE *fplog,matrix box,t_forcerec *fr,
 +                t_graph *graph,rvec x[])
 +{
 +  if (fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +  calc_shifts(box,fr->shift_vec);
 +  if (graph) {
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 1",graph);
 +    shift_self(graph,box,x);
 +    /* By doing an extra mk_mshift the molecules that are broken
 +     * because they were e.g. imported from another software
 +     * will be made whole again. Such are the healing powers
 +     * of GROMACS.
 +     */
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 2",graph);
 +  }
 +  if (fplog)
 +    fprintf(fplog,"Done rmpbc\n");
 +}
 +
 +static void low_do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +                          gmx_mtop_t *mtop,rvec x[],
 +                          gmx_bool bFirst)
 +{
 +  t_graph *graph;
 +  int mb,as,mol;
 +  gmx_molblock_t *molb;
 +
 +  if (bFirst && fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +
 +  snew(graph,1);
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    if (molb->natoms_mol == 1 ||
 +      (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) {
 +      /* Just one atom or charge group in the molecule, no PBC required */
 +      as += molb->nmol*molb->natoms_mol;
 +    } else {
 +      /* Pass NULL iso fplog to avoid graph prints for each molecule type */
 +      mk_graph_ilist(NULL,mtop->moltype[molb->type].ilist,
 +                   0,molb->natoms_mol,FALSE,FALSE,graph);
 +
 +      for(mol=0; mol<molb->nmol; mol++) {
 +      mk_mshift(fplog,graph,ePBC,box,x+as);
 +
 +      shift_self(graph,box,x+as);
 +      /* The molecule is whole now.
 +       * We don't need the second mk_mshift call as in do_pbc_first,
 +       * since we no longer need this graph.
 +       */
 +
 +      as += molb->natoms_mol;
 +      }
 +      done_graph(graph);
 +    }
 +  }
 +  sfree(graph);
 +}
 +
 +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
 +                     gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,TRUE);
 +}
 +
 +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +               gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,FALSE);
 +}
 +
 +void finish_run(FILE *fplog,t_commrec *cr,const char *confout,
 +                t_inputrec *inputrec,
 +                t_nrnb nrnb[],gmx_wallcycle_t wcycle,
 +                gmx_runtime_t *runtime,
 +                wallclock_gpu_t *gputimes,
 +                int omp_nth_pp,
 +                gmx_bool bWriteStat)
 +{
 +    int    i,j;
 +    t_nrnb *nrnb_tot=NULL;
 +    real   delta_t;
 +    double nbfs,mflop;
 +
 +    wallcycle_sum(cr,wcycle);
 +
 +    if (cr->nnodes > 1)
 +    {
 +        snew(nrnb_tot,1);
 +#ifdef GMX_MPI
 +        MPI_Allreduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +    }
 +    else
 +    {
 +        nrnb_tot = nrnb;
 +    }
 +
 +#if defined(GMX_MPI) && !defined(GMX_THREAD_MPI)
 +    if (cr->nnodes > 1)
 +    {
 +        /* reduce nodetime over all MPI processes in the current simulation */
 +        double sum;
 +        MPI_Allreduce(&runtime->proctime,&sum,1,MPI_DOUBLE,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        runtime->proctime = sum;
 +    }
 +#endif
 +
 +    if (SIMMASTER(cr))
 +    {
 +        print_flop(fplog,nrnb_tot,&nbfs,&mflop);
 +    }
 +    if (cr->nnodes > 1)
 +    {
 +        sfree(nrnb_tot);
 +    }
 +
 +    if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
 +    {
 +        print_dd_statistics(cr,inputrec,fplog);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (PARTDECOMP(cr))
 +    {
 +        if (MASTER(cr))
 +        {
 +            t_nrnb     *nrnb_all;
 +            int        s;
 +            MPI_Status stat;
 +
 +            snew(nrnb_all,cr->nnodes);
 +            nrnb_all[0] = *nrnb;
 +            for(s=1; s<cr->nnodes; s++)
 +            {
 +                MPI_Recv(nrnb_all[s].n,eNRNB,MPI_DOUBLE,s,0,
 +                         cr->mpi_comm_mysim,&stat);
 +            }
 +            pr_load(fplog,cr,nrnb_all);
 +            sfree(nrnb_all);
 +        }
 +        else
 +        {
 +            MPI_Send(nrnb->n,eNRNB,MPI_DOUBLE,MASTERRANK(cr),0,
 +                     cr->mpi_comm_mysim);
 +        }
 +    }
 +#endif
 +
 +    if (SIMMASTER(cr))
 +    {
 +        wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
 +                        wcycle,gputimes);
 +
 +        if (EI_DYNAMICS(inputrec->eI))
 +        {
 +            delta_t = inputrec->delta_t;
 +        }
 +        else
 +        {
 +            delta_t = 0;
 +        }
 +
 +        if (fplog)
 +        {
 +            print_perf(fplog,runtime->proctime,runtime->realtime,
 +                       cr->nnodes-cr->npmenodes,
 +                       runtime->nsteps_done,delta_t,nbfs,mflop,
 +                       omp_nth_pp);
 +        }
 +        if (bWriteStat)
 +        {
 +            print_perf(stderr,runtime->proctime,runtime->realtime,
 +                       cr->nnodes-cr->npmenodes,
 +                       runtime->nsteps_done,delta_t,nbfs,mflop,
 +                       omp_nth_pp);
 +        }
 +    }
 +}
 +
 +extern void initialize_lambdas(FILE *fplog,t_inputrec *ir,int *fep_state,real *lambda,double *lam0)
 +{
 +    /* this function works, but could probably use a logic rewrite to keep all the different
 +       types of efep straight. */
 +
 +    int i;
 +    t_lambda *fep = ir->fepvals;
 +
 +    if ((ir->efep==efepNO) && (ir->bSimTemp == FALSE)) {
 +        for (i=0;i<efptNR;i++)  {
 +            lambda[i] = 0.0;
 +            if (lam0)
 +            {
 +                lam0[i] = 0.0;
 +            }
 +        }
 +        return;
 +    } else {
 +        *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
 +                                             if checkpoint is set -- a kludge is in for now
 +                                             to prevent this.*/
 +        for (i=0;i<efptNR;i++)
 +        {
 +            /* overwrite lambda state with init_lambda for now for backwards compatibility */
 +            if (fep->init_lambda>=0) /* if it's -1, it was never initializd */
 +            {
 +                lambda[i] = fep->init_lambda;
 +                if (lam0) {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +            else
 +            {
 +                lambda[i] = fep->all_lambda[i][*fep_state];
 +                if (lam0) {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +        }
 +        if (ir->bSimTemp) {
 +            /* need to rescale control temperatures to match current state */
 +            for (i=0;i<ir->opts.ngtc;i++) {
 +                if (ir->opts.ref_t[i] > 0) {
 +                    ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Send to the log the information on the current lambdas */
 +    if (fplog != NULL)
 +    {
 +        fprintf(fplog,"Initial vector of lambda components:[ ");
 +        for (i=0;i<efptNR;i++)
 +        {
 +            fprintf(fplog,"%10.4f ",lambda[i]);
 +        }
 +        fprintf(fplog,"]\n");
 +    }
 +    return;
 +}
 +
 +
 +void init_md(FILE *fplog,
 +             t_commrec *cr,t_inputrec *ir,const output_env_t oenv,
 +             double *t,double *t0,
 +             real *lambda, int *fep_state, double *lam0,
 +             t_nrnb *nrnb,gmx_mtop_t *mtop,
 +             gmx_update_t *upd,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin,
 +             tensor force_vir,tensor shake_vir,rvec mu_tot,
 +             gmx_bool *bSimAnn,t_vcm **vcm, t_state *state, unsigned long Flags)
 +{
 +    int  i,j,n;
 +    real tmpt,mod;
 +
 +    /* Initial values */
 +    *t = *t0       = ir->init_t;
 +
 +    *bSimAnn=FALSE;
 +    for(i=0;i<ir->opts.ngtc;i++)
 +    {
 +        /* set bSimAnn if any group is being annealed */
 +        if(ir->opts.annealing[i]!=eannNO)
 +        {
 +            *bSimAnn = TRUE;
 +        }
 +    }
 +    if (*bSimAnn)
 +    {
 +        update_annealing_target_temp(&(ir->opts),ir->init_t);
 +    }
 +
 +    /* Initialize lambda variables */
 +    initialize_lambdas(fplog,ir,fep_state,lambda,lam0);
 +
 +    if (upd)
 +    {
 +        *upd = init_update(fplog,ir);
 +    }
 +
 +
 +    if (vcm != NULL)
 +    {
 +        *vcm = init_vcm(fplog,&mtop->groups,ir);
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
 +    {
 +        if (ir->etc == etcBERENDSEN)
 +        {
 +            please_cite(fplog,"Berendsen84a");
 +        }
 +        if (ir->etc == etcVRESCALE)
 +        {
 +            please_cite(fplog,"Bussi2007a");
 +        }
 +    }
 +
 +    init_nrnb(nrnb);
 +
 +    if (nfile != -1)
 +    {
 +        *outf = init_mdoutf(nfile,fnm,Flags,cr,ir,oenv);
 +
 +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : (*outf)->fp_ene,
 +                              mtop,ir, (*outf)->fp_dhdl);
 +    }
 +
 +    if (ir->bAdress)
 +    {
 +      please_cite(fplog,"Fritsch12");
 +      please_cite(fplog,"Junghans10");
 +    }
 +    /* Initiate variables */
 +    clear_mat(force_vir);
 +    clear_mat(shake_vir);
 +    clear_rvec(mu_tot);
 +
 +    debug_gmx();
 +}
 +
index 7be0435b4c6a668858c6182ea9c37b532c935a01,0000000000000000000000000000000000000000..24044fb31f3b2dfbba97b66343207bf808f39422
mode 100644,000000..100644
--- /dev/null
@@@ -1,1429 -1,0 +1,1428 @@@
-   double isp= 0.564189583547756;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "maths.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "futil.h"
 +#include "xvgr.h"
 +#include "vec.h"
 +#include "main.h"
 +#include "network.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "gmxfio.h"
 +#include "macros.h"
 +#include "tables.h"
 +
 +/* All the possible (implemented) table functions */
 +enum { 
 +  etabLJ6,   
 +  etabLJ12, 
 +  etabLJ6Shift, 
 +  etabLJ12Shift, 
 +  etabShift,
 +  etabRF,
 +  etabRF_ZERO,
 +  etabCOUL, 
 +  etabEwald, 
 +  etabEwaldSwitch, 
 +  etabEwaldUser,
 +  etabEwaldUserSwitch,
 +  etabLJ6Switch, 
 +  etabLJ12Switch, 
 +  etabCOULSwitch, 
 +  etabLJ6Encad, 
 +  etabLJ12Encad, 
 +  etabCOULEncad,  
 +  etabEXPMIN, 
 +  etabUSER, 
 +  etabNR 
 +};
 +
 +/** Evaluates to true if the table type contains user data. */
 +#define ETAB_USER(e)  ((e) == etabUSER || \
 +                       (e) == etabEwaldUser || (e) == etabEwaldUserSwitch)
 +
 +typedef struct {
 +  const char *name;
 +  gmx_bool bCoulomb;
 +} t_tab_props;
 +
 +/* This structure holds name and a flag that tells whether 
 +   this is a Coulomb type funtion */
 +static const t_tab_props tprops[etabNR] = {
 +  { "LJ6",  FALSE },
 +  { "LJ12", FALSE },
 +  { "LJ6Shift", FALSE },
 +  { "LJ12Shift", FALSE },
 +  { "Shift", TRUE },
 +  { "RF", TRUE },
 +  { "RF-zero", TRUE },
 +  { "COUL", TRUE },
 +  { "Ewald", TRUE },
 +  { "Ewald-Switch", TRUE },
 +  { "Ewald-User", TRUE },
 +  { "Ewald-User-Switch", TRUE },
 +  { "LJ6Switch", FALSE },
 +  { "LJ12Switch", FALSE },
 +  { "COULSwitch", TRUE },
 +  { "LJ6-Encad shift", FALSE },
 +  { "LJ12-Encad shift", FALSE },
 +  { "COUL-Encad shift",  TRUE },
 +  { "EXPMIN", FALSE },
 +  { "USER", FALSE }
 +};
 +
 +/* Index in the table that says which function to use */
 +enum { etiCOUL, etiLJ6, etiLJ12, etiNR };
 +
 +typedef struct {
 +  int  nx,nx0;
 +  double tabscale;
 +  double *x,*v,*f;
 +} t_tabledata;
 +
 +#define pow2(x) ((x)*(x))
 +#define pow3(x) ((x)*(x)*(x))
 +#define pow4(x) ((x)*(x)*(x)*(x))
 +#define pow5(x) ((x)*(x)*(x)*(x)*(x))
 +
 +
 +static double v_ewald_lr(double beta,double r)
 +{
 +    if (r == 0)
 +    {
 +        return beta*2/sqrt(M_PI);
 +    }
 +    else
 +    {
 +        return gmx_erfd(beta*r)/r;
 +    }
 +}
 +
 +void table_spline3_fill_ewald_lr(real *table_f,
 +                                 real *table_v,
 +                                 real *table_fdv0,
 +                                 int   ntab,
 +                                 real  dx,
 +                                 real  beta)
 +{
 +    real tab_max;
 +    int i,i_inrange;
 +    double dc,dc_new;
 +    gmx_bool bOutOfRange;
 +    double v_r0,v_r1,v_inrange,vi,a0,a1,a2dx;
 +    double x_r0;
 +
 +    if (ntab < 2)
 +    {
 +        gmx_fatal(FARGS,"Can not make a spline table with less than 2 points");
 +    }
 +
 +    /* We need some margin to be able to divide table values by r
 +     * in the kernel and also to do the integration arithmetics
 +     * without going out of range. Furthemore, we divide by dx below.
 +     */
 +    tab_max = GMX_REAL_MAX*0.0001;
 +
 +    /* This function produces a table with:
 +     * maximum energy error: V'''/(6*12*sqrt(3))*dx^3
 +     * maximum force error:  V'''/(6*4)*dx^2
 +     * The rms force error is the max error times 1/sqrt(5)=0.45.
 +     */
 +
 +    bOutOfRange = FALSE;
 +    i_inrange = ntab;
 +    v_inrange = 0;
 +    dc = 0;
 +    for(i=ntab-1; i>=0; i--)
 +    {
 +        x_r0 = i*dx;
 +
 +        v_r0 = v_ewald_lr(beta,x_r0);
 +
 +        if (!bOutOfRange)
 +        {
 +            i_inrange = i;
 +            v_inrange = v_r0;
 +    
 +            vi = v_r0;
 +        }
 +        else
 +        {
 +            /* Linear continuation for the last point in range */
 +            vi = v_inrange - dc*(i - i_inrange)*dx;
 +        }
 +
 +        if(table_v!=NULL)
 +        {
 +            table_v[i] = vi;
 +        }
 +
 +        if (i == 0)
 +        {
 +            continue;
 +        }
 +
 +        /* Get the potential at table point i-1 */
 +        v_r1 = v_ewald_lr(beta,(i-1)*dx);
 +
 +        if (v_r1 != v_r1 || v_r1 < -tab_max || v_r1 > tab_max)
 +        {
 +            bOutOfRange = TRUE;
 +        }
 +
 +        if (!bOutOfRange)
 +        {
 +            /* Calculate the average second derivative times dx over interval i-1 to i.
 +             * Using the function values at the end points and in the middle.
 +             */
 +            a2dx = (v_r0 + v_r1 - 2*v_ewald_lr(beta,x_r0-0.5*dx))/(0.25*dx);
 +            /* Set the derivative of the spline to match the difference in potential
 +             * over the interval plus the average effect of the quadratic term.
 +             * This is the essential step for minimizing the error in the force.
 +             */
 +            dc = (v_r0 - v_r1)/dx + 0.5*a2dx;
 +        }
 +
 +        if (i == ntab - 1)
 +        {
 +            /* Fill the table with the force, minus the derivative of the spline */
 +            table_f[i] = -dc;
 +        }
 +        else
 +        {
 +            /* tab[i] will contain the average of the splines over the two intervals */
 +            table_f[i] += -0.5*dc;
 +        }
 +
 +        if (!bOutOfRange)
 +        {
 +            /* Make spline s(x) = a0 + a1*(x - xr) + 0.5*a2*(x - xr)^2
 +             * matching the potential at the two end points
 +             * and the derivative dc at the end point xr.
 +             */
 +            a0   = v_r0;
 +            a1   = dc;
 +            a2dx = (a1*dx + v_r1 - a0)*2/dx;
 +
 +            /* Set dc to the derivative at the next point */
 +            dc_new = a1 - a2dx;
 +                
 +            if (dc_new != dc_new || dc_new < -tab_max || dc_new > tab_max)
 +            {
 +                bOutOfRange = TRUE;
 +            }
 +            else
 +            {
 +                dc = dc_new;
 +            }
 +        }
 +
 +        table_f[(i-1)] = -0.5*dc;
 +    }
 +    /* Currently the last value only contains half the force: double it */
 +    table_f[0] *= 2;
 +
 +    if(table_v!=NULL && table_fdv0!=NULL)
 +    {
 +        /* Copy to FDV0 table too. Allocation occurs in forcerec.c,
 +         * init_ewald_f_table().
 +         */
 +        for(i=0;i<ntab-1;i++)
 +        {
 +            table_fdv0[4*i]     = table_f[i];
 +            table_fdv0[4*i+1]   = table_f[i+1]-table_f[i];
 +            table_fdv0[4*i+2]   = table_v[i];
 +            table_fdv0[4*i+3]   = 0.0;
 +        }
 +        table_fdv0[4*(ntab-1)]    = table_f[(ntab-1)];
 +        table_fdv0[4*(ntab-1)+1]  = -table_f[(ntab-1)];
 +        table_fdv0[4*(ntab-1)+2]  = table_v[(ntab-1)];
 +        table_fdv0[4*(ntab-1)+3]  = 0.0;
 +    }
 +}
 +
 +/* The scale (1/spacing) for third order spline interpolation
 + * of the Ewald mesh contribution which needs to be subtracted
 + * from the non-bonded interactions.
 + */
 +real ewald_spline3_table_scale(real ewaldcoeff,real rc)
 +{
 +    double erf_x_d3=1.0522; /* max of (erf(x)/x)''' */
 +    double ftol,etol;
 +    double sc_f,sc_e;
 +
 +    /* Force tolerance: single precision accuracy */
 +    ftol = GMX_FLOAT_EPS;
 +    sc_f = sqrt(erf_x_d3/(6*4*ftol*ewaldcoeff))*ewaldcoeff;
 +
 +    /* Energy tolerance: 10x more accurate than the cut-off jump */
 +    etol = 0.1*gmx_erfc(ewaldcoeff*rc);
 +    etol = max(etol,GMX_REAL_EPS);
 +    sc_e = pow(erf_x_d3/(6*12*sqrt(3)*etol),1.0/3.0)*ewaldcoeff;
 +
 +    return max(sc_f,sc_e);
 +}
 +
 +/* Calculate the potential and force for an r value
 + * in exactly the same way it is done in the inner loop.
 + * VFtab is a pointer to the table data, offset is
 + * the point where we should begin and stride is 
 + * 4 if we have a buckingham table, 3 otherwise.
 + * If you want to evaluate table no N, set offset to 4*N.
 + *  
 + * We use normal precision here, since that is what we
 + * will use in the inner loops.
 + */
 +static void evaluate_table(real VFtab[], int offset, int stride, 
 +                         real tabscale, real r, real *y, real *yp)
 +{
 +  int n;
 +  real rt,eps,eps2;
 +  real Y,F,Geps,Heps2,Fp;
 +
 +  rt       =  r*tabscale;
 +  n        =  (int)rt;
 +  eps      =  rt - n;
 +  eps2     =  eps*eps;
 +  n        =  offset+stride*n;
 +  Y        =  VFtab[n];
 +  F        =  VFtab[n+1];
 +  Geps     =  eps*VFtab[n+2];
 +  Heps2    =  eps2*VFtab[n+3];
 +  Fp       =  F+Geps+Heps2;
 +  *y       =  Y+eps*Fp;
 +  *yp      =  (Fp+Geps+2.0*Heps2)*tabscale;
 +}
 +
 +static void copy2table(int n,int offset,int stride,
 +                     double x[],double Vtab[],double Ftab[],real scalefactor,
 +                     real dest[])
 +{
 +/* Use double prec. for the intermediary variables
 + * and temporary x/vtab/vtab2 data to avoid unnecessary 
 + * loss of precision.
 + */
 +  int  i,nn0;
 +  double F,G,H,h;
 +
 +  h = 0;
 +  for(i=0; (i<n); i++) {
 +    if (i < n-1) {
 +      h   = x[i+1] - x[i];
 +      F   = -Ftab[i]*h;
 +      G   =  3*(Vtab[i+1] - Vtab[i]) + (Ftab[i+1] + 2*Ftab[i])*h;
 +      H   = -2*(Vtab[i+1] - Vtab[i]) - (Ftab[i+1] +   Ftab[i])*h;
 +    } else {
 +      /* Fill the last entry with a linear potential,
 +       * this is mainly for rounding issues with angle and dihedral potentials.
 +       */
 +      F   = -Ftab[i]*h;
 +      G   = 0;
 +      H   = 0;
 +    }
 +    nn0 = offset + i*stride;
 +    dest[nn0]   = scalefactor*Vtab[i];
 +    dest[nn0+1] = scalefactor*F;
 +    dest[nn0+2] = scalefactor*G;
 +    dest[nn0+3] = scalefactor*H;
 +  }
 +}
 +
 +static void init_table(FILE *fp,int n,int nx0,
 +                     double tabscale,t_tabledata *td,gmx_bool bAlloc)
 +{
 +  int i;
 +  
 +  td->nx  = n;
 +  td->nx0 = nx0;
 +  td->tabscale = tabscale;
 +  if (bAlloc) {
 +    snew(td->x,td->nx);
 +    snew(td->v,td->nx);
 +    snew(td->f,td->nx);
 +  }
 +  for(i=0; (i<td->nx); i++)
 +    td->x[i] = i/tabscale;
 +}
 +
 +static void spline_forces(int nx,double h,double v[],gmx_bool bS3,gmx_bool bE3,
 +                        double f[])
 +{
 +  int    start,end,i;
 +  double v3,b_s,b_e,b;
 +  double beta,*gamma;
 +
 +  /* Formulas can be found in:
 +   * H.J.C. Berendsen, Simulating the Physical World, Cambridge 2007
 +   */
 +
 +  if (nx < 4 && (bS3 || bE3))
 +    gmx_fatal(FARGS,"Can not generate splines with third derivative boundary conditions with less than 4 (%d) points",nx);
 +  
 +  /* To make life easy we initially set the spacing to 1
 +   * and correct for this at the end.
 +   */
 +  beta = 2;
 +  if (bS3) {
 +    /* Fit V''' at the start */
 +    v3  = v[3] - 3*v[2] + 3*v[1] - v[0];
 +    if (debug)
 +      fprintf(debug,"The left third derivative is %g\n",v3/(h*h*h));
 +    b_s = 2*(v[1] - v[0]) + v3/6;
 +    start = 0;
 +    
 +    if (FALSE) {
 +      /* Fit V'' at the start */
 +      real v2;
 +      
 +      v2  = -v[3] + 4*v[2] - 5*v[1] + 2*v[0];
 +      /* v2  = v[2] - 2*v[1] + v[0]; */
 +      if (debug)
 +      fprintf(debug,"The left second derivative is %g\n",v2/(h*h));
 +      b_s = 3*(v[1] - v[0]) - v2/2;
 +      start = 0;
 +    }
 +  } else {
 +    b_s = 3*(v[2] - v[0]) + f[0]*h;
 +    start = 1;
 +  }
 +  if (bE3) {
 +    /* Fit V''' at the end */
 +    v3  = v[nx-1] - 3*v[nx-2] + 3*v[nx-3] - v[nx-4];
 +    if (debug)
 +      fprintf(debug,"The right third derivative is %g\n",v3/(h*h*h));
 +    b_e = 2*(v[nx-1] - v[nx-2]) + v3/6;
 +    end = nx;
 +  } else {
 +    /* V'=0 at the end */
 +    b_e = 3*(v[nx-1] - v[nx-3]) + f[nx-1]*h;
 +    end = nx - 1;
 +  }
 +
 +  snew(gamma,nx);
 +  beta = (bS3 ? 1 : 4);
 +
 +  /* For V'' fitting */
 +  /* beta = (bS3 ? 2 : 4); */
 +
 +  f[start] = b_s/beta;
 +  for(i=start+1; i<end; i++) {
 +    gamma[i] = 1/beta;
 +    beta = 4 - gamma[i];
 +    b    =  3*(v[i+1] - v[i-1]);
 +    f[i] = (b - f[i-1])/beta;
 +  }
 +  gamma[end-1] = 1/beta;
 +  beta = (bE3 ? 1 : 4) - gamma[end-1];
 +  f[end-1] = (b_e - f[end-2])/beta;
 +
 +  for(i=end-2; i>=start; i--)
 +    f[i] -= gamma[i+1]*f[i+1];
 +  sfree(gamma);
 +
 +  /* Correct for the minus sign and the spacing */
 +  for(i=start; i<end; i++)
 +    f[i] = -f[i]/h;
 +}
 +
 +static void set_forces(FILE *fp,int angle,
 +                     int nx,double h,double v[],double f[],
 +                     int table)
 +{
 +  int start,end;
 +
 +  if (angle == 2)
 +    gmx_fatal(FARGS,
 +            "Force generation for dihedral tables is not (yet) implemented");
 +
 +  start = 0;
 +  while (v[start] == 0)
 +    start++;
 +  
 +  end = nx;
 +  while(v[end-1] == 0)
 +    end--;
 +  if (end > nx - 2)
 +    end = nx;
 +  else
 +    end++;
 +
 +  if (fp)
 +    fprintf(fp,"Generating forces for table %d, boundary conditions: V''' at %g, %s at %g\n",
 +          table+1,start*h,end==nx ? "V'''" : "V'=0",(end-1)*h);
 +  spline_forces(end-start,h,v+start,TRUE,end==nx,f+start);
 +}
 +
 +static void read_tables(FILE *fp,const char *fn,
 +                      int ntab,int angle,t_tabledata td[])
 +{
 +  char *libfn;
 +  char buf[STRLEN];
 +  double **yy=NULL,start,end,dx0,dx1,ssd,vm,vp,f,numf;
 +  int  k,i,nx,nx0=0,ny,nny,ns;
 +  gmx_bool bAllZero,bZeroV,bZeroF;
 +  double tabscale;
 +
 +  nny = 2*ntab+1;  
 +  libfn = gmxlibfn(fn);
 +  nx  = read_xvg(libfn,&yy,&ny);
 +  if (ny != nny)
 +    gmx_fatal(FARGS,"Trying to read file %s, but nr columns = %d, should be %d",
 +              libfn,ny,nny);
 +  if (angle == 0) {
 +    if (yy[0][0] != 0.0)
 +      gmx_fatal(FARGS,
 +              "The first distance in file %s is %f nm instead of %f nm",
 +              libfn,yy[0][0],0.0);
 +  } else {
 +    if (angle == 1)
 +      start = 0.0;
 +    else
 +      start = -180.0;
 +    end = 180.0;
 +    if (yy[0][0] != start || yy[0][nx-1] != end)
 +      gmx_fatal(FARGS,"The angles in file %s should go from %f to %f instead of %f to %f\n",
 +              libfn,start,end,yy[0][0],yy[0][nx-1]);
 +  }
 +
 +  tabscale = (nx-1)/(yy[0][nx-1] - yy[0][0]);
 +  
 +  if (fp) {
 +    fprintf(fp,"Read user tables from %s with %d data points.\n",libfn,nx);
 +    if (angle == 0)
 +      fprintf(fp,"Tabscale = %g points/nm\n",tabscale);
 +  }
 +
 +  bAllZero = TRUE;
 +  for(k=0; k<ntab; k++) {
 +    bZeroV = TRUE;
 +    bZeroF = TRUE;
 +    for(i=0; (i < nx); i++) {
 +      if (i >= 2) {
 +      dx0 = yy[0][i-1] - yy[0][i-2];
 +      dx1 = yy[0][i]   - yy[0][i-1];
 +      /* Check for 1% deviation in spacing */
 +      if (fabs(dx1 - dx0) >= 0.005*(fabs(dx0) + fabs(dx1))) {
 +        gmx_fatal(FARGS,"In table file '%s' the x values are not equally spaced: %f %f %f",fn,yy[0][i-2],yy[0][i-1],yy[0][i]);
 +      }
 +      }
 +      if (yy[1+k*2][i] != 0) {
 +      bZeroV = FALSE;
 +      if (bAllZero) {
 +        bAllZero = FALSE;
 +        nx0 = i;
 +      }
 +      if (yy[1+k*2][i] >  0.01*GMX_REAL_MAX ||
 +          yy[1+k*2][i] < -0.01*GMX_REAL_MAX) {
 +        gmx_fatal(FARGS,"Out of range potential value %g in file '%s'",
 +                  yy[1+k*2][i],fn);
 +      }
 +      }
 +      if (yy[1+k*2+1][i] != 0) {
 +      bZeroF = FALSE;
 +      if (bAllZero) {
 +        bAllZero = FALSE;
 +        nx0 = i;
 +      }
 +      if (yy[1+k*2+1][i] >  0.01*GMX_REAL_MAX ||
 +          yy[1+k*2+1][i] < -0.01*GMX_REAL_MAX) {
 +        gmx_fatal(FARGS,"Out of range force value %g in file '%s'",
 +                  yy[1+k*2+1][i],fn);
 +      }
 +      }
 +    }
 +
 +    if (!bZeroV && bZeroF) {
 +      set_forces(fp,angle,nx,1/tabscale,yy[1+k*2],yy[1+k*2+1],k);
 +    } else {
 +      /* Check if the second column is close to minus the numerical
 +       * derivative of the first column.
 +       */
 +      ssd = 0;
 +      ns = 0;
 +      for(i=1; (i < nx-1); i++) {
 +      vm = yy[1+2*k][i-1];
 +      vp = yy[1+2*k][i+1];
 +      f  = yy[1+2*k+1][i];
 +      if (vm != 0 && vp != 0 && f != 0) {
 +        /* Take the centered difference */
 +        numf = -(vp - vm)*0.5*tabscale;
 +        ssd += fabs(2*(f - numf)/(f + numf));
 +        ns++;
 +      }
 +      }
 +      if (ns > 0) {
 +      ssd /= ns;
 +      sprintf(buf,"For the %d non-zero entries for table %d in %s the forces deviate on average %d%% from minus the numerical derivative of the potential\n",ns,k,libfn,(int)(100*ssd+0.5));
 +      if (debug)
 +        fprintf(debug,"%s",buf);
 +      if (ssd > 0.2) {
 +        if (fp)
 +          fprintf(fp,"\nWARNING: %s\n",buf);
 +        fprintf(stderr,"\nWARNING: %s\n",buf);
 +      }
 +      }
 +    }
 +  }
 +  if (bAllZero && fp) {
 +    fprintf(fp,"\nNOTE: All elements in table %s are zero\n\n",libfn);
 +  }
 +
 +  for(k=0; (k<ntab); k++) {
 +    init_table(fp,nx,nx0,tabscale,&(td[k]),TRUE);
 +    for(i=0; (i<nx); i++) {
 +      td[k].x[i] = yy[0][i];
 +      td[k].v[i] = yy[2*k+1][i];
 +      td[k].f[i] = yy[2*k+2][i];
 +    }
 +  }
 +  for(i=0; (i<ny); i++)
 +    sfree(yy[i]);
 +  sfree(yy);
 +  sfree(libfn);
 +}
 +
 +static void done_tabledata(t_tabledata *td)
 +{
 +  int i;
 +  
 +  if (!td)
 +    return;
 +    
 +  sfree(td->x);
 +  sfree(td->v);
 +  sfree(td->f);
 +}
 +
 +static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
 +{
 +  /* Fill the table according to the formulas in the manual.
 +   * In principle, we only need the potential and the second
 +   * derivative, but then we would have to do lots of calculations
 +   * in the inner loop. By precalculating some terms (see manual)
 +   * we get better eventual performance, despite a larger table.
 +   *
 +   * Since some of these higher-order terms are very small,
 +   * we always use double precision to calculate them here, in order
 +   * to avoid unnecessary loss of precision.
 +   */
 +#ifdef DEBUG_SWITCH
 +  FILE *fp;
 +#endif
 +  int  i;
 +  double reppow,p;
 +  double r1,rc,r12,r13;
 +  double r,r2,r6,rc6;
 +  double expr,Vtab,Ftab;
 +  /* Parameters for David's function */
 +  double A=0,B=0,C=0,A_3=0,B_4=0;
 +  /* Parameters for the switching function */
 +  double ksw,swi,swi1;
 +  /* Temporary parameters */
 +  gmx_bool bSwitch,bShift;
 +  double ewc=fr->ewaldcoeff;
-       Ftab  = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
 +   
 +  bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || 
 +           (tp == etabCOULSwitch) ||
 +           (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch));
 +  bShift  = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) || 
 +           (tp == etabShift));
 +
 +  reppow = fr->reppow;
 +
 +  if (tprops[tp].bCoulomb) {
 +    r1 = fr->rcoulomb_switch;
 +    rc = fr->rcoulomb;
 +  } 
 +  else {
 +    r1 = fr->rvdw_switch;
 +    rc = fr->rvdw;
 +  }
 +  if (bSwitch)
 +    ksw  = 1.0/(pow5(rc-r1));
 +  else
 +    ksw  = 0.0;
 +  if (bShift) {
 +    if (tp == etabShift)
 +      p = 1;
 +    else if (tp == etabLJ6Shift) 
 +      p = 6; 
 +    else 
 +      p = reppow;
 +    
 +    A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc,p+2)*pow2(rc-r1));
 +    B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc,p+2)*pow3(rc-r1));
 +    C = 1.0/pow(rc,p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1);
 +    if (tp == etabLJ6Shift) {
 +      A=-A;
 +      B=-B;
 +      C=-C;
 +    }
 +    A_3=A/3.0;
 +    B_4=B/4.0;
 +  }
 +  if (debug) { fprintf(debug,"Setting up tables\n"); fflush(debug); }
 +    
 +#ifdef DEBUG_SWITCH
 +  fp=xvgropen("switch.xvg","switch","r","s");
 +#endif
 +  
 +  for(i=td->nx0; (i<td->nx); i++) {
 +    r     = td->x[i];
 +    r2    = r*r;
 +    r6    = 1.0/(r2*r2*r2);
 +    if (gmx_within_tol(reppow,12.0,10*GMX_DOUBLE_EPS)) {
 +      r12 = r6*r6;
 +    } else {
 +      r12 = pow(r,-reppow);   
 +    }
 +    Vtab  = 0.0;
 +    Ftab  = 0.0;
 +    if (bSwitch) {
 +      /* swi is function, swi1 1st derivative and swi2 2nd derivative */
 +      /* The switch function is 1 for r<r1, 0 for r>rc, and smooth for
 +       * r1<=r<=rc. The 1st and 2nd derivatives are both zero at
 +       * r1 and rc.
 +       * ksw is just the constant 1/(rc-r1)^5, to save some calculations...
 +       */ 
 +      if(r<=r1) {
 +      swi  = 1.0;
 +      swi1 = 0.0;
 +      } else if (r>=rc) {
 +      swi  = 0.0;
 +      swi1 = 0.0;
 +      } else {
 +      swi      = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1) 
 +        + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw;
 +      swi1     = -30*pow2(r-r1)*ksw*pow2(rc-r1) 
 +        + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw;
 +      }
 +    }
 +    else { /* not really needed, but avoids compiler warnings... */
 +      swi  = 1.0;
 +      swi1 = 0.0;
 +    }
 +#ifdef DEBUG_SWITCH
 +    fprintf(fp,"%10g  %10g  %10g  %10g\n",r,swi,swi1,swi2);
 +#endif
 +
 +    rc6 = rc*rc*rc;
 +    rc6 = 1.0/(rc6*rc6);
 +
 +    switch (tp) {
 +    case etabLJ6:
 +            /* Dispersion */
 +            Vtab = -r6;
 +            Ftab = 6.0*Vtab/r;
 +            break;
 +    case etabLJ6Switch:
 +    case etabLJ6Shift:
 +      /* Dispersion */
 +      if (r < rc) {      
 +          Vtab = -r6;
 +          Ftab = 6.0*Vtab/r;
 +          break;
 +      }
 +      break;
 +    case etabLJ12:
 +            /* Repulsion */
 +            Vtab  = r12;
 +            Ftab  = reppow*Vtab/r;
 +      break;
 +    case etabLJ12Switch:
 +    case etabLJ12Shift:
 +      /* Repulsion */
 +      if (r < rc) {                
 +          Vtab  = r12;
 +          Ftab  = reppow*Vtab/r;
 +      }
 +      break;
 +      case etabLJ6Encad:
 +        if(r < rc) {
 +            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
 +            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
 +        } else { /* r>rc */ 
 +            Vtab  = 0;
 +            Ftab  = 0;
 +        } 
 +        break;
 +    case etabLJ12Encad:
 +        if(r < rc) {
 +            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
 +            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
 +        } else { /* r>rc */
 +            Vtab  = 0;
 +            Ftab  = 0;
 +        } 
 +        break;        
 +    case etabCOUL:
 +      Vtab  = 1.0/r;
 +      Ftab  = 1.0/r2;
 +      break;
 +    case etabCOULSwitch:
 +    case etabShift:
 +      if (r < rc) { 
 +      Vtab  = 1.0/r;
 +      Ftab  = 1.0/r2;
 +      }
 +      break;
 +    case etabEwald:
 +    case etabEwaldSwitch:
 +      Vtab  = gmx_erfc(ewc*r)/r;
-       Ftab  = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
++      Ftab  = gmx_erfc(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
 +      break;
 +    case etabEwaldUser:
 +    case etabEwaldUserSwitch:
 +      /* Only calculate minus the reciprocal space contribution */
 +      Vtab  = -gmx_erf(ewc*r)/r;
++      Ftab  = -gmx_erf(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
 +      break;
 +    case etabRF:
 +    case etabRF_ZERO:
 +      Vtab  = 1.0/r      +   fr->k_rf*r2 - fr->c_rf;
 +      Ftab  = 1.0/r2     - 2*fr->k_rf*r;
 +      if (tp == etabRF_ZERO && r >= rc) {
 +      Vtab = 0;
 +      Ftab = 0;
 +      }
 +      break;
 +    case etabEXPMIN:
 +      expr  = exp(-r);
 +      Vtab  = expr;
 +      Ftab  = expr;
 +      break;
 +    case etabCOULEncad:
 +        if(r < rc) {
 +            Vtab  = 1.0/r-(rc-r)/(rc*rc)-1.0/rc;
 +            Ftab  = 1.0/r2-1.0/(rc*rc);
 +        } else { /* r>rc */ 
 +            Vtab  = 0;
 +            Ftab  = 0;
 +        } 
 +        break;
 +    default:
 +      gmx_fatal(FARGS,"Table type %d not implemented yet. (%s,%d)",
 +                tp,__FILE__,__LINE__);
 +    }
 +    if (bShift) {
 +      /* Normal coulomb with cut-off correction for potential */
 +      if (r < rc) {
 +      Vtab -= C;
 +      /* If in Shifting range add something to it */
 +      if (r > r1) {
 +        r12 = (r-r1)*(r-r1);
 +        r13 = (r-r1)*r12;
 +        Vtab  += - A_3*r13 - B_4*r12*r12;
 +        Ftab  +=   A*r12 + B*r13;
 +      }
 +      }
 +    }
 +
 +    if (ETAB_USER(tp)) {
 +      Vtab += td->v[i];
 +      Ftab += td->f[i];
 +    }
 +
 +    if ((r > r1) && bSwitch) {
 +      Ftab = Ftab*swi - Vtab*swi1;
 +      Vtab = Vtab*swi;
 +    }
 +
 +    /* Convert to single precision when we store to mem */
 +    td->v[i]  = Vtab;
 +    td->f[i]  = Ftab;
 +  }
 +
 +  /* Continue the table linearly from nx0 to 0.
 +   * These values are only required for energy minimization with overlap or TPI.
 +   */
 +  for(i=td->nx0-1; i>=0; i--) {
 +    td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
 +    td->f[i] = td->f[i+1];
 +  }
 +
 +#ifdef DEBUG_SWITCH
 +  gmx_fio_fclose(fp);
 +#endif
 +}
 +
 +static void set_table_type(int tabsel[],const t_forcerec *fr,gmx_bool b14only)
 +{
 +  int eltype,vdwtype;
 +
 +  /* Set the different table indices.
 +   * Coulomb first.
 +   */
 +
 +
 +  if (b14only) {
 +    switch (fr->eeltype) {
 +    case eelRF_NEC:
 +      eltype = eelRF;
 +      break;
 +    case eelUSER:
 +    case eelPMEUSER:
 +    case eelPMEUSERSWITCH:
 +      eltype = eelUSER;
 +      break;
 +    default:
 +      eltype = eelCUT;
 +    }
 +  } else {
 +    eltype = fr->eeltype;
 +  }
 +  
 +  switch (eltype) {
 +  case eelCUT:
 +    tabsel[etiCOUL] = etabCOUL;
 +    break;
 +  case eelPOISSON:
 +    tabsel[etiCOUL] = etabShift;
 +    break;
 +  case eelSHIFT:
 +    if (fr->rcoulomb > fr->rcoulomb_switch)
 +      tabsel[etiCOUL] = etabShift;
 +    else
 +      tabsel[etiCOUL] = etabCOUL;
 +    break;
 +  case eelEWALD:
 +  case eelPME:
 +  case eelP3M_AD:
 +    tabsel[etiCOUL] = etabEwald;
 +    break;
 +  case eelPMESWITCH:
 +    tabsel[etiCOUL] = etabEwaldSwitch;
 +    break;
 +  case eelPMEUSER:
 +    tabsel[etiCOUL] = etabEwaldUser;
 +    break;
 +  case eelPMEUSERSWITCH:
 +    tabsel[etiCOUL] = etabEwaldUserSwitch;
 +    break;
 +  case eelRF:
 +  case eelGRF:
 +  case eelRF_NEC:
 +    tabsel[etiCOUL] = etabRF;
 +    break;
 +  case eelRF_ZERO:
 +    tabsel[etiCOUL] = etabRF_ZERO;
 +    break;
 +  case eelSWITCH:
 +    tabsel[etiCOUL] = etabCOULSwitch;
 +    break;
 +  case eelUSER:
 +    tabsel[etiCOUL] = etabUSER;
 +    break;
 +  case eelENCADSHIFT:
 +    tabsel[etiCOUL] = etabCOULEncad;
 +    break;      
 +  default:
 +    gmx_fatal(FARGS,"Invalid eeltype %d",eltype);
 +  }
 +  
 +  /* Van der Waals time */
 +  if (fr->bBHAM && !b14only) {
 +    tabsel[etiLJ6]  = etabLJ6;
 +    tabsel[etiLJ12] = etabEXPMIN;
 +  } else {
 +    if (b14only && fr->vdwtype != evdwUSER)
 +      vdwtype = evdwCUT;
 +    else
 +      vdwtype = fr->vdwtype;
 +
 +    switch (vdwtype) {
 +    case evdwSWITCH:
 +      tabsel[etiLJ6]  = etabLJ6Switch;
 +      tabsel[etiLJ12] = etabLJ12Switch;
 +      break;
 +    case evdwSHIFT:
 +      tabsel[etiLJ6]  = etabLJ6Shift;
 +      tabsel[etiLJ12] = etabLJ12Shift;
 +      break;
 +    case evdwUSER:
 +      tabsel[etiLJ6]  = etabUSER;
 +      tabsel[etiLJ12] = etabUSER;
 +      break;
 +    case evdwCUT:
 +      tabsel[etiLJ6]  = etabLJ6;
 +      tabsel[etiLJ12] = etabLJ12;
 +      break;
 +    case evdwENCADSHIFT:
 +      tabsel[etiLJ6]  = etabLJ6Encad;
 +      tabsel[etiLJ12] = etabLJ12Encad;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Invalid vdwtype %d in %s line %d",vdwtype,
 +                __FILE__,__LINE__);
 +    } 
 +  }
 +}
 +
 +t_forcetable make_tables(FILE *out,const output_env_t oenv,
 +                         const t_forcerec *fr,
 +                       gmx_bool bVerbose,const char *fn,
 +                       real rtab,int flags)
 +{
 +  const char *fns[3] = { "ctab.xvg", "dtab.xvg", "rtab.xvg" };
 +  const char *fns14[3] = { "ctab14.xvg", "dtab14.xvg", "rtab14.xvg" };
 +  FILE        *fp;
 +  t_tabledata *td;
 +  gmx_bool        b14only,bReadTab,bGenTab;
 +  real        x0,y0,yp;
 +  int         i,j,k,nx,nx0,tabsel[etiNR];
 +  real        scalefactor;
 +
 +  t_forcetable table;
 +
 +  b14only = (flags & GMX_MAKETABLES_14ONLY);
 +
 +  if (flags & GMX_MAKETABLES_FORCEUSER) {
 +    tabsel[etiCOUL] = etabUSER;
 +    tabsel[etiLJ6]  = etabUSER;
 +    tabsel[etiLJ12] = etabUSER;
 +  } else {
 +    set_table_type(tabsel,fr,b14only);
 +  }
 +  snew(td,etiNR);
 +  table.r         = rtab;
 +  table.scale     = 0;
 +  table.n         = 0;
 +  table.scale_exp = 0;
 +  nx0             = 10;
 +  nx              = 0;
 +  
 +  table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
 +  table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
 +  table.formatsize    = 4;
 +  table.ninteractions = 3;
 +  table.stride        = table.formatsize*table.ninteractions;
 +
 +  /* Check whether we have to read or generate */
 +  bReadTab = FALSE;
 +  bGenTab  = FALSE;
 +  for(i=0; (i<etiNR); i++) {
 +    if (ETAB_USER(tabsel[i]))
 +      bReadTab = TRUE;
 +    if (tabsel[i] != etabUSER)
 +      bGenTab  = TRUE;
 +  }
 +  if (bReadTab) {
 +    read_tables(out,fn,etiNR,0,td);
 +    if (rtab == 0 || (flags & GMX_MAKETABLES_14ONLY)) {
 +      rtab      = td[0].x[td[0].nx-1];
 +      table.n   = td[0].nx;
 +      nx        = table.n;
 +    } else {
 +      if (td[0].x[td[0].nx-1] < rtab) 
 +      gmx_fatal(FARGS,"Tables in file %s not long enough for cut-off:\n"
 +                "\tshould be at least %f nm\n",fn,rtab);
 +      nx        = table.n = (int)(rtab*td[0].tabscale + 0.5);
 +    }
 +    table.scale = td[0].tabscale;
 +    nx0         = td[0].nx0;
 +  }
 +  if (bGenTab) {
 +    if (!bReadTab) {
 +#ifdef GMX_DOUBLE
 +      table.scale = 2000.0;
 +#else
 +      table.scale = 500.0;
 +#endif
 +      nx = table.n = rtab*table.scale;
 +    }
 +  }
 +  if (fr->bBHAM) {
 +    if(fr->bham_b_max!=0)
 +      table.scale_exp = table.scale/fr->bham_b_max;
 +    else
 +      table.scale_exp = table.scale;
 +  }
 +
 +  /* Each table type (e.g. coul,lj6,lj12) requires four 
 +   * numbers per nx+1 data points. For performance reasons we want
 +   * the table data to be aligned to 16-byte.
 +   */
 +  snew_aligned(table.data, 12*(nx+1)*sizeof(real),16);
 +
 +  for(k=0; (k<etiNR); k++) {
 +    if (tabsel[k] != etabUSER) {
 +      init_table(out,nx,nx0,
 +               (tabsel[k] == etabEXPMIN) ? table.scale_exp : table.scale,
 +               &(td[k]),!bReadTab);
 +      fill_table(&(td[k]),tabsel[k],fr);
 +      if (out) 
 +      fprintf(out,"%s table with %d data points for %s%s.\n"
 +              "Tabscale = %g points/nm\n",
 +              ETAB_USER(tabsel[k]) ? "Modified" : "Generated",
 +              td[k].nx,b14only?"1-4 ":"",tprops[tabsel[k]].name,
 +              td[k].tabscale);
 +    }
 +
 +    /* Set scalefactor for c6/c12 tables. This is because we save flops in the non-table kernels
 +     * by including the derivative constants (6.0 or 12.0) in the parameters, since
 +     * we no longer calculate force in most steps. This means the c6/c12 parameters
 +     * have been scaled up, so we need to scale down the table interactions too.
 +     * It comes here since we need to scale user tables too.
 +     */
 +      if(k==etiLJ6)
 +      {
 +          scalefactor = 1.0/6.0;
 +      }
 +      else if(k==etiLJ12 && tabsel[k]!=etabEXPMIN)
 +      {
 +          scalefactor = 1.0/12.0;
 +      }
 +      else
 +      {
 +          scalefactor = 1.0;
 +      }
 +
 +    copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,scalefactor,table.data);
 +    
 +    if (bDebugMode() && bVerbose) {
 +      if (b14only)
 +      fp=xvgropen(fns14[k],fns14[k],"r","V",oenv);
 +      else
 +      fp=xvgropen(fns[k],fns[k],"r","V",oenv);
 +      /* plot the output 5 times denser than the table data */
 +      for(i=5*((nx0+1)/2); i<5*table.n; i++) {
 +      x0 = i*table.r/(5*(table.n-1));
 +      evaluate_table(table.data,4*k,12,table.scale,x0,&y0,&yp);
 +      fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
 +      }
 +      gmx_fio_fclose(fp);
 +    }
 +    done_tabledata(&(td[k]));
 +  }
 +  sfree(td);
 +
 +  return table;
 +}
 +
 +t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
 +                           const t_forcerec *fr,
 +                           const char *fn,
 +                           real rtab)
 +{
 +      const char *fns[3] = { "gbctab.xvg", "gbdtab.xvg", "gbrtab.xvg" };
 +      const char *fns14[3] = { "gbctab14.xvg", "gbdtab14.xvg", "gbrtab14.xvg" };
 +      FILE        *fp;
 +      t_tabledata *td;
 +      gmx_bool        bReadTab,bGenTab;
 +      real        x0,y0,yp;
 +      int         i,j,k,nx,nx0,tabsel[etiNR];
 +      double      r,r2,Vtab,Ftab,expterm;
 +      
 +      t_forcetable table;
 +      
 +      double abs_error_r, abs_error_r2;
 +      double rel_error_r, rel_error_r2;
 +      double rel_error_r_old=0, rel_error_r2_old=0;
 +      double x0_r_error, x0_r2_error;
 +      
 +      
 +      /* Only set a Coulomb table for GB */
 +      /* 
 +       tabsel[0]=etabGB;
 +       tabsel[1]=-1;
 +       tabsel[2]=-1;
 +      */
 +      
 +      /* Set the table dimensions for GB, not really necessary to
 +       * use etiNR (since we only have one table, but ...) 
 +       */
 +      snew(td,1);
 +    table.interaction   = GMX_TABLE_INTERACTION_ELEC;
 +    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
 +      table.r             = fr->gbtabr;
 +      table.scale         = fr->gbtabscale;
 +      table.scale_exp     = 0;
 +      table.n             = table.scale*table.r;
 +    table.formatsize    = 4;
 +    table.ninteractions = 1;
 +    table.stride        = table.formatsize*table.ninteractions;
 +      nx0                 = 0;
 +      nx                  = table.scale*table.r;
 +      
 +      /* Check whether we have to read or generate 
 +       * We will always generate a table, so remove the read code
 +       * (Compare with original make_table function
 +       */
 +      bReadTab = FALSE;
 +      bGenTab  = TRUE;
 +      
 +      /* Each table type (e.g. coul,lj6,lj12) requires four 
 +       * numbers per datapoint. For performance reasons we want
 +       * the table data to be aligned to 16-byte. This is accomplished
 +       * by allocating 16 bytes extra to a temporary pointer, and then
 +       * calculating an aligned pointer. This new pointer must not be
 +       * used in a free() call, but thankfully we're sloppy enough not
 +       * to do this :-)
 +       */
 +      
 +      snew_aligned(table.data,4*nx,16);
 +      
 +      init_table(out,nx,nx0,table.scale,&(td[0]),!bReadTab);
 +      
 +      /* Local implementation so we don't have to use the etabGB
 +       * enum above, which will cause problems later when
 +       * making the other tables (right now even though we are using
 +       * GB, the normal Coulomb tables will be created, but this
 +       * will cause a problem since fr->eeltype==etabGB which will not
 +       * be defined in fill_table and set_table_type
 +       */
 +      
 +      for(i=nx0;i<nx;i++)
 +    {
 +              Vtab    = 0.0;
 +              Ftab    = 0.0;
 +              r       = td->x[i];
 +              r2      = r*r;
 +              expterm = exp(-0.25*r2);
 +              
 +              Vtab = 1/sqrt(r2+expterm);
 +              Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
 +              
 +              /* Convert to single precision when we store to mem */
 +              td->v[i]  = Vtab;
 +              td->f[i]  = Ftab;
 +              
 +    }
 +      
 +      copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
 +      
 +      if(bDebugMode())
 +    {
 +              fp=xvgropen(fns[0],fns[0],"r","V",oenv);
 +              /* plot the output 5 times denser than the table data */
 +              /* for(i=5*nx0;i<5*table.n;i++) */
 +              for(i=nx0;i<table.n;i++)
 +              {
 +                      /* x0=i*table.r/(5*table.n); */
 +                      x0=i*table.r/table.n;
 +                      evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
 +                      fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
 +                      
 +              }
 +              gmx_fio_fclose(fp);
 +    }
 +      
 +      /*
 +       for(i=100*nx0;i<99.81*table.n;i++)
 +       {
 +       r = i*table.r/(100*table.n);
 +       r2      = r*r;
 +       expterm = exp(-0.25*r2);
 +       
 +       Vtab = 1/sqrt(r2+expterm);
 +       Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
 +       
 +       
 +       evaluate_table(table.data,0,4,table.scale,r,&y0,&yp);
 +       printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab);
 +       
 +       abs_error_r=fabs(y0-Vtab);
 +       abs_error_r2=fabs(yp-(-1)*Ftab);
 +       
 +       rel_error_r=abs_error_r/y0;
 +       rel_error_r2=fabs(abs_error_r2/yp);
 +       
 +       
 +       if(rel_error_r>rel_error_r_old)
 +       {
 +       rel_error_r_old=rel_error_r;
 +       x0_r_error=x0;
 +       }
 +       
 +       if(rel_error_r2>rel_error_r2_old)
 +       {
 +       rel_error_r2_old=rel_error_r2;
 +       x0_r2_error=x0;        
 +       }
 +       }
 +       
 +       printf("gb: MAX REL ERROR IN R=%15.15f, MAX REL ERROR IN R2=%15.15f\n",rel_error_r_old, rel_error_r2_old);
 +       printf("gb: XO_R=%g, X0_R2=%g\n",x0_r_error, x0_r2_error);
 +       
 +       exit(1); */
 +      done_tabledata(&(td[0]));
 +      sfree(td);
 +      
 +      return table;
 +      
 +      
 +}
 +
 +t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
 +                          const t_forcerec *fr,
 +                          const char *fn,
 +                            matrix box)
 +{
 +      const char *fns[3] = { "tf_tab.xvg", "atfdtab.xvg", "atfrtab.xvg" };
 +      FILE        *fp;
 +      t_tabledata *td;
 +      real        x0,y0,yp,rtab;
 +      int         i,nx,nx0;
 +        real        rx, ry, rz, box_r;
 +      
 +      t_forcetable table;
 +      
 +      
 +      /* Set the table dimensions for ATF, not really necessary to
 +       * use etiNR (since we only have one table, but ...) 
 +       */
 +      snew(td,1);
 +        
 +        if (fr->adress_type == eAdressSphere){
 +            /* take half box diagonal direction as tab range */
 +               rx = 0.5*box[0][0]+0.5*box[1][0]+0.5*box[2][0];
 +               ry = 0.5*box[0][1]+0.5*box[1][1]+0.5*box[2][1];
 +               rz = 0.5*box[0][2]+0.5*box[1][2]+0.5*box[2][2];
 +               box_r = sqrt(rx*rx+ry*ry+rz*rz);
 +               
 +        }else{
 +            /* xsplit: take half box x direction as tab range */
 +               box_r        = box[0][0]/2;
 +        }
 +        table.r         = box_r;
 +      table.scale     = 0;
 +      table.n         = 0;
 +      table.scale_exp = 0;
 +      nx0             = 10;
 +      nx              = 0;
 +      
 +        read_tables(out,fn,1,0,td);
 +        rtab      = td[0].x[td[0].nx-1];
 +
 +       if (fr->adress_type == eAdressXSplit && (rtab < box[0][0]/2)){
 +           gmx_fatal(FARGS,"AdResS full box therm force table in file %s extends to %f:\n"
 +                        "\tshould extend to at least half the length of the box in x-direction"
 +                        "%f\n",fn,rtab, box[0][0]/2);
 +       }
 +       if (rtab < box_r){
 +               gmx_fatal(FARGS,"AdResS full box therm force table in file %s extends to %f:\n"
 +                "\tshould extend to at least for spherical adress"
 +                "%f (=distance from center to furthermost point in box \n",fn,rtab, box_r);
 +       }
 +
 +
 +        table.n   = td[0].nx;
 +        nx        = table.n;
 +        table.scale = td[0].tabscale;
 +        nx0         = td[0].nx0;
 +
 +      /* Each table type (e.g. coul,lj6,lj12) requires four 
 +       * numbers per datapoint. For performance reasons we want
 +       * the table data to be aligned to 16-byte. This is accomplished
 +       * by allocating 16 bytes extra to a temporary pointer, and then
 +       * calculating an aligned pointer. This new pointer must not be
 +       * used in a free() call, but thankfully we're sloppy enough not
 +       * to do this :-)
 +       */
 +      
 +    snew_aligned(table.data,4*nx,16);
 +
 +      copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
 +      
 +      if(bDebugMode())
 +        {
 +          fp=xvgropen(fns[0],fns[0],"r","V",oenv);
 +          /* plot the output 5 times denser than the table data */
 +          /* for(i=5*nx0;i<5*table.n;i++) */
 +         
 +            for(i=5*((nx0+1)/2); i<5*table.n; i++)
 +            {
 +              /* x0=i*table.r/(5*table.n); */
 +              x0 = i*table.r/(5*(table.n-1));
 +              evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
 +              fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
 +              
 +            }
 +          ffclose(fp);
 +        }
 +
 +      done_tabledata(&(td[0]));
 +      sfree(td);
 +
 +    table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
 +    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
 +    table.formatsize    = 4;
 +    table.ninteractions = 3;
 +    table.stride        = table.formatsize*table.ninteractions;
 +
 +      
 +      return table;
 +}
 +
 +bondedtable_t make_bonded_table(FILE *fplog,char *fn,int angle)
 +{
 +  t_tabledata td;
 +  double start;
 +  int    i;
 +  bondedtable_t tab;
 +  
 +  if (angle < 2)
 +    start = 0;
 +  else
 +    start = -180.0;
 +  read_tables(fplog,fn,1,angle,&td);
 +  if (angle > 0) {
 +    /* Convert the table from degrees to radians */
 +    for(i=0; i<td.nx; i++) {
 +      td.x[i] *= DEG2RAD;
 +      td.f[i] *= RAD2DEG;
 +    }
 +    td.tabscale *= RAD2DEG;
 +  }
 +  tab.n = td.nx;
 +  tab.scale = td.tabscale;
 +  snew(tab.data,tab.n*4);
 +  copy2table(tab.n,0,4,td.x,td.v,td.f,1.0,tab.data);
 +  done_tabledata(&td);
 +
 +  return tab;
 +}
 +
 +
index 8e0096bb1bbed4c7697c2605ec7ced97d2d411a7,0000000000000000000000000000000000000000..faa968121be391c11210220999f76cc5bf7c528c
mode 100644,000000..100644
--- /dev/null
@@@ -1,1937 -1,0 +1,2005 @@@
- #if defined(HAVE_SCHED_H) && (defined(HAVE_SCHED_GETAFFINITY) || defined(HAVE_SCHED_SETAFFINITY))
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
- #ifdef GMX_OPENMP /* TODO: actually we could do this even without OpenMP?! */
- #ifdef HAVE_SCHED_SETAFFINITY
++#if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_GETAFFINITY)
 +#define _GNU_SOURCE
 +#include <sched.h>
 +#include <sys/syscall.h>
 +#endif
 +#include <signal.h>
 +#include <stdlib.h>
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +#include <string.h>
 +#include <assert.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "statutil.h"
 +#include "mdrun.h"
 +#include "md_logging.h"
 +#include "md_support.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "names.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "gmx_detect_hardware.h"
 +#include "gmx_omp_nthreads.h"
 +#include "pull_rotation.h"
 +#include "calc_verletbuf.h"
 +#include "../mdlib/nbnxn_search.h"
 +#include "../mdlib/nbnxn_consts.h"
 +#include "gmx_fatal_collective.h"
 +#include "membed.h"
 +#include "macros.h"
 +#include "gmx_omp.h"
 +
++#include "thread_mpi/threads.h"
++
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#ifdef GMX_OPENMM
 +#include "md_openmm.h"
 +#endif
 +
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +typedef struct { 
 +    gmx_integrator_t *func;
 +} gmx_intp_t;
 +
 +/* The array should match the eI array in include/types/enums.h */
 +#ifdef GMX_OPENMM  /* FIXME do_md_openmm needs fixing */
 +const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
 +#else
 +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
 +#endif
 +
 +gmx_large_int_t     deform_init_init_step_tpx;
 +matrix              deform_init_box_tpx;
 +#ifdef GMX_THREAD_MPI
 +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +struct mdrunner_arglist
 +{
 +    gmx_hw_opt_t *hw_opt;
 +    FILE *fplog;
 +    t_commrec *cr;
 +    int nfile;
 +    const t_filenm *fnm;
 +    output_env_t oenv;
 +    gmx_bool bVerbose;
 +    gmx_bool bCompact;
 +    int nstglobalcomm;
 +    ivec ddxyz;
 +    int dd_node_order;
 +    real rdd;
 +    real rconstr;
 +    const char *dddlb_opt;
 +    real dlb_scale;
 +    const char *ddcsx;
 +    const char *ddcsy;
 +    const char *ddcsz;
 +    const char *nbpu_opt;
 +    int nsteps_cmdline;
 +    int nstepout;
 +    int resetstep;
 +    int nmultisim;
 +    int repl_ex_nst;
 +    int repl_ex_nex;
 +    int repl_ex_seed;
 +    real pforce;
 +    real cpt_period;
 +    real max_hours;
 +    const char *deviceOptions;
 +    unsigned long Flags;
 +    int ret; /* return value */
 +};
 +
 +
 +/* The function used for spawning threads. Extracts the mdrunner() 
 +   arguments from its one argument and calls mdrunner(), after making
 +   a commrec. */
 +static void mdrunner_start_fn(void *arg)
 +{
 +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
 +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
 +                                        that it's thread-local. This doesn't
 +                                        copy pointed-to items, of course,
 +                                        but those are all const. */
 +    t_commrec *cr;  /* we need a local version of this */
 +    FILE *fplog=NULL;
 +    t_filenm *fnm;
 +
 +    fnm = dup_tfn(mc.nfile, mc.fnm);
 +
 +    cr = init_par_threads(mc.cr);
 +
 +    if (MASTER(cr))
 +    {
 +        fplog=mc.fplog;
 +    }
 +
 +    mda->ret=mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, 
 +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
 +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
 +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
 +                      mc.ddcsx, mc.ddcsy, mc.ddcsz,
 +                      mc.nbpu_opt,
 +                      mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
 +                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, 
 +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
 +}
 +
 +/* called by mdrunner() to start a specific number of threads (including 
 +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
 +   for each thread. 
 +   All options besides nthreads are the same as for mdrunner(). */
 +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, 
 +              FILE *fplog,t_commrec *cr,int nfile, 
 +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +              gmx_bool bCompact, int nstglobalcomm,
 +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +              const char *dddlb_opt,real dlb_scale,
 +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +              const char *nbpu_opt,
 +              int nsteps_cmdline, int nstepout,int resetstep,
 +              int nmultisim,int repl_ex_nst,int repl_ex_nex, int repl_ex_seed,
 +              real pforce,real cpt_period, real max_hours, 
 +              const char *deviceOptions, unsigned long Flags)
 +{
 +    int ret;
 +    struct mdrunner_arglist *mda;
 +    t_commrec *crn; /* the new commrec */
 +    t_filenm *fnmn;
 +
 +    /* first check whether we even need to start tMPI */
 +    if (hw_opt->nthreads_tmpi < 2)
 +    {
 +        return cr;
 +    }
 +
 +    /* a few small, one-time, almost unavoidable memory leaks: */
 +    snew(mda,1);
 +    fnmn=dup_tfn(nfile, fnm);
 +
 +    /* fill the data structure to pass as void pointer to thread start fn */
 +    mda->hw_opt=hw_opt;
 +    mda->fplog=fplog;
 +    mda->cr=cr;
 +    mda->nfile=nfile;
 +    mda->fnm=fnmn;
 +    mda->oenv=oenv;
 +    mda->bVerbose=bVerbose;
 +    mda->bCompact=bCompact;
 +    mda->nstglobalcomm=nstglobalcomm;
 +    mda->ddxyz[XX]=ddxyz[XX];
 +    mda->ddxyz[YY]=ddxyz[YY];
 +    mda->ddxyz[ZZ]=ddxyz[ZZ];
 +    mda->dd_node_order=dd_node_order;
 +    mda->rdd=rdd;
 +    mda->rconstr=rconstr;
 +    mda->dddlb_opt=dddlb_opt;
 +    mda->dlb_scale=dlb_scale;
 +    mda->ddcsx=ddcsx;
 +    mda->ddcsy=ddcsy;
 +    mda->ddcsz=ddcsz;
 +    mda->nbpu_opt=nbpu_opt;
 +    mda->nsteps_cmdline=nsteps_cmdline;
 +    mda->nstepout=nstepout;
 +    mda->resetstep=resetstep;
 +    mda->nmultisim=nmultisim;
 +    mda->repl_ex_nst=repl_ex_nst;
 +    mda->repl_ex_nex=repl_ex_nex;
 +    mda->repl_ex_seed=repl_ex_seed;
 +    mda->pforce=pforce;
 +    mda->cpt_period=cpt_period;
 +    mda->max_hours=max_hours;
 +    mda->deviceOptions=deviceOptions;
 +    mda->Flags=Flags;
 +
 +    fprintf(stderr, "Starting %d tMPI threads\n",hw_opt->nthreads_tmpi);
 +    fflush(stderr);
 +    /* now spawn new threads that start mdrunner_start_fn(), while 
 +       the main thread returns */
 +    ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi,
 +                     (hw_opt->bThreadPinning ? TMPI_AFFINITY_ALL_CORES : TMPI_AFFINITY_NONE),
 +                     mdrunner_start_fn, (void*)(mda) );
 +    if (ret!=TMPI_SUCCESS)
 +        return NULL;
 +
 +    /* make a new comm_rec to reflect the new situation */
 +    crn=init_par_threads(cr);
 +    return crn;
 +}
 +
 +
 +static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
 +                                        const gmx_hw_opt_t *hw_opt,
 +                                        int nthreads_tot,
 +                                        int ngpu)
 +{
 +    int nthreads_tmpi;
 +
 +    /* There are no separate PME nodes here, as we ensured in
 +     * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
 +     * and a conditional ensures we would not have ended up here.
 +     * Note that separate PME nodes might be switched on later.
 +     */
 +    if (ngpu > 0)
 +    {
 +        nthreads_tmpi = ngpu;
 +        if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
 +        {
 +            nthreads_tmpi = nthreads_tot;
 +        }
 +    }
 +    else if (hw_opt->nthreads_omp > 0)
 +    {
 +        /* Here we could oversubscribe, when we do, we issue a warning later */
 +        nthreads_tmpi = max(1,nthreads_tot/hw_opt->nthreads_omp);
 +    }
 +    else
 +    {
 +        /* TODO choose nthreads_omp based on hardware topology
 +           when we have a hardware topology detection library */
 +        /* In general, when running up to 4 threads, OpenMP should be faster.
 +         * Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
 +         * On Intel>=Nehalem running OpenMP on a single CPU is always faster,
 +         * even on two CPUs it's usually faster (but with many OpenMP threads
 +         * it could be faster not to use HT, currently we always use HT).
 +         * On Nehalem/Westmere we want to avoid running 16 threads over
 +         * two CPUs with HT, so we need a limit<16; thus we use 12.
 +         * A reasonable limit for Intel Sandy and Ivy bridge,
 +         * not knowing the topology, is 16 threads.
 +         */
 +        const int nthreads_omp_always_faster             =  4;
 +        const int nthreads_omp_always_faster_Nehalem     = 12;
 +        const int nthreads_omp_always_faster_SandyBridge = 16;
 +        const int first_model_Nehalem     = 0x1A;
 +        const int first_model_SandyBridge = 0x2A;
 +        gmx_bool bIntel_Family6;
 +
 +        bIntel_Family6 =
 +            (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
 +             gmx_cpuid_family(hwinfo->cpuid_info) == 6);
 +
 +        if (nthreads_tot <= nthreads_omp_always_faster ||
 +            (bIntel_Family6 &&
 +             ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
 +              (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
 +        {
 +            /* Use pure OpenMP parallelization */
 +            nthreads_tmpi = 1;
 +        }
 +        else
 +        {
 +            /* Don't use OpenMP parallelization */
 +            nthreads_tmpi = nthreads_tot;
 +        }
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +
 +
 +/* Get the number of threads to use for thread-MPI based on how many
 + * were requested, which algorithms we're using,
 + * and how many particles there are.
 + * At the point we have already called check_and_update_hw_opt.
 + * Thus all options should be internally consistent and consistent
 + * with the hardware, except that ntmpi could be larger than #GPU.
 + */
 +static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
 +                            gmx_hw_opt_t *hw_opt,
 +                            t_inputrec *inputrec, gmx_mtop_t *mtop,
 +                            const t_commrec *cr,
 +                            FILE *fplog)
 +{
 +    int nthreads_hw,nthreads_tot_max,nthreads_tmpi,nthreads_new,ngpu;
 +    int min_atoms_per_mpi_thread;
 +    char *env;
 +    char sbuf[STRLEN];
 +    gmx_bool bCanUseGPU;
 +
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        /* Trivial, return right away */
 +        return hw_opt->nthreads_tmpi;
 +    }
 +
 +    nthreads_hw = hwinfo->nthreads_hw_avail;
 +
 +    /* How many total (#tMPI*#OpenMP) threads can we start? */ 
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        nthreads_tot_max = hw_opt->nthreads_tot;
 +    }
 +    else
 +    {
 +        nthreads_tot_max = nthreads_hw;
 +    }
 +
 +    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
 +    if (bCanUseGPU)
 +    {
 +        ngpu = hwinfo->gpu_info.ncuda_dev_use;
 +    }
 +    else
 +    {
 +        ngpu = 0;
 +    }
 +
 +    nthreads_tmpi =
 +        get_tmpi_omp_thread_division(hwinfo,hw_opt,nthreads_tot_max,ngpu);
 +
 +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
 +    {
 +        /* Steps are divided over the nodes iso splitting the atoms */
 +        min_atoms_per_mpi_thread = 0;
 +    }
 +    else
 +    {
 +        if (bCanUseGPU)
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
 +        }
 +        else
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
 +        }
 +    }
 +
 +    /* Check if an algorithm does not support parallel simulation.  */
 +    if (nthreads_tmpi != 1 &&
 +        ( inputrec->eI == eiLBFGS ||
 +          inputrec->coulombtype == eelEWALD ) )
 +    {
 +        nthreads_tmpi = 1;
 +
 +        md_print_warn(cr,fplog,"The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
 +        if (hw_opt->nthreads_tmpi > nthreads_tmpi)
 +        {
 +            gmx_fatal(FARGS,"You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
 +        }
 +    }
 +    else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
 +    {
 +        /* the thread number was chosen automatically, but there are too many
 +           threads (too few atoms per thread) */
 +        nthreads_new = max(1,mtop->natoms/min_atoms_per_mpi_thread);
 +
 +        /* Avoid partial use of Hyper-Threading */
 +        if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
 +            nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw)
 +        {
 +            nthreads_new = nthreads_hw/2;
 +        }
 +
 +        /* Avoid large prime numbers in the thread count */
 +        if (nthreads_new >= 6)
 +        {
 +            /* Use only 6,8,10 with additional factors of 2 */
 +            int fac;
 +
 +            fac = 2;
 +            while (3*fac*2 <= nthreads_new)
 +            {
 +                fac *= 2;
 +            }
 +
 +            nthreads_new = (nthreads_new/fac)*fac;
 +        }
 +        else
 +        {
 +            /* Avoid 5 */
 +            if (nthreads_new == 5)
 +            {
 +                nthreads_new = 4;
 +            }
 +        }
 +
 +        nthreads_tmpi = nthreads_new;
 +
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
 +        fprintf(stderr,"      only starting %d thread-MPI threads.\n",nthreads_tmpi);
 +        fprintf(stderr,"      You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +#endif /* GMX_THREAD_MPI */
 +
 +
 +/* Environment variable for setting nstlist */
 +static const char*  NSTLIST_ENVVAR          =  "GMX_NSTLIST";
 +/* Try to increase nstlist when using a GPU with nstlist less than this */
 +static const int    NSTLIST_GPU_ENOUGH      = 20;
 +/* Increase nstlist until the non-bonded cost increases more than this factor */
 +static const float  NBNXN_GPU_LIST_OK_FAC   = 1.25;
 +/* Don't increase nstlist beyond a non-bonded cost increases of this factor */
 +static const float  NBNXN_GPU_LIST_MAX_FAC  = 1.40;
 +
 +/* Try to increase nstlist when running on a GPU */
 +static void increase_nstlist(FILE *fp,t_commrec *cr,
 +                             t_inputrec *ir,const gmx_mtop_t *mtop,matrix box)
 +{
 +    char *env;
 +    int  nstlist_orig,nstlist_prev;
 +    verletbuf_list_setup_t ls;
 +    real rlist_inc,rlist_ok,rlist_max,rlist_new,rlist_prev;
 +    int  i;
 +    t_state state_tmp;
 +    gmx_bool bBox,bDD,bCont;
 +    const char *nstl_fmt="\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
 +    const char *vbd_err="Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used";
 +    const char *box_err="Can not increase nstlist for GPU run because the box is too small";
 +    const char *dd_err ="Can not increase nstlist for GPU run because of domain decomposition limitations";
 +    char buf[STRLEN];
 +
 +    /* Number of + nstlist alternative values to try when switching  */
 +    const int nstl[]={ 20, 25, 40, 50 };
 +#define NNSTL  sizeof(nstl)/sizeof(nstl[0])
 +
 +    env = getenv(NSTLIST_ENVVAR);
 +    if (env == NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,nstl_fmt,ir->nstlist);
 +        }
 +    }
 +
 +    if (ir->verletbuf_drift == 0)
 +    {
 +        gmx_fatal(FARGS,"You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
 +    }
 +
 +    if (ir->verletbuf_drift < 0)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"%s\n",vbd_err);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"%s\n",vbd_err);
 +        }
 +
 +        return;
 +    }
 +
 +    nstlist_orig = ir->nstlist;
 +    if (env != NULL)
 +    {
 +        sprintf(buf,"Getting nstlist from environment variable GMX_NSTLIST=%s",env);
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"%s\n",buf);
 +        }
 +        sscanf(env,"%d",&ir->nstlist);
 +    }
 +
 +    verletbuf_get_list_setup(TRUE,&ls);
 +
 +    /* Allow rlist to make the list double the size of the cut-off sphere */
 +    rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE,mtop->natoms/det(box));
 +    rlist_ok  = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC,1.0/3.0) - rlist_inc;
 +    rlist_max = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC,1.0/3.0) - rlist_inc;
 +    if (debug)
 +    {
 +        fprintf(debug,"GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n",
 +                rlist_inc,rlist_max);
 +    }
 +
 +    i = 0;
 +    nstlist_prev = nstlist_orig;
 +    rlist_prev   = ir->rlist;
 +    do
 +    {
 +        if (env == NULL)
 +        {
 +            ir->nstlist = nstl[i];
 +        }
 +
 +        /* Set the pair-list buffer size in ir */
 +        calc_verlet_buffer_size(mtop,det(box),ir,ir->verletbuf_drift,&ls,
 +                                NULL,&rlist_new);
 +
 +        /* Does rlist fit in the box? */
 +        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC,box));
 +        bDD  = TRUE;
 +        if (bBox && DOMAINDECOMP(cr))
 +        {
 +            /* Check if rlist fits in the domain decomposition */
 +            if (inputrec2nboundeddim(ir) < DIM)
 +            {
 +                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
 +            }
 +            copy_mat(box,state_tmp.box);
 +            bDD = change_dd_cutoff(cr,&state_tmp,ir,rlist_new);
 +        }
 +
 +        bCont = FALSE;
 +
 +        if (env == NULL)
 +        {
 +            if (bBox && bDD && rlist_new <= rlist_max)
 +            {
 +                /* Increase nstlist */
 +                nstlist_prev = ir->nstlist;
 +                rlist_prev   = rlist_new;
 +                bCont = (i+1 < NNSTL && rlist_new < rlist_ok);
 +            }
 +            else
 +            {
 +                /* Stick with the previous nstlist */
 +                ir->nstlist = nstlist_prev;
 +                rlist_new   = rlist_prev;
 +                bBox = TRUE;
 +                bDD  = TRUE;
 +            }
 +        }
 +
 +        i++;
 +    }
 +    while (bCont);
 +
 +    if (!bBox || !bDD)
 +    {
 +        gmx_warning(!bBox ? box_err : dd_err);
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"\n%s\n",bBox ? box_err : dd_err);
 +        }
 +        ir->nstlist = nstlist_orig;
 +    }
 +    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
 +    {
 +        sprintf(buf,"Changing nstlist from %d to %d, rlist from %g to %g",
 +                nstlist_orig,ir->nstlist,
 +                ir->rlist,rlist_new);
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"%s\n\n",buf);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,"%s\n\n",buf);
 +        }
 +        ir->rlist     = rlist_new;
 +        ir->rlistlong = rlist_new;
 +    }
 +}
 +
 +static void prepare_verlet_scheme(FILE *fplog,
 +                                  gmx_hw_info_t *hwinfo,
 +                                  t_commrec *cr,
 +                                  gmx_hw_opt_t *hw_opt,
 +                                  const char *nbpu_opt,
 +                                  t_inputrec *ir,
 +                                  const gmx_mtop_t *mtop,
 +                                  matrix box,
 +                                  gmx_bool *bUseGPU)
 +{
 +    /* Here we only check for GPU usage on the MPI master process,
 +     * as here we don't know how many GPUs we will use yet.
 +     * We check for a GPU on all processes later.
 +     */
 +    *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
 +
 +    if (ir->verletbuf_drift > 0)
 +    {
 +        /* Update the Verlet buffer size for the current run setup */
 +        verletbuf_list_setup_t ls;
 +        real rlist_new;
 +
 +        /* Here we assume CPU acceleration is on. But as currently
 +         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 +         * and 4x2 gives a larger buffer than 4x4, this is ok.
 +         */
 +        verletbuf_get_list_setup(*bUseGPU,&ls);
 +
 +        calc_verlet_buffer_size(mtop,det(box),ir,
 +                                ir->verletbuf_drift,&ls,
 +                                NULL,&rlist_new);
 +        if (rlist_new != ir->rlist)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog,"\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 +                        ir->rlist,rlist_new,
 +                        ls.cluster_size_i,ls.cluster_size_j);
 +            }
 +            ir->rlist     = rlist_new;
 +            ir->rlistlong = rlist_new;
 +        }
 +    }
 +
 +    /* With GPU or emulation we should check nstlist for performance */
 +    if ((EI_DYNAMICS(ir->eI) &&
 +         *bUseGPU &&
 +         ir->nstlist < NSTLIST_GPU_ENOUGH) ||
 +        getenv(NSTLIST_ENVVAR) != NULL)
 +    {
 +        /* Choose a better nstlist */
 +        increase_nstlist(fplog,cr,ir,mtop,box);
 +    }
 +}
 +
 +static void convert_to_verlet_scheme(FILE *fplog,
 +                                     t_inputrec *ir,
 +                                     gmx_mtop_t *mtop,real box_vol)
 +{
 +    char *conv_mesg="Converting input file with group cut-off scheme to the Verlet cut-off scheme";
 +
 +    md_print_warn(NULL,fplog,"%s\n",conv_mesg);
 +
 +    ir->cutoff_scheme   = ecutsVERLET;
 +    ir->verletbuf_drift = 0.005;
 +
 +    if (ir->rcoulomb != ir->rvdw)
 +    {
 +        gmx_fatal(FARGS,"The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
 +    }
 +
 +    if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
 +    {
 +        gmx_fatal(FARGS,"User non-bonded potentials are not (yet) supported with the Verlet scheme");
 +    }
 +    else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
 +    {
 +        md_print_warn(NULL,fplog,"Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
 +
 +        if (EVDW_SWITCHED(ir->vdwtype))
 +        {
 +            ir->vdwtype = evdwCUT;
 +        }
 +        if (EEL_SWITCHED(ir->coulombtype))
 +        {
 +            if (EEL_FULL(ir->coulombtype))
 +            {
 +                /* With full electrostatic only PME can be switched */
 +                ir->coulombtype = eelPME;
 +            }
 +            else
 +            {
 +                md_print_warn(NULL,fplog,"NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n",eel_names[ir->coulombtype]);
 +                ir->coulombtype = eelRF;
 +                ir->epsilon_rf  = 0.0;
 +            }
 +        }
 +
 +        /* We set the target energy drift to a small number.
 +         * Note that this is only for testing. For production the user
 +         * should think about this and set the mdp options.
 +         */
 +        ir->verletbuf_drift = 1e-4;
 +    }
 +
 +    if (inputrec2nboundeddim(ir) != 3)
 +    {
 +        gmx_fatal(FARGS,"Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
 +    }
 +
 +    if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
 +    {
 +        gmx_fatal(FARGS,"Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
 +    {
 +        verletbuf_list_setup_t ls;
 +
 +        verletbuf_get_list_setup(FALSE,&ls);
 +        calc_verlet_buffer_size(mtop,box_vol,ir,ir->verletbuf_drift,&ls,
 +                                NULL,&ir->rlist);
 +    }
 +    else
 +    {
 +        ir->verletbuf_drift = -1;
 +        ir->rlist           = 1.05*max(ir->rvdw,ir->rcoulomb);
 +    }
 +
 +    gmx_mtop_remove_chargegroups(mtop);
 +}
 +
 +/* Check the process affinity mask and if it is found to be non-zero,
 + * will honor it and disable mdrun internal affinity setting.
 + * This function should be called first before the OpenMP library gets
 + * initialized with the last argument FALSE (which will detect affinity
 + * set by external tools like taskset), and later, after the OpenMP
 + * initialization, with the last argument TRUE to detect affinity changes
 + * made by the OpenMP library.
 + *
 + * Note that this will only work on Linux as we use a GNU feature. */
 +static void check_cpu_affinity_set(FILE *fplog, const t_commrec *cr,
 +                                   gmx_hw_opt_t *hw_opt, int ncpus,
 +                                   gmx_bool bAfterOpenmpInit)
 +{
 +#ifdef HAVE_SCHED_GETAFFINITY
 +    cpu_set_t mask_current;
 +    int       i, ret, cpu_count, cpu_set;
 +    gmx_bool  bAllSet;
 +
 +    assert(hw_opt);
 +    if (!hw_opt->bThreadPinning)
 +    {
 +        /* internal affinity setting is off, don't bother checking process affinity */
 +        return;
 +    }
 +
 +    CPU_ZERO(&mask_current);
 +    if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
 +    {
 +        /* failed to query affinity mask, will just return */
 +        if (debug)
 +        {
 +            fprintf(debug, "Failed to query affinity mask (error %d)", ret);
 +        }
 +        return;
 +    }
 +
 +    /* Before proceeding with the actual check, make sure that the number of
 +     * detected CPUs is >= the CPUs in the current set.
 +     * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
 +#ifdef CPU_COUNT
 +    if (ncpus < CPU_COUNT(&mask_current))
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT",
 +                    ncpus, CPU_COUNT(&mask_current));
 +        }
 +        return;
 +    }
 +#endif /* CPU_COUNT */
 +
 +    bAllSet = TRUE;
 +    for (i = 0; (i < ncpus && i < CPU_SETSIZE); i++)
 +    {
 +        bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
 +    }
 +
 +    if (!bAllSet)
 +    {
 +        if (!bAfterOpenmpInit)
 +        {
 +            md_print_warn(cr, fplog,
 +                          "Non-default process affinity set, disabling internal affinity");
 +        }
 +        else
 +        {
 +            md_print_warn(cr, fplog,
 +                          "Non-default process affinity set probably by the OpenMP library, "
 +                          "disabling internal affinity");
 +        }
 +        hw_opt->bThreadPinning = FALSE;
 +
 +        if (debug)
 +        {
 +            fprintf(debug, "Non-default affinity mask found\n");
 +        }
 +    }
 +    else
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "Default affinity mask found\n");
 +        }
 +    }
 +#endif /* HAVE_SCHED_GETAFFINITY */
 +}
 +
 +/* Set CPU affinity. Can be important for performance.
 +   On some systems (e.g. Cray) CPU Affinity is set by default.
 +   But default assigning doesn't work (well) with only some ranks
 +   having threads. This causes very low performance.
 +   External tools have cumbersome syntax for setting affinity
 +   in the case that only some ranks have threads.
 +   Thus it is important that GROMACS sets the affinity internally
 +   if only PME is using threads.
 +*/
 +static void set_cpu_affinity(FILE *fplog,
 +                             const t_commrec *cr,
 +                             gmx_hw_opt_t *hw_opt,
 +                             int nthreads_pme,
 +                             const gmx_hw_info_t *hwinfo,
 +                             const t_inputrec *inputrec)
 +{
 +#if defined GMX_THREAD_MPI
 +    /* With the number of TMPI threads equal to the number of cores
 +     * we already pinned in thread-MPI, so don't pin again here.
 +     */
 +    if (hw_opt->nthreads_tmpi == tMPI_Thread_get_hw_number())
 +    {
 +        return;
 +    }
 +#endif
 +
-         int thread, nthread_local, nthread_node, nthread_hw_max, nphyscore;
++#ifndef __APPLE__
++    /* If the tMPI thread affinity setting is not supported encourage the user
++     * to report it as it's either a bug or an exotic platform which we might
++     * want to support. */
++    if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES)
++    {
++        md_print_warn(NULL, fplog,
++                      "Can not set thread affinities on the current plarform. On NUMA systems this\n"
++                      "can cause performance degradation. If you think your platform should support\n"
++                      "setting affinities, contact the GROMACS developers.");
++        return;
++    }
++#endif /* __APPLE__ */
++
 +    if (hw_opt->bThreadPinning)
 +    {
-         thread = 0;
++        int nth_affinity_set, thread_id_node, thread_id,
++            nthread_local, nthread_node, nthread_hw_max, nphyscore;
 +        int offset;
 +        char *env;
 +
 +        /* threads on this MPI process or TMPI thread */
 +        if (cr->duty & DUTY_PP)
 +        {
 +            nthread_local = gmx_omp_nthreads_get(emntNonbonded);
 +        }
 +        else
 +        {
 +            nthread_local = gmx_omp_nthreads_get(emntPME);
 +        }
 +
 +        /* map the current process to cores */
-             MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->nodeid_intra,
++        thread_id_node = 0;
 +        nthread_node = nthread_local;
 +#ifdef GMX_MPI
 +        if (PAR(cr) || MULTISIM(cr))
 +        {
 +            /* We need to determine a scan of the thread counts in this
 +             * compute node.
 +             */
++            int process_index;
 +            MPI_Comm comm_intra;
 +
-             MPI_Scan(&nthread_local,&thread,1,MPI_INT,MPI_SUM,comm_intra);
++            process_index = cr->nodeid_intra;
++            if (MULTISIM(cr))
++            {
++                /* To simplify the code, we shift process indices by nnodes.
++                 * There might be far less processes, but that doesn't matter.
++                 */
++                process_index += cr->ms->sim*cr->nnodes;
++            }
++            MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),process_index,
 +                           &comm_intra);
-             thread -= nthread_local;
++            MPI_Scan(&nthread_local,&thread_id_node,1,MPI_INT,MPI_SUM,comm_intra);
 +            /* MPI_Scan is inclusive, but here we need exclusive */
-         /* set the per-thread affinity */
- #pragma omp parallel firstprivate(thread) num_threads(nthread_local)
++            thread_id_node -= nthread_local;
 +            /* Get the total number of threads on this physical node */
 +            MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
 +            MPI_Comm_free(&comm_intra);
 +        }
 +#endif
 +
 +        offset = 0;
 +        if (hw_opt->core_pinning_offset > 0)
 +        {
 +            offset = hw_opt->core_pinning_offset;
 +            if (SIMMASTER(cr))
 +            {
 +                fprintf(stderr, "Applying core pinning offset %d\n", offset);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog, "Applying core pinning offset %d\n", offset);
 +            }
 +        }
 +
 +        /* With Intel Hyper-Threading enabled, we want to pin consecutive
 +         * threads to physical cores when using more threads than physical
 +         * cores or when the user requests so.
 +         */
 +        nthread_hw_max = hwinfo->nthreads_hw_avail;
 +        nphyscore = -1;
 +        if (hw_opt->bPinHyperthreading ||
 +            (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
 +             nthread_node > nthread_hw_max/2 && getenv("GMX_DISABLE_PINHT") == NULL))
 +        {
 +            if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) != GMX_CPUID_X86_SMT_ENABLED)
 +            {
 +                /* We print to stderr on all processes, as we might have
 +                 * different settings on different physical nodes.
 +                 */
 +                if (gmx_cpuid_vendor(hwinfo->cpuid_info) != GMX_CPUID_VENDOR_INTEL)
 +                {
 +                    md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
 +                                  "but non-Intel CPU detected (vendor: %s)\n",
 +                                  gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
 +                }
 +                else
 +                {
 +                    md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
 +                                  "but the CPU detected does not have Intel Hyper-Threading support "
 +                                  "(or it is turned off)\n");
 +                }
 +            }
 +            nphyscore = nthread_hw_max/2;
 +
 +            if (SIMMASTER(cr))
 +            {
 +                fprintf(stderr, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
 +                        nphyscore);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
 +                        nphyscore);
 +            }
 +        }
 +
-             cpu_set_t mask;
-             int core;
++        /* Set the per-thread affinity. In order to be able to check the success
++         * of affinity settings, we will set nth_affinity_set to 1 on threads
++         * where the affinity setting succeded and to 0 where it failed.
++         * Reducing these 0/1 values over the threads will give the total number
++         * of threads on which we succeeded.
++         */
++         nth_affinity_set = 0;
++#pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \
++                     reduction(+:nth_affinity_set)
 +        {
-             CPU_ZERO(&mask);
-             thread += gmx_omp_get_thread_num();
++            int      core;
++            gmx_bool setaffinity_ret;
 +
-                 core = offset + thread;
++            thread_id       = gmx_omp_get_thread_num();
++            thread_id_node += thread_id;
 +            if (nphyscore <= 0)
 +            {
-                 core = offset + thread/2 + (thread % 2)*nphyscore;
++                core = offset + thread_id_node;
 +            }
 +            else
 +            {
 +                /* Lock pairs of threads to the same hyperthreaded core */
-             CPU_SET(core, &mask);
-             sched_setaffinity((pid_t) syscall (SYS_gettid), sizeof(cpu_set_t), &mask);
++                core = offset + thread_id_node/2 + (thread_id_node % 2)*nphyscore;
++            }
++
++            setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
++
++            /* store the per-thread success-values of the setaffinity */
++            nth_affinity_set = (setaffinity_ret == 0);
++
++            if (debug)
++            {
++                fprintf(debug, "On node %d, thread %d the affinity setting returned %d\n",
++                        cr->nodeid, gmx_omp_get_thread_num(), setaffinity_ret);
++            }
++        }
++
++        if (nth_affinity_set > nthread_local)
++        {
++            char msg[STRLEN];
++
++            sprintf(msg, "Looks like we have set affinity for more threads than "
++                    "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
++            gmx_incons(msg);
++        }
++        else
++        {
++            /* check if some threads failed to set their affinities */
++            if (nth_affinity_set != nthread_local)
++            {
++                char sbuf[STRLEN];
++                sbuf[0] = '\0';
++#ifdef GMX_MPI
++#ifdef GMX_THREAD_MPI
++                sprintf(sbuf, "In thread-MPI thread #%d", cr->nodeid);
++#else /* GMX_LIB_MPI */
++#endif
++                sprintf(sbuf, "In MPI process #%d", cr->nodeid);
++#endif /* GMX_MPI */
++                md_print_warn(NULL, fplog,
++                              "%s%d/%d thread%s failed to set their affinities. "
++                              "This can cause performance degradation!",
++                              sbuf, nthread_local - nth_affinity_set, nthread_local,
++                              (nthread_local - nth_affinity_set) > 1 ? "s" : "");
 +            }
- #endif /* HAVE_SCHED_SETAFFINITY */
- #endif /* GMX_OPENMP */
 +        }
 +    }
 +}
 +
 +
 +static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
 +                                    int cutoff_scheme)
 +{
 +    gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp);
 +
 +#ifndef GMX_THREAD_MPI
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        gmx_fatal(FARGS,"Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        gmx_fatal(FARGS,"Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +#endif
 +
 +    if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
 +    {
 +        /* We have the same number of OpenMP threads for PP and PME processes,
 +         * thus we can perform several consistency checks.
 +         */
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
 +        {
 +            gmx_fatal(FARGS,"The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
 +                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi,hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
 +        {
 +            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
 +                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi);
 +        }
 +
 +        if (hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
 +        {
 +            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
 +                      hw_opt->nthreads_tot,hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +    }
 +
 +#ifndef GMX_OPENMP
 +    if (hw_opt->nthreads_omp > 1)
 +    {
 +        gmx_fatal(FARGS,"OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
 +    }
 +#endif
 +
 +    if (cutoff_scheme == ecutsGROUP)
 +    {
 +        /* We only have OpenMP support for PME only nodes */
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS,"OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
 +                      ecutscheme_names[cutoff_scheme],
 +                      ecutscheme_names[ecutsVERLET]);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
 +    {
 +        gmx_fatal(FARGS,"You need to specify -ntomp in addition to -ntomp_pme");
 +    }
 +
 +    if (hw_opt->nthreads_tot == 1)
 +    {
 +        hw_opt->nthreads_tmpi = 1;
 +
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS,"You requested %d OpenMP threads with %d total threads",
 +                      hw_opt->nthreads_tmpi,hw_opt->nthreads_tot);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
 +    {
 +        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
 +                hw_opt->nthreads_tot,
 +                hw_opt->nthreads_tmpi,
 +                hw_opt->nthreads_omp,
 +                hw_opt->nthreads_omp_pme,
 +                hw_opt->gpu_id!=NULL ? hw_opt->gpu_id : "");
 +                
 +    }
 +}
 +
 +
 +/* Override the value in inputrec with value passed on the command line (if any) */
 +static void override_nsteps_cmdline(FILE *fplog,
 +                                    int nsteps_cmdline,
 +                                    t_inputrec *ir,
 +                                    const t_commrec *cr)
 +{
 +    assert(ir);
 +    assert(cr);
 +
 +    /* override with anything else than the default -2 */
 +    if (nsteps_cmdline > -2)
 +    {
 +        char stmp[STRLEN];
 +
 +        ir->nsteps = nsteps_cmdline;
 +        if (EI_DYNAMICS(ir->eI))
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps, %.3f ps",
 +                    nsteps_cmdline, nsteps_cmdline*ir->delta_t);
 +        }
 +        else
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps",
 +                    nsteps_cmdline);
 +        }
 +
 +        md_print_warn(cr, fplog, "%s\n", stmp);
 +    }
 +}
 +
 +/* Data structure set by SIMMASTER which needs to be passed to all nodes
 + * before the other nodes have read the tpx file and called gmx_detect_hardware.
 + */
 +typedef struct {
 +    int cutoff_scheme; /* The cutoff scheme from inputrec_t */
 +    gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
 +} master_inf_t;
 +
 +int mdrunner(gmx_hw_opt_t *hw_opt,
 +             FILE *fplog,t_commrec *cr,int nfile,
 +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +             gmx_bool bCompact, int nstglobalcomm,
 +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +             const char *dddlb_opt,real dlb_scale,
 +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +             const char *nbpu_opt,
 +             int nsteps_cmdline, int nstepout,int resetstep,
 +             int nmultisim,int repl_ex_nst,int repl_ex_nex,
 +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
 +             const char *deviceOptions, unsigned long Flags)
 +{
 +    gmx_bool   bForceUseGPU,bTryUseGPU;
 +    double     nodetime=0,realtime;
 +    t_inputrec *inputrec;
 +    t_state    *state=NULL;
 +    matrix     box;
 +    gmx_ddbox_t ddbox={0};
 +    int        npme_major,npme_minor;
 +    real       tmpr1,tmpr2;
 +    t_nrnb     *nrnb;
 +    gmx_mtop_t *mtop=NULL;
 +    t_mdatoms  *mdatoms=NULL;
 +    t_forcerec *fr=NULL;
 +    t_fcdata   *fcd=NULL;
 +    real       ewaldcoeff=0;
 +    gmx_pme_t  *pmedata=NULL;
 +    gmx_vsite_t *vsite=NULL;
 +    gmx_constr_t constr;
 +    int        i,m,nChargePerturbed=-1,status,nalloc;
 +    char       *gro;
 +    gmx_wallcycle_t wcycle;
 +    gmx_bool       bReadRNG,bReadEkin;
 +    int        list;
 +    gmx_runtime_t runtime;
 +    int        rc;
 +    gmx_large_int_t reset_counters;
 +    gmx_edsam_t ed=NULL;
 +    t_commrec   *cr_old=cr; 
 +    int         nthreads_pme=1;
 +    int         nthreads_pp=1;
 +    gmx_membed_t membed=NULL;
 +    gmx_hw_info_t *hwinfo=NULL;
 +    master_inf_t minf={-1,FALSE};
 +
 +    /* CAUTION: threads may be started later on in this function, so
 +       cr doesn't reflect the final parallel state right now */
 +    snew(inputrec,1);
 +    snew(mtop,1);
 +    
 +    if (Flags & MD_APPENDFILES) 
 +    {
 +        fplog = NULL;
 +    }
 +
 +    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
 +    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
 +
 +    snew(state,1);
 +    if (SIMMASTER(cr)) 
 +    {
 +        /* Read (nearly) all data required for the simulation */
 +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
 +
 +        if (inputrec->cutoff_scheme != ecutsVERLET &&
 +            ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
 +        {
 +            convert_to_verlet_scheme(fplog,inputrec,mtop,det(state->box));
 +        }
 +
 +        /* Detect hardware, gather information. With tMPI only thread 0 does it
 +         * and after threads are started broadcasts hwinfo around. */
 +        snew(hwinfo, 1);
 +        gmx_detect_hardware(fplog, hwinfo, cr,
 +                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
 +
 +        minf.cutoff_scheme = inputrec->cutoff_scheme;
 +        minf.bUseGPU       = FALSE;
 +
 +        if (inputrec->cutoff_scheme == ecutsVERLET)
 +        {
 +            prepare_verlet_scheme(fplog,hwinfo,cr,hw_opt,nbpu_opt,
 +                                  inputrec,mtop,state->box,
 +                                  &minf.bUseGPU);
 +        }
 +        else if (hwinfo->bCanUseGPU)
 +        {
 +            md_print_warn(cr,fplog,
 +                          "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
 +                          "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
 +                          "      (for quick performance testing you can use the -testverlet option)\n");
 +
 +            if (bForceUseGPU)
 +            {
 +                gmx_fatal(FARGS,"GPU requested, but can't be used without cutoff-scheme=Verlet");
 +            }
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        gmx_bcast_sim(sizeof(minf),&minf,cr);
 +    }
 +#endif
 +    if (minf.bUseGPU && cr->npmenodes == -1)
 +    {
 +        /* Don't automatically use PME-only nodes with GPUs */
 +        cr->npmenodes = 0;
 +    }
 +
 +    /* Check for externally set OpenMP affinity and turn off internal
 +     * pinning if any is found. We need to do this check early to tell
 +     * thread-MPI whether it should do pinning when spawning threads.
 +     */
 +    gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
 +
 +#ifdef GMX_THREAD_MPI
 +    /* With thread-MPI inputrec is only set here on the master thread */
 +    if (SIMMASTER(cr))
 +#endif
 +    {
 +        check_and_update_hw_opt(hw_opt,minf.cutoff_scheme);
 +
 +#ifdef GMX_THREAD_MPI
 +        /* Early check for externally set process affinity. Can't do over all
 +         * MPI processes because hwinfo is not available everywhere, but with
 +         * thread-MPI it's needed as pinning might get turned off which needs
 +         * to be known before starting thread-MPI. */
 +        check_cpu_affinity_set(fplog,
 +                               NULL,
 +                               hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
 +        {
 +            gmx_fatal(FARGS,"You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
 +        }
 +#endif
 +
 +        if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
 +            cr->npmenodes <= 0)
 +        {
 +            gmx_fatal(FARGS,"You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
 +        }
 +    }
 +
 +#ifdef GMX_THREAD_MPI
 +    if (SIMMASTER(cr))
 +    {
 +        /* NOW the threads will be started: */
 +        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
 +                                                 hw_opt,
 +                                                 inputrec, mtop,
 +                                                 cr, fplog);
 +        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 1)
 +        {
 +            /* now start the threads. */
 +            cr=mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, 
 +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
 +                                      ddxyz, dd_node_order, rdd, rconstr, 
 +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
 +                                      nbpu_opt,
 +                                      nsteps_cmdline, nstepout, resetstep, nmultisim, 
 +                                      repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
 +                                      cpt_period, max_hours, deviceOptions, 
 +                                      Flags);
 +            /* the main thread continues here with a new cr. We don't deallocate
 +               the old cr because other threads may still be reading it. */
 +            if (cr == NULL)
 +            {
 +                gmx_comm("Failed to spawn threads");
 +            }
 +        }
 +    }
 +#endif
 +    /* END OF CAUTION: cr is now reliable */
 +
 +    /* g_membed initialisation *
 +     * Because we change the mtop, init_membed is called before the init_parallel *
 +     * (in case we ever want to make it run in parallel) */
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"Initializing membed");
 +        }
 +        membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* now broadcast everything to the non-master nodes/threads: */
 +        init_parallel(fplog, cr, inputrec, mtop);
 +
 +        /* This check needs to happen after get_nthreads_mpi() */
 +        if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "The Verlet cut-off scheme is not supported with particle decomposition.\n"
 +                                 "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
 +        }
 +    }
 +    if (fplog != NULL)
 +    {
 +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
 +    }
 +
 +#if defined GMX_THREAD_MPI
 +    /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
 +     * to the other threads  -- slightly uncool, but works fine, just need to
 +     * make sure that the data doesn't get freed twice. */
 +    if (cr->nnodes > 1)
 +    {
 +        if (!SIMMASTER(cr))
 +        {
 +            snew(hwinfo, 1);
 +        }
 +        gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
 +    }
 +#else
 +    if (PAR(cr) && !SIMMASTER(cr))
 +    {
 +        /* now we have inputrec on all nodes, can run the detection */
 +        /* TODO: perhaps it's better to propagate within a node instead? */
 +        snew(hwinfo, 1);
 +        gmx_detect_hardware(fplog, hwinfo, cr,
 +                                 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
 +    }
 +
 +    /* Now do the affinity check with MPI/no-MPI (done earlier with thread-MPI). */
 +    check_cpu_affinity_set(fplog, cr,
 +                           hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 +#endif
 +
 +    /* now make sure the state is initialized and propagated */
 +    set_state_entries(state,inputrec,cr->nnodes);
 +
 +    /* remove when vv and rerun works correctly! */
 +    if (PAR(cr) && EI_VV(inputrec->eI) && ((Flags & MD_RERUN) || (Flags & MD_RERUN_VSITE)))
 +    {
 +        gmx_fatal(FARGS,
 +                  "Currently can't do velocity verlet with rerun in parallel.");
 +    }
 +
 +    /* A parallel command line option consistency check that we can
 +       only do after any threads have started. */
 +    if (!PAR(cr) &&
 +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
 +    {
 +        gmx_fatal(FARGS,
 +                  "The -dd or -npme option request a parallel simulation, "
 +#ifndef GMX_MPI
 +                  "but %s was compiled without threads or MPI enabled"
 +#else
 +#ifdef GMX_THREAD_MPI
 +                  "but the number of threads (option -nt) is 1"
 +#else
 +                  "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
 +#endif
 +#endif
 +                  , ShortProgram()
 +            );
 +    }
 +
 +    if ((Flags & MD_RERUN) &&
 +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 +    {
 +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 +    }
 +
 +    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog) && PAR(cr))
 +    {
 +        /* All-vs-all loops do not work with domain decomposition */
 +        Flags |= MD_PARTDEC;
 +    }
 +
 +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
 +    {
 +        if (cr->npmenodes > 0)
 +        {
 +            if (!EEL_PME(inputrec->coulombtype))
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but the system does not use PME electrostatics");
 +            }
 +            if (Flags & MD_PARTDEC)
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
 +            }
 +        }
 +
 +        cr->npmenodes = 0;
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
 +#endif
 +
 +    /* NMR restraints must be initialized before load_checkpoint,
 +     * since with time averaging the history is added to t_state.
 +     * For proper consistency check we therefore need to extend
 +     * t_state here.
 +     * So the PME-only nodes (if present) will also initialize
 +     * the distance restraints.
 +     */
 +    snew(fcd,1);
 +
 +    /* This needs to be called before read_checkpoint to extend the state */
 +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
 +
 +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
 +    {
 +        if (PAR(cr) && !(Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
 +        }
 +        /* Orientation restraints */
 +        if (MASTER(cr))
 +        {
 +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
 +                        state);
 +        }
 +    }
 +
 +    if (DEFORM(*inputrec))
 +    {
 +        /* Store the deform reference box before reading the checkpoint */
 +        if (SIMMASTER(cr))
 +        {
 +            copy_mat(state->box,box);
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(box),box,cr);
 +        }
 +        /* Because we do not have the update struct available yet
 +         * in which the reference values should be stored,
 +         * we store them temporarily in static variables.
 +         * This should be thread safe, since they are only written once
 +         * and with identical values.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        deform_init_init_step_tpx = inputrec->init_step;
 +        copy_mat(box,deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    if (opt2bSet("-cpi",nfile,fnm)) 
 +    {
 +        /* Check if checkpoint file exists before doing continuation.
 +         * This way we can use identical input options for the first and subsequent runs...
 +         */
 +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
 +        {
 +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
 +                            cr,Flags & MD_PARTDEC,ddxyz,
 +                            inputrec,state,&bReadRNG,&bReadEkin,
 +                            (Flags & MD_APPENDFILES),
 +                            (Flags & MD_APPENDFILESSET));
 +            
 +            if (bReadRNG)
 +            {
 +                Flags |= MD_READ_RNG;
 +            }
 +            if (bReadEkin)
 +            {
 +                Flags |= MD_READ_EKIN;
 +            }
 +        }
 +    }
 +
 +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI only the master node/thread exists in mdrun.c,
 +         * therefore non-master nodes need to open the "seppot" log file here.
 +         */
 +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
 +#endif
 +        )
 +    {
 +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
 +                             Flags,&fplog);
 +    }
 +
 +    /* override nsteps with value from cmdline */
 +    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
 +
 +    if (SIMMASTER(cr)) 
 +    {
 +        copy_mat(state->box,box);
 +    }
 +
 +    if (PAR(cr)) 
 +    {
 +        gmx_bcast(sizeof(box),box,cr);
 +    }
 +
 +    /* Essential dynamics */
 +    if (opt2bSet("-ei",nfile,fnm))
 +    {
 +        /* Open input and output files, allocate space for ED data structure */
 +        ed = ed_open(nfile,fnm,Flags,cr);
 +    }
 +
 +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
 +                     EI_TPI(inputrec->eI) ||
 +                     inputrec->eI == eiNM))
 +    {
 +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
 +                                           dddlb_opt,dlb_scale,
 +                                           ddcsx,ddcsy,ddcsz,
 +                                           mtop,inputrec,
 +                                           box,state->x,
 +                                           &ddbox,&npme_major,&npme_minor);
 +
 +        make_dd_communicators(fplog,cr,dd_node_order);
 +
 +        /* Set overallocation to avoid frequent reallocation of arrays */
 +        set_over_alloc_dd(TRUE);
 +    }
 +    else
 +    {
 +        /* PME, if used, is done on all nodes with 1D decomposition */
 +        cr->npmenodes = 0;
 +        cr->duty = (DUTY_PP | DUTY_PME);
 +        npme_major = 1;
 +        npme_minor = 1;
 +        if (!EI_TPI(inputrec->eI))
 +        {
 +            npme_major = cr->nnodes;
 +        }
 +        
 +        if (inputrec->ePBC == epbcSCREW)
 +        {
 +            gmx_fatal(FARGS,
 +                      "pbc=%s is only implemented with domain decomposition",
 +                      epbc_names[inputrec->ePBC]);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* After possible communicator splitting in make_dd_communicators.
 +         * we can set up the intra/inter node communication.
 +         */
 +        gmx_setup_nodecomm(fplog,cr);
 +    }
 +
 +    /* Initialize per-node process ID and counters. */
 +    gmx_init_intra_counters(cr);
 +
 +#ifdef GMX_MPI
 +    md_print_info(cr,fplog,"Using %d MPI %s\n",
 +                  cr->nnodes,
 +#ifdef GMX_THREAD_MPI
 +                  cr->nnodes==1 ? "thread" : "threads"
 +#else
 +                  cr->nnodes==1 ? "process" : "processes"
 +#endif
 +                  );
 +#endif
 +
 +    gmx_omp_nthreads_init(fplog, cr,
 +                          hwinfo->nthreads_hw_avail,
 +                          hw_opt->nthreads_omp,
 +                          hw_opt->nthreads_omp_pme,
 +                          (cr->duty & DUTY_PP) == 0,
 +                          inputrec->cutoff_scheme == ecutsVERLET);
 +
 +    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
 +
 +    /* getting number of PP/PME threads
 +       PME: env variable should be read only on one node to make sure it is 
 +       identical everywhere;
 +     */
 +    /* TODO nthreads_pp is only used for pinning threads.
 +     * This is a temporary solution until we have a hw topology library.
 +     */
 +    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 +    nthreads_pme = gmx_omp_nthreads_get(emntPME);
 +
 +    wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pp,nthreads_pme);
 +
 +    if (PAR(cr))
 +    {
 +        /* Master synchronizes its value of reset_counters with all nodes 
 +         * including PME only nodes */
 +        reset_counters = wcycle_get_reset_counters(wcycle);
 +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
 +        wcycle_set_reset_counters(wcycle, reset_counters);
 +    }
 +
 +    snew(nrnb,1);
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* For domain decomposition we allocate dynamically
 +         * in dd_partition_system.
 +         */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            bcast_state_setup(cr,state);
 +        }
 +        else
 +        {
 +            if (PAR(cr))
 +            {
 +                bcast_state(cr,state,TRUE);
 +            }
 +        }
 +
 +        /* Initiate forcerecord */
 +        fr = mk_forcerec();
 +        fr->hwinfo = hwinfo;
 +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +                      opt2fn("-table",nfile,fnm),
 +                      opt2fn("-tabletf",nfile,fnm),
 +                      opt2fn("-tablep",nfile,fnm),
 +                      opt2fn("-tableb",nfile,fnm),
 +                      nbpu_opt,
 +                      FALSE,pforce);
 +
 +        /* version for PCA_NOT_READ_NODE (see md.c) */
 +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +          "nofile","nofile","nofile","nofile",FALSE,pforce);
 +          */        
 +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
 +
 +        /* Initialize QM-MM */
 +        if(fr->bQMMM)
 +        {
 +            init_QMMMrec(cr,box,mtop,inputrec,fr);
 +        }
 +
 +        /* Initialize the mdatoms structure.
 +         * mdatoms is not filled with atom data,
 +         * as this can not be done now with domain decomposition.
 +         */
 +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
 +
 +        /* Initialize the virtual site communication */
 +        vsite = init_vsite(mtop,cr,FALSE);
 +
 +        calc_shifts(box,fr->shift_vec);
 +
 +        /* With periodic molecules the charge groups should be whole at start up
 +         * and the virtual sites should not be far from their proper positions.
 +         */
 +        if (!inputrec->bContinuation && MASTER(cr) &&
 +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        {
 +            /* Make molecules whole at start of run */
 +            if (fr->ePBC != epbcNONE)
 +            {
 +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
 +            }
 +            if (vsite)
 +            {
 +                /* Correct initial vsite positions are required
 +                 * for the initial distribution in the domain decomposition
 +                 * and for the initial shell prediction.
 +                 */
 +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
 +            }
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            ewaldcoeff = fr->ewaldcoeff;
 +            pmedata = &fr->pmedata;
 +        }
 +        else
 +        {
 +            pmedata = NULL;
 +        }
 +    }
 +    else
 +    {
 +        /* This is a PME only node */
 +
 +        /* We don't need the state */
 +        done_state(state);
 +
 +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
 +        snew(pmedata,1);
 +    }
 +
 +    /* Before setting affinity, check whether the affinity has changed
 +     * - which indicates that probably the OpenMP library has changed it since
 +     * we first checked). */
 +    check_cpu_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, TRUE);
 +
 +    /* Set the CPU affinity */
 +    set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
 +
 +    /* Initiate PME if necessary,
 +     * either on all nodes or on dedicated PME nodes only. */
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (mdatoms)
 +        {
 +            nChargePerturbed = mdatoms->nChargePerturbed;
 +        }
 +        if (cr->npmenodes > 0)
 +        {
 +            /* The PME only nodes need to know nChargePerturbed */
 +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
 +        }
 +
 +        if (cr->duty & DUTY_PME)
 +        {
 +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
 +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
 +                                  (Flags & MD_REPRODUCIBLE),nthreads_pme);
 +            if (status != 0) 
 +            {
 +                gmx_fatal(FARGS,"Error %d initializing PME",status);
 +            }
 +        }
 +    }
 +
 +
 +    if (integrator[inputrec->eI].func == do_md
 +#ifdef GMX_OPENMM
 +        ||
 +        integrator[inputrec->eI].func == do_md_openmm
 +#endif
 +        )
 +    {
 +        /* Turn on signal handling on all nodes */
 +        /*
 +         * (A user signal from the PME nodes (if any)
 +         * is communicated to the PP nodes.
 +         */
 +        signal_handler_install();
 +    }
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        if (inputrec->ePull != epullNO)
 +        {
 +            /* Initialize pull code */
 +            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
 +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +           /* Initialize enforced rotation code */
 +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,box,mtop,oenv,
 +                    bVerbose,Flags);
 +        }
 +
 +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
 +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
 +
 +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
 +
 +            setup_dd_grid(fplog,cr->dd);
 +        }
 +
 +        /* Now do whatever the user wants us to do (how flexible...) */
 +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
 +                                      oenv,bVerbose,bCompact,
 +                                      nstglobalcomm,
 +                                      vsite,constr,
 +                                      nstepout,inputrec,mtop,
 +                                      fcd,state,
 +                                      mdatoms,nrnb,wcycle,ed,fr,
 +                                      repl_ex_nst,repl_ex_nex,repl_ex_seed,
 +                                      membed,
 +                                      cpt_period,max_hours,
 +                                      deviceOptions,
 +                                      Flags,
 +                                      &runtime);
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            finish_pull(fplog,inputrec->pull);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +            finish_rot(fplog,inputrec->rot);
 +        }
 +
 +    } 
 +    else 
 +    {
 +        /* do PME only */
 +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
 +    }
 +
 +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
 +    {
 +        /* Some timing stats */  
 +        if (SIMMASTER(cr))
 +        {
 +            if (runtime.proc == 0)
 +            {
 +                runtime.proc = runtime.real;
 +            }
 +        }
 +        else
 +        {
 +            runtime.real = 0;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    /* Finish up, write some stuff
 +     * if rerunMD, don't write last frame again 
 +     */
 +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
 +               inputrec,nrnb,wcycle,&runtime,
 +               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
 +                 nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
 +               nthreads_pp, 
 +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
 +
 +    if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        char gpu_err_str[STRLEN];
 +
 +        /* free GPU memory and uninitialize GPU (by destroying the context) */
 +        nbnxn_cuda_free(fplog, fr->nbv->cu_nbv);
 +
 +        if (!free_gpu(gpu_err_str))
 +        {
 +            gmx_warning("On node %d failed to free GPU #%d: %s",
 +                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
 +        }
 +    }
 +
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        sfree(membed);
 +    }
 +
 +#ifdef GMX_THREAD_MPI
 +    if (PAR(cr) && SIMMASTER(cr))
 +#endif
 +    {
 +        gmx_hardware_info_free(hwinfo);
 +    }
 +
 +    /* Does what it says */  
 +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
 +
 +    /* Close logfile already here if we were appending to it */
 +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
 +    {
 +        gmx_log_close(fplog);
 +    } 
 +
 +    rc=(int)gmx_get_stop_condition();
 +
 +#ifdef GMX_THREAD_MPI
 +    /* we need to join all threads. The sub-threads join when they
 +       exit this function, but the master thread needs to be told to 
 +       wait for that. */
 +    if (PAR(cr) && MASTER(cr))
 +    {
 +        tMPI_Finalize();
 +    }
 +#endif
 +
 +    return rc;
 +}
Simple merge
Simple merge
Simple merge