Merge branch 'release-4-5-patches' into rotation-4-5
authorCarsten Kutzner <ckutzne@gwdg.de>
Thu, 4 Nov 2010 09:33:40 +0000 (10:33 +0100)
committerCarsten Kutzner <ckutzne@gwdg.de>
Thu, 4 Nov 2010 09:33:40 +0000 (10:33 +0100)
127 files changed:
CMakeLists.txt
Makefile.am
README
acinclude.m4
cmake/FindMPI.cmake
cmake/TestMPI.c [new file with mode: 0644]
cmake/Toolchain-BlueGeneL-xlc.cmake [new file with mode: 0644]
configure.ac
include/CMakeLists.txt
include/copyrite.h
include/gmx_sse2_double.h
include/gstat.h
include/index.h
include/pdb2top.h
include/pme.h
include/string2.h
include/types/forcerec.h
include/types/nblist.h
include/vec.h
man/CMakeLists.txt
scripts/CMakeLists.txt
scripts/GMXRC.bash.cmakein
scripts/GMXRC.bash.in
scripts/GMXRC.csh.cmakein
scripts/GMXRC.csh.in
scripts/GMXRC.zsh.cmakein
scripts/GMXRC.zsh.in
share/CMakeLists.txt
share/html/online/mdp_opt.html
share/top/charmm27.ff/Makefile.am
share/top/charmm27.ff/aminoacids.r2b
share/top/charmm27.ff/dna.arn [new file with mode: 0644]
share/top/charmm27.ff/dna.c.tdb [new file with mode: 0644]
share/top/charmm27.ff/dna.hdb [new file with mode: 0644]
share/top/charmm27.ff/dna.n.tdb [new file with mode: 0644]
share/top/charmm27.ff/dna.r2b [deleted file]
share/top/charmm27.ff/dna.rtp
share/top/charmm27.ff/ffnonbonded.itp
share/top/charmm27.ff/gb.itp
share/top/charmm27.ff/rna.arn [new file with mode: 0644]
share/top/charmm27.ff/rna.c.tdb [new file with mode: 0644]
share/top/charmm27.ff/rna.hdb [new file with mode: 0644]
share/top/charmm27.ff/rna.n.tdb [new file with mode: 0644]
share/top/charmm27.ff/rna.r2b
share/top/charmm27.ff/rna.rtp
share/top/gromos43a1.ff/aminoacids.r2b
share/top/gromos43a1.ff/methanol.itp
share/top/gromos43a2.ff/aminoacids.r2b
share/top/gromos45a3.ff/aminoacids.r2b
share/top/gromos53a5.ff/aminoacids.r2b
share/top/gromos53a6.ff/aminoacids.r2b
share/top/gurgle.dat
share/top/ions.itp
src/config.h.cmakein
src/gmxlib/CMakeLists.txt
src/gmxlib/Makefile.am
src/gmxlib/checkpoint.c
src/gmxlib/copyrite.c
src/gmxlib/enxio.c
src/gmxlib/ftocstr.c [deleted file]
src/gmxlib/gmxfio.c
src/gmxlib/index.c
src/gmxlib/names.c
src/gmxlib/nonbonded/nb_kernel_bluegene/interaction.h
src/gmxlib/nonbonded/nb_kernel_ia32_sse2/nb_kernel400_ia32_sse2.c
src/gmxlib/nonbonded/nb_kernel_ia32_sse2/nb_kernel410_ia32_sse2.c
src/gmxlib/nonbonded/nb_kernel_ia32_sse2/nb_kernel430_ia32_sse2.c
src/gmxlib/nonbonded/nb_kernel_ia32_sse2/nb_kernel_allvsallgb_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel400_sse2_single.c
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel410_sse2_single.c
src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel430_sse2_single.c
src/gmxlib/nonbonded/nb_kernel_x86_64_sse2/nb_kernel400_x86_64_sse2.c
src/gmxlib/nonbonded/nb_kernel_x86_64_sse2/nb_kernel410_x86_64_sse2.c
src/gmxlib/nonbonded/nb_kernel_x86_64_sse2/nb_kernel430_x86_64_sse2.c
src/gmxlib/nonbonded/nb_kernel_x86_64_sse2/nb_kernel_allvsallgb_sse2_double.c
src/gmxlib/selection/compiler.c
src/gmxlib/selection/evaluate.c
src/gmxlib/selection/parser.c
src/gmxlib/selection/parser.h
src/gmxlib/selection/parser.y
src/gmxlib/selection/scanner.c
src/gmxlib/selection/scanner.l
src/gmxlib/selection/selhelp.c
src/gmxlib/vmdio.c
src/kernel/CMakeLists.txt
src/kernel/gmx_gpu_utils/CMakeLists.txt
src/kernel/grompp.c
src/kernel/md.c
src/kernel/md_openmm.c
src/kernel/openmm_wrapper.cpp
src/kernel/pdb2gmx.c
src/kernel/pdb2top.c
src/kernel/readir.c
src/kernel/runner.c
src/mdlib/CMakeLists.txt
src/mdlib/clincs.c
src/mdlib/coupling.c
src/mdlib/ebin.c
src/mdlib/force.c
src/mdlib/forcerec.c
src/mdlib/genborn.c
src/mdlib/genborn_allvsall.c
src/mdlib/genborn_sse2_double.c
src/mdlib/genborn_sse2_single.c
src/mdlib/gmx_fft_fftw3.c
src/mdlib/gmx_wallcycle.c
src/mdlib/mdebin.c
src/mdlib/pme.c
src/ngmx/CMakeLists.txt
src/tools/CMakeLists.txt
src/tools/Makefile.am
src/tools/dlist.c
src/tools/gmx_bar.c
src/tools/gmx_chi.c
src/tools/gmx_density.c
src/tools/gmx_genbox.c
src/tools/gmx_hbond.c
src/tools/gmx_membed.c
src/tools/gmx_potential.c
src/tools/gmx_tune_pme.c
src/tools/gmx_velacc.c
src/tools/gmx_wham.c
src/tools/make_ndx.c
src/tools/pp2shift.h

index efde6dba111c0e4c4da6da36de5a04d6962bddfd..0d49a828f7584c5d3a90a3d5747161c2fea02e68 100644 (file)
@@ -3,9 +3,16 @@ cmake_minimum_required(VERSION 2.6)
 project(Gromacs)
 include(Dart)
 mark_as_advanced(DART_ROOT)
+
 # PROJECT_VERSION should have the following structure: 
-# VERSION[-dev-SUFFIX] where the VERSION can have any form and the suffix
-set(PROJECT_VERSION "4.5.1")
+# VERSION-dev[-SUFFIX] where the VERSION should have the for: vMajor.vMinor.vPatch
+#
+# The "-dev" suffix is important to keep because it makes possible to distinguish 
+# between a build from official release and a build from git release branch on a 
+# machine with no git. 
+#
+# NOTE: when releasing the "-dev" suffix needs to be stripped off!
+set(PROJECT_VERSION "4.5.2-dev")
 set(CUSTOM_VERSION_STRING ""
     CACHE STRING "Custom version string (if empty, use hard-coded default)")
 mark_as_advanced(CUSTOM_VERSION_STRING)
@@ -16,7 +23,7 @@ set(SOVERSION 6)
 # It is a bit irritating, but this has to be set separately for now!
 SET(CPACK_PACKAGE_VERSION_MAJOR "4")
 SET(CPACK_PACKAGE_VERSION_MINOR "5")
-SET(CPACK_PACKAGE_VERSION_PATCH "1")
+SET(CPACK_PACKAGE_VERSION_PATCH "2")
 
 
 # Cmake modules/macros are in a subdirectory to keep this file cleaner
@@ -81,11 +88,13 @@ option(GMX_MPI    "Build a parallel (message-passing) version of GROMACS" OFF)
 option(GMX_THREADS    "Build a parallel (thread-based) version of GROMACS (cannot be combined with MPI yet)" ON)
 option(GMX_SOFTWARE_INVSQRT "Use GROMACS software 1/sqrt" ON)
 mark_as_advanced(GMX_SOFTWARE_INVSQRT)
+option(GMX_POWERPC_INVSQRT "Use PowerPC hardware 1/sqrt" ON)
+mark_as_advanced(GMX_POWERPC_INVSQRT)
 option(GMX_FAHCORE "Build a library with mdrun functionality" OFF)
 mark_as_advanced(GMX_FAHCORE)
 option(GMX_OPENMM "Accelerated execution on GPUs through the OpenMM library (rerun cmake after changing to see relevant options)" OFF)
 set(GMX_ACCELERATION "auto" 
-    CACHE STRING "Accelerated kernels. Pick one of: auto, none, SSE, BlueGene, Power6, ia64, altivec")
+    CACHE STRING "Accelerated kernels. Pick one of: auto, none, SSE, BlueGene, Power6, ia64, altivec, fortran")
 
 set(GMX_FFT_LIBRARY "fftw3" 
     CACHE STRING "FFT library choices: fftw3,fftw2,mkl,fftpack[built-in]")
@@ -112,6 +121,15 @@ option(USE_VERSION_H "Generate development version string/information" ON)
 mark_as_advanced(USE_VERSION_H)
 
 option(GMX_DEFAULT_SUFFIX "Use default suffixes for GROMACS binaries and libs (_d for double, _mpi for MPI; rerun cmake after changing to see relevant options)" ON)
+
+if(UNIX AND NOT APPLE)
+    option(GMX_PREFER_STATIC_LIBS "When finding libraries prefer \".a\" static archives (NOTE: this is enabled only for UNIX (excluding APPLE) platforms but it might not always work!" OFF)
+    mark_as_advanced(GMX_PREFER_STATIC_LIBS)
+endif()
+
+########################################################################
+# Set up binary and library suffixing 
+########################################################################
 set(GMX_BINARY_SUFFIX "" CACHE STRING "Suffix for GROMACS binaries (default: _d for double, _mpi for MPI, _mpi_d for MPI and double).")
 set(GMX_LIBS_SUFFIX "" 
   CACHE STRING "Suffix for GROMACS libs (default: _d for double, _mpi for MPI, _mpi_d for MPI and double).")
@@ -126,6 +144,10 @@ if (GMX_DEFAULT_SUFFIX)
     set (GMX_BINARY_SUFFIX "${GMX_BINARY_SUFFIX}_d")
     set (GMX_LIBS_SUFFIX "${GMX_LIBS_SUFFIX}_d")
   endif(GMX_DOUBLE)
+  if (GMX_OPENMM)
+    set (GMX_BINARY_SUFFIX "-gpu")
+    set (GMX_LIBS_SUFFIX "_gpu")
+  endif(GMX_OPENMM)
   mark_as_advanced(FORCE GMX_BINARY_SUFFIX GMX_LIBS_SUFFIX)
   message(STATUS "Using default binary suffix: \"${GMX_BINARY_SUFFIX}\"")    
   message(STATUS "Using default library suffix: \"${GMX_LIBS_SUFFIX}\"") 
@@ -137,11 +159,14 @@ endif(GMX_DEFAULT_SUFFIX)
 
 set(PKG_CFLAGS "")
 if(GMX_DOUBLE)
-  set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_DOUBLE")
+    set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_DOUBLE")
 endif(GMX_DOUBLE)
 if(GMX_SOFTWARE_INVSQRT)
   set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_SOFTWARE_INVSQRT")
 endif(GMX_SOFTWARE_INVSQRT)
+if(GMX_POWERPC_INVSQRT)
+  set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_POWERPC_INVSQRT")
+endif(GMX_POWERPC_INVSQRT)
 
 ########################################################################
 #Process MPI settings
@@ -189,30 +214,36 @@ endif(GMX_MPI)
 if(GMX_OPENMM)
     cmake_minimum_required(VERSION 2.6.4)
     # we'll use the built-in fft to avoid unnecessary dependencies
-    message(STATUS "No external FFT libraries needed for the OpenMM build, using fftpack!")
-    set (GMX_FFT_LIBRARY "fftpack" CACHE STRING 
-               "No external FFT libraries needed for the OpenMM build, using fftpack!" FORCE)
+    string(TOUPPER ${GMX_FFT_LIBRARY} GMX_FFT_LIBRARY)
+    if(NOT ${GMX_FFT_LIBRARY} STREQUAL "FFTPACK")
+        message(STATUS "No external FFT libraries needed for the OpenMM build, switching to fftpack!")
+        set(GMX_FFT_LIBRARY "fftpack" CACHE STRING 
+               "No external FFT libraries needed for the OpenMM build, switching to  fftpack!" FORCE)
+    endif()
     if(GMX_MPI)
         message(FATAL_ERROR "The OpenMM build is not compatible with MPI!")
     endif(GMX_MPI)
     if(GMX_THREADS)
-        message(STATUS "Threads not compatible with OpenMM build, disabled!")
+        message(STATUS "Threads are  not compatible with OpenMM build, disabled!")
         set(GMX_THREADS OFF CACHE BOOL 
-               "Threads not compatible with OpenMM build, disabled!" FORCE)
+               "Threads are not compatible with OpenMM build, disabled!" FORCE)
     endif(GMX_THREADS)
     if(GMX_SOFTWARE_INVSQRT)
-        set(GMX_SOFTWARE_INVSQRT OFF FORCE)
+        set(GMX_SOFTWARE_INVSQRT OFF CACHE STRING 
+                "The OpenMM build does not need GROMACS software 1/sqrt!" FORCE)
     endif(GMX_SOFTWARE_INVSQRT)
-    if(NOT GMX_ACCELERATION MATCHES "^(none|None|NONE)")
-        message(WARNING "CPU-based acceleration turned off, OpenMM does not support/need any!")        
-       unset(GMX_ACCELERATION CACHE)
+    string(TOUPPER ${GMX_ACCELERATION} GMX_ACCELERATION)
+    if(NOT GMX_ACCELERATION STREQUAL "NONE")
+        message(STATUS "Switching off CPU-based acceleration, the OpenMM build does not support/need any!")    
         set(GMX_ACCELERATION "none" CACHE STRING 
-               "CPU-based acceleration turned off, OpenMM does not support/need any!" FORCE)
+               "Switching off CPU-based acceleration, the OpenMM build does not support/need any!" FORCE)
     endif()
     if(GMX_FAHCORE)
         message(FATAL_ERROR "The OpenMM build does not support FAH build!")
     endif(GMX_FAHCORE)
-    set(GMX_DOUBLE OFF FORCE )
+    if(GMX_DOUBLE)
+        message(FATAL_ERROR  "The OpenMM-build does not support double precision calculations!")
+    endif()
     # mark as advanced the unused variables
     mark_as_advanced(FORCE GMX_ACCELERATION GMX_MPI GMX_FFT_LIBRARY 
         GMX_QMMM_PROGRAM GMX_THREADS GMX_DOUBLE)
@@ -327,6 +358,15 @@ test_big_endian(GMX_INTEGER_BIG_ENDIAN)
 ########################################################################
 # Find external packages                                               #
 ########################################################################
+if(UNIX AND NOT APPLE)
+    if(GMX_PREFER_STATIC_LIBS)
+        SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
+        if(BUILD_SHARED_LIBS)
+            message(WARNING "Static libraries requested, the GROMACS libraries will also be build static (BUILD_SHARED_LIBS=OFF)")
+            set(BUILD_SHARED_LIBS OFF CACHE BOOL "Enable shared libraries (can be problematic with MPI, Windows)" FORCE)
+        endif()
+    endif()
+endif()
 
 find_package(LibXml2)
 set(PKG_XML "")
@@ -439,7 +479,7 @@ gmx_test_inline_asm_msvc_x86(GMX_X86_MSVC_INLINE_ASM)
 if (${GMX_ACCELERATION} STREQUAL "auto" AND NOT GMX_OPENMM)
   if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86|x64|x86_64|AMD64|amd64)")
 
-    set(GMX_ACCELERATION "SSE" CACHE STRING "Accelerated kernels. Pick one of: auto, none, SSE, BlueGene, Power6, ia64, altivec" FORCE)
+    set(GMX_ACCELERATION "SSE" CACHE STRING "Accelerated kernels. Pick one of: auto, none, SSE, BlueGene, Power6, ia64, altivec, fortran" FORCE)
     
     if (GMX_64_BIT)
       set(GMX_X86_64_ASM ON CACHE BOOL "Add SSE assembly files for x86_64" FORCE)
@@ -449,11 +489,10 @@ if (${GMX_ACCELERATION} STREQUAL "auto" AND NOT GMX_OPENMM)
 
     # Determine the assembler/compiler to use
   else()
-    set(GMX_ACCELERATION "none" CACHE STRING "Accelerated kernels. Pick one of: auto, none, SSE, BlueGene, Power6, ia64, altivec" FORCE)
+    set(GMX_ACCELERATION "none" CACHE STRING "Accelerated kernels. Pick one of: auto, none, SSE, BlueGene, Power6, ia64, altivec, fortran" FORCE)
   endif()
 endif (${GMX_ACCELERATION} STREQUAL "auto" AND NOT GMX_OPENMM)
 
-
 include(gmxTestXDR)
 gmx_test_xdr(GMX_SYSTEM_XDR)
 if(NOT GMX_SYSTEM_XDR)
@@ -461,27 +500,6 @@ if(NOT GMX_SYSTEM_XDR)
     set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_INTERNAL_XDR")
 endif(NOT GMX_SYSTEM_XDR)
 
-if(GMX_FORTRAN OR GMX_POWER6)
-    enable_language(Fortran)
-    include(FortranCInterface)
-    discover_fortran_mangling(prefix isupper suffix extra_under_score found)
-    if(extra_under_score)
-        set(extrasuffix "_")
-    endif(extra_under_score)
-
-    if(isupper)
-        set(F77_FUNCDEF   "${prefix} ## NAME ## ${suffix}")
-        set(F77_FUNCDEF_  "${prefix} ## NAME ## ${suffix}${extrasuffix}")
-    else(isupper)
-        set(F77_FUNCDEF   "${prefix} ## name ## ${suffix}")
-        set(F77_FUNCDEF_  "${prefix} ## name ## ${suffix}${extrasuffix}")
-    endif(isupper)
-else(GMX_FORTRAN OR GMX_POWER6)
-        set(F77_FUNCDEF   "name ## _")
-        set(F77_FUNCDEF_  "name ## _")
-endif(GMX_FORTRAN OR GMX_POWER6)
-
-
 # Process nonbonded accelerated kernels settings
 string(TOUPPER ${GMX_ACCELERATION} ${GMX_ACCELERATION})
 if(${GMX_ACCELERATION} STREQUAL "NONE")
@@ -511,11 +529,11 @@ elseif(${GMX_ACCELERATION} STREQUAL "SSE")
       if(GMX_DOUBLE)
         set(GMX_IA32_SSE2 1)
       else()
-       set(GMX_IA32_SSE 1)
+       set(GMX_IA32_SSE 1)
       endif()
     elseif(GMX_X86_64_ASM)
       if(GMX_DOUBLE)
-       set(GMX_X86_64_SSE2 1)
+       set(GMX_X86_64_SSE2 1)
       else()
         set(GMX_X86_64_SSE 1)
       endif()
@@ -530,10 +548,29 @@ elseif(${GMX_ACCELERATION} STREQUAL "SSE")
 
 elseif(${GMX_ACCELERATION} STREQUAL "FORTRAN")
     set(GMX_FORTRAN 1)
+    #these are switch on by default sometimes
+    set(GMX_IA32_ASM 0)
+    set(GMX_GMX_X86_64_ASM 0)
 elseif(${GMX_ACCELERATION} STREQUAL "BLUEGENE")
+# GMX_ACCELERATION=BlueGene should be set in the Toolchain-BlueGene?-???.cmake file
+    message(STATUS "Configuring for BlueGene")
     set(GMX_BLUEGENE 1)
+    if (${CMAKE_SYSTEM_NAME} STREQUAL "BlueGeneL")
+        set(SHARED_LIBS_DEFAULT OFF CACHE BOOL "Shared libraries not compatible with BlueGene/L, disabled!" FORCE)
+        set(BUILD_SHARED_LIBS OFF CACHE BOOL "Shared libraries not compatible with BlueGene/L, disabled!" FORCE)
+    endif (${CMAKE_SYSTEM_NAME} STREQUAL "BlueGeneL")
+    set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Do not use software reciprocal square root on BlueGene" FORCE)
+    set(GMX_POWERPC_INVSQRT ON CACHE BOOL "Use hardware reciprocal square root on BlueGene" FORCE)
+    set(GMX_X11 OFF CACHE BOOL "X11 not compatible with BlueGene, disabled!" FORCE)
+    set(GMX_THREADS OFF CACHE BOOL "Threads not compatible with BlueGene, disabled!" FORCE)
+    set(GMX_MPI ON CACHE BOOL "Use MPI on BlueGene" FORCE)
+    set(GMX_EXTERNAL_BLAS TRUE CACHE BOOL "Use MASSV for BLAS on BlueGene" FORCE)
+    set(GMX_EXTERNAL_LAPACK TRUE CACHE BOOL "Use MASSV for LAPACK on BlueGene" FORCE)
+    list(APPEND GMX_EXTRA_LIBRARIES massv)
 elseif(${GMX_ACCELERATION} STREQUAL "POWER6")
     set(GMX_POWER6 1)
+    set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Do not use software reciprocal square root on Power6" FORCE)
+    set(GMX_POWERPC_INVSQRT ON CACHE BOOL "Use hardware reciprocal square root on Power6" FORCE)
 elseif(${GMX_ACCELERATION} STREQUAL "IA64")
     set(GMX_IA64_ASM 1)
     set(DISABLE_WATERWATER_NLIST 1)
@@ -547,11 +584,31 @@ else(${GMX_ACCELERATION} STREQUAL "NONE")
     MESSAGE(FATAL_ERROR "Unrecognized option for accelerated kernels: ${GMX_ACCELERATION}. Pick one of auto, none, SSE, Fortran, BlueGene, Power6, ia64, altivec")
 endif(${GMX_ACCELERATION} STREQUAL "NONE")
 
+if(GMX_FORTRAN OR GMX_POWER6)
+    if (GMX_THREADS)
+        message(FATAL_ERROR "FORTRAN/POWER6 is incompatible with threads and only provides a speed-up on certain IBM compilers. Disable FORTRAN (or threads if you really want to use FORTRAN kernels).")
+    endif(GMX_THREADS)
+    enable_language(Fortran)
+    include(FortranCInterface)
+    discover_fortran_mangling(prefix isupper suffix extra_under_score found)
+    if(extra_under_score)
+        set(extrasuffix "_")
+    endif(extra_under_score)
+    if(prefix)
+      set(prefix "${prefix} ##")
+    endif(prefix)
 
-
-
-
-
+    if(isupper)
+        set(F77_FUNCDEF   "${prefix} NAME ## ${suffix}")
+        set(F77_FUNCDEF_  "${prefix} NAME ## ${suffix}${extrasuffix}")
+    else(isupper)
+        set(F77_FUNCDEF   "${prefix} name ## ${suffix}")
+        set(F77_FUNCDEF_  "${prefix} name ## ${suffix}${extrasuffix}")
+    endif(isupper)
+else(GMX_FORTRAN OR GMX_POWER6)
+        set(F77_FUNCDEF   "name ## _")
+        set(F77_FUNCDEF_  "name ## _")
+endif(GMX_FORTRAN OR GMX_POWER6)
 
 # Process QM/MM Settings
 string(TOUPPER ${GMX_QMMM_PROGRAM} ${GMX_QMMM_PROGRAM})
@@ -678,7 +735,11 @@ endif (NOT DEFINED GROMACS_C_FLAGS_SET)
 ########################################################################
 # Specify install locations and which subdirectories to process        #
 ########################################################################
-set(LIB_INSTALL_DIR  ${CMAKE_INSTALL_PREFIX}/lib)
+if ( DEFINED LIB )
+    set(LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/${LIB}")
+else()
+    set(LIB_INSTALL_DIR  ${CMAKE_INSTALL_PREFIX}/lib)
+endif()
 set(BIN_INSTALL_DIR  ${CMAKE_INSTALL_PREFIX}/bin)
 set(DATA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/share/gromacs)
 set(MAN_INSTALL_DIR  ${CMAKE_INSTALL_PREFIX}/share/man)
index 52abb079ab6eb2221d62cb97585d5c4b4a100060..febdec51c310a36a9ef8eb66a387886ab4a2489e 100644 (file)
@@ -44,6 +44,9 @@ EXTRA_DIST = config/depcomp \
 
 
 install-exec-hook:
+if NO_LA_FILES
+       cd $(DESTDIR)$(libdir) && rm -f libmd@LIBSUFFIX@.la libgmx@LIBSUFFIX@.la libgmxpreprocess@LIBSUFFIX@.la libgmxana@LIBSUFFIX@.la
+endif
        @echo ""
        @echo "GROMACS is installed under $(prefix)."
        @echo "Make sure to update your PATH and MANPATH to find the"
@@ -67,6 +70,9 @@ install-mdrun:
        (cd $(top_builddir)/src/mdlib && $(MAKE) install ; exit 0)
        (cd $(top_builddir)/src/kernel && $(MAKE) install-libLTLIBRARIES ; exit 0)
        (cd $(top_builddir)/src/kernel && $(MAKE) install-mdrun ; exit 0)
+if NO_LA_FILES
+       cd $(DESTDIR)$(libdir) && rm -f libmd@LIBSUFFIX@.la libgmx@LIBSUFFIX@.la libgmxpreprocess@LIBSUFFIX@.la libgmxana@LIBSUFFIX@.la
+endif
 
 fahcore:
        (cd $(top_builddir)/src/gmxlib && $(MAKE) ; exit 0)
diff --git a/README b/README
index d24640e3c902f648c0ef821d59b405fa543d81b1..398c9fca27ec27446c4c5b32b695e0c5bc7d9769 100644 (file)
--- a/README
+++ b/README
@@ -2,8 +2,10 @@
                Welcome to the official version of GROMACS!
 
 If you are familiar with unix, it should be fairly trivial to compile and
-install GROMACS. Installation instructions are available in the INSTALL file,
-and a more extended step-by-step guide can be found on http://www.gromacs.org .
+install GROMACS. Installation instructions are available in the INSTALL.* 
+files (there is one for automake users, INSTALL.automake and one for cmake
+users, INSTALL.cmake). A more extended step-by-step guide can be found 
+on our website http://www.gromacs.org .
 
 Of course we will do our utmost to help you with any problems, but PLEASE 
 READ THE INSTALLATION INSTRUCTIONS BEFORE CONTACTING US!
index 6e2f396d3aefbce82b4c8e024f88b716a27515ec..8c5fb69f768be0e4daab6401240bafbd1e1e8bb5 100644 (file)
@@ -2075,6 +2075,7 @@ else
     CPPFLAGS="$save_CPPFLAGS"
     LDFLAGS="$save_LDFLAGS"
     LIBS="$save_LIBS"
+    DLOPEN_LIBS="$lt_cv_dlopen_libs"
     ;;
   esac
 
index 481b0e994dfa5d6c73c3a888444c90637a869a19..a0c36ed77af5dd34443a3886296d45098bffda5c 100644 (file)
 # Microsoft HPC SDK is automatically added to the system path
 # Argonne National Labs MPICH2 sets a registry key that we can use.
 
+TRY_COMPILE(MPI_FOUND ${CMAKE_BINARY_DIR}
+  "${CMAKE_SOURCE_DIR}/cmake/TestMPI.c"
+  COMPILE_DEFINITIONS )
+
+if(MPI_FOUND)
+  return()
+endif()
+
 set(_MPI_PACKAGE_DIR
   mpi
   mpich
diff --git a/cmake/TestMPI.c b/cmake/TestMPI.c
new file mode 100644 (file)
index 0000000..4eab044
--- /dev/null
@@ -0,0 +1,6 @@
+#include <mpi.h> 
+
+int main(int argc, char **argv)
+{
+  MPI_Init(&argc,&argv);
+}
diff --git a/cmake/Toolchain-BlueGeneL-xlc.cmake b/cmake/Toolchain-BlueGeneL-xlc.cmake
new file mode 100644 (file)
index 0000000..5e0eadc
--- /dev/null
@@ -0,0 +1,51 @@
+# derived from http://cmake.org/Wiki/CmakeBlueGene
+
+# the name of the target operating system
+set(CMAKE_SYSTEM_NAME BlueGeneL CACHE STRING "Cross-compiling for BlueGene/L")
+
+# adjust to suit your machine's versions
+#    /bgl/BlueLight/V1R3M2_140_2007-070424/ppc/bglsys
+set(BLRTS_PATH /bgl/BlueLight/V1R3M4_300_2008-080728/ppc/bglsys CACHE STRING "Path to the BlueGene/L system libraries and includes")
+
+# set the compiler
+set(CMAKE_C_COMPILER  /opt/ibmcmp/vac/bg/8.0/bin/blrts_xlc)
+set(CMAKE_C_FLAGS "-O3 -qbgl -qarch=auto -qtune=auto -qnoautoconfig -qfloat=norngchk -qhot")
+set(CMAKE_EXE_LINKER_FLAGS "-L${BLRTS_PATH}/lib")
+set(CMAKE_CXX_COMPILER  /opt/ibmcmp/vacpp/bg/8.0/bin/blrts_xlC)
+
+set(MPI_LIBRARY mpich.rts CACHE STRING "MPI library for BlueGene" FORCE)
+set(MPI_EXTRA_LIBRARY msglayer.rts devices.rts rts.rts devices.rts CACHE STRING "Extra MPI libraries for BlueGene" FORCE)
+set(MPI_INCLUDE_PATH ${BLRTS_PATH}/include  CACHE STRING "MPI include path for BlueGene" FORCE)
+
+# This adds directories that find commands should specifically ignore for cross compiles.
+# Most of these directories are the includeand lib directories for the frontend on BG/P systems.
+# Not ignoring these can cause things like FindX11 to find a frontend PPC version mistakenly.
+# We use this on BG instead of re-rooting because backend libraries are typically strewn about
+# the filesystem, and we can't re-root ALL backend libraries to a single place.
+
+set(CMAKE_SYSTEM_IGNORE_PATH
+  /lib             /lib64             /include
+  /usr/lib         /usr/lib64         /usr/include
+  /usr/local/lib   /usr/local/lib64   /usr/local/include
+  /usr/X11/lib     /usr/X11/lib64     /usr/X11/include
+  /usr/lib/X11     /usr/lib64/X11     /usr/include/X11
+  /usr/X11R6/lib   /usr/X11R6/lib64   /usr/X11R6/include
+  /usr/X11R7/lib   /usr/X11R7/lib64   /usr/X11R7/include
+)
+
+# set the search path for the environment coming with the compiler
+# and a directory where you can install your own compiled software
+set(CMAKE_FIND_ROOT_PATH
+    /bgl/BlueLight/ppcfloor/
+    ${BLRTS_PATH}
+    /opt/ibmcmp/xlmass/bg
+)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search 
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+set(GMX_ACCELERATION "BlueGene" CACHE STRING "Forcing BlueGene acceleration when using BlueGene toolchain")
index ff1ca8f34e9528be2169cfa09bafaf92b78afdd0..f9b9ef1d5b45b70b598829e8958dbaf9c86d087f 100644 (file)
@@ -3,7 +3,13 @@
 #######################################################################
  
 AC_PREREQ(2.50)
-AC_INIT(gromacs, 4.5.1, [gmx-users@gromacs.org])
+
+# The "-dev" suffix is important to keep because it makes possible to distinguish 
+# between a build from official release and a build from git release branch on a 
+# machine with no git. 
+#
+# NOTE: when releasing the "-dev" suffix needs to be stripped off!
+AC_INIT(gromacs, 4.5.2-dev, [gmx-users@gromacs.org])
 AC_CONFIG_SRCDIR(src/gmxlib/3dview.c)
 AC_CONFIG_AUX_DIR(config)
 AC_CANONICAL_HOST
@@ -54,6 +60,12 @@ AC_ARG_ENABLE(fortran,
 # always use CC for linking:
 AC_SUBST(F77LINK,"\$(LINK)")
 
+#On some systems GNU libtool's la files are more confusing for libtool than helpful due to the 
+#hard coded paths/libs in the files. And for better linking we have pkg-config files (.pc).
+AC_ARG_ENABLE(la-files,
+       [AS_HELP_STRING([--disable-la-files],[Do NOT install GNU libtool's la files])],,
+       [enable_la_files=yes])
+AM_CONDITIONAL(NO_LA_FILES,[test "$enable_la_files" = no])
 
 
 
@@ -147,9 +159,9 @@ esac
 # IBM Power6-specific optimization
 AC_ARG_ENABLE(power6,
              [AC_HELP_STRING([--enable-power6],
-                             [Use IBM Pwr6/PPC440/PPC450-specific F77 kernels])],,enable_power6=no)
+                             [Use IBM Power6-specific F77 kernels])],,enable_power6=no)
 if test "$enable_power6" = "yes"; then
-  AC_DEFINE(GMX_POWER6,,[Enable IBM Pwr6/PPC440/PPC450-specific F77 kernels])
+  AC_DEFINE(GMX_POWER6,,[Enable IBM Power6-specific F77 kernels])
 fi
 
 AC_ARG_ENABLE(bluegene,
@@ -168,11 +180,11 @@ AC_ARG_ENABLE(software-invsqrt,
               [AC_HELP_STRING([--disable-software-invsqrt],
                               [No software 1/sqrt (disabled on sgi,ibm,ia64)])],,
 [case "${host_cpu}-${host_os}" in
-  mips*-irix* | rs6000*-aix* | powerpc*-aix | ia64*-*) enable_software_invsqrt=no ;;
+  mips*-irix* | rs6000*-aix* | powerpc*-aix | powerpc*-none | ia64*-*) enable_software_invsqrt=no ;;
   *) enable_software_invsqrt=yes ;;
 esac])
 if test "$enable_software_invsqrt" = "yes"; then
-  AC_DEFINE(GMX_SOFTWARE_INVSQRT,,[Use the GROMACS sGMX_INTERNAL_XDRsqrt(x)])
+  AC_DEFINE(GMX_SOFTWARE_INVSQRT,,[Use the GROMACS software 1/sqrt(x)])
   PKG_CFLAGS="$PKG_CFLAGS -DGMX_SOFTWARE_INVSQRT"
 fi
 AM_CONDITIONAL([GMX_SOFTWARE_INVSQRT],[test "$enable_software_invsqrt" = "yes"])
@@ -461,6 +473,9 @@ if test "$enable_threads" = "yes"; then
   if test "$with_fft" = "fftw2"; then
     AC_MSG_ERROR([fftw2 can't be used with threads. Use fftw3 or mkl.])
   fi 
+  if test "$enable_fortran" = "yes"; then
+    AC_MSG_ERROR([FORTRAN is incompatible with threads and only provides a speed-up on certain IBM compilers. Use --disable-threads if you really want to use FORTRAN kernels.])
+  fi
   AC_CHECK_HEADERS(unistd.h)
   AC_CHECK_HEADERS(sys/time.h)
   AC_CHECK_HEADERS(sched.h)
@@ -567,6 +582,7 @@ AC_PROG_INSTALL
 AC_PROG_LN_S
 AC_PROG_MAKE_SET
 AC_LIBTOOL_WIN32_DLL
+AC_LIBTOOL_DLOPEN
 AC_PROG_LIBTOOL
 AC_SYS_LARGEFILE
 #
@@ -1116,6 +1132,8 @@ fi
 
 if test "$enable_bluegene" = "yes"; then
   AC_DEFINE(GMX_BLUEGENE,,[Use assembly intrinsics kernels for BlueGene])
+  AC_DEFINE_UNQUOTED(GMX_POWERPC_INVSQRT,,[Use the PowerPC hardware 1/sqrt(x)])
+  PKG_CFLAGS="$PKG_CFLAGS -DGMX_POWERPC_INVSQRT"
 fi
 
 if test "$enable_fortran" = "yes"; then
@@ -1168,13 +1186,22 @@ if test "$with_dlopen" = "yes"; then
       CFLAGS_RET=$CFLAGS
       #LDFLAGS="$lt_cv_dlopen_libs $LDFLAGS" #can't make the macro, which is getting lt_cv_dlopen_libs, to work
       LDFLAGS_RET=$LDFLAGS
+      LIBS_RET=$LIBS
       if test "$enable_all_static" = "yes"; then  #make sure we test also whether it works static
         LDFLAGS="$LDFLAGS -static"
       fi
       CFLAGS="-I$srcdir/include -DGMX_DLOPEN $CFLAGS"
-      AC_TRY_LINK([#include "$srcdir/src/gmxlib/vmddlopen.c"],,[AC_MSG_RESULT([yes])\
-       AC_DEFINE(GMX_DLOPEN,,[Compile with dlopen])],AC_MSG_RESULT([no]))
+      LIBS="$DLOPEN_LIBS $LIBS"
+      AC_TRY_LINK([#include "$srcdir/src/gmxlib/vmddlopen.c"],,[
+        AC_MSG_RESULT([yes])
+       AC_DEFINE(GMX_DLOPEN,,[Compile with dlopen])
+       AC_SUBST(DLOPEN_LIBS)
+      ],[
+       AC_MSG_RESULT([no])
+       AC_SUBST(DLOPEN_LIBS,"")
+      ])
       CFLAGS=$CFLAGS_RET
+      LIBS=$LIBS_RET
       LDFLAGS=$LDFLAGS_RET
 fi
 
index c0248b9c2cf257c181499e4a4b480953da3b21ad..05b41fd8366783345984428c0d0b928a2e767cdc 100644 (file)
@@ -1,5 +1,6 @@
 # includes: Nothing to build, just installation
 install(DIRECTORY . DESTINATION ${INCL_INSTALL_DIR}/gromacs
+  COMPONENT development
   PATTERN "Makefile*" EXCLUDE
   PATTERN "CMake*" EXCLUDE
   PATTERN "cmake*" EXCLUDE
index cafabf2c893c4804ebedcb5438749b658ba3bc40..487bdba60a758cc01b441236ae508c910a17e0ae 100644 (file)
@@ -56,7 +56,7 @@ CopyrightText[] = {
   "Written by Emile Apol, Rossen Apostolov, Herman J.C. Berendsen,",
   "Aldert van Buuren, Pär Bjelkmar, Rudi van Drunen, Anton Feenstra, ",
   "Gerrit Groenhof, Peter Kasson, Per Larsson, Pieter Meulenhoff, ",
-  "Teemu Murtola, Szilard Pall, Sander Pronk, Roland Schultz, ",
+  "Teemu Murtola, Szilard Pall, Sander Pronk, Roland Schulz, ",
   "Michael Shirts, Alfons Sijbers, Peter Tieleman,\n",
   "Berk Hess, David van der Spoel, and Erik Lindahl.\n",
   "Copyright (c) 1991-2000, University of Groningen, The Netherlands.",
index cf9ee16c7805bac5d9504b43eaae0d256e73fbd7..d6635af150dd37dec874ce96c189e44c363cbb84 100644 (file)
@@ -476,7 +476,7 @@ gmx_mm_sincos_pd(__m128d x,
     };
 #endif
     
-    const __m128d signmask    = _mm_castsi128_pd( _mm_set_epi32(0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF) );
+    const __m128d signmask    = gmx_mm_castsi128_pd( _mm_set_epi32(0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF) );
     const __m128d tabscale    = _mm_set1_pd(32.0/M_PI);
     const __m128d invtabscale = _mm_set1_pd(M_PI/32.0);
     const __m128d one         = _mm_set1_pd(1.0);
@@ -533,10 +533,10 @@ gmx_mm_sincos_pd(__m128d x,
     cswapsign = _mm_shuffle_epi32(cswapsign,_MM_SHUFFLE(1,1,0,0));
     minusone  = _mm_sub_pd(_mm_setzero_pd(),one);
     
-    ssign     = _mm_or_pd(_mm_and_pd( _mm_castsi128_pd(sswapsign),minusone ),
-                          _mm_andnot_pd( _mm_castsi128_pd(sswapsign),one ));
-    csign     = _mm_or_pd(_mm_and_pd( _mm_castsi128_pd(cswapsign),minusone ),
-                          _mm_andnot_pd( _mm_castsi128_pd(cswapsign),one ));
+    ssign     = _mm_or_pd(_mm_and_pd( gmx_mm_castsi128_pd(sswapsign),minusone ),
+                          _mm_andnot_pd( gmx_mm_castsi128_pd(sswapsign),one ));
+    csign     = _mm_or_pd(_mm_and_pd( gmx_mm_castsi128_pd(cswapsign),minusone ),
+                          _mm_andnot_pd( gmx_mm_castsi128_pd(cswapsign),one ));
     
     /* First lookup into table */
 #ifdef _MSC_VER
index f2d2b3761ebc850a8e4b1e04f8df213f1480de06..a9ac497c6873248df9b21d4458ce8d9d769fdbfc 100644 (file)
@@ -392,7 +392,7 @@ gmx_bool has_dihedral(int Dih,t_dlist *dl);
 t_dlist *mk_dlist(FILE *log, 
                         t_atoms *atoms, int *nlist,
                         gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bHChi,
-                        int maxchi,int r0,int naa,char **aa);
+                        int maxchi, int r0, gmx_residuetype_t rt);
                         
 void pr_dlist(FILE *fp,int nl,t_dlist dl[],real dt,  int printtype,
                     gmx_bool bPhi, gmx_bool bPsi,gmx_bool bChi,gmx_bool bOmega, int maxchi);
index b327bd34d974302e96b45cdda3f4537bf5a5bc19..ad91baa98d7eaeaea1992e5a7e877da7e008f668 100644 (file)
@@ -123,6 +123,15 @@ gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm);
 gmx_bool 
 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm);
 
+int
+gmx_residuetype_get_size(gmx_residuetype_t rt);
+
+int
+gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm);
+
+const char *
+gmx_residuetype_get_name(gmx_residuetype_t rt, int index);
+
 
 
 
index 0fc4ab5d197a6ecd9ad2e7555e79589c400f0bc4..a58f6199faa5c90e8e6fc063551837b30b24c5a5 100644 (file)
@@ -107,7 +107,7 @@ void pdb2top(FILE *top_file, char *posre_fn, char *molname,
                    int nrtp, t_restp rtp[],
                    t_restp *restp, t_hackblock *hb,
                    int nterpairs, t_hackblock **ntdb, t_hackblock **ctdb,
-                   int *rn, int *rc, gmx_bool bAllowMissing,
+                   gmx_bool bAllowMissing,
                    gmx_bool bVsites, gmx_bool bVsiteAromatics,
                    const char *ff, const char *ffdir,
                    real mHmult,
index 75c76443e8d16ac92593975b6289a732464666ca..9c6ee1215ea9fa7f18eab9f70ebf454eb2312d60 100644 (file)
@@ -63,6 +63,8 @@ int gmx_pme_destroy(FILE *log,gmx_pme_t *pmedata);
 #define GMX_PME_SOLVE         (1<<1)
 #define GMX_PME_CALC_F        (1<<2)
 #define GMX_PME_CALC_ENER_VIR (1<<3)
+/* This forces the grid to be backtransformed even without GMX_PME_CALC_F */
+#define GMX_PME_CALC_POT      (1<<4)
 #define GMX_PME_DO_ALL_F  (GMX_PME_SPREAD_Q | GMX_PME_SOLVE | GMX_PME_CALC_F)
 
 int gmx_pme_do(gmx_pme_t pme,
index 6d949384c437659478161dcea0e0e054b44b7c7a..184c82205969c988ea47a61086e581c8cfa8ca00 100644 (file)
@@ -123,6 +123,10 @@ char **split(char sep,char *str);
 
 gmx_large_int_t str_to_large_int_t(const char *str, char **endptr);
 
+#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
+#define snprintf _snprintf
+#endif
+
 #ifdef __cplusplus
 }
 #endif
index 405b503bd0a7b39ee62af37ddc399387ef3ce914..70b9e0d2ff02272c94a0f5bbaed92171592befea 100644 (file)
@@ -68,10 +68,12 @@ typedef struct {
 } t_nblists;
 
 /* macros for the cginfo data in forcerec */
-/* The maximum cg size is 255, because we only have space for 8 bits in cginfo,
+/* The maximum cg size in cginfo is 255,
+ * because we only have space for 8 bits in cginfo,
  * this cg size entry is actually only read with domain decomposition.
+ * But there is a smaller limit due to the t_excl data structure
+ * which is defined in nblist.h.
  */
-#define MAX_CHARGEGROUP_SIZE 256
 #define SET_CGINFO_GID(cgi,gid)      (cgi) = (((cgi)  &  ~65535)  |  (gid)   )
 #define GET_CGINFO_GID(cgi)        ( (cgi)            &   65535)
 #define SET_CGINFO_EXCL_INTRA(cgi)   (cgi) =  ((cgi)  |  (1<<16))
index 843435e001bd16e9c11b7d96ca6e3c68e35ea5d0..ae5aea1853383ac0d009469cb3e8842694d8ea22 100644 (file)
@@ -50,6 +50,11 @@ enum {
 
 typedef unsigned long t_excl;
 
+/* The maximum charge group size because of minimum size of t_excl
+ * could be 32 bits.
+ */
+#define MAX_CHARGEGROUP_SIZE 32
+
 /* The maximum charge group size for CG-CG nblists.
  * The excl entry in t_nblist uses blocks of this size.
  */
index dd916bc149bc1352bf1e05882b819c3b0d5a62ee..139a9c4514f339210ce8be0b2edd756f960f0284 100644 (file)
@@ -785,7 +785,7 @@ static gmx_inline void mvmul(matrix a,const rvec src,rvec dest)
 static gmx_inline void mvmul_ur0(matrix a,const rvec src,rvec dest)
 {
   dest[ZZ]=a[ZZ][XX]*src[XX]+a[ZZ][YY]*src[YY]+a[ZZ][ZZ]*src[ZZ];
-  dest[YY]=a[YY][XX]*src[XX]+a[YY][YY];
+  dest[YY]=a[YY][XX]*src[XX]+a[YY][YY]*src[YY];
   dest[XX]=a[XX][XX]*src[XX];
 }
 
index 18cf1d1c47bffb116e7f37ebb6f4680a9e3f8fc3..82953fe841083efd853c2467e993a63dd2608863 100644 (file)
@@ -1,5 +1,6 @@
 # Man pages: Nothing to build, just installation
 install(DIRECTORY . DESTINATION ${MAN_INSTALL_DIR}
+  COMPONENT data
   PATTERN "Makefile*" EXCLUDE
   PATTERN "CMake*" EXCLUDE
   PATTERN "cmake*" EXCLUDE
index 87b51fce0dc33ec9ab3ae4114ad827f128f5ae9f..a89c958615d23f5134f0b84ee013035be0cea526 100644 (file)
@@ -3,14 +3,14 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/GMXRC.bash.cmakein ${CMAKE_CURRENT_BI
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/GMXRC.csh.cmakein  ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.csh @ONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/GMXRC.zsh.cmakein  ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.zsh @ONLY)
 
-install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC      DESTINATION ${BIN_INSTALL_DIR})
-install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.bash DESTINATION ${BIN_INSTALL_DIR})
-install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.zsh  DESTINATION ${BIN_INSTALL_DIR})
-install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.csh  DESTINATION ${BIN_INSTALL_DIR})
+install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC      DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
+install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.bash DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
+install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.zsh  DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
+install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/GMXRC.csh  DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
 
 file(GLOB EXTRA_SCRIPTS completion.*)
-install(FILES ${EXTRA_SCRIPTS} DESTINATION ${BIN_INSTALL_DIR})
+install(FILES ${EXTRA_SCRIPTS} DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
 
-install(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/demux.pl      DESTINATION ${BIN_INSTALL_DIR})
-install(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/xplor2gmx.pl  DESTINATION ${BIN_INSTALL_DIR})
+install(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/demux.pl      DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
+install(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/xplor2gmx.pl  DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
 
index 293530b14a9e61abe9931169e272c290d1267343..52717cc5fb4c2175b159cc81c4d313445c966eca 100644 (file)
@@ -8,8 +8,12 @@
 tmppath=""
 for i in `echo $LD_LIBRARY_PATH | sed "s/:/ /g"`; do
   if test "$i" != "$GMXLDLIB"; then
+    if test "${tmppath}" == ""; then
+      tmppath=$i
+    else
     tmppath=${tmppath}:$i
   fi
+  fi
 done
 LD_LIBRARY_PATH=$tmppath
 
@@ -38,6 +42,9 @@ for i in `echo $MANPATH | sed "s/:/ /g"`; do
     tmppath=${tmppath}:$i
   fi
 done
+if test "$tmppath" == ""; then
+    tmppath=":"
+fi
 MANPATH=$tmppath
 
 ##########################################################
index fcad8c712410164a6fcbc82669004735d899f778..e0b69b472df1de5a6d34aac207f4f69bf11922f8 100644 (file)
@@ -8,8 +8,12 @@
 tmppath=""
 for i in `echo $LD_LIBRARY_PATH | sed "s/:/ /g"`; do
   if test "$i" != "$GMXLDLIB"; then
+    if test "${tmppath}" == ""; then
+      tmppath=$i
+    else
     tmppath=${tmppath}:$i
   fi
+  fi
 done
 LD_LIBRARY_PATH=$tmppath
 
@@ -38,6 +42,9 @@ for i in `echo $MANPATH | sed "s/:/ /g"`; do
     tmppath=${tmppath}:$i
   fi
 done
+if test "$tmppath" == ""; then
+    tmppath=":"
+fi
 MANPATH=$tmppath
 
 ##########################################################
index ff9eba8d6b8c7ace39d1b31ad770cd9ec9cbe363..0e18ba3dbe516b025bb8de4802daa7382c8e4744 100644 (file)
@@ -16,7 +16,13 @@ if (! $?GMXMAN) setenv GMXMAN ""
 # remove previous gromacs part from ld_library_path
 set tmppath = ""
 foreach i ( `echo $LD_LIBRARY_PATH | sed "s/:/ /g"` )
-  if ( "$i" != "$GMXLDLIB" ) set tmppath = "${tmppath}:$i"
+  if ( "$i" != "$GMXLDLIB" ) then
+    if ("${tmppath}" == "") then
+      set tmppath = "$i"
+    else
+      set tmppath = "${tmppath}:$i"
+    endif
+  endif
 end
 setenv LD_LIBRARY_PATH $tmppath
 
@@ -39,6 +45,9 @@ set tmppath = ""
 foreach i ( `echo $MANPATH | sed "s/:/ /g"` )
   if ( "$i" != "$GMXMAN" ) set tmppath = "${tmppath}:$i"
 end
+if ("$tmppath" == "") then
+    set tmppath = ":"
+endif
 setenv MANPATH $tmppath
 
 ##########################################################
index f4dc0df8046c7abc36ea7489aa41c94b94dce486..766446010275e7be80aa9ab133d17b6caa4bde14 100644 (file)
@@ -16,7 +16,13 @@ if (! $?GMXMAN) setenv GMXMAN ""
 # remove previous gromacs part from ld_library_path
 set tmppath = ""
 foreach i ( `echo $LD_LIBRARY_PATH | sed "s/:/ /g"` )
-  if ( "$i" != "$GMXLDLIB" ) set tmppath = "${tmppath}:$i"
+  if ( "$i" != "$GMXLDLIB" ) then
+    if ("${tmppath}" == "") then
+      set tmppath = "$i"
+    else
+      set tmppath = "${tmppath}:$i"
+    endif
+  endif
 end
 setenv LD_LIBRARY_PATH $tmppath
 
@@ -39,6 +45,9 @@ set tmppath = ""
 foreach i ( `echo $MANPATH | sed "s/:/ /g"` )
   if ( "$i" != "$GMXMAN" ) set tmppath = "${tmppath}:$i"
 end
+if ("$tmppath" == "") then
+    set tmppath = ":"
+endif
 setenv MANPATH $tmppath
 
 ##########################################################
index fef40840b0f6c99a79f9bc742f41332bef1359ea..0a29da2418cc26a67967057f3f036f0aad8b204d 100644 (file)
@@ -8,8 +8,12 @@
 tmppath=""
 for i in `echo $LD_LIBRARY_PATH | sed "s/:/ /g"`; do
   if test "$i" != "$GMXLDLIB"; then
+    if test "${tmppath}" = ""; then
+      tmppath=$i
+    else
     tmppath=${tmppath}:$i
   fi
+  fi
 done
 LD_LIBRARY_PATH=$tmppath
 
@@ -38,6 +42,9 @@ for i in `echo $MANPATH | sed "s/:/ /g"`; do
     tmppath=${tmppath}:$i
   fi
 done
+if test "$tmppath" = ""; then
+    tmppath=":"
+fi
 MANPATH=$tmppath
 
 ##########################################################
index 079c29bb35d4b3f5168b686cf221322df8826212..45522bbdfcdba76cd423c8fedbd1d356a9bf24c5 100644 (file)
@@ -8,8 +8,12 @@
 tmppath=""
 for i in `echo $LD_LIBRARY_PATH | sed "s/:/ /g"`; do
   if test "$i" != "$GMXLDLIB"; then
+    if test "${tmppath}" = ""; then
+      tmppath=$i
+    else
     tmppath=${tmppath}:$i
   fi
+  fi
 done
 LD_LIBRARY_PATH=$tmppath
 
@@ -38,6 +42,9 @@ for i in `echo $MANPATH | sed "s/:/ /g"`; do
     tmppath=${tmppath}:$i
   fi
 done
+if test "$tmppath" = ""; then
+    tmppath=":"
+fi
 MANPATH=$tmppath
 
 ##########################################################
index 86f1a85517612e0c2b44243cd499f8eb32ecd4c9..dfebb7f8419c0108c7327b49f5605676f14d90d5 100644 (file)
@@ -1,9 +1,13 @@
 # Data: Nothing to build, just installation
 install(DIRECTORY . DESTINATION ${DATA_INSTALL_DIR}
+  COMPONENT data
   PATTERN "Makefile*" EXCLUDE
   PATTERN "CMake*" EXCLUDE
   PATTERN "cmake*" EXCLUDE
   PATTERN "*~" EXCLUDE
 )
 
-install(FILES template/CMakeLists.txt.template DESTINATION ${DATA_INSTALL_DIR} RENAME template/CMakeLists.txt)
+install(FILES template/CMakeLists.txt.template
+        DESTINATION ${DATA_INSTALL_DIR}
+        RENAME template/CMakeLists.txt
+        COMPONENT data)
index 9cbf19db3326bebe106e5f6a3061a540794a31cc..a510d3e84bf6c3d04bd4f63c96d50cd0567c2916 100644 (file)
@@ -45,7 +45,7 @@ IF YOU'RE NOT SURE ABOUT WHAT YOU'RE DOING, DON'T DO IT!
   nstpcouple, tau_p, compressibility, ref_p, refcoord_scaling)
 <li><A HREF="#sa"><b>simulated annealing</b></A> (annealing, annealing_npoints, annealing_time, annealing_temp)
 <li><A HREF="#vel"><b>velocity generation</b></A> (gen_vel, gen_temp, gen_seed)
-<li><A HREF="#bond"><b>bonds</b></A> (constraints, constraint_algorithm, unconstrained_start, shake_tol, lincs_order, lincs_iter, lincs_warnangle, morse)
+<li><A HREF="#bond"><b>bonds</b></A> (constraints, constraint_algorithm, continuation, shake_tol, lincs_order, lincs_iter, lincs_warnangle, morse)
 <li><A HREF="#egexcl"><b>Energy group exclusions</b></A> (energygrp_excl)
 <li><A HREF="#walls"><b>Walls</b></A> (nwall, wall_type, wall_r_linpot, wall_atomtype,
 wall_density, wall_ewald_zfac)
@@ -1047,7 +1047,8 @@ when inter charge-group constraints are present.
 SHAKE can not be used with energy minimization.
 </dd>
 </dl></dd>
-<dt><b>unconstrained_start:</b></dt>
+<dt><b>continuation:</b></dt>
+<dd>This option was formerly known as <tt>unconstrained_start</tt>.</dd>
 <dd><dl compact>
 <dt><b>no</b></dt>
 <dd>apply constraints to the start configuration and reset shells</dd>
@@ -1115,11 +1116,17 @@ there is also a wall at z=z_box. Walls can only be used with <b>pbc=xy</b>.
 When set to <b>2</b> pressure coupling and Ewald summation can be used
 (it is usually best to use semiisotropic pressure coupling with
 the x/y compressibility set to 0, as otherwise the surface area will change).
+Walls interact wit the rest of the system through an optional <tt>wall_atomtype</tt>.
 Energy groups <tt>wall0</tt> and <tt>wall1</tt> (for <b>nwall=2</b>) are
 added automatically to monitor the interaction of energy groups
 with each wall.
 The <A HREF="#run">center of mass motion removal</A> will be turned
 off in the z-direction.</dd>
+<dt><b>wall_atomtype:</b></dt>
+<dd>the atom type name in the force field for each wall. 
+By (for example) defining a special wall atom type in the topology with its 
+own combination rules, this allows for independent tuning of the interaction 
+of each atomtype with the walls.</dd>
 <dt><b>wall_type:</b></dt>
 <dl>
 <dt><b>9-3</b></dt>
@@ -1142,9 +1149,6 @@ are beyond a wall.
 When the value is &le;0 (&lt;0 for <b>wall_type=table</b>),
 a fatal error is generated when atoms are beyond a wall.
 </dd>
-<dt><b>wall_atomtype:</b></dt>
-<dd>the atom type name in the force field for each wall, this allows
-for independent tuning of the interaction of each atomtype with the walls</dd>
 <dt><b>wall_density: [nm<sup>-3</sup>/nm<sup>-2</sup>]</b></dt>
 <dd>the number density of the atoms for each wall for wall types
 <b>9-3</b> and <b>10-4</b>
@@ -1646,7 +1650,7 @@ for the corresponding atom</dd>
 
 <dt></dt><b>sa_algorithm</b>
 <dd><dl compact="compact">
-<dt><b>Ace-approx</b></dt>
+<dt><b>Ace-approximation</b></dt>
 <dd>Use an Ace-type approximation (default)</dd>
 <dt><b>None</b></dt>
 <dd>No non-polar solvation calculation done. For GBSA only the polar part gets 
@@ -1800,7 +1804,7 @@ reals to your subroutine. Check the inputrec definition in
 <A HREF="#tc">tc_grps</A><br>
 <A HREF="#tc">tcoupl</A><br>
 <A HREF="#run">tinit</A><br>
-<A HREF="#bond">unconstrained_start</A><br>
+<A HREF="#bond">continuation</A><br>
 <A HREF="#user">user1_grps</A><br>
 <A HREF="#user">user2_grps</A><br>
 <A HREF="#user">userint1</A><br>
index 454f75649d23617489cd08e1e1963d311805e2d4..b09c02ac9157c03938c732c285acddda059bb6a4 100644 (file)
@@ -11,7 +11,10 @@ aminoacids.hdb   cmap.itp         forcefield.doc   rna.rtp   \
 aminoacids.n.tdb dna.rtp          forcefield.itp   spc.itp     \
 aminoacids.r2b   ffbonded.itp     gb.itp           tip3p.itp   \
 aminoacids.rtp   ffnabonded.itp   ions.itp         tip4p.itp   \
-spce.itp       tips3p.itp      watermodels.dat    tip5p.itp
+spce.itp       tips3p.itp      watermodels.dat    tip5p.itp    \
+dna.hdb                dna.n.tdb       dna.c.tdb       dna.arn \
+rna.hdb                rna.n.tdb       rna.c.tdb       rna.arn \
+rna.r2b
 
 EXTRA_DIST = ${topol_DATA}
 
index 0f4f982055ce66543c751b0039e2a891f389c053..9296d0194d9bb9d011ba1b0e0eed5584ac66f729 100644 (file)
@@ -6,3 +6,4 @@ HISH    HSP
 LYSN   LSN
 ASPH   ASPP
 GLUH   GLUP
+HEM     HEME
diff --git a/share/top/charmm27.ff/dna.arn b/share/top/charmm27.ff/dna.arn
new file mode 100644 (file)
index 0000000..1d12b6a
--- /dev/null
@@ -0,0 +1,12 @@
+DNA  C7   C5M
+DNA  H71  H51
+DNA  H72  H52
+DNA  H73  H53
+DNA  OP1  O1P
+DNA  OP2  O2P
+DNA  H2'  H2'1
+DNA  H2'' H2'2
+DNA  H5'  H5'1
+DNA  H5'' H5'2
+DNA  HO5' H5T
+DNA  HO3' H3T
diff --git a/share/top/charmm27.ff/dna.c.tdb b/share/top/charmm27.ff/dna.c.tdb
new file mode 100644 (file)
index 0000000..892bc18
--- /dev/null
@@ -0,0 +1,10 @@
+[ None ]
+
+[ 3' ]
+[ replace ]
+C3'    C3'     CN7     12.011     0.14 
+H3'    H3'     HN7     1.008      0.09
+O3'    O3'     ON5     15.9994   -0.66
+[ Add ]
+1      2       H3T     O3'     C3'     C4'
+               HN5     1.008     0.43 
diff --git a/share/top/charmm27.ff/dna.hdb b/share/top/charmm27.ff/dna.hdb
new file mode 100644 (file)
index 0000000..12af372
--- /dev/null
@@ -0,0 +1,36 @@
+DA     8
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N9      C2'     
+1      1       H8      C8      N9      N7      
+2      3       H6      N6      C6      C5      
+1      1       H2      C2      N1      N3      
+1      5       H3'     C3'     C4'     C2'     O3'     
+2      6       H2'     C2'     C1'     C3'     
+DT     8
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N1      C2'     
+1      1       H6      C6      N1      C5      
+3      4       H5      C5M     C5      C6      
+1      1       H3      N3      C4      C2      
+1      5       H3'     C3'     C4'     C2'     O3'     
+2      6       H2'     C2'     C1'     C3'     
+DG     8
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N9      C2'     
+1      1       H8      C8      N9      N7      
+1      1       H1      N1      C6      C2      
+2      3       H2      N2      C2      N1      
+1      5       H3'     C3'     C4'     C2'     O3'     
+2      6       H2'     C2'     C1'     C3'     
+DC     8
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N1      C2'     
+1      1       H6      C6      N1      C5      
+1      1       H5      C5      C6      C4      
+2      3       H4      N4      C4      C5      
+1      5       H3'     C3'     C4'     C2'     O3'     
+2      6       H2'     C2'     C1'     C3'     
diff --git a/share/top/charmm27.ff/dna.n.tdb b/share/top/charmm27.ff/dna.n.tdb
new file mode 100644 (file)
index 0000000..26faf9c
--- /dev/null
@@ -0,0 +1,16 @@
+[ None ]
+
+[ 5' ]
+[ delete ]
+ P
+ O1P
+ O2P
+[ replace ]
+O5'    O5'     ON5     15.9994   -0.66   
+C5'    C5'     CN8B    12.011     0.05
+[ Add ]
+ 1  2  H5T     O5'     C5'     C4'
+       HN5     1.008   0.43
+
+
+
diff --git a/share/top/charmm27.ff/dna.r2b b/share/top/charmm27.ff/dna.r2b
deleted file mode 100644 (file)
index a7c195c..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-; rtp residue to rtp building block table
-;GMX   Force-field
-DA    DA       DA5     DA3     -
-DG    DG       DG5     DG3     -
-DC    DC       DC5     DC3     -
-DT    DT       DT5     DT3     -
-
index 983b381a57babeba0eb106098ff099139fa9aad4..485ea0182d202fe88d61938281bcbb52bcb1aa25 100644 (file)
@@ -112,200 +112,6 @@ N7        C8
 N6     C6      H61     H62
 C6     N1      C5      N6
 
-[ DA5 ]
-;             H61  H62;
-;               \  /
-;                N6
-;                |
-;                C6
-;              //  \
-;              N1   C5--N7\\
-;              |    ||     C8-H8
-;              C2   C4--N9/
-;             / \\ /     \
-;           H2   N3       \
-;                          \
-;                           \
-;                            \
-;               H5'1H4'  O4'  \
-;                |    \ /   \  \
-;        H5T-O5'-C5'---C4'    C1'
-;                |     \     / \
-;               H5'2  C3'--C2' H1'
-;                    / \    / \
-;                 O3' H3' H2'1 H2'2
-;                  |  
-;                     
-;
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6     -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N9     NN2     -0.05   10
-C5     CN5      0.28   11
-N7     NN4     -0.71   12
-C8     CN4      0.34   13
-H8     HN3      0.12   14
-N1     NN3A    -0.74   15
-C2     CN4      0.50   16
-H2     HN3      0.13   17
-N3     NN3A    -0.75   18
-C4     CN5      0.43   19
-C6     CN2      0.46   20
-N6     NN1     -0.77   21
-H61    HN1      0.38   22
-H62    HN1      0.38   23
-C2'    CN8     -0.18   24
-H2'1   HN8      0.09   25
-H2'2   HN8      0.09   26
-C3'    CN7      0.01   27
-H3'    HN7      0.09   28
-O3'    ON2     -0.57   29
-[ bonds ]
-H5T    O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N9
-C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N1
-C6     N6
-N6     H61
-N6     H62
-C6     C5
-C5     N7
-C2'    C3'
-C3'    O3'
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C8     H8
-C2     H2
-N1     C6
-C2     N3
-C4     C5
-N7     C8
-[ impropers ]
-N6     C6      H61     H62
-C6     N1      C5      N6
-
-
-[ DA3 ]
-;             H61  H62;
-;               \  /
-;                N6
-;                |
-;                C6
-;              //  \
-;              N1   C5--N7\\
-;              |    ||     C8-H8
-;              C2   C4--N9/
-;             / \\ /     \
-;           H2   N3       \
-;                          \
-;                           \
-;                            \
-;        O1P   H5'1 H4'  O4'  \
-;         |      |    \ /   \  \
-;        -P-O5'-C5'---C4'    C1'
-;         |      |     \     / \
-;        O2P    H5'2   C3'--C2' H1'
-;                     / \    / \
-;                  O3' H3' H2'1 H2'2
-;                   |  
-;                  H3T
-
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6     -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N9     NN2     -0.05   12
-C5     CN5      0.28   13
-N7     NN4     -0.71   14
-C8     CN4      0.34   15
-H8     HN3      0.12   16
-N1     NN3A    -0.74   17
-C2     CN4      0.50   18
-H2     HN3      0.13   19
-N3     NN3A    -0.75   20
-C4     CN5      0.43   21
-C6     CN2      0.46   22
-N6     NN1     -0.77   23
-H61    HN1      0.38   24
-H62    HN1      0.38   25
-C2'    CN8     -0.18   26
-H2'1   HN8      0.09   27
-H2'2   HN8      0.09   28
-C3'    CN7      0.14   29
-H3'    HN7      0.09   30
-O3'    ON5     -0.66   31
-H3T     HN5      0.43   32
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N9
-C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N1
-C6     N6
-N6     H61
-N6     H62
-C6     C5
-C5     N7
-C2'    C3'
-C3'    O3'
-O3'    H3T
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C8     H8
-C2     H2
-N1     C6
-C2     N3
-C4     C5
-N7     C8
-[ impropers ]
-N6     C6      H61     H62
-C6     N1      C5      N6
-
 ;--------------------------------------------------------------------------
 [ DC ]
 ;
@@ -353,296 +159,16 @@ H5       HN3      0.07   16
 C2     CN1      0.52   17
 O2     ON1C    -0.49   18
 N3     NN3     -0.66   19
-C4     CN2      0.65   20
-N4     NN1     -0.75   21
-H41    HN1      0.37   22
-H42    HN1      0.33   23
-C2'    CN8     -0.18   24
-H2'1   HN8      0.09   25
-H2'2   HN8      0.09   26
-C3'    CN7      0.01   27
-H3'    HN7      0.09   28
-O3'    ON2     -0.57   29
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-C4     N4
-N4     H41
-N4     H42
-C4     C5
-C2'    C3'
-C3'    O3'
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C5     C6
-N3     C4
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      N4
-N4     C4      H41     H42
-
-[ DC5 ]
-;
-;                      H42  H41
-;                        \  /    
-;                         N4
-;                         |
-;                         C4
-;                        /  \\
-;                    H5-C5   N3
-;                       ||   |
-;                    H6-C6   C2
-;                        \  / \\
-;                         N1   O2
-;                          \
-;                           \
-;                            \ 
-;               H5'1H4'  O4'  \
-;                |    \ /   \  \
-;       H5T-O5'-C5'---C4'    C1'
-;                |     \     / \
-;               H5'2  C3'--C2' H1'
-;                    / \    / \
-;                 O3' H3' H2'1 H2'2
-;                   |  
-;   
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6     -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N1     NN2     -0.13   10
-C6     CN3      0.05   11
-H6     HN3      0.17   12
-C5     CN3     -0.13   13
-H5     HN3      0.07   14
-C2     CN1      0.52   15
-O2     ON1C    -0.49   16
-N3     NN3     -0.66   17
-C4     CN2      0.65   18
-N4     NN1     -0.75   19
-H41    HN1      0.37   20
-H42    HN1      0.33   21
-C2'    CN8     -0.18   22
-H2'1   HN8      0.09   23
-H2'2   HN8      0.09   24
-C3'    CN7      0.01   25
-H3'    HN7      0.09   26
-O3'    ON2     -0.57   27
-[ bonds ]
-H5T    O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-C4     N4
-N4     H41
-N4     H42
-C4     C5
-C2'    C3'
-C3'    O3'
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C5     C6
-N3     C4
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      N4
-N4     C4      H41     H42
-
-[ DC3 ]
-;
-;                      H42  H41
-;                        \  /    
-;                         N4
-;                         |
-;                         C4
-;                        /  \\
-;                    H5-C5   N3
-;                       ||   |
-;                    H6-C6   C2
-;                        \  / \\
-;                         N1   O2
-;                          \
-;                           \
-;                            \ 
-;        O1P    H5'1H4'  O4'  \
-;         |      |    \ /   \  \
-;        -P-O5'-C5'---C4'    C1'
-;         |      |     \     / \
-;        O2P    H5'2  C3'--C2' H1'
-;                    / \    / \
-;                 O3' H3' H2'1 H2'2
-;                   |  
-;                  H3T
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6     -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N1     NN2     -0.13   12
-C6     CN3      0.05   13
-H6     HN3      0.17   14
-C5     CN3     -0.13   15
-H5     HN3      0.07   16
-C2     CN1      0.52   17
-O2     ON1C    -0.49   18
-N3     NN3     -0.66   19
-C4     CN2      0.65   20
-N4     NN1     -0.75   21
-H41    HN1      0.37   22
-H42    HN1      0.33   23
-C2'    CN8     -0.18   24
-H2'1   HN8      0.09   25
-H2'2   HN8      0.09   26
-C3'    CN7      0.14   27
-H3'    HN7      0.09   28
-O3'    ON5     -0.66   29
-H3T     HN5      0.43   30
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-C4     N4
-N4     H41
-N4     H42
-C4     C5
-C2'    C3'
-C3'    O3'
-O3'    H3T
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C5     C6
-N3     C4
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      N4
-N4     C4      H41     H42
-
-; --------------------------------------------------------------------
-[ DG ]
-;               O6
-;               ||   
-;               C6    
-;              /  \  
-;          H1-N1   C5--N7\\
-;             |    ||     C8-H8
-;             C2   C4--N9/
-;            / \\ /      \
-;      H21-N2   N3        \
-;          |               \
-;         H22               \
-;                            \
-;        O1P    H5'1 H4'  O4'  \
-;         |      |    \ /   \  \
-;        -P-O5'-C5'---C4'    C1'
-;         |      |     \     / \
-;        O2P    H5'2   C3'--C2' H1'
-;                    / \    / \
-;                 O3' H3' H2'1 H2'2
-;                  |  
-;                     
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6     -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N9     NN2B    -0.02   12
-C4     CN5      0.26   13
-N2     NN1     -0.68   14
-H21    HN1      0.32   15
-H22    HN1      0.35   16
-N3     NN3G    -0.74   17
-C2     CN2      0.75   18
-N1     NN2G    -0.34   19
-H1     HN2      0.26   20
-C6     CN1      0.54   21
-O6     ON1     -0.51   22
-C5     CN5G     0.00   23
-N7     NN4     -0.60   24
-C8     CN4      0.25   25
-H8     HN3      0.16   26
-C2'    CN8     -0.18   27
-H2'1   HN8      0.09   28
-H2'2   HN8      0.09   29
-C3'    CN7      0.01   30
-H3'    HN7      0.09   31
-O3'    ON2     -0.57   32
+C4     CN2      0.65   20
+N4     NN1     -0.75   21
+H41    HN1      0.37   22
+H42    HN1      0.33   23
+C2'    CN8     -0.18   24
+H2'1   HN8      0.09   25
+H2'2   HN8      0.09   26
+C3'    CN7      0.01   27
+H3'    HN7      0.09   28
+O3'    ON2     -0.57   29
 [ bonds ]
 -O3'     P
 P      O1P
@@ -653,112 +179,15 @@ C5'      C4'
 C4'    O4'
 C4'    C3'
 O4'    C1'
-C1'    N9
+C1'    N1
 C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N2
-C2     N1
-N2     H21
-N2     H22
-N1     H1
+N1     C2
 N1     C6
-C6     C5
-C5     N7
-C2'    C3'
-C3'    O3'
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C8     H8
 C2     N3
+C4     N4
+N4     H41
+N4     H42
 C4     C5
-N7     C8
-C6     O6
-[ impropers ]
-C2     N3      N1      N2
-C6     N1      C5      O6
-N2     H21     C2      H22
-
-[ DG5 ]
-;               O6
-;               ||   
-;               C6    
-;              /  \  
-;          H1-N1   C5--N7\\
-;             |    ||     C8-H8
-;             C2   C4--N9/
-;            / \\ /      \
-;      H21-N2   N3        \
-;          |               \
-;         H22               \
-;                            \
-;               H5'1H4'  O4'  \
-;                |    \ /   \  \
-;        H5T-O5'-C5'---C4'    C1'
-;                |     \     / \
-;               H5'2  C3'--C2' H1'
-;                    / \    / \
-;                 O3' H3' H2'1 H2'2
-;                  |  
-;                     
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6     -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N9     NN2B    -0.02   10
-C4     CN5      0.26   11
-N2     NN1     -0.68   12
-H21    HN1      0.32   13
-H22    HN1      0.35   14
-N3     NN3G    -0.74   15
-C2     CN2      0.75   16
-N1     NN2G    -0.34   17
-H1     HN2      0.26   18
-C6     CN1      0.54   19
-O6     ON1     -0.51   20
-C5     CN5G     0.00   21
-N7     NN4     -0.60   22
-C8     CN4      0.25   23
-H8     HN3      0.16   24
-C2'    CN8     -0.18   25
-H2'1   HN8      0.09   26
-H2'2   HN8      0.09   27
-C3'    CN7      0.01   28
-H3'    HN7      0.09   29
-O3'    ON2     -0.57   30
-[ bonds ]
-H5T     O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N9
-C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N2
-C2     N1
-N2     H21
-N2     H22
-N1     H1
-N1     C6
-C6     C5
-C5     N7
 C2'    C3'
 C3'    O3'
 C1'    H1'
@@ -768,17 +197,18 @@ C3'       H3'
 C4'    H4'
 C5'    H5'1
 C5'    H5'2
-C8     H8
-C2     N3
-C4     C5
-N7     C8
-C6     O6
+C5     H5
+C6     H6
+C2     O2
+C5     C6
+N3     C4
 [ impropers ]
-C2     N3      N1      N2
-C6     N1      C5      O6
-N2     H21     C2      H22
-[ DG3 ]
+C2     N1      N3      O2
+C4     N3      C5      N4
+N4     C4      H41     H42
+
+; --------------------------------------------------------------------
+[ DG ]
 ;               O6
 ;               ||   
 ;               C6    
@@ -797,9 +227,9 @@ N2  H21     C2      H22
 ;         |      |     \     / \
 ;        O2P    H5'2   C3'--C2' H1'
 ;                    / \    / \
-;                  O3' H3' H2'1 H2'2
-;                   |  
-;                  H3T
+;                 O3' H3' H2'1 H2'2
+;                  |  
+;                     
 [ atoms ]
 P      P        1.50   0
 O1P    ON3     -0.78   1
@@ -831,10 +261,9 @@ H8 HN3      0.16   26
 C2'    CN8     -0.18   27
 H2'1   HN8      0.09   28
 H2'2   HN8      0.09   29
-C3'    CN7      0.14   30
+C3'    CN7      0.01   30
 H3'    HN7      0.09   31
-O3'    ON5     -0.66   32
-H3T     HN5      0.43   33
+O3'    ON2     -0.57   32
 [ bonds ]
 -O3'     P
 P      O1P
@@ -860,7 +289,6 @@ C6  C5
 C5     N7
 C2'    C3'
 C3'    O3'
-O3'    H3T
 C1'    H1'
 C2'    H2'1
 C2'    H2'2
@@ -975,192 +403,3 @@ C2        N1      N3      O2
 C4     N3      C5      O4
 C5     C4      C6      C5M
 
-[ DT5 ]
-;                  H51    O4
-;                   |     ||
-;               H52-C5M   C4    H3
-;                   |  \ /  \  /
-;                  H53  C5   N3
-;                       ||   |
-;                    H6-C6   C2
-;                        \  / \\  
-;                         N1   O2
-;                          \
-;                           \
-;                            \ 
-;               H5'1H4'  O4'  \
-;                |    \ /   \  \
-;       H5T-O5'-C5'---C4'    C1'
-;                |     \     / \
-;               H5'2  C3'--C2' H1'
-;                    / \    / \
-;                 O3' H3' H2'1 H2'2
-;                   |  
-;                     
-;
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6     -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N1     NN2B    -0.34   10
-C6     CN3      0.17   11
-H6     HN3      0.17   12
-C2     CN1T     0.51   13
-O2     ON1     -0.41   14
-N3     NN2U    -0.46   15
-H3     HN2      0.36   16
-C4     CN1      0.50   17
-O4     ON1     -0.45   18
-C5     CN3T    -0.15   19
-C5M    CN9     -0.11   20
-H51    HN9      0.07   21
-H52    HN9      0.07   22
-H53    HN9      0.07   23
-C2'    CN8     -0.18   24
-H2'1   HN8      0.09   25
-H2'2   HN8      0.09   26
-C3'    CN7      0.01   27
-H3'    HN7      0.09   28
-O3'    ON2     -0.57   29
-[ bonds ]
-H5T    O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-N3     H3
-N3     C4
-C4     C5
-C5     C5M
-C2'    C3'
-C3'    O3'
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C6     H6
-C5M    H51
-C5M    H52
-C5M    H53
-C2     O2
-C4     O4
-C5     C6
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      O4
-C5     C4      C6      C5M
-
-[ DT3 ]
-;                  H51    O4
-;                   |     ||
-;               H52-C5M   C4    H3
-;                   |  \ /  \  /
-;                  H53  C5   N3
-;                       ||   |
-;                    H6-C6   C2
-;                        \  / \\  
-;                         N1   O2
-;                          \
-;                           \
-;                            \ 
-;        O1P    H5'1H4'  O4'  \
-;         |      |    \ /   \  \
-;        -P-O5'-C5'---C4'    C1'
-;         |      |     \     / \
-;        O2P    H5'2  C3'--C2' H1'
-;                    / \    / \
-;                 O3' H3' H2'1 H2'2
-;                   |  
-;                  H3T
-;
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6     -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N1     NN2B    -0.34   12
-C6     CN3      0.17   13
-H6     HN3      0.17   14
-C2     CN1T     0.51   15
-O2     ON1     -0.41   16
-N3     NN2U    -0.46   17
-H3     HN2      0.36   18
-C4     CN1      0.50   19
-O4     ON1     -0.45   20
-C5     CN3T    -0.15   21
-C5M    CN9     -0.11   22
-H51    HN9      0.07   23
-H52    HN9      0.07   24
-H53    HN9      0.07   25
-C2'    CN8     -0.18   26
-H2'1   HN8      0.09   27
-H2'2   HN8      0.09   28
-C3'    CN7      0.14   29
-H3'    HN7      0.09   30
-O3'    ON5     -0.66   31
-H3T     HN5      0.43   32
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-N3     H3
-N3     C4
-C4     C5
-C5     C5M
-C2'    C3'
-C3'    O3'
-O3'    H3T
-C1'    H1'
-C2'    H2'1
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C6     H6
-C5M    H51
-C5M    H52
-C5M    H53
-C2     O2
-C4     O4
-C5     C6
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      O4
-C5     C4      C6      C5M
-
index fd6223814c18cf711d995682cd9ad3ce044330a0..41a6f2058c38c8d9d1c02c9e5cdb2b386aace2bf 100644 (file)
@@ -155,6 +155,7 @@ HW  1       1.008000        0.41    A       0.0     0.0     ; SPC H
 MNH3   0       0.000000        0.00    A       0.0     0.0
 MNH2   0       0.000000        0.00    A       0.0     0.0
 MCH3   0       0.000000        0.00    A       0.0     0.0
+MCH3S  0       0.000000        0.00    A       0.0     0.0
 ; Ions and noble gases (useful for tutorials)
 Cu2+   29      63.54600        2.00    A       2.08470e-01     4.76976e+00
 Ar     18      39.94800        0.00    A       3.41000e-01     2.74580e-02
index a4f29e117c2fb5b4d99ef3949085394730fb4e95..bdd87ef10b710f6200255c012a3fb1a71d178e24 100644 (file)
@@ -41,6 +41,7 @@
  HP             0.1     1       1       0.125    0.85 ; H
  NY             0.155   1       1.028   0.17063  0.79 ; N
  CPT            0.172   0.012   1.554   0.1875   0.72 ; C     
- MNH3          0       0       0       0        0    ; vsite (rigid tetrahedrical NH3 group)
- MNH2          0       0       0       0        0    ; vsite
- MCH3          0       0       0       0        0    ; vsite (rigid CH3 group)
\ No newline at end of file
+ MNH3          0       0       0       0        0    ; dummy mass
+ MNH2          0       0       0       0        0    ; dummy mass
+ MCH3          0       0       0       0        0    ; dummy mass
+ MCH3S         0       0       0       0        0    ; dummy mass
diff --git a/share/top/charmm27.ff/rna.arn b/share/top/charmm27.ff/rna.arn
new file mode 100644 (file)
index 0000000..ed58c40
--- /dev/null
@@ -0,0 +1,8 @@
+RNA  OP1  O1P
+RNA  OP2  O2P
+RNA  H2'  H2'1
+RNA  H2'' H2'2
+RNA  H5'  H5'1
+RNA  H5'' H5'2
+RNA  HO5' H5T
+RNA  HO3' H3T
diff --git a/share/top/charmm27.ff/rna.c.tdb b/share/top/charmm27.ff/rna.c.tdb
new file mode 100644 (file)
index 0000000..892bc18
--- /dev/null
@@ -0,0 +1,10 @@
+[ None ]
+
+[ 3' ]
+[ replace ]
+C3'    C3'     CN7     12.011     0.14 
+H3'    H3'     HN7     1.008      0.09
+O3'    O3'     ON5     15.9994   -0.66
+[ Add ]
+1      2       H3T     O3'     C3'     C4'
+               HN5     1.008     0.43 
diff --git a/share/top/charmm27.ff/rna.hdb b/share/top/charmm27.ff/rna.hdb
new file mode 100644 (file)
index 0000000..d3e4546
--- /dev/null
@@ -0,0 +1,40 @@
+RA     9
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N9      C2'     
+1      1       H8      C8      N9      N7      
+2      3       H6      N6      C6      C5      
+1      1       H2      C2      N1      N3      
+1      5       H3'     C3'     C4'     C2'     O3'     
+1      5       H2'2    C2'     C1'     C3'     O2'     
+1      2       H2'1    O2'     C2'     C1'     
+RU     9
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N1      C2'     
+1      1       H6      C6      N1      C5      
+1      1       H5      C5      C6      C4      
+1      1       H3      N3      C4      C2      
+1      5       H3'     C3'     C4'     C2'     O3'     
+1      5       H2'2    C2'     C1'     C3'     O2'     
+1      2       H2'1    O2'     C2'     C1'     
+RG     9
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N9      C2'     
+1      1       H8      C8      N9      N7      
+1      1       H1      N1      C6      C2      
+2      3       H2      N2      C2      N1      
+1      5       H3'     C3'     C4'     C2'     O3'     
+1      5       H2'2    C2'     C1'     C3'     O2'     
+1      2       H2'1    O2'     C2'     C1'     
+RC     9
+2      6       H5'     C5'     O5'     C4'     
+1      5       H4'     C4'     C5'     O4'     C3'     
+1      5       H1'     C1'     O4'     N1      C2'     
+1      1       H6      C6      N1      C5      
+1      1       H5      C5      C6      C4      
+2      3       H4      N4      C4      C5      
+1      5       H3'     C3'     C4'     C2'     O3'     
+1      5       H2'2    C2'     C1'     C3'     O2'     
+1      2       H2'1    O2'     C2'     C1'     
diff --git a/share/top/charmm27.ff/rna.n.tdb b/share/top/charmm27.ff/rna.n.tdb
new file mode 100644 (file)
index 0000000..26faf9c
--- /dev/null
@@ -0,0 +1,16 @@
+[ None ]
+
+[ 5' ]
+[ delete ]
+ P
+ O1P
+ O2P
+[ replace ]
+O5'    O5'     ON5     15.9994   -0.66   
+C5'    C5'     CN8B    12.011     0.05
+[ Add ]
+ 1  2  H5T     O5'     C5'     C4'
+       HN5     1.008   0.43
+
+
+
index 10cf12155a00ba1aa4078a7694c0e87a7358b4c6..d30b5290fedcffc4252305621bfd3c3d75a34a51 100644 (file)
@@ -1,6 +1,6 @@
 ; rtp residue to rtp building block table
 ;GMX   Force-field
-A     RA       RA5     RA3     RA
-G     RG       RG5     RG3     RG
-C     RC       RC5     RC3     RC
-U     RU       RU5     RU3     RU
+A     RA
+G     RG
+C     RC
+U     RU
index dd53e919c1364601f6aca61abb510cbc437b94d4..3abc189d2a173b16f7aaf05f595456dd68362019 100644 (file)
@@ -114,204 +114,6 @@ N7        C8
 N6     C6      H61     H62
 C6     N1      C5      N6
 
-[ RA5 ]
- ;             H61  H62;
- ;               \  /
- ;                N6
- ;                |
- ;                C6
- ;              //  \
- ;              N1   C5--N7\\
- ;              |    ||     C8-H8
- ;              C2   C4--N9/
- ;             / \\ /     \
- ;           H2   N3       \
- ;                          \
- ;                           \
- ;                            \
- ;               H5'1H4'  O4'  \
- ;                |    \ /   \  \
- ;        H5T-O5'-C5'---C4'    C1'
- ;                |     \     / \
- ;               H5'2  C3'--C2' H1'
- ;                     / \   / \
- ;                  O3' H3' O2' H2'2
- ;                   |       |
- ;                          H2'1
- ;
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6B    -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N9     NN2     -0.05   10
-C5     CN5      0.28   11
-N7     NN4     -0.71   12
-C8     CN4      0.34   13
-H8     HN3      0.12   14
-N1     NN3A    -0.74   15
-C2     CN4      0.50   16
-H2     HN3      0.13   17
-N3     NN3A    -0.75   18
-C4     CN5      0.43   19
-C6     CN2      0.46   20
-N6     NN1     -0.77   21
-H61    HN1      0.38   22
-H62    HN1      0.38   23
-C2'    CN7B     0.14   24
-H2'2   HN7      0.09   25
-O2'    ON5     -0.66   26
-H2'1   HN5      0.43   27
-C3'    CN7      0.01   28
-H3'    HN7      0.09   29
-O3'    ON2     -0.57   30
-[ bonds ]
-H5T    O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N9
-C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N1
-C6     N6
-N6     H61
-N6     H62
-C6     C5
-C5     N7
-C2'    C3'
-C2'    O2'
-O2'    H2'1
-C3'    O3'
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C8     H8
-C2     H2
-N1     C6
-C2     N3
-C4     C5
-N7     C8
-[ impropers ]
-N6     C6      H61     H62
-C6     N1      C5      N6
-
-
-[ RA3 ]
- ;             H61  H62;
- ;               \  /
- ;                N6
- ;                |
- ;                C6
- ;              //  \
- ;              N1   C5--N7\\
- ;              |    ||     C8-H8
- ;              C2   C4--N9/
- ;             / \\ /     \
- ;           H2   N3       \
- ;                          \
- ;                           \
- ;                            \
- ;        O1P   H5'1 H4'  O4'  \
- ;         |      |    \ /   \  \
- ;        -P-O5'-C5'---C4'    C1'
- ;         |      |     \     / \
- ;        O2P    H5'2   C3'--C2' H1'
- ;                     / \   / \
- ;                  O3' H3' O2' H2'2
- ;                   |       |
- ;                  H3T     H2'1
- ;
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6B    -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N9     NN2     -0.05   12
-C5     CN5      0.28   13
-N7     NN4     -0.71   14
-C8     CN4      0.34   15
-H8     HN3      0.12   16
-N1     NN3A    -0.74   17
-C2     CN4      0.50   18
-H2     HN3      0.13   19
-N3     NN3A    -0.75   20
-C4     CN5      0.43   21
-C6     CN2      0.46   22
-N6     NN1     -0.77   23
-H61    HN1      0.38   24
-H62    HN1      0.38   25
-C2'    CN7B     0.14   26
-H2'2   HN7      0.09   27
-O2'    ON5     -0.66   28
-H2'1   HN5      0.43   29
-C3'    CN7      0.14   30
-H3'    HN7      0.09   31
-O3'    ON5     -0.66   32
-H3T     HN5      0.43   33
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N9
-C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N1
-C6     N6
-N6     H61
-N6     H62
-C6     C5
-C5     N7
-C2'    C3'
-C2'    O2'
-O2'    H2'1
-C3'    O3'
-O3'    H3T
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C8     H8
-C2     H2
-N1     C6
-C2     N3
-C4     C5
-N7     C8
-[ impropers ]
-N6     C6      H61     H62
-C6     N1      C5      N6
-
 ;--------------------------------------------------------------------------
 [ RC ]
 ;
@@ -339,323 +141,37 @@ C6       N1      C5      N6
 ;                   |       |
 ;                          H2'1
 [ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6B    -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N1     NN2     -0.13   12
-C6     CN3      0.05   13
-H6     HN3      0.17   14
-C5     CN3     -0.13   15
-H5     HN3      0.07   16
-C2     CN1      0.52   17
-O2     ON1C    -0.49   18
-N3     NN3     -0.66   19
-C4     CN2      0.65   20
-N4     NN1     -0.75   21
-H41    HN1      0.37   22
-H42    HN1      0.33   23
-C2'    CN7B     0.14   24
-H2'2   HN7      0.09   25
-O2'    ON5     -0.66   26
-H2'1   HN5      0.43   27
-C3'    CN7      0.01   28
-H3'    HN7      0.09   29
-O3'    ON2     -0.57   30
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-C4     N4
-N4     H41
-N4     H42
-C4     C5
-C2'    C3'
-C3'    O3'
-C2'    O2'
-O2'    H2'1
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C5     C6
-N3     C4
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      N4
-N4     C4      H41     H42
-
-[ RC5 ]
-;
-;                      H42  H41
-;                        \  /    
-;                         N4
-;                         |
-;                         C4
-;                        /  \\
-;                    H5-C5   N3
-;                       ||   |
-;                    H6-C6   C2
-;                        \  / \\
-;                         N1   O2
-;                          \
-;                           \
-;                            \ 
-;               H5'1H4'  O4'  \
-;                |    \ /   \  \
-;       H5T-O5'-C5'---C4'    C1'
-;                |     \     / \
-;               H5'2  C3'--C2' H1'
-;                     / \   / \ 
-;                  O3' H3' O2' H2'2 
-;                   |       |
-;                          H2'1
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6B    -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N1     NN2     -0.13   10
-C6     CN3      0.05   11
-H6     HN3      0.17   12
-C5     CN3     -0.13   13
-H5     HN3      0.07   14
-C2     CN1      0.52   15
-O2     ON1C    -0.49   16
-N3     NN3     -0.66   17
-C4     CN2      0.65   18
-N4     NN1     -0.75   19
-H41    HN1      0.37   20
-H42    HN1      0.33   21
-C2'    CN7B     0.14   22
-H2'2   HN7      0.09   23
-O2'    ON5     -0.66   24
-H2'1   HN5      0.43   25
-C3'    CN7      0.01   26
-H3'    HN7      0.09   27
-O3'    ON2     -0.57   28
-[ bonds ]
-H5T    O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-C4     N4
-N4     H41
-N4     H42
-C4     C5
-C2'    C3'
-C3'    O3'
-C2'    O2'
-O2'    H2'1
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C5     C6
-N3     C4
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      N4
-N4     C4      H41     H42
-
-[ RC3 ]
-;
-;                      H42  H41
-;                        \  /    
-;                         N4
-;                         |
-;                         C4
-;                        /  \\
-;                    H5-C5   N3
-;                       ||   |
-;                    H6-C6   C2
-;                        \  / \\
-;                         N1   O2
-;                          \
-;                           \
-;                            \ 
-;        O1P    H5'1H4'  O4'  \
-;         |      |    \ /   \  \
-;        -P-O5'-C5'---C4'    C1'
-;         |      |     \     / \
-;        O2P    H5'2  C3'--C2' H1'
-;                     / \   / \ 
-;                  O3' H3' O2' H2'2 
-;                   |       |
-;                  H3T     H2'1
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6B    -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N1     NN2     -0.13   12
-C6     CN3      0.05   13
-H6     HN3      0.17   14
-C5     CN3     -0.13   15
-H5     HN3      0.07   16
-C2     CN1      0.52   17
-O2     ON1C    -0.49   18
-N3     NN3     -0.66   19
-C4     CN2      0.65   20
-N4     NN1     -0.75   21
-H41    HN1      0.37   22
-H42    HN1      0.33   23
-C2'    CN7B     0.14   24
-H2'2   HN7      0.09   25
-O2'    ON5     -0.66   26
-H2'1   HN5      0.43   27
-C3'    CN7      0.14   28
-H3'    HN7      0.09   29
-O3'    ON5     -0.66   30
-H3T     HN5      0.43   31
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-C4     N4
-N4     H41
-N4     H42
-C4     C5
-C2'    C3'
-C3'    O3'
-O3'    H3T
-C2'    O2'
-O2'    H2'1
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C5     C6
-N3     C4
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      N4
-N4     C4      H41     H42
-
-; --------------------------------------------------------------------
-[ RG ]
-;               O6
-;               ||   
-;               C6    
-;              /  \  
-;          H1-N1   C5--N7\\
-;             |    ||     C8-H8
-;             C2   C4--N9/
-;            / \\ /      \
-;      H21-N2   N3        \
-;          |               \
-;         H22               \
-;                            \
-;        O1P    H5'1 H4'  O4'  \
-;         |      |    \ /   \  \
-;        -P-O5'-C5'---C4'    C1'
-;         |      |     \     / \
-;        O2P    H5'2   C3'--C2' H1'
-;                      / \   / \
-;                     O3' H3' O2' H2'2 
-;                     |       |
-;                             H2'1
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6B    -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N9     NN2B    -0.02   12
-C4     CN5      0.26   13
-N2     NN1     -0.68   14
-H21    HN1      0.32   15
-H22    HN1      0.35   16
-N3     NN3G    -0.74   17
-C2     CN2      0.75   18
-N1     NN2G    -0.34   19
-H1     HN2      0.26   20
-C6     CN1      0.54   21
-O6     ON1     -0.51   22
-C5     CN5G     0.00   23
-N7     NN4     -0.60   24
-C8     CN4      0.25   25
-H8     HN3      0.16   26
-C2'    CN7B     0.14   27
-H2'2   HN7      0.09   28
-O2'    ON5     -0.66   29
-H2'1   HN5      0.43   30
-C3'    CN7      0.01   31
-H3'    HN7      0.09   32
-O3'    ON2     -0.57   33
+P      P        1.50   0
+O1P    ON3     -0.78   1
+O2P    ON3     -0.78   2
+O5'    ON2     -0.57   3
+C5'    CN8B    -0.08   4
+H5'1   HN8      0.09   5
+H5'2   HN8      0.09   6
+C4'    CN7      0.16   7
+H4'    HN7      0.09   8
+O4'    ON6B    -0.50   9
+C1'    CN7B     0.16   10
+H1'    HN7      0.09   11
+N1     NN2     -0.13   12
+C6     CN3      0.05   13
+H6     HN3      0.17   14
+C5     CN3     -0.13   15
+H5     HN3      0.07   16
+C2     CN1      0.52   17
+O2     ON1C    -0.49   18
+N3     NN3     -0.66   19
+C4     CN2      0.65   20
+N4     NN1     -0.75   21
+H41    HN1      0.37   22
+H42    HN1      0.33   23
+C2'    CN7B     0.14   24
+H2'2   HN7      0.09   25
+O2'    ON5     -0.66   26
+H2'1   HN5      0.43   27
+C3'    CN7      0.01   28
+H3'    HN7      0.09   29
+O3'    ON2     -0.57   30
 [ bonds ]
 -O3'     P
 P      O1P
@@ -666,114 +182,15 @@ C5'      C4'
 C4'    O4'
 C4'    C3'
 O4'    C1'
-C1'    N9
+C1'    N1
 C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N2
-C2     N1
-N2     H21
-N2     H22
-N1     H1
+N1     C2
 N1     C6
-C6     C5
-C5     N7
-C2'    C3'
-C3'    O3'
-C2'    O2'
-O2'    H2'1
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C8     H8
 C2     N3
+C4     N4
+N4     H41
+N4     H42
 C4     C5
-N7     C8
-C6     O6
-[ impropers ]
-C2     N3      N1      N2
-C6     N1      C5      O6
-N2     H21     C2      H22
-
-[ RG5 ]
-;               O6
-;               ||   
-;               C6    
-;              /  \  
-;          H1-N1   C5--N7\\
-;             |    ||     C8-H8
-;             C2   C4--N9/
-;            / \\ /      \
-;      H21-N2   N3        \
-;          |               \
-;         H22               \
-;                            \
-;               H5'1H4'  O4'  \
-;                |    \ /   \  \
-;        H5T-O5'-C5'---C4'    C1'
-;                |     \     / \
-;               H5'2  C3'--C2' H1'
-;                     / \   / \
-;                   O3' H3' O2' H2'2 
-;                    |       |
-;                            H2'1
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6B    -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N9     NN2B    -0.02   10
-C4     CN5      0.26   11
-N2     NN1     -0.68   12
-H21    HN1      0.32   13
-H22    HN1      0.35   14
-N3     NN3G    -0.74   15
-C2     CN2      0.75   16
-N1     NN2G    -0.34   17
-H1     HN2      0.26   18
-C6     CN1      0.54   19
-O6     ON1     -0.51   20
-C5     CN5G     0.00   21
-N7     NN4     -0.60   22
-C8     CN4      0.25   23
-H8     HN3      0.16   24
-C2'    CN7B     0.14   25
-H2'2   HN7      0.09   26
-O2'    ON5     -0.66   27
-H2'1   HN5      0.43   28
-C3'    CN7      0.01   29
-H3'    HN7      0.09   30
-O3'    ON2     -0.57   31
-[ bonds ]
-H5T     O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N9
-C1'    C2'
-N9     C4
-N9     C8
-C4     N3
-C2     N2
-C2     N1
-N2     H21
-N2     H22
-N1     H1
-N1     C6
-C6     C5
-C5     N7
 C2'    C3'
 C3'    O3'
 C2'    O2'
@@ -784,17 +201,18 @@ C3'       H3'
 C4'    H4'
 C5'    H5'1
 C5'    H5'2
-C8     H8
-C2     N3
-C4     C5
-N7     C8
-C6     O6
+C5     H5
+C6     H6
+C2     O2
+C5     C6
+N3     C4
 [ impropers ]
-C2     N3      N1      N2
-C6     N1      C5      O6
-N2     H21     C2      H22
-[ RG3 ]
+C2     N1      N3      O2
+C4     N3      C5      N4
+N4     C4      H41     H42
+
+; --------------------------------------------------------------------
+[ RG ]
 ;               O6
 ;               ||   
 ;               C6    
@@ -815,7 +233,7 @@ N2  H21     C2      H22
 ;                      / \   / \
 ;                     O3' H3' O2' H2'2 
 ;                     |       |
-;                    H3T     H2'1
+;                             H2'1
 [ atoms ]
 P      P        1.50   0
 O1P    ON3     -0.78   1
@@ -848,10 +266,9 @@ C2'        CN7B     0.14   27
 H2'2   HN7      0.09   28
 O2'    ON5     -0.66   29
 H2'1   HN5      0.43   30
-C3'    CN7      0.14   31
+C3'    CN7      0.01   31
 H3'    HN7      0.09   32
-O3'    ON5     -0.66   33
-H3T     HN5      0.43   34
+O3'    ON2     -0.57   33
 [ bonds ]
 -O3'     P
 P      O1P
@@ -877,7 +294,6 @@ C6  C5
 C5     N7
 C2'    C3'
 C3'    O3'
-O3'    H3T
 C2'    O2'
 O2'    H2'1
 C1'    H1'
@@ -987,183 +403,4 @@ C5        C6
 [ impropers ]
 C2     N1      N3      O2
 C4     N3      C5      O4
-[ RU5 ]
- ;                         O4                               
- ;                         ||
- ;                         C4    H3
- ;                        /  \  /
- ;                    H5-C5   N3
- ;                       ||   |
- ;                    H6-C6   C2
- ;                        \  / \\    
- ;                         N1   O2
- ;                          \
- ;                           \
- ;                            \ 
- ;              H5'1 H4'  O4'  \
- ;                |    \ /   \  \
- ;       H5T-O5'-C5'---C4'    C1'
- ;                |     \     / \
- ;               H5'2  C3'--C2' H1'
- ;                     / \   / \
- ;                   O3' H3' O2' H2'2
- ;                    |       | 
- ;                           H2'1
- ;
-[ atoms ]
-H5T     HN5      0.43   0
-O5'    ON5     -0.66   1
-C5'    CN8B     0.05   2
-H5'1   HN8      0.09   3
-H5'2   HN8      0.09   4
-C4'    CN7      0.16   5
-H4'    HN7      0.09   6
-O4'    ON6B    -0.50   7
-C1'    CN7B     0.16   8
-H1'    HN7      0.09   9
-N1     NN2B    -0.34   10
-C6     CN3      0.20   11
-H6     HN3      0.14   12
-C2     CN1T     0.55   13
-O2     ON1     -0.45   14
-N3     NN2U    -0.46   15
-H3     HN2      0.36   16
-C4     CN1      0.53   17
-O4     ON1     -0.48   18
-C5     CN3     -0.15   19
-H5     HN3      0.10   20
-C2'    CN7B     0.14   21
-H2'2   HN7      0.09   22
-O2'    ON5     -0.66   23
-H2'1   HN5      0.43   24
-C3'    CN7      0.01   25
-H3'    HN7      0.09   26
-O3'    ON2     -0.57   27
-[ bonds ]
-H5T    O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-N3     H3
-N3     C4
-C4     C5
-C2'    C3'
-C3'    O3'
-C2'    O2'
-O2'    H2'1
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C4     O4
-C5     C6
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      O4
-
-[ RU3 ]
- ;                         O4                               
- ;                         ||
- ;                         C4    H3
- ;                        /  \  /
- ;                    H5-C5   N3
- ;                       ||   |
- ;                    H6-C6   C2
- ;                        \  / \\    
- ;                         N1   O2
- ;                          \
- ;                           \
- ;                            \ 
- ;        O1P    H5'1H4'  O4'  \
- ;         |      |    \ /   \  \
- ;        -P-O5'-C5'---C4'    C1'
- ;         |      |     \     / \
- ;        O2P    H5'2  C3'--C2' H1'
- ;                     / \   / \
- ;                   O3' H3' O2' H2'2
- ;                    |       | 
- ;                   H3T     H2'1
- ;
-[ atoms ]
-P      P        1.50   0
-O1P    ON3     -0.78   1
-O2P    ON3     -0.78   2
-O5'    ON2     -0.57   3
-C5'    CN8B    -0.08   4
-H5'1   HN8      0.09   5
-H5'2   HN8      0.09   6
-C4'    CN7      0.16   7
-H4'    HN7      0.09   8
-O4'    ON6B    -0.50   9
-C1'    CN7B     0.16   10
-H1'    HN7      0.09   11
-N1     NN2B    -0.34   12
-C6     CN3      0.20   13
-H6     HN3      0.14   14
-C2     CN1T     0.55   15
-O2     ON1     -0.45   16
-N3     NN2U    -0.46   17
-H3     HN2      0.36   18
-C4     CN1      0.53   19
-O4     ON1     -0.48   20
-C5     CN3     -0.15   21
-H5     HN3      0.10   22
-C2'    CN7B     0.14   23
-H2'2   HN7      0.09   24
-O2'    ON5     -0.66   25
-H2'1   HN5      0.43   26
-C3'    CN7      0.14   27
-H3'    HN7      0.09   28
-O3'    ON5     -0.66   29
-H3T     HN5      0.43   30
-[ bonds ]
--O3'     P
-P      O1P
-P      O2P
-P      O5'
-O5'    C5'
-C5'    C4'
-C4'    O4'
-C4'    C3'
-O4'    C1'
-C1'    N1
-C1'    C2'
-N1     C2
-N1     C6
-C2     N3
-N3     H3
-N3     C4
-C4     C5
-C2'    C3'
-C3'    O3'
-O3'    H3T
-C2'    O2'
-O2'    H2'1
-C1'    H1'
-C2'    H2'2
-C3'    H3'
-C4'    H4'
-C5'    H5'1
-C5'    H5'2
-C5     H5
-C6     H6
-C2     O2
-C4     O4
-C5     C6
-[ impropers ]
-C2     N1      N3      O2
-C4     N3      C5      O4
 
index 823db9041c643335d7f9a9148250de288c7c7beb..551bb3517459c8ba361695d782347caedf9359a2 100644 (file)
@@ -5,3 +5,4 @@ HISD   HISA
 HISE   HISB
 LYS    LYSH
 LYSN   LYS
+HEM    HEME
index e31e651f8ca0ed85875849321bcd18f2afca9874..61b545b1d3c1a2041c4a962580928ce9599ad399 100644 (file)
@@ -2,9 +2,9 @@
 
 [ atomtypes ]
 ;   type      mass    charge    ptype       c6            c12
-    OMET    15.999    -0.69     A      2.6169e-3      2.5231e-6
+    OMet    15.999    -0.69     A      2.6169e-3      2.5231e-6
       OW    15.999    -0.82     A      2.6170e-3      2.6330e-6
-    CMET    15.035     0.29     A      8.8758e-3     17.8426e-6
+    CMet    15.035     0.29     A      8.8758e-3     17.8426e-6
        H     1.008     0.4      A      0.0            0.0
       HW     1.008     0.41     A      0.0            0.0
 #endif
@@ -16,12 +16,12 @@ Methanol        2
 [ atoms ]
 ;   nr  type    resnr   residu  atom    cgnr    charge mass
 #ifdef _FF_GROMOS96
-1       CMET     1       MeOH    Me1     1        0.176 15.035   
-2       OMET     1       MeOH    O2      1       -0.574 15.999 
+1       CMet     1       MeOH    Me1     1        0.176 15.035   
+2       OMet     1       MeOH    O2      1       -0.574 15.999 
 3       H        1       MeOH    H3      1        0.398  1.008 
 #else
-1       CMET     1       MeOH    Me1     1        0.29  15.035
-2       OMET     1       MeOH    O2      1       -0.69  15.999
+1       CMet     1       MeOH    Me1     1        0.29  15.035
+2       OMet     1       MeOH    O2      1       -0.69  15.999
 3       H        1       MeOH    H3      1        0.40   1.008
 #endif
 
index 823db9041c643335d7f9a9148250de288c7c7beb..551bb3517459c8ba361695d782347caedf9359a2 100644 (file)
@@ -5,3 +5,4 @@ HISD   HISA
 HISE   HISB
 LYS    LYSH
 LYSN   LYS
+HEM    HEME
index 823db9041c643335d7f9a9148250de288c7c7beb..551bb3517459c8ba361695d782347caedf9359a2 100644 (file)
@@ -5,3 +5,4 @@ HISD   HISA
 HISE   HISB
 LYS    LYSH
 LYSN   LYS
+HEM    HEME
index 823db9041c643335d7f9a9148250de288c7c7beb..551bb3517459c8ba361695d782347caedf9359a2 100644 (file)
@@ -5,3 +5,4 @@ HISD   HISA
 HISE   HISB
 LYS    LYSH
 LYSN   LYS
+HEM    HEME
index 823db9041c643335d7f9a9148250de288c7c7beb..551bb3517459c8ba361695d782347caedf9359a2 100644 (file)
@@ -5,3 +5,4 @@ HISD   HISA
 HISE   HISB
 LYS    LYSH
 LYSN   LYS
+HEM    HEME
index fa17ed1c9d01dcef432363017da6cff43740f4c0..e14729550dc75b38ee8f1c52fd6fa1156ea4e39a 100644 (file)
@@ -1,4 +1,4 @@
-363
+364
 ��ߦ��ߨ���߬��������߻���ߦ��߷���ߋ�߻�߶�ߦ��������׷���������߶��
 �߳���ߋ��߳���߫���ߨ���߫���߻���׫������
 �����߽����߲�߷������ײ��������
 �����،ߑ�߈��߆��ߜ��ߍ���ߐ�ߞ�ߚ����������׸�����߸��������
 �ߓ���ߋ�߈����ߋ���߶ߙ���ߓ���߶ߛ�ߌ���������׼���߼�������
 ���߶ߗ���ߚ���������ߓ�����ߋ���ߚ���������ߚ����׻���߯������
+��،ߊ����ߜ���ߋ���ߐ���߆��ߞ��߶ߔ����״���߽����
index a0939261188ae5483b4017c814f8c322a3069197..48c3d7af658f24e22601e3def9fd2116c57b62ca 100644 (file)
@@ -84,7 +84,7 @@ CU2+          1
 
 [ atoms ]
 ; id   at type res nr  residu name     at name  cg nr  charge   mass
-1      CU2+    1       CU2+            CU       1      2        63.54600
+1      Cu2+    1       CU2+            CU       1      2        63.54600
 
 [ moleculetype ]
 ; molname      nrexcl
@@ -92,7 +92,7 @@ ZN2+          1
 
 [ atoms ]
 ; id   at type res nr  residu name     at name  cg nr  charge   mass
-1      ZN2+    1       ZN2+            ZN       1      2        65.37000
+1      Zn2+    1       ZN2+            ZN       1      2        65.37000
 
 [ moleculetype ]
 ; molname      nrexcl
index a8c687baa02d537c7fb29863310276fc07047d50..6b51da84d4f6af7397cb6e3220cc936a732fd0c4 100644 (file)
 /* Use the GROMACS software 1/sqrt(x) */
 #cmakedefine GMX_SOFTWARE_INVSQRT
 
+/* Use the PowerPC hardware 1/sqrt(x) */
+#cmakedefine GMX_POWERPC_INVSQRT
+
 /* Compile with dlopen */
 #cmakedefine GMX_DLOPEN
 
index b78966cbfac45c27f7634ecf462d5d6c751409d5..21bdd4c4236583e43d039fbd1ca368b7b935bac4 100644 (file)
@@ -62,6 +62,22 @@ if(GMX_X86_64_ASM)
   endif()
 endif(GMX_X86_64_ASM)
 
+if(GMX_FORTRAN)
+  if (GMX_DOUBLE)
+    file(GLOB FORTRAN_SOURCES nonbonded/nb_kernel_f77_double/*.[cf])
+  else(GMX_DOUBLE)
+    file(GLOB FORTRAN_SOURCES nonbonded/nb_kernel_f77_single/*.[cf])
+  endif(GMX_DOUBLE)
+endif(GMX_FORTRAN)
+
+if(GMX_POWER6)
+  file(GLOB FORTRAN_SOURCES nonbonded/nb_kernel_power6/*.[cF])
+endif(GMX_POWER6)
+
+if(GMX_BLUEGENE)
+  file(GLOB GMX_BLUEGENE_C_SRC nonbonded/nb_kernel_bluegene/*.c)
+endif(GMX_BLUEGENE)
+
 if(NOT GMX_EXTERNAL_BLAS)
   file(GLOB BLAS_SOURCES gmx_blas/*.c)
 endif(NOT GMX_EXTERNAL_BLAS)
@@ -118,13 +134,18 @@ else(GMX_ASM_USEASM-NASM)
 endif(GMX_ASM_USEASM-NASM)
 endif(NOT GMX_OPENMM)
 
-add_library(gmx ${GMXLIB_SOURCES} ${BLAS_SOURCES} ${LAPACK_SOURCES} ${GMX_SSEKERNEL_C_SRC} ${GMX_SSEKERNEL_ASM_SRC} ${THREAD_MPI_SRC})
+add_library(gmx ${GMXLIB_SOURCES} ${BLAS_SOURCES} ${LAPACK_SOURCES} ${GMX_SSEKERNEL_C_SRC} ${GMX_SSEKERNEL_ASM_SRC} ${FORTRAN_SOURCES} ${GMX_BLUEGENE_C_SRC} ${THREAD_MPI_SRC})
 target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES}  ${THREAD_LIB})
-add_dependencies(gmx gmx_version) 
+if(USE_VERSION_H)
+       add_dependencies(gmx gmx_version) 
+endif()
 set_target_properties(gmx PROPERTIES OUTPUT_NAME "gmx${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION} INSTALL_NAME_DIR "${LIB_INSTALL_DIR}")
 
-install(TARGETS gmx DESTINATION ${LIB_INSTALL_DIR})
+install(TARGETS gmx DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgmx.pc.cmakein ${CMAKE_CURRENT_BINARY_DIR}/libgmx.pc @ONLY)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgmx.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig RENAME "libgmx${GMX_LIBS_SUFFIX}.pc")
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgmx.pc
+        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
+        RENAME "libgmx${GMX_LIBS_SUFFIX}.pc"
+        COMPONENT development)
 
index 0beb0ad2590ae5f394111dbd7288ebdd9fb6e164..ce3323560c04aed040adcddce14c24924e5255c0 100644 (file)
@@ -45,7 +45,7 @@ libgmx@LIBSUFFIX@_la_DEPENDENCIES = nonbonded/libnonbonded.la         \
 
 #      
 #
-libgmx@LIBSUFFIX@_la_LDFLAGS = -no-undefined -version-info @SHARED_VERSION_INFO@ $(PTHREAD_LIBS)
+libgmx@LIBSUFFIX@_la_LDFLAGS = -no-undefined -version-info @SHARED_VERSION_INFO@ @DLOPEN_LIBS@ $(PTHREAD_LIBS)
 
 libgmx@LIBSUFFIX@_la_SOURCES = \
        3dview.c        atomprop.c      bondfree.c      \
index 0d0b74a5db4e2b86fc8840dbd46499c0bb418991..c31137433a262beef2f9b0c62ec2206ee72fed9b 100644 (file)
@@ -1457,23 +1457,32 @@ static void read_checkpoint(const char *fn,FILE **pfplog,
     int  natoms,ngtc,nnhpres,nhchainlength,fflags,flags_eks,flags_enh;
     int  d;
     int  ret;
-       gmx_file_position_t *outputfiles;
-       int  nfiles;
-       t_fileio *chksum_file;
-       FILE* fplog = *pfplog;
-       unsigned char digest[16];
+    gmx_file_position_t *outputfiles;
+    int  nfiles;
+    t_fileio *chksum_file;
+    FILE* fplog = *pfplog;
+    unsigned char digest[16];
 #if !((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
-       struct flock fl = { 0, SEEK_SET, 0,       F_WRLCK,     0 }; 
+    struct flock fl;  /* don't initialize here: the struct order is OS 
+                         dependent! */
 #endif
-       
+
     const char *int_warn=
-        "WARNING: The checkpoint file was generator with integrator %s,\n"
-        "         while the simulation uses integrator %s\n\n";
+              "WARNING: The checkpoint file was generated with integrator %s,\n"
+              "         while the simulation uses integrator %s\n\n";
     const char *sd_note=
         "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
         "      while the simulation uses %d SD or BD nodes,\n"
         "      continuation will be exact, except for the random state\n\n";
     
+#if !((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__) 
+    fl.l_type=F_WRLCK;
+    fl.l_whence=SEEK_SET;
+    fl.l_start=0;
+    fl.l_len=0;
+    fl.l_pid=0;
+#endif
+
     if (PARTDECOMP(cr))
     {
         gmx_fatal(FARGS,
@@ -1745,16 +1754,19 @@ static void read_checkpoint(const char *fn,FILE **pfplog,
             if (outputfiles[i].chksum_size != -1)
             {
                 if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
-                                     digest) != outputfiles[i].chksum_size)
+                                     digest) != outputfiles[i].chksum_size)  /*at the end of the call the file position is at the end of the file*/
                 {
                     gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents has been modified.",
                               outputfiles[i].chksum_size, 
                               outputfiles[i].filename);
                 }
             } 
-            else if (i==0)  /*log file need to be seeked even when not reading md5*/
+            if (i==0)  /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
             {
-                gmx_fio_seek(chksum_file,outputfiles[i].offset);
+                if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
+                {
+                       gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
+                }
             }
 #endif
             
index 353fba1781ef6a25a612bdfe53133f60bb2e8e80..e9c9790f8f05c1231d26bf32a5c59a1c6052547b 100644 (file)
@@ -145,7 +145,7 @@ static void ster_print(FILE *out,const char *s)
   int  slen;
   char buf[128];
   
-  sprintf(buf,":-)  %s  (-:",s);
+  snprintf(buf,128,":-)  %s  (-:",s);
   slen=strlen(buf);
   space(out,(80-slen)/2);
   fprintf(out,"%s\n",buf);
@@ -258,7 +258,7 @@ void CopyRight(FILE *out,const char *szProgram)
 
   fprintf(out,"\n");
 
-  sprintf(buf,"%s",Program());
+  snprintf(buf,256,"%s",Program());
 #ifdef GMX_DOUBLE
   strcat(buf," (double precision)");
 #endif
@@ -358,6 +358,11 @@ void please_cite(FILE *fp,const char *key)
       "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation",
       "J. Chem. Theory Comput.",
       4, 2008, "435-447" },
+    { "Hub2010",
+      "J. S. Hub, B. L. de Groot and D. van der Spoel",
+      "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates",
+      "J. Chem. Theory Comput.",
+      0, 2010, "0000-0000"}, 
     { "In-Chul99a",
       "Y. In-Chul and M. L. Berkowitz",
       "Ewald summation for systems with slab geometry",
index fef5f69ee58682a8ea24242c672cbf7b509a5e89..4999c6a63209316737b68947013f1f755ebb2181 100644 (file)
@@ -207,7 +207,7 @@ static void enxsubblock_alloc(t_enxsubblock *sb)
             }
             break;
         default:
-            gmx_incons("Unknown block type");
+            gmx_incons("Unknown block type: this file is corrupted or from the future");
     }
 }
 
@@ -314,6 +314,19 @@ void add_subblocks_enxblock(t_enxblock *eb, int n)
     }
 }
 
+static void enx_warning(const char *msg)
+{
+    if (getenv("GMX_ENX_NO_FATAL") != NULL)
+    {
+        gmx_warning(msg);
+    }
+    else
+    {
+        gmx_fatal(FARGS,"%s\n%s",
+                  msg,
+                  "If you want to use the correct frames before the corrupted frame and avoid this fatal error set the env.var. GMX_ENX_NO_FATAL");
+    }
+}
 
 static void edr_strings(XDR *xdr,gmx_bool bRead,int file_version,
                         int n,gmx_enxnm_t **nms)
@@ -429,7 +442,7 @@ static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
                        int nre_test,gmx_bool *bWrongPrecision,gmx_bool *bOK)
 {
     int  magic=-7777777;
-    real r;
+    real first_real_to_check;
     int  b,i,zero=0,dum=0;
     gmx_bool bRead = gmx_fio_getread(ef->fio);
     int  tempfix_nr=0;
@@ -457,16 +470,16 @@ static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
      * (which is the case for for instance the block sizes for variable
      * number of blocks, where this number is read before).
      */
-    r = -2e10;
-    if (!gmx_fio_do_real(ef->fio, r))
+    first_real_to_check = -2e10;
+    if (!gmx_fio_do_real(ef->fio, first_real_to_check))
     {
         return FALSE;
     }
-    if (r > -1e10)
+    if (first_real_to_check > -1e10)
     {
         /* Assume we are reading an old format */
         *file_version = 1;
-        fr->t = r;
+        fr->t = first_real_to_check;
         if (!gmx_fio_do_int(ef->fio, dum))   *bOK = FALSE;
         fr->step = dum;
     }
@@ -475,7 +488,9 @@ static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
         if (!gmx_fio_do_int(ef->fio, magic))       *bOK = FALSE;
         if (magic != -7777777)
         {
-            gmx_fatal(FARGS,"Energy header magic number mismatch, this is not a GROMACS edr file");
+            enx_warning("Energy header magic number mismatch, this is not a GROMACS edr file");
+            *bOK=FALSE;
+            return FALSE;
         }
         *file_version = enx_version;
         if (!gmx_fio_do_int(ef->fio, *file_version)) *bOK = FALSE;
@@ -522,11 +537,16 @@ static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
     }
 
     if (!gmx_fio_do_int(ef->fio, fr->nblock))  *bOK = FALSE;
+    if (fr->nblock < 0) *bOK=FALSE;
 
     if (ndisre!=0)
     {
         if (*file_version >= 4)
-            gmx_incons("Distance restraint blocks in old style in new style file");
+        {
+            enx_warning("Distance restraint blocks in old style in new style file");
+            *bOK=FALSE;
+            return FALSE;
+        }
         fr->nblock+=1;
     }
 
@@ -540,8 +560,20 @@ static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
         return *bOK;
     }
 
+    /* we now know what these should be, or we've already bailed out because
+       of wrong precision */
+    if ( *file_version==1 && (fr->t < 0 || fr->t > 1e20 || fr->step < 0 ) )
+    {
+        enx_warning("edr file with negative step number or unreasonable time (and without version number).");
+        *bOK=FALSE;
+        return FALSE;
+    }
+
+
     if (*bOK && bRead)
+    {
         add_blocks_enxframe(fr, fr->nblock);
+    }
 
     startb=0;
     if (ndisre>0)
@@ -572,7 +604,9 @@ static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
             else
             {
                 if (fr->block[b].nsub != 1)
+                {
                     gmx_incons("Writing an old version .edr file with too many subblocks");
+                }
                 if (fr->block[b].sub[0].type != dtreal)
                 {
                     gmx_incons("Writing an old version .edr file the wrong subblock type");
@@ -690,7 +724,7 @@ ener_file_t open_enx(const char *fn,const char *mode)
     gmx_enxnm_t *nms=NULL;
     int        file_version=-1;
     t_enxframe *fr;
-    gmx_bool       bWrongPrecision,bDum=TRUE;
+    gmx_bool       bWrongPrecision,bOK=TRUE;
     struct ener_file *ef;
 
     snew(ef,1);
@@ -701,8 +735,8 @@ ener_file_t open_enx(const char *fn,const char *mode)
         gmx_fio_setprecision(ef->fio,FALSE);
         do_enxnms(ef,&nre,&nms);
         snew(fr,1);
-        do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bDum);
-        if(!bDum)
+        do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
+        if(!bOK)
         {
             gmx_file("Cannot read energy file header. Corrupt file?");
         }
@@ -721,8 +755,8 @@ ener_file_t open_enx(const char *fn,const char *mode)
             gmx_fio_checktype(ef->fio);
             gmx_fio_setprecision(ef->fio,TRUE);
             do_enxnms(ef,&nre,&nms);
-            do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bDum);
-            if(!bDum)
+            do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
+            if(!bOK)
             {
                 gmx_file("Cannot write energy file header; maybe you are out of quota?");
             }
@@ -973,7 +1007,7 @@ gmx_bool do_enx(ener_file_t ef,t_enxframe *fr)
                     bOK1=gmx_fio_ndo_string(ef->fio, sub->sval, sub->nr);
                     break;
                 default:
-                    gmx_incons("Reading unknown block type");
+                    gmx_incons("Reading unknown block data type: this file is corrupted or from the future");
             }
             bOK = bOK && bOK1;
         }
diff --git a/src/gmxlib/ftocstr.c b/src/gmxlib/ftocstr.c
deleted file mode 100644 (file)
index b874ee0..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * 
- *                This source code is part of
- * 
- *                 G   R   O   M   A   C   S
- * 
- *          GROningen MAchine for Chemical Simulations
- * 
- *                        VERSION 3.2.0
- * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team,
- * check out http://www.gromacs.org for more information.
-
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * 
- * If you want to redistribute modifications, please consider that
- * scientific software is very special. Version control is crucial -
- * bugs must be traceable. We will be happy to consider code for
- * inclusion in the official distribution, but derived work must not
- * be called official GROMACS. Details are found in the README & COPYING
- * files - if they are missing, get the official version at www.gromacs.org.
- * 
- * To help us fund GROMACS development, we humbly ask that you cite
- * the papers on the package - you can find them in the top README file.
- * 
- * For more info, check our website at http://www.gromacs.org
- * 
- * And Hey:
- * GROningen Mixture of Alchemy and Childrens' Stories
- */
-/* This file is completely threadsafe - keep it that way! */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-int ftocstr(char *ds, int dl, char *ss, int sl)
-    /* dst, src ptrs */
-    /* dst max len */
-    /* src len */
-{
-    char *p;
-
-    p = ss + sl;
-    while ( --p >= ss && *p == ' ' );
-    sl = p - ss + 1;
-    dl--;
-    ds[0] = 0;
-    if (sl > dl)
-      return 1;
-    while (sl--)
-      (*ds++ = *ss++);
-    *ds = '\0';
-    return 0;
-}
-
-
-int ctofstr(char *ds, int dl, char *ss)
-     /* dest space */
-     /* max dest length */
-     /* src string (0-term) */
-{
-    while (dl && *ss) {
-       *ds++ = *ss++;
-       dl--;
-    }
-    while (dl--)
-       *ds++ = ' ';
-    return 0;
-}
index 587a554a923564c8462822f3c342c7f1c83ace5d..e83788d870ac83321ee78ea4fc77a642e185470f 100644 (file)
@@ -184,12 +184,7 @@ const char *gmx_fio_dbgstr(t_fileio *fio, const char *desc, char *buf)
     }
     else
     {
-#if (defined( _WIN32 ) || defined( _WIN64 ) )
-        /* windows doesn't do standard C */
-#define snprintf sprintf_s
-#endif
-        snprintf(buf, GMX_FIO_BUFLEN, "  ; %s %s", 
-                 fio->comment ? fio->comment : "", desc);
+        snprintf(buf, GMX_FIO_BUFLEN, "  ; %s %s", fio->comment ? fio->comment : "", desc);
     }
     return buf;
 }
@@ -1087,7 +1082,7 @@ int gmx_fio_seek(t_fileio* fio, gmx_off_t fpos)
     gmx_fio_lock(fio);
     if (fio->fp)
     {
-        gmx_fseek(fio->fp, fpos, SEEK_SET);
+        rc = gmx_fseek(fio->fp, fpos, SEEK_SET);
     }
     else
     {
index 4156d70405edbf6f9d30b6dedb597edab3cc0df8..0cb96f65d215944c381281358b8eeaea56edd5c1 100644 (file)
@@ -662,6 +662,42 @@ gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
     return rc;
 }
 
+/* Return the size of the arrays */
+int
+gmx_residuetype_get_size(gmx_residuetype_t rt)
+{
+    return rt->n;
+}
+
+/* Search for a residuetype with name resnm within the
+ * gmx_residuetype database. Return the index if found,
+ * otherwise -1.
+ */
+int
+gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
+{
+    int i,rc;
+
+    rc=-1;
+    for(i=0;i<rt->n && rc;i++)
+    {
+        rc=gmx_strcasecmp(rt->resname[i],resnm);
+    }
+
+    return (0 == rc) ? i-1 : -1;
+}
+
+/* Return the name of the residuetype with the given index, or
+ * NULL if not found. */
+const char *
+gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
+{
+  if(index >= 0 && index < rt->n) {
+    return rt->resname[index];
+  } else {
+    return NULL;
+  }
+}
 
 
 
index 6365da6a937f8a8137946a6ec2d8efbce63dd16e..df1ad2ff3abc8f5c202890b8b68b572f71a3197b 100644 (file)
@@ -173,7 +173,7 @@ const char *egb_names[egbNR+1] = {
 };
 
 const char *esa_names[esaNR+1] = {
-  "Ace-approx", "None", "Still", NULL
+  "Ace-approximation", "None", "Still", NULL
 };
 
 const char *ewt_names[ewtNR+1] = {
index 9d37b2465b7ad42b360777365b2bef35b3b5d555..68b4fb39ddc4dc016aaa96efff2d5c9b1c5a5489 100644 (file)
 
 */
 
-/* The optimized version of converts2ints is disabled
+/* The optimized version of converts2ints is disabled on BG/P
  * because of issues on BG/P reported in bugzilla 429
  */
-/* #if (defined __IBMC__ || defined __IBMCPP__) */
-#if (0)
+#if defined __blrts__
 
 #define convert2ints(x,xi,conv,i1,i2)                      \
     xi      = __fpctiwz(x);                                \
index ec24e41fc0c8c41bd97230570f0285649b1b5cca..1aa96e4fbbac954325cbf8c522c6237033c82b55 100644 (file)
@@ -61,11 +61,11 @@ void nb_kernel400_ia32_sse2(int *           p_nri,
                               int *           inneriter,
                               double *         work)
 {
-    int           nri,nthreads;
-    int           n,ii,is3,ii3,k,nj0,nj1,ggid;
-    double        shX,shY,shZ;
-    int           jnrA,jnrB;
-    int           j3A,j3B;
+  int           nri,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
        gmx_gbdata_t *gbdata;
        double *      gpol;
     
@@ -93,35 +93,35 @@ void nb_kernel400_ia32_sse2(int *           p_nri,
     
        nri        = *p_nri;
     
-    gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
-    gbtabscale = _mm_load1_pd(p_gbtabscale);  
-    facel      = _mm_load1_pd(p_facel);
-    
-    nj1         = 0;
-    jnrA = jnrB = 0;
-    j3A = j3B   = 0;
-    jx          = _mm_setzero_pd();
-    jy          = _mm_setzero_pd();
-    jz          = _mm_setzero_pd();
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
                ix               = _mm_set1_pd(shX+pos[ii3+0]);
                iy               = _mm_set1_pd(shY+pos[ii3+1]);
                iz               = _mm_set1_pd(shZ+pos[ii3+2]);
-        
+    
                iq               = _mm_load1_pd(charge+ii);
                iq               = _mm_mul_pd(iq,facel);
-        
+    
                isai             = _mm_load1_pd(invsqrta+ii);
                        
                vctot            = _mm_setzero_pd();
@@ -138,18 +138,18 @@ void nb_kernel400_ia32_sse2(int *           p_nri,
                        
                        j3A     = jnrA * 3;
                        j3B     = jnrB * 3;
-            
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+      
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
             
                        dx           = _mm_sub_pd(ix,jx);
                        dy           = _mm_sub_pd(iy,jy);
                        dz           = _mm_sub_pd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_pd(rinv,rinv);
-            
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
@@ -160,7 +160,7 @@ void nb_kernel400_ia32_sse2(int *           p_nri,
                        qq           = _mm_mul_pd(iq,jq);            
                        vcoul        = _mm_mul_pd(qq,rinv);
                        fscal        = _mm_mul_pd(vcoul,rinv);                                 
-            vctot        = _mm_add_pd(vctot,vcoul);
+      vctot        = _mm_add_pd(vctot,vcoul);
             
             /* Polarization interaction */
                        qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
@@ -174,48 +174,48 @@ void nb_kernel400_ia32_sse2(int *           p_nri,
                        eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
-            H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
-            F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
-            Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
-            F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
-            vgb     = _mm_mul_pd(Y, qq);           
-            fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
-
-            vgbtot  = _mm_add_pd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_pd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
-                                   
-            fscal        = _mm_mul_pd( _mm_sub_pd( fscal, fijGB),rinv );
-            
-            /***********************************/
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+      
+      fscal        = _mm_mul_pd( _mm_sub_pd( fscal, fijGB),rinv );
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_pd(fscal,dx);
-            ty           = _mm_mul_pd(fscal,dy);
-            tz           = _mm_mul_pd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_pd(fix,tx);
-            fiy          = _mm_add_pd(fiy,ty);
-            fiz          = _mm_add_pd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
@@ -224,96 +224,109 @@ void nb_kernel400_ia32_sse2(int *           p_nri,
                {
                        jnrA    = jjnr[k];
                        j3A     = jnrA * 3;
-            
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
-            
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
                        dx           = _mm_sub_sd(ix,jx);
                        dy           = _mm_sub_sd(iy,jy);
                        dz           = _mm_sub_sd(iz,jz);
-            
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_sd(rinv,rinv);
-            
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
                        GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
-                               
+      
                        isaprod      = _mm_mul_sd(isai,isaj);
-                       qq           = _mm_mul_sd(iq,jq);            
+                       qq           = _mm_mul_sd(jq,iq);            
                        vcoul        = _mm_mul_sd(qq,rinv);
                        fscal        = _mm_mul_sd(vcoul,rinv);                                 
-            vctot        = _mm_add_sd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
                        gbscale      = _mm_mul_sd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_sd(rsq,rinv);
                        rtab         = _mm_mul_sd(r,gbscale);
-
+      
                        n0                   = _mm_cvttpd_epi32(rtab);
                        eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
-            H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
-            F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
-            Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
-            F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
-            vgb     = _mm_mul_sd(Y, qq);           
-            fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
-
-            dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_sd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_sd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
                        
-            fscal        = _mm_mul_sd( _mm_sub_sd( fscal, fijGB),rinv );
-            
-            /***********************************/
+      fscal        = _mm_mul_sd( _mm_sub_sd( fscal, fijGB),rinv );
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_sd(fscal,dx);
-            ty           = _mm_mul_sd(fscal,dy);
-            tz           = _mm_mul_sd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_sd(fix,tx);
-            fiy          = _mm_add_sd(fiy,ty);
-            fiz          = _mm_add_sd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
-        gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
-        
-        ggid     = gid[n];         
-        
-        gmx_mm_update_1pot_pd(vctot,vc+ggid);
-        gmx_mm_update_2pot_pd(vgbtot,gpol+ggid,dvdasum,dvda+ii);
-       }
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
+    
+    ggid     = gid[n];         
     
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+  }
+  
        *outeriter   = nri;            
-    *inneriter   = nj1;        
+  *inneriter   = nj1;  
 }
index 759916a8e620d9905766fe3913b5d11baf484a6d..17ea0c5b3ececa81991b1208a47643dc5a0b7f50 100644 (file)
@@ -62,12 +62,12 @@ void nb_kernel410_ia32_sse2(int *           p_nri,
                                                        int *           inneriter,
                                                        double *         work)
 {
-    int           nri,ntype,nthreads;
-    int           n,ii,is3,ii3,k,nj0,nj1,ggid;
-    double        shX,shY,shZ;
+  int           nri,ntype,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
        int                       offset,nti;
-    int           jnrA,jnrB;
-    int           j3A,j3B;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
        int           tjA,tjB;
        gmx_gbdata_t *gbdata;
        double *      gpol;
@@ -100,37 +100,37 @@ void nb_kernel410_ia32_sse2(int *           p_nri,
        nri        = *p_nri;
        ntype      = *p_ntype;
     
-    gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
-    gbtabscale = _mm_load1_pd(p_gbtabscale);  
-    facel      = _mm_load1_pd(p_facel);
-
-    nj1         = 0;
-    jnrA = jnrB = 0;
-    j3A = j3B   = 0;
-    jx          = _mm_setzero_pd();
-    jy          = _mm_setzero_pd();
-    jz          = _mm_setzero_pd();
-    c6          = _mm_setzero_pd();
-    c12         = _mm_setzero_pd();
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
+  c6          = _mm_setzero_pd();
+  c12         = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
                ix               = _mm_set1_pd(shX+pos[ii3+0]);
                iy               = _mm_set1_pd(shY+pos[ii3+1]);
                iz               = _mm_set1_pd(shZ+pos[ii3+2]);
-        
+    
                iq               = _mm_load1_pd(charge+ii);
                iq               = _mm_mul_pd(iq,facel);
-        
+    
                isai             = _mm_load1_pd(invsqrta+ii);
         
                nti              = 2*ntype*type[ii];
@@ -151,39 +151,39 @@ void nb_kernel410_ia32_sse2(int *           p_nri,
                        j3A     = jnrA * 3;
                        j3B     = jnrB * 3;
 
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
 
                        dx           = _mm_sub_pd(ix,jx);
                        dy           = _mm_sub_pd(iy,jy);
                        dz           = _mm_sub_pd(iz,jz);
 
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_pd(rinv,rinv);
-            
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
                        GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
             
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
                        tjB          = nti+2*type[jnrB];
-            
-            GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
+      
+      GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
                        
                        isaprod      = _mm_mul_pd(isai,isaj);
                        qq           = _mm_mul_pd(iq,jq);            
                        vcoul        = _mm_mul_pd(qq,rinv);
                        fscal        = _mm_mul_pd(vcoul,rinv);                                 
-            vctot        = _mm_add_pd(vctot,vcoul);
+      vctot        = _mm_add_pd(vctot,vcoul);
             
-            /* Polarization interaction */
+      /* Polarization interaction */
                        qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
                        gbscale      = _mm_mul_pd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_pd(rsq,rinv);
                        rtab         = _mm_mul_pd(r,gbscale);
@@ -192,30 +192,30 @@ void nb_kernel410_ia32_sse2(int *           p_nri,
                        eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
-            H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
-            F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
-            Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
-            F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
-            vgb     = _mm_mul_pd(Y, qq);           
-            fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_pd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_pd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
                        
                        rinvsix      = _mm_mul_pd(rinvsq,rinvsq);
                        rinvsix      = _mm_mul_pd(rinvsix,rinvsq);
@@ -224,26 +224,26 @@ void nb_kernel410_ia32_sse2(int *           p_nri,
                        vvdw12       = _mm_mul_pd(c12, _mm_mul_pd(rinvsix,rinvsix));
                        vvdwtot      = _mm_add_pd(vvdwtot,_mm_sub_pd(vvdw12,vvdw6));
             
-            fscal        = _mm_sub_pd(_mm_mul_pd(rinvsq, 
-                                                 _mm_sub_pd(_mm_mul_pd(twelve,vvdw12),
-                                                            _mm_mul_pd(six,vvdw6))),
-                                      _mm_mul_pd( _mm_sub_pd( fijGB,fscal),rinv ));
-                        
-            /***********************************/
+      fscal        = _mm_sub_pd(_mm_mul_pd(rinvsq, 
+                                           _mm_sub_pd(_mm_mul_pd(twelve,vvdw12),
+                                                      _mm_mul_pd(six,vvdw6))),
+                                _mm_mul_pd( _mm_sub_pd( fijGB,fscal),rinv ));
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_pd(fscal,dx);
-            ty           = _mm_mul_pd(fscal,dy);
-            tz           = _mm_mul_pd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_pd(fix,tx);
-            fiy          = _mm_add_pd(fiy,ty);
-            fiz          = _mm_add_pd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
@@ -253,39 +253,53 @@ void nb_kernel410_ia32_sse2(int *           p_nri,
                        jnrA    = jjnr[k];
                        
                        j3A     = jnrA * 3;
-            
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
             
                        dx           = _mm_sub_sd(ix,jx);
                        dy           = _mm_sub_sd(iy,jy);
                        dz           = _mm_sub_sd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_sd(rinv,rinv);
-            
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      vvdw6        = _mm_setzero_pd();
+      vvdw12       = _mm_setzero_pd();
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
                        GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
-            
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
-            
-            GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
+      
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+      
+      GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
                        
                        isaprod      = _mm_mul_sd(isai,isaj);
-                       qq           = _mm_mul_sd(iq,jq);            
+                       qq           = _mm_mul_sd(jq,iq);            
                        vcoul        = _mm_mul_sd(qq,rinv);
                        fscal        = _mm_mul_sd(vcoul,rinv);                                 
-            vctot        = _mm_add_sd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
                        gbscale      = _mm_mul_sd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_sd(rsq,rinv);
                        rtab         = _mm_mul_sd(r,gbscale);
@@ -294,70 +308,73 @@ void nb_kernel410_ia32_sse2(int *           p_nri,
                        eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
-            H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
-            F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
-            Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
-            F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
-            vgb     = _mm_mul_sd(Y, qq);           
-            fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_sd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_sd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
                        
                        rinvsix      = _mm_mul_sd(rinvsq,rinvsq);
                        rinvsix      = _mm_mul_sd(rinvsix,rinvsq);
                        
                        vvdw6        = _mm_mul_sd(c6,rinvsix);
                        vvdw12       = _mm_mul_sd(c12, _mm_mul_sd(rinvsix,rinvsix));
-                       vvdwtot      = _mm_add_sd(vvdwtot,_mm_sub_sd(vvdw12,vvdw6));
-        
-            fscal        = _mm_sub_sd(_mm_mul_sd(rinvsq, 
-                                                 _mm_sub_sd(_mm_mul_sd(twelve,vvdw12),
-                                                            _mm_mul_sd(six,vvdw6))),
-                                      _mm_mul_sd( _mm_sub_sd( fijGB,fscal),rinv ));
-            
-            /***********************************/
+                       vvdwtot      = _mm_add_pd(vvdwtot,_mm_sub_sd(vvdw12,vvdw6)); /* (**) */
+      
+      fscal        = _mm_sub_sd(_mm_mul_sd(rinvsq, 
+                                           _mm_sub_sd(_mm_mul_sd(twelve,vvdw12),
+                                                      _mm_mul_sd(six,vvdw6))),
+                                _mm_mul_sd( _mm_sub_sd( fijGB,fscal),rinv ));
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_sd(fscal,dx);
-            ty           = _mm_mul_sd(fscal,dy);
-            tz           = _mm_mul_sd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_sd(fix,tx);
-            fiy          = _mm_add_sd(fiy,ty);
-            fiz          = _mm_add_sd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
-        gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
-
-        ggid     = gid[n];         
-        
-        gmx_mm_update_2pot_pd(vctot,vc+ggid,vvdwtot,vvdw+ggid);
-        gmx_mm_update_2pot_pd(vgbtot,gpol+ggid,dvdasum,dvda+ii);
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
+    
+    ggid     = gid[n];         
+    
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+    gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid);
+    
        }
-
+  
        *outeriter   = nri;            
-    *inneriter   = nj1;        
+  *inneriter   = nj1;  
 }
index 94f1957748b208cf39509fdff7acd31013893426..d0ed80c135b7305e8a1ec88301f677a5d4d0e0a8 100644 (file)
@@ -60,12 +60,12 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                               int *           inneriter,
                               double *         work)
 {
-    int           nri,ntype,nthreads;
-    int           n,ii,is3,ii3,k,nj0,nj1,ggid;
-    double        shX,shY,shZ;
+  int           nri,ntype,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
        int                       offset,nti;
-    int           jnrA,jnrB;
-    int           j3A,j3B;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
        int           tjA,tjB;
        gmx_gbdata_t *gbdata;
        double *      gpol;
@@ -80,12 +80,12 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
        __m128d  vcoul,fscal,gbscale,c6,c12;
        __m128d  rinvsq,r,rtab;
        __m128d  eps,Y,F,G,H;
-    __m128d  VV,FF,Fp;
+  __m128d  VV,FF,Fp;
        __m128d  vgb,fijGB,dvdatmp;
        __m128d  rinvsix,vvdw6,vvdw12,vvdwtmp;
        __m128d  facel,gbtabscale,dvdaj;
-    __m128d  fijD,fijR;
-    __m128d  xmm1,tabscale,eps2;
+  __m128d  fijD,fijR;
+  __m128d  xmm1,tabscale,eps2;
        __m128i  n0, nnn;
     
        
@@ -100,40 +100,40 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
        nri        = *p_nri;
        ntype      = *p_ntype;
     
-    gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
-    gbtabscale = _mm_load1_pd(p_gbtabscale);  
-    facel      = _mm_load1_pd(p_facel);
-    tabscale   = _mm_load1_pd(p_tabscale);
-    
-    nj1         = 0;
-    jnrA = jnrB = 0;
-    j3A = j3B   = 0;
-    jx          = _mm_setzero_pd();
-    jy          = _mm_setzero_pd();
-    jz          = _mm_setzero_pd();
-    c6          = _mm_setzero_pd();
-    c12         = _mm_setzero_pd();
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  tabscale   = _mm_load1_pd(p_tabscale);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
+  c6          = _mm_setzero_pd();
+  c12         = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
                ix               = _mm_set1_pd(shX+pos[ii3+0]);
                iy               = _mm_set1_pd(shY+pos[ii3+1]);
                iz               = _mm_set1_pd(shZ+pos[ii3+2]);
-        
+    
                iq               = _mm_load1_pd(charge+ii);
                iq               = _mm_mul_pd(iq,facel);
-        
+    
                isai             = _mm_load1_pd(invsqrta+ii);
-        
+    
                nti              = 2*ntype*type[ii];
                
                vctot            = _mm_setzero_pd();
@@ -152,39 +152,39 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        j3A     = jnrA * 3;
                        j3B     = jnrB * 3;
             
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
             
                        dx           = _mm_sub_pd(ix,jx);
                        dy           = _mm_sub_pd(iy,jy);
                        dz           = _mm_sub_pd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_pd(rinv,rinv);
-            
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
                        GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
             
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
                        tjB          = nti+2*type[jnrB];
-            
-            GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
+      
+      GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
                        
                        isaprod      = _mm_mul_pd(isai,isaj);
                        qq           = _mm_mul_pd(iq,jq);            
                        vcoul        = _mm_mul_pd(qq,rinv);
                        fscal        = _mm_mul_pd(vcoul,rinv);                                 
-            vctot        = _mm_add_pd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      vctot        = _mm_add_pd(vctot,vcoul);
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
                        gbscale      = _mm_mul_pd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_pd(rsq,rinv);
                        rtab         = _mm_mul_pd(r,gbscale);
@@ -193,47 +193,47 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
-            H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
-            F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
-            Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
-            F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
-            vgb     = _mm_mul_pd(Y, qq);           
-            fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_pd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_pd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
                        
-            /* Calculate VDW table index */
+      /* Calculate VDW table index */
                        rtab    = _mm_mul_pd(r,tabscale);
                        n0      = _mm_cvttpd_epi32(rtab);
                        eps     = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        eps2    = _mm_mul_pd(eps,eps);
                        nnn     = _mm_slli_epi32(n0,3);
                        
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
                        H       = _mm_mul_pd(H,eps2);
                        Fp      = _mm_add_pd(F,G);
                        Fp      = _mm_add_pd(Fp,H);
@@ -245,16 +245,16 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        
                        vvdw6   = _mm_mul_pd(c6,VV);
                        fijD    = _mm_mul_pd(c6,FF);
-            
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
-            F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+4);
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
-            H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+6);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
+      
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
+      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+4);
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
+      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+6);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
                        H       = _mm_mul_pd(H,eps2);
                        Fp      = _mm_add_pd(F,G);
                        Fp      = _mm_add_pd(Fp,H);
@@ -269,29 +269,29 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        
                        vvdwtmp = _mm_add_pd(vvdw12,vvdw6);
                        vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp);
-            
+      
                        xmm1    = _mm_add_pd(fijD,fijR);
                        xmm1    = _mm_mul_pd(xmm1,tabscale);
                        xmm1    = _mm_add_pd(xmm1,fijGB);
                        xmm1    = _mm_sub_pd(xmm1,fscal);
                        fscal   = _mm_mul_pd(xmm1,neg);
                        fscal   = _mm_mul_pd(fscal,rinv);
-            
-            /***********************************/
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_pd(fscal,dx);
-            ty           = _mm_mul_pd(fscal,dy);
-            tz           = _mm_mul_pd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_pd(fix,tx);
-            fiy          = _mm_add_pd(fiy,ty);
-            fiz          = _mm_add_pd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
@@ -300,39 +300,53 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                {
                        jnrA    = jjnr[k];
                        j3A     = jnrA * 3;
-            
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
-            
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
                        dx           = _mm_sub_sd(ix,jx);
                        dy           = _mm_sub_sd(iy,jy);
                        dz           = _mm_sub_sd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_sd(rinv,rinv);
-            
-                       /***********************************/
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      vvdw6        = _mm_setzero_pd();
+      vvdw12       = _mm_setzero_pd();
+
+      /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
                        GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
             
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
-            
-            GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+      
+      GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
                        
                        isaprod      = _mm_mul_sd(isai,isaj);
-                       qq           = _mm_mul_sd(iq,jq);            
+                       qq           = _mm_mul_sd(jq,iq);            
                        vcoul        = _mm_mul_sd(qq,rinv);
                        fscal        = _mm_mul_sd(vcoul,rinv);                                 
-            vctot        = _mm_add_sd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
                        gbscale      = _mm_mul_sd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_sd(rsq,rinv);
                        rtab         = _mm_mul_sd(r,gbscale);
@@ -341,47 +355,47 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
-            H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
-            F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
-            Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
-            F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
-            vgb     = _mm_mul_sd(Y, qq);           
-            fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_sd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_sd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
                        
-            /* Calculate VDW table index */
+      /* Calculate VDW table index */
                        rtab    = _mm_mul_sd(r,tabscale);
                        n0      = _mm_cvttpd_epi32(rtab);
                        eps     = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        eps2    = _mm_mul_sd(eps,eps);
                        nnn     = _mm_slli_epi32(n0,3);
                        
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
                        H       = _mm_mul_sd(H,eps2);
                        Fp      = _mm_add_sd(F,G);
                        Fp      = _mm_add_sd(Fp,H);
@@ -393,16 +407,16 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        
                        vvdw6   = _mm_mul_sd(c6,VV);
                        fijD    = _mm_mul_sd(c6,FF);
-            
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
+      
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
                        H       = _mm_mul_sd(H,eps2);
                        Fp      = _mm_add_sd(F,G);
                        Fp      = _mm_add_sd(Fp,H);
@@ -416,7 +430,7 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        fijR    = _mm_mul_sd(c12,FF);
                        
                        vvdwtmp = _mm_add_sd(vvdw12,vvdw6);
-                       vvdwtot = _mm_add_sd(vvdwtot,vvdwtmp);
+                       vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp); /* (**) */
             
                        xmm1    = _mm_add_sd(fijD,fijR);
                        xmm1    = _mm_mul_sd(xmm1,tabscale);
@@ -425,34 +439,37 @@ void nb_kernel430_ia32_sse2(int *           p_nri,
                        fscal   = _mm_mul_sd(xmm1,neg);
                        fscal   = _mm_mul_sd(fscal,rinv);
 
-            /***********************************/
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_sd(fscal,dx);
-            ty           = _mm_mul_sd(fscal,dy);
-            tz           = _mm_mul_sd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_sd(fix,tx);
-            fiy          = _mm_add_sd(fiy,ty);
-            fiz          = _mm_add_sd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
-        gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
-        
-        ggid     = gid[n];         
-        
-        gmx_mm_update_2pot_pd(vctot,vc+ggid,vvdwtot,vvdw+ggid);
-        gmx_mm_update_2pot_pd(vgbtot,gpol+ggid,dvdasum,dvda+ii);
-       }
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
     
+    ggid     = gid[n];         
+    
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+    gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid);
+    
+       }
+  
        *outeriter   = nri;            
-    *inneriter   = nj1;        
+  *inneriter   = nj1;  
 }
 
index 88ae4d32362d8b641b531f80e1fb0c5426ca1fa9..a84a67946b5e96c23a27f2226d0692ed581da88f 100644 (file)
@@ -715,7 +715,7 @@ nb_kernel_allvsallgb_sse2_double(t_forcerec *           fr,
         pmask1           = prologue_mask[i+1];
         emask0           = epilogue_mask[i];
         emask1           = epilogue_mask[i+1];
-        imask_SSE0        = _mm_load1_pd((double *)(imask+i));
+        imask_SSE0        = _mm_load1_pd((double *)(imask+2*i));
         imask_SSE1        = _mm_load1_pd((double *)(imask+2*i+2));
         
          for(j=nj0; j<nj1; j+=UNROLLJ)
index e6c64c70731d6934fc0d35b28f56fa0ebaec03c8..760860bde5ca75502dbe02466c2b1f082d8e27af 100644 (file)
@@ -18,7 +18,6 @@
 #include<math.h>
 #include<vec.h>
 
-
 #include <xmmintrin.h>
 #include <emmintrin.h>
 
 #include "../nb_kerneltype.h"
 
 
-
 void nb_kernel400_sse2_double(int *           p_nri,
-                                                       int *           iinr,
-                                                       int *           jindex,
-                                                       int *           jjnr,
-                                                       int *           shift,
-                                                       double *         shiftvec,
-                                                       double *         fshift,
-                                                       int *           gid,
-                                                       double *         pos,
-                                                       double *         faction,
-                                                       double *         charge,
-                                                       double *         p_facel,
-                                                       double *         p_krf,
-                                                       double *         p_crf,
-                                                       double *         Vc,
-                                                       int *           type,
-                                                       int *           p_ntype,
-                                                       double *         vdwparam,
-                                                       double *         Vvdw,
-                                                       double *         p_tabscale,
-                                                       double *         VFtab,
-                                                       double *         invsqrta,
-                                                       double *         dvda,
-                                                       double *         p_gbtabscale,
-                                                       double *         GBtab,
-                                                       int *           p_nthreads,
-                                                       int *           count,
-                                                       void *          mtx,
-                                                       int *           outeriter,
-                                                       int *           inneriter,
-                                                       double *         work)
+                              int *           iinr,
+                              int *           jindex,
+                              int *           jjnr,
+                              int *           shift,
+                              double *         shiftvec,
+                              double *         fshift,
+                              int *           gid,
+                              double *         pos,
+                              double *         faction,
+                              double *         charge,
+                              double *         p_facel,
+                              double *         p_krf,
+                              double *         p_crf,
+                              double *         vc,
+                              int *           type,
+                              int *           p_ntype,
+                              double *         vdwparam,
+                              double *         vvdw,
+                              double *         p_tabscale,
+                              double *         VFtab,
+                              double *         invsqrta,
+                              double *         dvda,
+                              double *         p_gbtabscale,
+                              double *         GBtab,
+                              int *           p_nthreads,
+                              int *           count,
+                              void *          mtx,
+                              int *           outeriter,
+                              int *           inneriter,
+                              double *         work)
 {
-       int           nri,ntype,nthreads,offset;
-       int           n,ii,is3,ii3,k,nj0,nj1,jnr1,jnr2,j13,j23,ggid;
-       double        facel,krf,crf,tabscl,gbtabscl,vct,vgbt;
-       double        shX,shY,shZ,isai_d,dva;
+  int           nri,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
        gmx_gbdata_t *gbdata;
-       float *        gpol;
-       
-       __m128d       ix,iy,iz,jx,jy,jz;
-       __m128d           dx,dy,dz,t1,t2,t3;
-       __m128d           fix,fiy,fiz,rsq11,rinv,r,fscal,rt,eps,eps2;
-       __m128d           q,iq,qq,isai,isaj,isaprod,vcoul,gbscale,dvdai,dvdaj;
-       __m128d       Y,F,G,H,Fp,VV,FF,vgb,fijC,dvdatmp,dvdasum,vctot,vgbtot,n0d;
-       __m128d           xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm8;
-       __m128d       fac,tabscale,gbtabscale;
-       __m128i       n0,nnn;
+       double *      gpol;
+    
+       __m128d  iq,qq,jq,isai;
+       __m128d  ix,iy,iz;
+       __m128d  jx,jy,jz;
+       __m128d  dx,dy,dz;
+       __m128d  vctot,vgbtot,dvdasum,gbfactor;
+       __m128d  fix,fiy,fiz,tx,ty,tz,rsq;
+       __m128d  rinv,isaj,isaprod;
+       __m128d  vcoul,fscal,gbscale;
+       __m128d  rinvsq,r,rtab;
+       __m128d  eps,Y,F,G,H;
+       __m128d  vgb,fijGB,dvdatmp;
+       __m128d  facel,gbtabscale,dvdaj;
+       __m128i  n0, nnn;
        
-       const __m128d neg    = {-1.0,-1.0};
-       const __m128d zero   = {0.0,0.0};
-       const __m128d half   = {0.5,0.5};
-       const __m128d two    = {2.0,2.0};
-       const __m128d three  = {3.0,3.0};
+       const __m128d neg        = _mm_set1_pd(-1.0);
+       const __m128d zero       = _mm_set1_pd(0.0);
+       const __m128d minushalf  = _mm_set1_pd(-0.5);
+       const __m128d two        = _mm_set1_pd(2.0);
        
        gbdata     = (gmx_gbdata_t *)work;
        gpol       = gbdata->gpol;
-       
+    
        nri        = *p_nri;
-       ntype      = *p_ntype;
-       nthreads   = *p_nthreads; 
-    facel      = (*p_facel) * ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent));       
-       krf        = *p_krf;
-       crf        = *p_crf;
-       tabscl     = *p_tabscale;
-       gbtabscl   = *p_gbtabscale;
-       nj1        = 0;
-       
-       /* Splat variables */
-       fac        = _mm_load1_pd(&facel);
-       tabscale   = _mm_load1_pd(&tabscl);
-       gbtabscale = _mm_load1_pd(&gbtabscl);
-       
-       /* Keep compiler happy */
-       dvdatmp = _mm_setzero_pd();
-       vgb     = _mm_setzero_pd();
-       dvdaj   = _mm_setzero_pd();
-       isaj    = _mm_setzero_pd();
-       vcoul   = _mm_setzero_pd();
-       t1      = _mm_setzero_pd();
-       t2      = _mm_setzero_pd();
-       t3      = _mm_setzero_pd();
-       
-       jnr1=jnr2=0;
-       j13=j23=0;
+    
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-               is3     = 3*shift[n];
-               shX     = shiftvec[is3];
-               shY     = shiftvec[is3+1];
-               shZ     = shiftvec[is3+2];
-               
-               nj0     = jindex[n];      
-        nj1     = jindex[n+1];  
-               offset  = (nj1-nj0)%2;
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
-               ii      = iinr[n];
-               ii3     = ii*3;
-               
-               ix      = _mm_set1_pd(shX+pos[ii3+0]);
-               iy      = _mm_set1_pd(shX+pos[ii3+1]);
-               iz      = _mm_set1_pd(shX+pos[ii3+2]); 
-               q       = _mm_set1_pd(charge[ii]);
-               
-               iq      = _mm_mul_pd(fac,q); 
-               isai_d  = invsqrta[ii];
-               isai    = _mm_load1_pd(&isai_d);
-               
-               fix     = _mm_setzero_pd();
-               fiy     = _mm_setzero_pd();
-               fiz     = _mm_setzero_pd();
-               dvdasum = _mm_setzero_pd();
-               vctot   = _mm_setzero_pd();
-               vgbtot  = _mm_setzero_pd();
-               
-               for(k=nj0;k<nj1-offset; k+=2)
+               ix               = _mm_set1_pd(shX+pos[ii3+0]);
+               iy               = _mm_set1_pd(shY+pos[ii3+1]);
+               iz               = _mm_set1_pd(shZ+pos[ii3+2]);
+    
+               iq               = _mm_load1_pd(charge+ii);
+               iq               = _mm_mul_pd(iq,facel);
+    
+               isai             = _mm_load1_pd(invsqrta+ii);
+                       
+               vctot            = _mm_setzero_pd();
+               vgbtot           = _mm_setzero_pd();
+               dvdasum          = _mm_setzero_pd();
+               fix              = _mm_setzero_pd();
+               fiy              = _mm_setzero_pd();
+               fiz              = _mm_setzero_pd();
+                
+               for(k=nj0;k<nj1-1; k+=2)
                {
-                       jnr1    = jjnr[k];
-                       jnr2    = jjnr[k+1];
-                       
-                       j13     = jnr1 * 3;
-                       j23     = jnr2 * 3;
-                       
-                       /* Load coordinates */
-                       xmm1    = _mm_loadu_pd(pos+j13); /* x1 y1 */
-                       xmm2    = _mm_loadu_pd(pos+j23); /* x2 y2 */
-                       
-                       xmm5    = _mm_load_sd(pos+j13+2); /* z1 - */
-                       xmm6    = _mm_load_sd(pos+j23+2); /* z2 - */
-                       
-                       /* transpose */
-                       jx      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
-                       jy      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
-                       jz      = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); 
-                       
-                       /* distances */
-                       dx      = _mm_sub_pd(ix,jx);
-                       dy              = _mm_sub_pd(iy,jy);
-                       dz              = _mm_sub_pd(iz,jz);
-                       
-                       rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
-                       rinv    = gmx_mm_invsqrt_pd(rsq11);
-                       
-                       /* Load invsqrta */
-                       isaj    = _mm_loadl_pd(isaj,invsqrta+jnr1);
-                       isaj    = _mm_loadh_pd(isaj,invsqrta+jnr2);
-                       isaprod = _mm_mul_pd(isai,isaj);
-                       
-                       /* Load charges */
-                       q               = _mm_loadl_pd(q,charge+jnr1);
-                       q               = _mm_loadh_pd(q,charge+jnr2);
-                       qq              = _mm_mul_pd(iq,q);
-                       
-                       vcoul   = _mm_mul_pd(qq,rinv);
-                       fscal   = _mm_mul_pd(vcoul,rinv);
-                       qq              = _mm_mul_pd(isaprod,qq);
-                       qq              = _mm_mul_pd(qq,neg);
-                       gbscale = _mm_mul_pd(isaprod,gbtabscale);
-                       
-                       /* Load dvdaj */
-                       dvdaj   = _mm_loadl_pd(dvdaj, dvda+jnr1);
-                       dvdaj   = _mm_loadh_pd(dvdaj, dvda+jnr2);
-                       
-                       r               = _mm_mul_pd(rsq11,rinv);
-                       rt              = _mm_mul_pd(r,gbscale);
-                       n0              = _mm_cvttpd_epi32(rt);
-                       n0d             = _mm_cvtepi32_pd(n0);
-                       eps             = _mm_sub_pd(rt,n0d);
-                       eps2    = _mm_mul_pd(eps,eps);
-                       
-                       nnn             = _mm_slli_epi64(n0,2);
-                       
-                       xmm1    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
-                       xmm2    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
-                       xmm3    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
-                       xmm4    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
-                       
-                       G               = _mm_mul_pd(G,eps);
-                       H               = _mm_mul_pd(H,eps2);
-                       Fp              = _mm_add_pd(F,G);
-                       Fp              = _mm_add_pd(Fp,H);
-                       VV              = _mm_mul_pd(Fp,eps);
-                       VV              = _mm_add_pd(Y,VV);
-                       H               = _mm_mul_pd(two,H);
-                       FF              = _mm_add_pd(Fp,G);
-                       FF              = _mm_add_pd(FF,H);
-                       vgb             = _mm_mul_pd(qq,VV);
-                       fijC    = _mm_mul_pd(qq,FF);
-                       fijC    = _mm_mul_pd(fijC,gbscale);
-                       
-                       dvdatmp = _mm_mul_pd(fijC,r);
-                       dvdatmp = _mm_add_pd(vgb,dvdatmp);
-                       dvdatmp = _mm_mul_pd(dvdatmp,neg);
-                       dvdatmp = _mm_mul_pd(dvdatmp,half);
-                       dvdasum = _mm_add_pd(dvdasum,dvdatmp);
-                       
-                       xmm1    = _mm_mul_pd(dvdatmp,isaj);
-                       xmm1    = _mm_mul_pd(xmm1,isaj);
-                       dvdaj   = _mm_add_pd(dvdaj,xmm1);
-                       
-                       /* store dvda */
-                       _mm_storel_pd(dvda+jnr1,dvdaj);
-                       _mm_storeh_pd(dvda+jnr2,dvdaj);
-                       
-                       vctot   = _mm_add_pd(vctot,vcoul);
-                       vgbtot  = _mm_add_pd(vgbtot,vgb);
-                       
-                       fscal   = _mm_sub_pd(fijC,fscal);
-                       fscal   = _mm_mul_pd(fscal,neg);
-                       fscal   = _mm_mul_pd(fscal,rinv);
-                       
-                       /* calculate partial force terms */
-                       t1              = _mm_mul_pd(fscal,dx);
-                       t2              = _mm_mul_pd(fscal,dy);
-                       t3              = _mm_mul_pd(fscal,dz);
-                       
-                       /* update the i force */
-                       fix             = _mm_add_pd(fix,t1);
-                       fiy             = _mm_add_pd(fiy,t2);
-                       fiz             = _mm_add_pd(fiz,t3);
-                       
-                       /* accumulate forces from memory */
-                       xmm1    = _mm_loadu_pd(faction+j13); /* fx1 fy1 */
-                       xmm2    = _mm_loadu_pd(faction+j23); /* fx2 fy2 */
-                       
-                       xmm5    = _mm_load1_pd(faction+j13+2); /* fz1 fz1 */
-                       xmm6    = _mm_load1_pd(faction+j23+2); /* fz2 fz2 */
-                       
-                       /* transpose */
-                       xmm7    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fz1 fz2 */
-                       xmm5    = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* fx1 fx2 */
-                       xmm6    = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
-                       
-                       /* subtract partial forces */
-                       xmm5    = _mm_sub_pd(xmm5,t1);
-                       xmm6    = _mm_sub_pd(xmm6,t2);
-                       xmm7    = _mm_sub_pd(xmm7,t3);
-                       
-                       xmm1    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fx1 fy1 */
-                       xmm2    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
-                       
-                       /* store fx and fy */
-                       _mm_storeu_pd(faction+j13,xmm1);
-                       _mm_storeu_pd(faction+j23,xmm2);
-                       
-                       /* .. then fz */
-                       _mm_storel_pd(faction+j13+2,xmm7);
-                       _mm_storeh_pd(faction+j23+2,xmm7);
+                       jnrA    = jjnr[k];
+                       jnrB    = jjnr[k+1];
+                       
+                       j3A     = jnrA * 3;
+                       j3B     = jnrB * 3;
+      
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+            
+                       dx           = _mm_sub_pd(ix,jx);
+                       dy           = _mm_sub_pd(iy,jy);
+                       dz           = _mm_sub_pd(iz,jz);
+            
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
+                       rinvsq       = _mm_mul_pd(rinv,rinv);
+      
+                       /***********************************/
+                       /* INTERACTION SECTION STARTS HERE */
+                       /***********************************/
+                       GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
+                       GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
+            
+                       isaprod      = _mm_mul_pd(isai,isaj);
+                       qq           = _mm_mul_pd(iq,jq);            
+                       vcoul        = _mm_mul_pd(qq,rinv);
+                       fscal        = _mm_mul_pd(vcoul,rinv);                                 
+      vctot        = _mm_add_pd(vctot,vcoul);
+            
+            /* Polarization interaction */
+                       qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
+                       gbscale      = _mm_mul_pd(isaprod,gbtabscale);
+            
+                       /* Calculate GB table index */
+                       r            = _mm_mul_pd(rsq,rinv);
+                       rtab         = _mm_mul_pd(r,gbscale);
+                       
+                       n0                   = _mm_cvttpd_epi32(rtab);
+                       eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
+                       nnn                  = _mm_slli_epi32(n0,2);
+                       
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+      
+      fscal        = _mm_mul_pd( _mm_sub_pd( fscal, fijGB),rinv );
+      
+      /***********************************/
+                       /*  INTERACTION SECTION ENDS HERE  */
+                       /***********************************/
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
+                       GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
                /* In double precision, offset can only be either 0 or 1 */
-               if(offset!=0)
+               if(k<nj1)
                {
-                       jnr1    = jjnr[k];
-                       j13             = jnr1*3;
-                       
-                       jx      = _mm_load_sd(pos+j13);
-                       jy      = _mm_load_sd(pos+j13+1);
-                       jz      = _mm_load_sd(pos+j13+2);
-                       
-                       isaj    = _mm_load_sd(invsqrta+jnr1);
-                       isaprod = _mm_mul_sd(isai,isaj);
-                       dvdaj   = _mm_load_sd(dvda+jnr1);
-                       q               = _mm_load_sd(charge+jnr1);
-                       qq      = _mm_mul_sd(iq,q);
-                       
-                       dx      = _mm_sub_sd(ix,jx);
-                       dy              = _mm_sub_sd(iy,jy);
-                       dz              = _mm_sub_sd(iz,jz);
-                       
-                       rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
-                       rinv    = gmx_mm_invsqrt_pd(rsq11);
-                       
-                       vcoul   = _mm_mul_sd(qq,rinv);
-                       fscal   = _mm_mul_sd(vcoul,rinv);
-                       qq              = _mm_mul_sd(isaprod,qq);
-                       qq              = _mm_mul_sd(qq,neg);
-                       gbscale = _mm_mul_sd(isaprod,gbtabscale);
-                       
-                       r               = _mm_mul_sd(rsq11,rinv);
-                       rt              = _mm_mul_sd(r,gbscale);
-                       n0              = _mm_cvttpd_epi32(rt);
-                       n0d             = _mm_cvtepi32_pd(n0);
-                       eps             = _mm_sub_sd(rt,n0d);
-                       eps2    = _mm_mul_sd(eps,eps);
-                       
-                       nnn             = _mm_slli_epi64(n0,2);
-                       
-                       xmm1    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))); 
-                       xmm2    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))); 
-                       xmm3    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))+2); 
-                       xmm4    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))+2); 
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); 
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); 
-                       
-                       G               = _mm_mul_sd(G,eps);
-                       H               = _mm_mul_sd(H,eps2);
-                       Fp              = _mm_add_sd(F,G);
-                       Fp              = _mm_add_sd(Fp,H);
-                       VV              = _mm_mul_sd(Fp,eps);
-                       VV              = _mm_add_sd(Y,VV);
-                       H               = _mm_mul_sd(two,H);
-                       FF              = _mm_add_sd(Fp,G);
-                       FF              = _mm_add_sd(FF,H);
-                       vgb             = _mm_mul_sd(qq,VV);
-                       fijC    = _mm_mul_sd(qq,FF);
-                       fijC    = _mm_mul_sd(fijC,gbscale);
-                       
-                       dvdatmp = _mm_mul_sd(fijC,r);
-                       dvdatmp = _mm_add_sd(vgb,dvdatmp);
-                       dvdatmp = _mm_mul_sd(dvdatmp,neg);
-                       dvdatmp = _mm_mul_sd(dvdatmp,half);
-                       dvdasum = _mm_add_sd(dvdasum,dvdatmp);
-                       
-                       xmm1    = _mm_mul_sd(dvdatmp,isaj);
-                       xmm1    = _mm_mul_sd(xmm1,isaj);
-                       dvdaj   = _mm_add_sd(dvdaj,xmm1);
-                       
-                       /* store dvda */
-                       _mm_storel_pd(dvda+jnr1,dvdaj);
-                       
-                       vctot   = _mm_add_sd(vctot,vcoul);
-                       vgbtot  = _mm_add_sd(vgbtot,vgb);
-                       
-                       fscal   = _mm_sub_sd(fijC,fscal);
-                       fscal   = _mm_mul_sd(fscal,neg);
-                       fscal   = _mm_mul_sd(fscal,rinv);
-                       
-                       /* calculate partial force terms */
-                       t1              = _mm_mul_sd(fscal,dx);
-                       t2              = _mm_mul_sd(fscal,dy);
-                       t3              = _mm_mul_sd(fscal,dz);
-                       
-                       /* update the i force */
-                       fix             = _mm_add_sd(fix,t1);
-                       fiy             = _mm_add_sd(fiy,t2);
-                       fiz             = _mm_add_sd(fiz,t3);
-                       
-                       /* accumulate forces from memory */
-                       xmm5    = _mm_load_sd(faction+j13);   /* fx */
-                       xmm6    = _mm_load_sd(faction+j13+1); /* fy */
-                       xmm7    = _mm_load_sd(faction+j13+2); /* fz */
-                       
-                       /* subtract partial forces */
-                       xmm5    = _mm_sub_sd(xmm5,t1);
-                       xmm6    = _mm_sub_sd(xmm6,t2);
-                       xmm7    = _mm_sub_sd(xmm7,t3);
-                       
-                       /* store forces */
-                       _mm_store_sd(faction+j13,xmm5);
-                       _mm_store_sd(faction+j13+1,xmm6);
-                       _mm_store_sd(faction+j13+2,xmm7);
+                       jnrA    = jjnr[k];
+                       j3A     = jnrA * 3;
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
+                       dx           = _mm_sub_sd(ix,jx);
+                       dy           = _mm_sub_sd(iy,jy);
+                       dz           = _mm_sub_sd(iz,jz);
+      
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
+                       rinvsq       = _mm_mul_sd(rinv,rinv);
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      
+                       /***********************************/
+                       /* INTERACTION SECTION STARTS HERE */
+                       /***********************************/
+                       GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
+                       GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
+      
+                       isaprod      = _mm_mul_sd(isai,isaj);
+                       qq           = _mm_mul_sd(jq,iq);            
+                       vcoul        = _mm_mul_sd(qq,rinv);
+                       fscal        = _mm_mul_sd(vcoul,rinv);                                 
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
+                       qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
+                       gbscale      = _mm_mul_sd(isaprod,gbtabscale);
+      
+                       /* Calculate GB table index */
+                       r            = _mm_mul_sd(rsq,rinv);
+                       rtab         = _mm_mul_sd(r,gbscale);
+      
+                       n0                   = _mm_cvttpd_epi32(rtab);
+                       eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
+                       nnn                  = _mm_slli_epi32(n0,2);
+                       
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+                       
+      fscal        = _mm_mul_sd( _mm_sub_sd( fscal, fijGB),rinv );
+      
+      /***********************************/
+                       /*  INTERACTION SECTION ENDS HERE  */
+                       /***********************************/
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
+                       GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-               /* fix/fiy/fiz now contain four partial terms, that all should be
-                * added to the i particle forces
-                */
-               t1               = _mm_unpacklo_pd(t1,fix);
-               t2               = _mm_unpacklo_pd(t2,fiy);
-               t3               = _mm_unpacklo_pd(t3,fiz);
-               
-               fix              = _mm_add_pd(fix,t1);
-               fiy              = _mm_add_pd(fiy,t2);
-               fiz              = _mm_add_pd(fiz,t3);
-               
-               fix      = _mm_shuffle_pd(fix,fix,_MM_SHUFFLE2(1,1));
-               fiy      = _mm_shuffle_pd(fiy,fiy,_MM_SHUFFLE2(1,1));
-               fiz      = _mm_shuffle_pd(fiz,fiz,_MM_SHUFFLE2(1,1));
-               
-               /* Load i forces from memory */
-               xmm1     = _mm_load_sd(faction+ii3);
-               xmm2     = _mm_load_sd(faction+ii3+1);
-               xmm3     = _mm_load_sd(faction+ii3+2);
-               
-               /* Add to i force */
-               fix      = _mm_add_sd(fix,xmm1);
-               fiy      = _mm_add_sd(fiy,xmm2);
-               fiz      = _mm_add_sd(fiz,xmm3);
-               
-               /* store i forces to memory */
-               _mm_store_sd(faction+ii3,fix);
-               _mm_store_sd(faction+ii3+1,fiy);
-               _mm_store_sd(faction+ii3+2,fiz);
-               
-               /* now do dvda */
-               dvdatmp  = _mm_unpacklo_pd(dvdatmp,dvdasum);
-               dvdasum  = _mm_add_pd(dvdasum,dvdatmp);
-               _mm_storeh_pd(&dva,dvdasum);
-               dvda[ii] = dvda[ii] + dva*isai_d*isai_d;
-               
-               ggid     = gid[n];
-               
-               /* Coulomb potential */
-               vcoul    = _mm_unpacklo_pd(vcoul,vctot);
-               vctot    = _mm_add_pd(vctot,vcoul);
-               _mm_storeh_pd(&vct,vctot);
-               Vc[ggid] = Vc[ggid] + vct;
-               
-               /* GB potential */
-               vgb      = _mm_unpacklo_pd(vgb,vgbtot);
-               vgbtot   = _mm_add_pd(vgbtot,vgb);
-               _mm_storeh_pd(&vgbt,vgbtot);
-               gpol[ggid] = gpol[ggid] + vgbt;
-       }
-       
-       *outeriter   = nri;            
-    *inneriter   = nj1;        
-}
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel400nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel400nf_sse2_double(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    double *         shiftvec,
-                    double *         fshift,
-                    int *           gid,
-                    double *         pos,
-                    double *         faction,
-                    double *         charge,
-                    double *         p_facel,
-                    double *         p_krf,
-                    double *         p_crf,
-                    double *         Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    double *         vdwparam,
-                    double *         Vvdw,
-                    double *         p_tabscale,
-                    double *         VFtab,
-                    double *         invsqrta,
-                    double *         dvda,
-                    double *         p_gbtabscale,
-                    double *         GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    double *         work)
-{
-    int           nri,ntype,nthreads;
-    double         facel,krf,crf,tabscale,gbtabscale,vgb,fgb;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    double         shX,shY,shZ;
-    double         iq;
-    double         qq,vcoul,vctot;
-    double         r,rt,eps,eps2;
-    int           n0,nnn;
-    double         Y,F,Geps,Heps2,Fp,VV;
-    double         isai,isaj,isaprod,gbscale;
-    double         ix1,iy1,iz1;
-    double         jx1,jy1,jz1;
-    double         dx11,dy11,dz11,rsq11,rinv11;
-    const int     fractshift = 12;
-    const int     fractmask = 8388607;
-    const int     expshift = 23;
-    const int     expmask = 2139095040;
-    const int     explsb = 8388608;
-    double         lu;
-    int           iexp,addr;
-    union { unsigned int bval; double fval; } bitpattern,result;
-       
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-    nj1              = 0;              
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
     
-    for(n=0; (n<nri); n++)
-    {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
-        ix1              = shX + pos[ii3+0];
-        iy1              = shY + pos[ii3+1];
-        iz1              = shZ + pos[ii3+2];
-        iq               = facel*charge[ii];
-        isai             = invsqrta[ii];   
-        vctot            = 0;              
-        
-        for(k=nj0; (k<nj1); k++)
-        {
-            jnr              = jjnr[k];        
-            j3               = 3*jnr;          
-            jx1              = pos[j3+0];      
-            jy1              = pos[j3+1];      
-            jz1              = pos[j3+2];      
-            dx11             = ix1 - jx1;      
-            dy11             = iy1 - jy1;      
-            dz11             = iz1 - jz1;      
-            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-            bitpattern.fval  = rsq11;          
-            iexp             = (((bitpattern.bval)&expmask)>>expshift);
-            addr             = (((bitpattern.bval)&(fractmask|explsb))>>fractshift);
-            result.bval      = gmx_invsqrt_exptab[iexp] | gmx_invsqrt_fracttab[addr];
-            lu               = result.fval;    
-            rinv11           = (0.5*lu*(3.0-((rsq11*lu)*lu)));
-            isaj             = invsqrta[jnr];  
-            isaprod          = isai*isaj;      
-            qq               = iq*charge[jnr]; 
-            vcoul            = qq*rinv11;      
-            qq               = isaprod*(-qq);  
-            gbscale          = isaprod*gbtabscale;
-            r                = rsq11*rinv11;   
-            rt               = r*gbscale;      
-            n0               = rt;             
-            eps              = rt-n0;          
-            eps2             = eps*eps;        
-            nnn              = 4*n0;           
-            Y                = GBtab[nnn];     
-            F                = GBtab[nnn+1];   
-            Geps             = eps*GBtab[nnn+2];
-            Heps2            = eps2*GBtab[nnn+3];
-            Fp               = F+Geps+Heps2;   
-            VV               = Y+eps*Fp;       
-            vgb              = qq*VV;          
-            vctot            = vctot + vcoul;  
-        }
-        
-        ggid             = gid[n];         
-        Vc[ggid]         = Vc[ggid] + vctot;
-    }
+    ggid     = gid[n];         
     
-    *outeriter       = nri;            
-    *inneriter       = nj1;            
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+  }
+  
+       *outeriter   = nri;            
+  *inneriter   = nj1;  
 }
-
-
index d5e17e7f790598ec1b8edf8c7a4c4468e898ea29..2a65f5208be7cf501a0f9b338e2b8a7375cfa64b 100644 (file)
 #include "../nb_kerneltype.h"
 
 
+
 void nb_kernel410_sse2_double(int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    double *         shiftvec,
-                    double *         fshift,
-                    int *           gid,
-                    double *         pos,
-                    double *         faction,
-                    double *         charge,
-                    double *         p_facel,
-                    double *         p_krf,
-                    double *         p_crf,
-                    double *         Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    double *         vdwparam,
-                    double *         Vvdw,
-                    double *         p_tabscale,
-                    double *         VFtab,
-                    double *         invsqrta,
-                    double *         dvda,
-                    double *         p_gbtabscale,
-                    double *         GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    double *         work)
+                                                       int *           iinr,
+                                                       int *           jindex,
+                                                       int *           jjnr,
+                                                       int *           shift,
+                                                       double *         shiftvec,
+                                                       double *         fshift,
+                                                       int *           gid,
+                                                       double *         pos,
+                                                       double *         faction,
+                                                       double *         charge,
+                                                       double *         p_facel,
+                                                       double *         p_krf,
+                                                       double *         p_crf,
+                                                       double *         vc,
+                                                       int *           type,
+                                                       int *           p_ntype,
+                                                       double *         vdwparam,
+                                                       double *         vvdw,
+                                                       double *         p_tabscale,
+                                                       double *         VFtab,
+                                                       double *         invsqrta,
+                                                       double *         dvda,
+                                                       double *         p_gbtabscale,
+                                                       double *         GBtab,
+                                                       int *           p_nthreads,
+                                                       int *           count,
+                                                       void *          mtx,
+                                                       int *           outeriter,
+                                                       int *           inneriter,
+                                                       double *         work)
 {
-       int           nri,ntype,nthreads,offset,tj,tj2,nti;
-       int           n,ii,is3,ii3,k,nj0,nj1,jnr1,jnr2,j13,j23,ggid;
-       double        facel,krf,crf,tabscl,gbtabscl,vct,vdwt,nt1,nt2;
-       double        shX,shY,shZ,isai_d,dva,vgbt;
+  int           nri,ntype,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
+       int                       offset,nti;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
+       int           tjA,tjB;
        gmx_gbdata_t *gbdata;
-       float *        gpol;
-
-       __m128d       ix,iy,iz,jx,jy,jz;
-       __m128d           dx,dy,dz,t1,t2,t3;
-       __m128d           fix,fiy,fiz,rsq11,rinv,r,fscal,rt,eps,eps2;
-       __m128d           q,iq,qq,isai,isaj,isaprod,vcoul,gbscale,dvdai,dvdaj;
-       __m128d       Y,F,G,H,Fp,VV,FF,vgb,fijC,dvdatmp,dvdasum,vctot,vgbtot,n0d;
-       __m128d           xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm8;
-       __m128d       c6,c12,Vvdw6,Vvdw12,Vvdwtmp,Vvdwtot,rinvsq,rinvsix;
-       __m128d       fac,tabscale,gbtabscale;
-       __m128i       n0,nnn;
+       double *      gpol;
+    
+       __m128d  iq,qq,jq,isai;
+       __m128d  ix,iy,iz;
+       __m128d  jx,jy,jz;
+       __m128d  dx,dy,dz;
+       __m128d  vctot,vvdwtot,vgbtot,dvdasum,gbfactor;
+       __m128d  fix,fiy,fiz,tx,ty,tz,rsq;
+       __m128d  rinv,isaj,isaprod;
+       __m128d  vcoul,fscal,gbscale,c6,c12;
+       __m128d  rinvsq,r,rtab;
+       __m128d  eps,Y,F,G,H;
+       __m128d  vgb,fijGB,dvdatmp;
+       __m128d  rinvsix,vvdw6,vvdw12;
+       __m128d  facel,gbtabscale,dvdaj;
+       __m128i  n0, nnn;
        
-       const __m128d neg    = {-1.0,-1.0};
-       const __m128d zero   = {0.0,0.0};
-       const __m128d half   = {0.5,0.5};
-       const __m128d two    = {2.0,2.0};
-       const __m128d three  = {3.0,3.0};
-       const __m128d six    = {6.0,6.0};
-       const __m128d twelwe = {12.0,12.0};
+       const __m128d neg        = _mm_set1_pd(-1.0);
+       const __m128d zero       = _mm_set1_pd(0.0);
+       const __m128d minushalf  = _mm_set1_pd(-0.5);
+       const __m128d two        = _mm_set1_pd(2.0);
+       const __m128d six        = _mm_set1_pd(6.0);
+       const __m128d twelve     = _mm_set1_pd(12.0);
        
        gbdata     = (gmx_gbdata_t *)work;
        gpol       = gbdata->gpol;
-       
+
        nri        = *p_nri;
        ntype      = *p_ntype;
-       nthreads   = *p_nthreads; 
-    facel      = (*p_facel) * ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent));       
-       krf        = *p_krf;
-       crf        = *p_crf;
-       tabscl     = *p_tabscale;
-       gbtabscl   = *p_gbtabscale;
-       nj1        = 0;
-       
-       /* Splat variables */
-       fac        = _mm_load1_pd(&facel);
-       tabscale   = _mm_load1_pd(&tabscl);
-       gbtabscale = _mm_load1_pd(&gbtabscl);
-       
-       /* Keep compiler happy */
-       Vvdwtmp = _mm_setzero_pd();
-       Vvdwtot = _mm_setzero_pd();
-       dvdatmp = _mm_setzero_pd();
-       dvdaj   = _mm_setzero_pd();
-       isaj    = _mm_setzero_pd();
-       vcoul   = _mm_setzero_pd();
-       vgb     = _mm_setzero_pd();
-       t1      = _mm_setzero_pd();
-       t2      = _mm_setzero_pd();
-       t3      = _mm_setzero_pd();
-       xmm1    = _mm_setzero_pd();
-       xmm2    = _mm_setzero_pd();
-       xmm3    = _mm_setzero_pd();
-       xmm4    = _mm_setzero_pd();
-       jnr1    = jnr2 = 0;
-       j13     = j23  = 0;
+    
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
+  c6          = _mm_setzero_pd();
+  c12         = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-               is3     = 3*shift[n];
-               shX     = shiftvec[is3];
-               shY     = shiftvec[is3+1];
-               shZ     = shiftvec[is3+2];
-               
-               nj0     = jindex[n];      
-        nj1     = jindex[n+1];  
-               offset  = (nj1-nj0)%2;
-               
-               ii      = iinr[n];
-               ii3     = ii*3;
-               
-               ix      = _mm_set1_pd(shX+pos[ii3+0]);
-               iy      = _mm_set1_pd(shX+pos[ii3+1]);
-               iz      = _mm_set1_pd(shX+pos[ii3+2]); 
-               q       = _mm_set1_pd(charge[ii]);
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
-               iq      = _mm_mul_pd(fac,q); 
-               isai_d  = invsqrta[ii];
-               isai    = _mm_load1_pd(&isai_d);
-               
-               nti      = 2*ntype*type[ii];
-               
-               fix     = _mm_setzero_pd();
-               fiy     = _mm_setzero_pd();
-               fiz     = _mm_setzero_pd();
-               dvdasum = _mm_setzero_pd();
-               vctot   = _mm_setzero_pd();
-               vgbtot  = _mm_setzero_pd();
-               Vvdwtot = _mm_setzero_pd();
+               ix               = _mm_set1_pd(shX+pos[ii3+0]);
+               iy               = _mm_set1_pd(shY+pos[ii3+1]);
+               iz               = _mm_set1_pd(shZ+pos[ii3+2]);
+    
+               iq               = _mm_load1_pd(charge+ii);
+               iq               = _mm_mul_pd(iq,facel);
+    
+               isai             = _mm_load1_pd(invsqrta+ii);
+        
+               nti              = 2*ntype*type[ii];
                
-               for(k=nj0;k<nj1-offset; k+=2)
+               vctot            = _mm_setzero_pd();
+               vvdwtot          = _mm_setzero_pd();
+               vgbtot           = _mm_setzero_pd();
+               dvdasum          = _mm_setzero_pd();
+               fix              = _mm_setzero_pd();
+               fiy              = _mm_setzero_pd();
+               fiz              = _mm_setzero_pd();
+        
+               for(k=nj0;k<nj1-1; k+=2)
                {
-                       jnr1    = jjnr[k];
-                       jnr2    = jjnr[k+1];
-                       
-                       j13     = jnr1 * 3;
-                       j23     = jnr2 * 3;
-                       
-                       /* Load coordinates */
-                       xmm1    = _mm_loadu_pd(pos+j13); /* x1 y1 */
-                       xmm2    = _mm_loadu_pd(pos+j23); /* x2 y2 */
-                       
-                       xmm5    = _mm_load_sd(pos+j13+2); /* z1 - */
-                       xmm6    = _mm_load_sd(pos+j23+2); /* z2 - */
-                       
-                       /* transpose */
-                       jx      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
-                       jy      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
-                       jz      = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); 
-                       
-                       /* distances */
-                       dx      = _mm_sub_pd(ix,jx);
-                       dy              = _mm_sub_pd(iy,jy);
-                       dz              = _mm_sub_pd(iz,jz);
-                       
-                       rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
-                       rinv    = gmx_mm_invsqrt_pd(rsq11);
-                       
-                       /* Load invsqrta */
-                       isaj    = _mm_loadl_pd(isaj,invsqrta+jnr1);
-                       isaj    = _mm_loadh_pd(isaj,invsqrta+jnr2);
-                       isaprod = _mm_mul_pd(isai,isaj);
-                       
-                       /* Load charges */
-                       q               = _mm_loadl_pd(q,charge+jnr1);
-                       q               = _mm_loadh_pd(q,charge+jnr2);
-                       qq              = _mm_mul_pd(iq,q);
-                       
-                       vcoul   = _mm_mul_pd(qq,rinv);
-                       fscal   = _mm_mul_pd(vcoul,rinv);
-                       qq              = _mm_mul_pd(isaprod,qq);
-                       qq              = _mm_mul_pd(qq,neg);
-                       gbscale = _mm_mul_pd(isaprod,gbtabscale);
-                       
-                       /* Load VdW parameters */
-                       tj      = nti+2*type[jnr1];
-                       tj2     = nti+2*type[jnr2];
+                       jnrA    = jjnr[k];
+                       jnrB    = jjnr[k+1];
                        
-                       xmm1      = _mm_loadu_pd(vdwparam+tj);
-                       xmm2     = _mm_loadu_pd(vdwparam+tj2);
-                       c6      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0));
-                       c12     = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1));
-                       
-                       rinvsq  = _mm_mul_pd(rinv,rinv);
-                       
-                       /* Load dvdaj */
-                       dvdaj   = _mm_loadl_pd(dvdaj, dvda+jnr1);
-                       dvdaj   = _mm_loadh_pd(dvdaj, dvda+jnr2);
-                       
-                       r               = _mm_mul_pd(rsq11,rinv);
-                       rt              = _mm_mul_pd(r,gbscale);
-                       n0              = _mm_cvttpd_epi32(rt);
-                       n0d             = _mm_cvtepi32_pd(n0);
-                       eps             = _mm_sub_pd(rt,n0d);
-                       eps2    = _mm_mul_pd(eps,eps);
-                       
-                       nnn             = _mm_slli_epi64(n0,2);
-                       
-                       xmm1    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
-                       xmm2    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
-                       xmm3    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
-                       xmm4    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
-                       
-                       G               = _mm_mul_pd(G,eps);
-                       H               = _mm_mul_pd(H,eps2);
-                       Fp              = _mm_add_pd(F,G);
-                       Fp              = _mm_add_pd(Fp,H);
-                       VV              = _mm_mul_pd(Fp,eps);
-                       VV              = _mm_add_pd(Y,VV);
-                       H               = _mm_mul_pd(two,H);
-                       FF              = _mm_add_pd(Fp,G);
-                       FF              = _mm_add_pd(FF,H);
-                       vgb             = _mm_mul_pd(qq,VV);
-                       fijC    = _mm_mul_pd(qq,FF);
-                       fijC    = _mm_mul_pd(fijC,gbscale);
-                       
-                       dvdatmp = _mm_mul_pd(fijC,r);
-                       dvdatmp = _mm_add_pd(vgb,dvdatmp);
-                       dvdatmp = _mm_mul_pd(dvdatmp,neg);
-                       dvdatmp = _mm_mul_pd(dvdatmp,half);
-                       dvdasum = _mm_add_pd(dvdasum,dvdatmp);
-                       
-                       xmm1    = _mm_mul_pd(dvdatmp,isaj);
-                       xmm1    = _mm_mul_pd(xmm1,isaj);
-                       dvdaj   = _mm_add_pd(dvdaj,xmm1);
-                       
-                       /* store dvda */
-                       _mm_storel_pd(dvda+jnr1,dvdaj);
-                       _mm_storeh_pd(dvda+jnr2,dvdaj);
-                       
-                       vctot   = _mm_add_pd(vctot,vcoul);
-                       vgbtot  = _mm_add_pd(vgbtot,vgb);
-                       
-                       /* VdW interaction */
-                       rinvsix = _mm_mul_pd(rinvsq,rinvsq);
-                       rinvsix = _mm_mul_pd(rinvsix,rinvsq);
-                       
-                       Vvdw6   = _mm_mul_pd(c6,rinvsix);
-                       Vvdw12  = _mm_mul_pd(c12,rinvsix);
-                       Vvdw12  = _mm_mul_pd(Vvdw12,rinvsix);
-                       Vvdwtmp = _mm_sub_pd(Vvdw12,Vvdw6);
-                       Vvdwtot = _mm_add_pd(Vvdwtot,Vvdwtmp);
-                       
-                       xmm1    = _mm_mul_pd(twelwe,Vvdw12);
-                       xmm2    = _mm_mul_pd(six,Vvdw6);
-                       xmm1    = _mm_sub_pd(xmm1,xmm2);
-                       xmm1    = _mm_mul_pd(xmm1,rinvsq);
-                       
-                       /* Scalar force */
-                       fscal   = _mm_sub_pd(fijC,fscal);
-                       fscal   = _mm_mul_pd(fscal,rinv);
-                       fscal   = _mm_sub_pd(xmm1,fscal);
-                       
-                       /* calculate partial force terms */
-                       t1              = _mm_mul_pd(fscal,dx);
-                       t2              = _mm_mul_pd(fscal,dy);
-                       t3              = _mm_mul_pd(fscal,dz);
-                       
-                       /* update the i force */
-                       fix             = _mm_add_pd(fix,t1);
-                       fiy             = _mm_add_pd(fiy,t2);
-                       fiz             = _mm_add_pd(fiz,t3);
-                       
-                       /* accumulate forces from memory */
-                       xmm1    = _mm_loadu_pd(faction+j13); /* fx1 fy1 */
-                       xmm2    = _mm_loadu_pd(faction+j23); /* fx2 fy2 */
-                       
-                       xmm5    = _mm_load1_pd(faction+j13+2); /* fz1 fz1 */
-                       xmm6    = _mm_load1_pd(faction+j23+2); /* fz2 fz2 */
-                       
-                       /* transpose */
-                       xmm7    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fz1 fz2 */
-                       xmm5    = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* fx1 fx2 */
-                       xmm6    = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
-                       
-                       /* subtract partial forces */
-                       xmm5    = _mm_sub_pd(xmm5,t1);
-                       xmm6    = _mm_sub_pd(xmm6,t2);
-                       xmm7    = _mm_sub_pd(xmm7,t3);
-                       
-                       xmm1    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fx1 fy1 */
-                       xmm2    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
-                       
-                       /* store fx and fy */
-                       _mm_storeu_pd(faction+j13,xmm1);
-                       _mm_storeu_pd(faction+j23,xmm2);
-                       
-                       /* .. then fz */
-                       _mm_storel_pd(faction+j13+2,xmm7);
-                       _mm_storeh_pd(faction+j23+2,xmm7);
+                       j3A     = jnrA * 3;
+                       j3B     = jnrB * 3;
+
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+
+                       dx           = _mm_sub_pd(ix,jx);
+                       dy           = _mm_sub_pd(iy,jy);
+                       dz           = _mm_sub_pd(iz,jz);
+
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
+                       rinvsq       = _mm_mul_pd(rinv,rinv);
+      
+                       /***********************************/
+                       /* INTERACTION SECTION STARTS HERE */
+                       /***********************************/
+                       GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
+                       GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
+            
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+                       tjB          = nti+2*type[jnrB];
+      
+      GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
+                       
+                       isaprod      = _mm_mul_pd(isai,isaj);
+                       qq           = _mm_mul_pd(iq,jq);            
+                       vcoul        = _mm_mul_pd(qq,rinv);
+                       fscal        = _mm_mul_pd(vcoul,rinv);                                 
+      vctot        = _mm_add_pd(vctot,vcoul);
+            
+      /* Polarization interaction */
+                       qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
+                       gbscale      = _mm_mul_pd(isaprod,gbtabscale);
+      
+                       /* Calculate GB table index */
+                       r            = _mm_mul_pd(rsq,rinv);
+                       rtab         = _mm_mul_pd(r,gbscale);
+                       
+                       n0                   = _mm_cvttpd_epi32(rtab);
+                       eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
+                       nnn                  = _mm_slli_epi32(n0,2);
+                       
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+                       
+                       rinvsix      = _mm_mul_pd(rinvsq,rinvsq);
+                       rinvsix      = _mm_mul_pd(rinvsix,rinvsq);
+                       
+                       vvdw6        = _mm_mul_pd(c6,rinvsix);
+                       vvdw12       = _mm_mul_pd(c12, _mm_mul_pd(rinvsix,rinvsix));
+                       vvdwtot      = _mm_add_pd(vvdwtot,_mm_sub_pd(vvdw12,vvdw6));
+            
+      fscal        = _mm_sub_pd(_mm_mul_pd(rinvsq, 
+                                           _mm_sub_pd(_mm_mul_pd(twelve,vvdw12),
+                                                      _mm_mul_pd(six,vvdw6))),
+                                _mm_mul_pd( _mm_sub_pd( fijGB,fscal),rinv ));
+      
+      /***********************************/
+                       /*  INTERACTION SECTION ENDS HERE  */
+                       /***********************************/
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
+                       GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
                /* In double precision, offset can only be either 0 or 1 */
-               if(offset!=0)
+               if(k<nj1)
                {
-                       jnr1    = jjnr[k];
-                       j13             = jnr1*3;
-                       
-                       jx      = _mm_load_sd(pos+j13);
-                       jy      = _mm_load_sd(pos+j13+1);
-                       jz      = _mm_load_sd(pos+j13+2);
-                       
-                       isaj    = _mm_load_sd(invsqrta+jnr1);
-                       isaprod = _mm_mul_sd(isai,isaj);
-                       dvdaj   = _mm_load_sd(dvda+jnr1);
-                       q               = _mm_load_sd(charge+jnr1);
-                       qq      = _mm_mul_sd(iq,q);
-                       
-                       dx      = _mm_sub_sd(ix,jx);
-                       dy              = _mm_sub_sd(iy,jy);
-                       dz              = _mm_sub_sd(iz,jz);
-                       
-                       rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
-                       rinv    = gmx_mm_invsqrt_pd(rsq11);
-                       
-                       vcoul   = _mm_mul_sd(qq,rinv);
-                       fscal   = _mm_mul_sd(vcoul,rinv);
-                       qq              = _mm_mul_sd(isaprod,qq);
-                       qq              = _mm_mul_sd(qq,neg);
-                       gbscale = _mm_mul_sd(isaprod,gbtabscale);
-                       
-                       /* Load VdW parameters */
-                       tj      = nti+2*type[jnr1];
-                       
-                       c6      = _mm_load_sd(vdwparam+tj);
-                       c12     = _mm_load_sd(vdwparam+tj+1);
-                       
-                       rinvsq  = _mm_mul_sd(rinv,rinv);
-                       
-                       r               = _mm_mul_sd(rsq11,rinv);
-                       rt              = _mm_mul_sd(r,gbscale);
-                       n0              = _mm_cvttpd_epi32(rt);
-                       n0d             = _mm_cvtepi32_pd(n0);
-                       eps             = _mm_sub_sd(rt,n0d);
-                       eps2    = _mm_mul_sd(eps,eps);
-                       
-                       nnn             = _mm_slli_epi64(n0,2);
-                       
-                       xmm1    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))); 
-                       xmm2    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))); 
-                       xmm3    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))+2); 
-                       xmm4    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))+2); 
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); 
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); 
-                       
-                       G               = _mm_mul_sd(G,eps);
-                       H               = _mm_mul_sd(H,eps2);
-                       Fp              = _mm_add_sd(F,G);
-                       Fp              = _mm_add_sd(Fp,H);
-                       VV              = _mm_mul_sd(Fp,eps);
-                       VV              = _mm_add_sd(Y,VV);
-                       H               = _mm_mul_sd(two,H);
-                       FF              = _mm_add_sd(Fp,G);
-                       FF              = _mm_add_sd(FF,H);
-                       vgb             = _mm_mul_sd(qq,VV);
-                       fijC    = _mm_mul_sd(qq,FF);
-                       fijC    = _mm_mul_sd(fijC,gbscale);
-                       
-                       dvdatmp = _mm_mul_sd(fijC,r);
-                       dvdatmp = _mm_add_sd(vgb,dvdatmp);
-                       dvdatmp = _mm_mul_sd(dvdatmp,neg);
-                       dvdatmp = _mm_mul_sd(dvdatmp,half);
-                       dvdasum = _mm_add_sd(dvdasum,dvdatmp);
-                       
-                       xmm1    = _mm_mul_sd(dvdatmp,isaj);
-                       xmm1    = _mm_mul_sd(xmm1,isaj);
-                       dvdaj   = _mm_add_sd(dvdaj,xmm1);
-                       
-                       /* store dvda */
-                       _mm_storel_pd(dvda+jnr1,dvdaj);
-                       
-                       vctot   = _mm_add_sd(vctot,vcoul);
-                       vgbtot  = _mm_add_sd(vgbtot,vgb);
-                       
-                       /* VdW interaction */
-                       rinvsix = _mm_mul_sd(rinvsq,rinvsq);
-                       rinvsix = _mm_mul_sd(rinvsix,rinvsq);
-                       
-                       Vvdw6   = _mm_mul_sd(c6,rinvsix);
-                       Vvdw12  = _mm_mul_sd(c12,rinvsix);
-                       Vvdw12  = _mm_mul_sd(Vvdw12,rinvsix);
-                       Vvdwtmp = _mm_sub_sd(Vvdw12,Vvdw6);
-                       Vvdwtot = _mm_add_sd(Vvdwtot,Vvdwtmp);
-                       
-                       xmm1    = _mm_mul_sd(twelwe,Vvdw12);
-                       xmm2    = _mm_mul_sd(six,Vvdw6);
-                       xmm1    = _mm_sub_sd(xmm1,xmm2);
-                       xmm1    = _mm_mul_sd(xmm1,rinvsq);
-                       
-                       /* Scalar force */
-                       fscal   = _mm_sub_sd(fijC,fscal);
-                       fscal   = _mm_mul_sd(fscal,rinv);
-                       fscal   = _mm_sub_sd(xmm1,fscal);
-                       
-                       /* calculate partial force terms */
-                       t1              = _mm_mul_sd(fscal,dx);
-                       t2              = _mm_mul_sd(fscal,dy);
-                       t3              = _mm_mul_sd(fscal,dz);
-                       
-                       /* update the i force */
-                       fix             = _mm_add_sd(fix,t1);
-                       fiy             = _mm_add_sd(fiy,t2);
-                       fiz             = _mm_add_sd(fiz,t3);
-                       
-                       /* accumulate forces from memory */
-                       xmm5    = _mm_load_sd(faction+j13);   /* fx */
-                       xmm6    = _mm_load_sd(faction+j13+1); /* fy */
-                       xmm7    = _mm_load_sd(faction+j13+2); /* fz */
-                       
-                       /* subtract partial forces */
-                       xmm5    = _mm_sub_sd(xmm5,t1);
-                       xmm6    = _mm_sub_sd(xmm6,t2);
-                       xmm7    = _mm_sub_sd(xmm7,t3);
-                       
-                       /* store forces */
-                       _mm_store_sd(faction+j13,xmm5);
-                       _mm_store_sd(faction+j13+1,xmm6);
-                       _mm_store_sd(faction+j13+2,xmm7);
+                       jnrA    = jjnr[k];
+                       
+                       j3A     = jnrA * 3;
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+            
+                       dx           = _mm_sub_sd(ix,jx);
+                       dy           = _mm_sub_sd(iy,jy);
+                       dz           = _mm_sub_sd(iz,jz);
+            
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
+                       rinvsq       = _mm_mul_sd(rinv,rinv);
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      vvdw6        = _mm_setzero_pd();
+      vvdw12       = _mm_setzero_pd();
+      
+                       /***********************************/
+                       /* INTERACTION SECTION STARTS HERE */
+                       /***********************************/
+                       GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
+                       GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
+      
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+      
+      GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
+                       
+                       isaprod      = _mm_mul_sd(isai,isaj);
+                       qq           = _mm_mul_sd(jq,iq);            
+                       vcoul        = _mm_mul_sd(qq,rinv);
+                       fscal        = _mm_mul_sd(vcoul,rinv);                                 
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
+                       qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
+                       gbscale      = _mm_mul_sd(isaprod,gbtabscale);
+      
+                       /* Calculate GB table index */
+                       r            = _mm_mul_sd(rsq,rinv);
+                       rtab         = _mm_mul_sd(r,gbscale);
+                       
+                       n0                   = _mm_cvttpd_epi32(rtab);
+                       eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
+                       nnn                  = _mm_slli_epi32(n0,2);
+                       
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+                       
+                       rinvsix      = _mm_mul_sd(rinvsq,rinvsq);
+                       rinvsix      = _mm_mul_sd(rinvsix,rinvsq);
+                       
+                       vvdw6        = _mm_mul_sd(c6,rinvsix);
+                       vvdw12       = _mm_mul_sd(c12, _mm_mul_sd(rinvsix,rinvsix));
+                       vvdwtot      = _mm_add_pd(vvdwtot,_mm_sub_sd(vvdw12,vvdw6)); /* (**) */
+      
+      fscal        = _mm_sub_sd(_mm_mul_sd(rinvsq, 
+                                           _mm_sub_sd(_mm_mul_sd(twelve,vvdw12),
+                                                      _mm_mul_sd(six,vvdw6))),
+                                _mm_mul_sd( _mm_sub_sd( fijGB,fscal),rinv ));
+      
+      /***********************************/
+                       /*  INTERACTION SECTION ENDS HERE  */
+                       /***********************************/
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
+                       GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-               /* fix/fiy/fiz now contain four partial terms, that all should be
-                * added to the i particle forces
-                */
-               t1               = _mm_unpacklo_pd(t1,fix);
-               t2               = _mm_unpacklo_pd(t2,fiy);
-               t3               = _mm_unpacklo_pd(t3,fiz);
-               
-               fix              = _mm_add_pd(fix,t1);
-               fiy              = _mm_add_pd(fiy,t2);
-               fiz              = _mm_add_pd(fiz,t3);
-               
-               fix      = _mm_shuffle_pd(fix,fix,_MM_SHUFFLE2(1,1));
-               fiy      = _mm_shuffle_pd(fiy,fiy,_MM_SHUFFLE2(1,1));
-               fiz      = _mm_shuffle_pd(fiz,fiz,_MM_SHUFFLE2(1,1));
-               
-               /* Load i forces from memory */
-               xmm1     = _mm_load_sd(faction+ii3);
-               xmm2     = _mm_load_sd(faction+ii3+1);
-               xmm3     = _mm_load_sd(faction+ii3+2);
-               
-               /* Add to i force */
-               fix      = _mm_add_sd(fix,xmm1);
-               fiy      = _mm_add_sd(fiy,xmm2);
-               fiz      = _mm_add_sd(fiz,xmm3);
-               
-               /* store i forces to memory */
-               _mm_store_sd(faction+ii3,fix);
-               _mm_store_sd(faction+ii3+1,fiy);
-               _mm_store_sd(faction+ii3+2,fiz);
-               
-               /* now do dvda */
-               dvdatmp  = _mm_unpacklo_pd(dvdatmp,dvdasum);
-               dvdasum  = _mm_add_pd(dvdasum,dvdatmp);
-               _mm_storeh_pd(&dva,dvdasum);
-               dvda[ii] = dvda[ii] + dva*isai_d*isai_d;
-               
-               ggid     = gid[n];
-               
-               /* Coulomb potential */
-               vcoul    = _mm_unpacklo_pd(vcoul,vctot);
-               vctot    = _mm_add_pd(vctot,vcoul);
-               _mm_storeh_pd(&vct,vctot);
-               Vc[ggid] = Vc[ggid] + vct;
-               
-               /* VdW potential */
-               Vvdwtmp  = _mm_unpacklo_pd(Vvdwtmp,Vvdwtot);
-               Vvdwtot  = _mm_add_pd(Vvdwtot,Vvdwtmp);
-               _mm_storeh_pd(&vdwt,Vvdwtot);
-               Vvdw[ggid] = Vvdw[ggid] + vdwt;
-               
-               /* GB potential */
-               vgb      = _mm_unpacklo_pd(vgb,vgbtot);
-               vgbtot   = _mm_add_pd(vgbtot,vgb);
-               _mm_storeh_pd(&vgbt,vgbtot);
-               gpol[ggid] = gpol[ggid] + vgbt;
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
+    
+    ggid     = gid[n];         
+    
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+    gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid);
+    
        }
-       
+  
        *outeriter   = nri;            
-    *inneriter   = nj1;        
+  *inneriter   = nj1;  
 }
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel410nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel410nf_sse2_double(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    double *         shiftvec,
-                    double *         fshift,
-                    int *           gid,
-                    double *         pos,
-                    double *         faction,
-                    double *         charge,
-                    double *         p_facel,
-                    double *         p_krf,
-                    double *         p_crf,
-                    double *         Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    double *         vdwparam,
-                    double *         Vvdw,
-                    double *         p_tabscale,
-                    double *         VFtab,
-                    double *         invsqrta,
-                    double *         dvda,
-                    double *         p_gbtabscale,
-                    double *         GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    double *         work)
-{
-    int           nri,ntype,nthreads;
-    double         facel,krf,crf,tabscale,gbtabscale,vgb,fgb;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    double         shX,shY,shZ;
-    double         rinvsq;
-    double         iq;
-    double         qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    double         rinvsix;
-    double         Vvdw6,Vvdwtot;
-    double         Vvdw12;
-    double         r,rt,eps,eps2;
-    int           n0,nnn;
-    double         Y,F,Geps,Heps2,Fp,VV;
-    double         isai,isaj,isaprod,gbscale;
-    double         ix1,iy1,iz1;
-    double         jx1,jy1,jz1;
-    double         dx11,dy11,dz11,rsq11,rinv11;
-    double         c6,c12;
-    const int     fractshift = 12;
-    const int     fractmask = 8388607;
-    const int     expshift = 23;
-    const int     expmask = 2139095040;
-    const int     explsb = 8388608;
-    double         lu;
-    int           iexp,addr;
-    union { unsigned int bval; double fval; } bitpattern,result;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-    nj1              = 0;              
-
-    for(n=0; (n<nri); n++)
-    {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
-        ix1              = shX + pos[ii3+0];
-        iy1              = shY + pos[ii3+1];
-        iz1              = shZ + pos[ii3+2];
-        iq               = facel*charge[ii];
-        isai             = invsqrta[ii];   
-        nti              = 2*ntype*type[ii];
-        vctot            = 0;              
-        Vvdwtot          = 0;              
-        
-        for(k=nj0; (k<nj1); k++)
-        {
-            jnr              = jjnr[k];        
-            j3               = 3*jnr;          
-            jx1              = pos[j3+0];      
-            jy1              = pos[j3+1];      
-            jz1              = pos[j3+2];      
-            dx11             = ix1 - jx1;      
-            dy11             = iy1 - jy1;      
-            dz11             = iz1 - jz1;      
-            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-            bitpattern.fval  = rsq11;          
-            iexp             = (((bitpattern.bval)&expmask)>>expshift);
-            addr             = (((bitpattern.bval)&(fractmask|explsb))>>fractshift);
-            result.bval      = gmx_invsqrt_exptab[iexp] | gmx_invsqrt_fracttab[addr];
-            lu               = result.fval;    
-            rinv11           = (0.5*lu*(3.0-((rsq11*lu)*lu)));
-            isaj             = invsqrta[jnr];  
-            isaprod          = isai*isaj;      
-            qq               = iq*charge[jnr]; 
-            vcoul            = qq*rinv11;      
-            qq               = isaprod*(-qq);  
-            gbscale          = isaprod*gbtabscale;
-            tj               = nti+2*type[jnr];
-            c6               = vdwparam[tj];   
-            c12              = vdwparam[tj+1]; 
-            rinvsq           = rinv11*rinv11;  
-            r                = rsq11*rinv11;   
-            rt               = r*gbscale;      
-            n0               = rt;             
-            eps              = rt-n0;          
-            eps2             = eps*eps;        
-            nnn              = 4*n0;           
-            Y                = GBtab[nnn];     
-            F                = GBtab[nnn+1];   
-            Geps             = eps*GBtab[nnn+2];
-            Heps2            = eps2*GBtab[nnn+3];
-            Fp               = F+Geps+Heps2;   
-            VV               = Y+eps*Fp;       
-            vgb              = qq*VV;          
-            vctot            = vctot + vcoul;  
-            rinvsix          = rinvsq*rinvsq*rinvsq;
-            Vvdw6            = c6*rinvsix;     
-            Vvdw12           = c12*rinvsix*rinvsix;
-            Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-        }
-        
-        ggid             = gid[n];         
-        Vc[ggid]         = Vc[ggid] + vctot;
-        Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-    }
-    
-    *outeriter       = nri;            
-    *inneriter       = nj1;            
-}
-
-
index 2532a8576650ce79d794a1e51784a8489388a831..2df2ed04c32857d9fe973dabe5cfdb941c071fce 100644 (file)
@@ -18,7 +18,6 @@
 #include<math.h>
 #include<vec.h>
 
-
 #include <xmmintrin.h>
 #include <emmintrin.h>
 
 /* get gmx_gbdata_t */
 #include "../nb_kerneltype.h"
 
-
+#include "nb_kernel430_x86_64_sse2.h"
 
 void nb_kernel430_sse2_double(int *           p_nri,
-                                                       int *           iinr,
-                                                       int *           jindex,
-                                                       int *           jjnr,
-                                                       int *           shift,
-                                                       double *         shiftvec,
-                                                       double *         fshift,
-                                                       int *           gid,
-                                                       double *         pos,
-                                                       double *         faction,
-                                                       double *         charge,
-                                                       double *         p_facel,
-                                                       double *         p_krf,
-                                                       double *         p_crf,
-                                                       double *         Vc,
-                                                       int *           type,
-                                                       int *           p_ntype,
-                                                       double *         vdwparam,
-                                                       double *         Vvdw,
-                                                       double *         p_tabscale,
-                                                       double *         VFtab,
-                                                       double *         invsqrta,
-                                                       double *         dvda,
-                                                       double *         p_gbtabscale,
-                                                       double *         GBtab,
-                                                       int *           p_nthreads,
-                                                       int *           count,
-                                                       void *          mtx,
-                                                       int *           outeriter,
-                                                       int *           inneriter,
-                                                       double *         work)
+                              int *           iinr,
+                              int *           jindex,
+                              int *           jjnr,
+                              int *           shift,
+                              double *         shiftvec,
+                              double *         fshift,
+                              int *           gid,
+                              double *         pos,
+                              double *         faction,
+                              double *         charge,
+                              double *         p_facel,
+                              double *         p_krf,
+                              double *         p_crf,
+                              double *         vc,
+                              int *           type,
+                              int *           p_ntype,
+                              double *         vdwparam,
+                              double *         vvdw,
+                              double *         p_tabscale,
+                              double *         VFtab,
+                              double *         invsqrta,
+                              double *         dvda,
+                              double *         p_gbtabscale,
+                              double *         GBtab,
+                              int *           p_nthreads,
+                              int *           count,
+                              void *          mtx,
+                              int *           outeriter,
+                              int *           inneriter,
+                              double *         work)
 {
-       int           nri,ntype,nthreads,offset,tj,tj2,nti;
-       int           n,ii,is3,ii3,k,nj0,nj1,jnr1,jnr2,j13,j23,ggid;
-       double        facel,krf,crf,tabscl,gbtabscl,vct,vdwt,vgbt,nt1,nt2;
-       double        shX,shY,shZ,isai_d,dva;
+  int           nri,ntype,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
+       int                       offset,nti;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
+       int           tjA,tjB;
        gmx_gbdata_t *gbdata;
-       float *        gpol;
-
-       __m128d       ix,iy,iz,jx,jy,jz;
-       __m128d           dx,dy,dz,t1,t2,t3;
-       __m128d           fix,fiy,fiz,rsq11,rinv,r,fscal,rt,eps,eps2;
-       __m128d           q,iq,qq,isai,isaj,isaprod,vcoul,gbscale,dvdai,dvdaj;
-       __m128d       Y,F,G,H,Fp,VV,FF,vgb,fijC,fijD,fijR,dvdatmp,dvdasum,vctot,n0d;
-       __m128d           xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm8;
-       __m128d       c6,c12,Vvdw6,Vvdw12,Vvdwtmp,Vvdwtot,vgbtot,rinvsq,rinvsix;
-       __m128d       fac,tabscale,gbtabscale;
-       __m128i       n0,nnn;
-       
-       const __m128d neg    = {-1.0,-1.0};
-       const __m128d zero   = {0.0,0.0};
-       const __m128d half   = {0.5,0.5};
-       const __m128d two    = {2.0,2.0};
-       const __m128d three  = {3.0,3.0};
-       const __m128d six    = {6.0,6.0};
-       const __m128d twelwe = {12.0,12.0};
+       double *      gpol;
+    
+       __m128d  iq,qq,jq,isai;
+       __m128d  ix,iy,iz;
+       __m128d  jx,jy,jz;
+       __m128d  dx,dy,dz;
+       __m128d  vctot,vvdwtot,vgbtot,dvdasum,gbfactor;
+       __m128d  fix,fiy,fiz,tx,ty,tz,rsq;
+       __m128d  rinv,isaj,isaprod;
+       __m128d  vcoul,fscal,gbscale,c6,c12;
+       __m128d  rinvsq,r,rtab;
+       __m128d  eps,Y,F,G,H;
+  __m128d  VV,FF,Fp;
+       __m128d  vgb,fijGB,dvdatmp;
+       __m128d  rinvsix,vvdw6,vvdw12,vvdwtmp;
+       __m128d  facel,gbtabscale,dvdaj;
+  __m128d  fijD,fijR;
+  __m128d  xmm1,tabscale,eps2;
+       __m128i  n0, nnn;
+    
        
-       const __m128i four   = _mm_set_epi32(4,4,4,4);
+       const __m128d neg        = _mm_set1_pd(-1.0);
+       const __m128d zero       = _mm_set1_pd(0.0);
+       const __m128d minushalf  = _mm_set1_pd(-0.5);
+       const __m128d two        = _mm_set1_pd(2.0);
        
        gbdata     = (gmx_gbdata_t *)work;
        gpol       = gbdata->gpol;
-       
+    
        nri        = *p_nri;
        ntype      = *p_ntype;
-       nthreads   = *p_nthreads; 
-    facel      = (*p_facel) * ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent));       
-       krf        = *p_krf;
-       crf        = *p_crf;
-       tabscl     = *p_tabscale;
-       gbtabscl   = *p_gbtabscale;
-       nj1        = 0;
-       
-       /* Splat variables */
-       fac        = _mm_load1_pd(&facel);
-       tabscale   = _mm_load1_pd(&tabscl);
-       gbtabscale = _mm_load1_pd(&gbtabscl);
-       
-       /* Keep compiler happy */
-       Vvdwtmp = _mm_setzero_pd();
-       Vvdwtot = _mm_setzero_pd();
-       dvdatmp = _mm_setzero_pd();
-       dvdaj   = _mm_setzero_pd();
-       isaj    = _mm_setzero_pd();
-       vcoul   = _mm_setzero_pd();
-       vgb     = _mm_setzero_pd();
-       t1      = _mm_setzero_pd();
-       t2      = _mm_setzero_pd();
-       t3      = _mm_setzero_pd();
-       xmm1    = _mm_setzero_pd();
-       xmm2    = _mm_setzero_pd();
-       xmm3    = _mm_setzero_pd();
-       xmm4    = _mm_setzero_pd();
-       jnr1    = jnr2 = 0;
-       j13     = j23  = 0;
+    
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  tabscale   = _mm_load1_pd(p_tabscale);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
+  c6          = _mm_setzero_pd();
+  c12         = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-               is3     = 3*shift[n];
-               shX     = shiftvec[is3];
-               shY     = shiftvec[is3+1];
-               shZ     = shiftvec[is3+2];
-               
-               nj0     = jindex[n];      
-        nj1     = jindex[n+1];  
-               offset  = (nj1-nj0)%2;
-               
-               ii      = iinr[n];
-               ii3     = ii*3;
-               
-               ix      = _mm_set1_pd(shX+pos[ii3+0]);
-               iy      = _mm_set1_pd(shX+pos[ii3+1]);
-               iz      = _mm_set1_pd(shX+pos[ii3+2]); 
-               q       = _mm_set1_pd(charge[ii]);
-               
-               iq      = _mm_mul_pd(fac,q); 
-               isai_d  = invsqrta[ii];
-               isai    = _mm_load1_pd(&isai_d);
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
-               nti      = 2*ntype*type[ii];
-               
-               fix     = _mm_setzero_pd();
-               fiy     = _mm_setzero_pd();
-               fiz     = _mm_setzero_pd();
-               dvdasum = _mm_setzero_pd();
-               vctot   = _mm_setzero_pd();
-               vgbtot  = _mm_setzero_pd();
-               Vvdwtot = _mm_setzero_pd();
+               ix               = _mm_set1_pd(shX+pos[ii3+0]);
+               iy               = _mm_set1_pd(shY+pos[ii3+1]);
+               iz               = _mm_set1_pd(shZ+pos[ii3+2]);
+    
+               iq               = _mm_load1_pd(charge+ii);
+               iq               = _mm_mul_pd(iq,facel);
+    
+               isai             = _mm_load1_pd(invsqrta+ii);
+    
+               nti              = 2*ntype*type[ii];
                
-               for(k=nj0;k<nj1-offset; k+=2)
+               vctot            = _mm_setzero_pd();
+               vvdwtot          = _mm_setzero_pd();
+               vgbtot           = _mm_setzero_pd();
+               dvdasum          = _mm_setzero_pd();
+               fix              = _mm_setzero_pd();
+               fiy              = _mm_setzero_pd();
+               fiz              = _mm_setzero_pd();
+        
+               for(k=nj0;k<nj1-1; k+=2)
                {
-                       jnr1    = jjnr[k];
-                       jnr2    = jjnr[k+1];
-                       
-                       j13     = jnr1 * 3;
-                       j23     = jnr2 * 3;
-                       
-                       /* Load coordinates */
-                       xmm1    = _mm_loadu_pd(pos+j13); /* x1 y1 */
-                       xmm2    = _mm_loadu_pd(pos+j23); /* x2 y2 */
-                       
-                       xmm5    = _mm_load_sd(pos+j13+2); /* z1 - */
-                       xmm6    = _mm_load_sd(pos+j23+2); /* z2 - */
-                       
-                       /* transpose */
-                       jx      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
-                       jy      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
-                       jz      = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); 
-                       
-                       /* distances */
-                       dx      = _mm_sub_pd(ix,jx);
-                       dy              = _mm_sub_pd(iy,jy);
-                       dz              = _mm_sub_pd(iz,jz);
-                       
-                       rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
-                       rinv    = gmx_mm_invsqrt_pd(rsq11);
-                       
-                       /* Load invsqrta */
-                       isaj    = _mm_loadl_pd(isaj,invsqrta+jnr1);
-                       isaj    = _mm_loadh_pd(isaj,invsqrta+jnr2);
-                       isaprod = _mm_mul_pd(isai,isaj);
-                       
-                       /* Load charges */
-                       q               = _mm_loadl_pd(q,charge+jnr1);
-                       q               = _mm_loadh_pd(q,charge+jnr2);
-                       qq              = _mm_mul_pd(iq,q);
-                       
-                       vcoul   = _mm_mul_pd(qq,rinv);
-                       fscal   = _mm_mul_pd(vcoul,rinv);
-                       qq              = _mm_mul_pd(isaprod,qq);
-                       qq              = _mm_mul_pd(qq,neg);
-                       gbscale = _mm_mul_pd(isaprod,gbtabscale);
-                       
-                       /* Load VdW parameters */
-                       tj      = nti+2*type[jnr1];
-                       tj2     = nti+2*type[jnr2];
-                       
-                       xmm1      = _mm_loadu_pd(vdwparam+tj);
-                       xmm2     = _mm_loadu_pd(vdwparam+tj2);
-                       c6      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0));
-                       c12     = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1));
-                       
-                       /* Load dvdaj */
-                       dvdaj   = _mm_loadl_pd(dvdaj, dvda+jnr1);
-                       dvdaj   = _mm_loadh_pd(dvdaj, dvda+jnr2);
-                       
-                       /* Calculate GB table index */
-                       r               = _mm_mul_pd(rsq11,rinv);
-                       rt              = _mm_mul_pd(r,gbscale);
-                       n0              = _mm_cvttpd_epi32(rt);
-                       n0d             = _mm_cvtepi32_pd(n0);
-                       eps             = _mm_sub_pd(rt,n0d);
-                       eps2    = _mm_mul_pd(eps,eps);
-                       
-                       nnn             = _mm_slli_epi64(n0,2);
-                       
-                       xmm1    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
-                       xmm2    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
-                       xmm3    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
-                       xmm4    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
-                       
-                       G               = _mm_mul_pd(G,eps);
-                       H               = _mm_mul_pd(H,eps2);
-                       Fp              = _mm_add_pd(F,G);
-                       Fp              = _mm_add_pd(Fp,H);
-                       VV              = _mm_mul_pd(Fp,eps);
-                       VV              = _mm_add_pd(Y,VV);
-                       H               = _mm_mul_pd(two,H);
-                       FF              = _mm_add_pd(Fp,G);
-                       FF              = _mm_add_pd(FF,H);
-                       vgb             = _mm_mul_pd(qq,VV);
-                       fijC    = _mm_mul_pd(qq,FF);
-                       fijC    = _mm_mul_pd(fijC,gbscale);
-                       
-                       dvdatmp = _mm_mul_pd(fijC,r);
-                       dvdatmp = _mm_add_pd(vgb,dvdatmp);
-                       dvdatmp = _mm_mul_pd(dvdatmp,neg);
-                       dvdatmp = _mm_mul_pd(dvdatmp,half);
-                       dvdasum = _mm_add_pd(dvdasum,dvdatmp);
-                       
-                       xmm1    = _mm_mul_pd(dvdatmp,isaj);
-                       xmm1    = _mm_mul_pd(xmm1,isaj);
-                       dvdaj   = _mm_add_pd(dvdaj,xmm1);
-                       
-                       /* store dvda */
-                       _mm_storel_pd(dvda+jnr1,dvdaj);
-                       _mm_storeh_pd(dvda+jnr2,dvdaj);
-                       
-                       vctot   = _mm_add_pd(vctot,vcoul);
-                       vgbtot  = _mm_add_pd(vgbtot,vgb);
-                       
-                       /* Calculate VDW table index */
-                       rt      = _mm_mul_pd(r,tabscale);
-                       n0      = _mm_cvttpd_epi32(rt);
-                       n0d     = _mm_cvtepi32_pd(n0);
-                       eps     = _mm_sub_pd(rt,n0d);
+                       jnrA    = jjnr[k];
+                       jnrB    = jjnr[k+1];
+                       
+                       j3A     = jnrA * 3;
+                       j3B     = jnrB * 3;
+            
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+            
+                       dx           = _mm_sub_pd(ix,jx);
+                       dy           = _mm_sub_pd(iy,jy);
+                       dz           = _mm_sub_pd(iz,jz);
+            
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
+                       rinvsq       = _mm_mul_pd(rinv,rinv);
+      
+                       /***********************************/
+                       /* INTERACTION SECTION STARTS HERE */
+                       /***********************************/
+                       GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
+                       GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
+            
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+                       tjB          = nti+2*type[jnrB];
+      
+      GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
+                       
+                       isaprod      = _mm_mul_pd(isai,isaj);
+                       qq           = _mm_mul_pd(iq,jq);            
+                       vcoul        = _mm_mul_pd(qq,rinv);
+                       fscal        = _mm_mul_pd(vcoul,rinv);                                 
+      vctot        = _mm_add_pd(vctot,vcoul);
+      
+      /* Polarization interaction */
+                       qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
+                       gbscale      = _mm_mul_pd(isaprod,gbtabscale);
+      
+                       /* Calculate GB table index */
+                       r            = _mm_mul_pd(rsq,rinv);
+                       rtab         = _mm_mul_pd(r,gbscale);
+                       
+                       n0                   = _mm_cvttpd_epi32(rtab);
+                       eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
+                       nnn                  = _mm_slli_epi32(n0,2);
+                       
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+                       
+      /* Calculate VDW table index */
+                       rtab    = _mm_mul_pd(r,tabscale);
+                       n0      = _mm_cvttpd_epi32(rtab);
+                       eps     = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        eps2    = _mm_mul_pd(eps,eps);
                        nnn     = _mm_slli_epi32(n0,3);
                        
-                       /* Tabulated VdW interaction - dispersion */
-                       xmm1    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
-                       xmm2    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
-                       xmm3    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
-                       xmm4    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
-                       
-                       G       = _mm_mul_pd(G,eps);
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
                        H       = _mm_mul_pd(H,eps2);
                        Fp      = _mm_add_pd(F,G);
                        Fp      = _mm_add_pd(Fp,H);
@@ -293,23 +243,18 @@ void nb_kernel430_sse2_double(int *           p_nri,
                        FF      = _mm_add_pd(Fp,G);
                        FF      = _mm_add_pd(FF,xmm1);
                        
-                       Vvdw6   = _mm_mul_pd(c6,VV);
+                       vvdw6   = _mm_mul_pd(c6,VV);
                        fijD    = _mm_mul_pd(c6,FF);
-                       
-                       /* Tabulated VdW interaction - repulsion */
-                       nnn     = _mm_add_epi32(nnn,four);
-                       
-                       xmm1    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
-                       xmm2    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
-                       xmm3    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
-                       xmm4    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
-                       
-                       G       = _mm_mul_pd(G,eps);
+      
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
+      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+4);
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
+      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+6);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
                        H       = _mm_mul_pd(H,eps2);
                        Fp      = _mm_add_pd(F,G);
                        Fp      = _mm_add_pd(Fp,H);
@@ -319,162 +264,138 @@ void nb_kernel430_sse2_double(int *           p_nri,
                        FF      = _mm_add_pd(Fp,G);
                        FF      = _mm_add_pd(FF,xmm1);
                        
-                       Vvdw12  = _mm_mul_pd(c12,VV);
+                       vvdw12  = _mm_mul_pd(c12,VV);
                        fijR    = _mm_mul_pd(c12,FF);
                        
-                       Vvdwtmp = _mm_add_pd(Vvdw12,Vvdw6);
-                       Vvdwtot = _mm_add_pd(Vvdwtot,Vvdwtmp);
-                       
+                       vvdwtmp = _mm_add_pd(vvdw12,vvdw6);
+                       vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp);
+      
                        xmm1    = _mm_add_pd(fijD,fijR);
                        xmm1    = _mm_mul_pd(xmm1,tabscale);
-                       xmm1    = _mm_add_pd(xmm1,fijC);
+                       xmm1    = _mm_add_pd(xmm1,fijGB);
                        xmm1    = _mm_sub_pd(xmm1,fscal);
                        fscal   = _mm_mul_pd(xmm1,neg);
                        fscal   = _mm_mul_pd(fscal,rinv);
-                       
-                       /* calculate partial force terms */
-                       t1              = _mm_mul_pd(fscal,dx);
-                       t2              = _mm_mul_pd(fscal,dy);
-                       t3              = _mm_mul_pd(fscal,dz);
-                       
-                       /* update the i force */
-                       fix             = _mm_add_pd(fix,t1);
-                       fiy             = _mm_add_pd(fiy,t2);
-                       fiz             = _mm_add_pd(fiz,t3);
-                       
-                       /* accumulate forces from memory */
-                       xmm1    = _mm_loadu_pd(faction+j13); /* fx1 fy1 */
-                       xmm2    = _mm_loadu_pd(faction+j23); /* fx2 fy2 */
-                       
-                       xmm5    = _mm_load1_pd(faction+j13+2); /* fz1 fz1 */
-                       xmm6    = _mm_load1_pd(faction+j23+2); /* fz2 fz2 */
-                       
-                       /* transpose */
-                       xmm7    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fz1 fz2 */
-                       xmm5    = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* fx1 fx2 */
-                       xmm6    = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
-                       
-                       /* subtract partial forces */
-                       xmm5    = _mm_sub_pd(xmm5,t1);
-                       xmm6    = _mm_sub_pd(xmm6,t2);
-                       xmm7    = _mm_sub_pd(xmm7,t3);
-                       
-                       xmm1    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fx1 fy1 */
-                       xmm2    = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
-                       
-                       /* store fx and fy */
-                       _mm_storeu_pd(faction+j13,xmm1);
-                       _mm_storeu_pd(faction+j23,xmm2);
-                       
-                       /* .. then fz */
-                       _mm_storel_pd(faction+j13+2,xmm7);
-                       _mm_storel_pd(faction+j23+2,xmm7);
+      
+      /***********************************/
+                       /*  INTERACTION SECTION ENDS HERE  */
+                       /***********************************/
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
+                       GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
                /* In double precision, offset can only be either 0 or 1 */
-               if(offset!=0)
+               if(k<nj1)
                {
-                       jnr1    = jjnr[k];
-                       j13             = jnr1*3;
-                       
-                       jx      = _mm_load_sd(pos+j13);
-                       jy      = _mm_load_sd(pos+j13+1);
-                       jz      = _mm_load_sd(pos+j13+2);
-                       
-                       isaj    = _mm_load_sd(invsqrta+jnr1);
-                       isaprod = _mm_mul_sd(isai,isaj);
-                       dvdaj   = _mm_load_sd(dvda+jnr1);
-                       q               = _mm_load_sd(charge+jnr1);
-                       qq      = _mm_mul_sd(iq,q);
-                       
-                       dx      = _mm_sub_sd(ix,jx);
-                       dy              = _mm_sub_sd(iy,jy);
-                       dz              = _mm_sub_sd(iz,jz);
-                       
-                       rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
-                       rinv    = gmx_mm_invsqrt_pd(rsq11);
-                       
-                       vcoul   = _mm_mul_sd(qq,rinv);
-                       fscal   = _mm_mul_sd(vcoul,rinv);
-                       qq              = _mm_mul_sd(isaprod,qq);
-                       qq              = _mm_mul_sd(qq,neg);
-                       gbscale = _mm_mul_sd(isaprod,gbtabscale);
-                       
-                       /* Load VdW parameters */
-                       tj      = nti+2*type[jnr1];
-                       
-                       c6      = _mm_load_sd(vdwparam+tj);
-                       c12     = _mm_load_sd(vdwparam+tj+1);
-                       
-                       /* Calculate GB table index */
-                       r               = _mm_mul_sd(rsq11,rinv);
-                       rt              = _mm_mul_sd(r,gbscale);
-                       n0              = _mm_cvttpd_epi32(rt);
-                       n0d             = _mm_cvtepi32_pd(n0);
-                       eps             = _mm_sub_sd(rt,n0d);
-                       eps2    = _mm_mul_sd(eps,eps);
-                       
-                       nnn             = _mm_slli_epi64(n0,2);
-                       
-                       xmm1    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))); 
-                       xmm2    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))); 
-                       xmm3    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,0))+2); 
-                       xmm4    = _mm_load_pd(GBtab+(gmx_mm_extract_epi64(nnn,1))+2); 
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); 
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); 
-                       
-                       G               = _mm_mul_sd(G,eps);
-                       H               = _mm_mul_sd(H,eps2);
-                       Fp              = _mm_add_sd(F,G);
-                       Fp              = _mm_add_sd(Fp,H);
-                       VV              = _mm_mul_sd(Fp,eps);
-                       VV              = _mm_add_sd(Y,VV);
-                       H               = _mm_mul_sd(two,H);
-                       FF              = _mm_add_sd(Fp,G);
-                       FF              = _mm_add_sd(FF,H);
-                       vgb             = _mm_mul_sd(qq,VV);
-                       fijC    = _mm_mul_sd(qq,FF);
-                       fijC    = _mm_mul_sd(fijC,gbscale);
-                       
-                       dvdatmp = _mm_mul_sd(fijC,r);
-                       dvdatmp = _mm_add_sd(vgb,dvdatmp);
-                       dvdatmp = _mm_mul_sd(dvdatmp,neg);
-                       dvdatmp = _mm_mul_sd(dvdatmp,half);
-                       dvdasum = _mm_add_sd(dvdasum,dvdatmp);
-                       
-                       xmm1    = _mm_mul_sd(dvdatmp,isaj);
-                       xmm1    = _mm_mul_sd(xmm1,isaj);
-                       dvdaj   = _mm_add_sd(dvdaj,xmm1);
-                       
-                       /* store dvda */
-                       _mm_storel_pd(dvda+jnr1,dvdaj);
-                       
-                       vctot   = _mm_add_sd(vctot,vcoul);
-                       vgbtot  = _mm_add_sd(vgbtot,vgb);
-                       
-                       /* Calculate VDW table index */
-                       rt      = _mm_mul_sd(r,tabscale);
-                       n0      = _mm_cvttpd_epi32(rt);
-                       n0d     = _mm_cvtepi32_pd(n0);
-                       eps     = _mm_sub_sd(rt,n0d);
+                       jnrA    = jjnr[k];
+                       j3A     = jnrA * 3;
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
+                       dx           = _mm_sub_sd(ix,jx);
+                       dy           = _mm_sub_sd(iy,jy);
+                       dz           = _mm_sub_sd(iz,jz);
+            
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
+                       rinvsq       = _mm_mul_sd(rinv,rinv);
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      vvdw6        = _mm_setzero_pd();
+      vvdw12       = _mm_setzero_pd();
+
+      /***********************************/
+                       /* INTERACTION SECTION STARTS HERE */
+                       /***********************************/
+                       GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
+                       GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
+            
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+      
+      GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
+                       
+                       isaprod      = _mm_mul_sd(isai,isaj);
+                       qq           = _mm_mul_sd(jq,iq);            
+                       vcoul        = _mm_mul_sd(qq,rinv);
+                       fscal        = _mm_mul_sd(vcoul,rinv);                                 
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
+                       qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
+                       gbscale      = _mm_mul_sd(isaprod,gbtabscale);
+      
+                       /* Calculate GB table index */
+                       r            = _mm_mul_sd(rsq,rinv);
+                       rtab         = _mm_mul_sd(r,gbscale);
+                       
+                       n0                   = _mm_cvttpd_epi32(rtab);
+                       eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
+                       nnn                  = _mm_slli_epi32(n0,2);
+                       
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+                       
+      /* Calculate VDW table index */
+                       rtab    = _mm_mul_sd(r,tabscale);
+                       n0      = _mm_cvttpd_epi32(rtab);
+                       eps     = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        eps2    = _mm_mul_sd(eps,eps);
                        nnn     = _mm_slli_epi32(n0,3);
                        
-                       /* Tabulated VdW interaction - dispersion */
-                       xmm1    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
-                       xmm2    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
-                       xmm3    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
-                       xmm4    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
-                       
-                       G       = _mm_mul_sd(G,eps);
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
                        H       = _mm_mul_sd(H,eps2);
                        Fp      = _mm_add_sd(F,G);
                        Fp      = _mm_add_sd(Fp,H);
@@ -484,23 +405,18 @@ void nb_kernel430_sse2_double(int *           p_nri,
                        FF      = _mm_add_sd(Fp,G);
                        FF      = _mm_add_sd(FF,xmm1);
                        
-                       Vvdw6   = _mm_mul_sd(c6,VV);
+                       vvdw6   = _mm_mul_sd(c6,VV);
                        fijD    = _mm_mul_sd(c6,FF);
-                       
-                       /* Tabulated VdW interaction - repulsion */
-                       nnn     = _mm_add_epi32(nnn,four);
-                       
-                       xmm1    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
-                       xmm2    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
-                       xmm3    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
-                       xmm4    = _mm_load_pd(VFtab+(gmx_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
-                       
-                       Y               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
-                       F               = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
-                       G               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
-                       H               = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
-                       
-                       G       = _mm_mul_sd(G,eps);
+      
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
                        H       = _mm_mul_sd(H,eps2);
                        Fp      = _mm_add_sd(F,G);
                        Fp      = _mm_add_sd(Fp,H);
@@ -510,261 +426,50 @@ void nb_kernel430_sse2_double(int *           p_nri,
                        FF      = _mm_add_sd(Fp,G);
                        FF      = _mm_add_sd(FF,xmm1);
                        
-                       Vvdw12  = _mm_mul_sd(c12,VV);
+                       vvdw12  = _mm_mul_sd(c12,VV);
                        fijR    = _mm_mul_sd(c12,FF);
                        
-                       Vvdwtmp = _mm_add_sd(Vvdw12,Vvdw6);
-                       Vvdwtot = _mm_add_sd(Vvdwtot,Vvdwtmp);
-                       
+                       vvdwtmp = _mm_add_sd(vvdw12,vvdw6);
+                       vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp); /* (**) */
+            
                        xmm1    = _mm_add_sd(fijD,fijR);
                        xmm1    = _mm_mul_sd(xmm1,tabscale);
-                       xmm1    = _mm_add_sd(xmm1,fijC);
+                       xmm1    = _mm_add_sd(xmm1,fijGB);
                        xmm1    = _mm_sub_sd(xmm1,fscal);
                        fscal   = _mm_mul_sd(xmm1,neg);
                        fscal   = _mm_mul_sd(fscal,rinv);
-                       
-                       /* calculate partial force terms */
-                       t1              = _mm_mul_sd(fscal,dx);
-                       t2              = _mm_mul_sd(fscal,dy);
-                       t3              = _mm_mul_sd(fscal,dz);
-                       
-                       /* update the i force */
-                       fix             = _mm_add_sd(fix,t1);
-                       fiy             = _mm_add_sd(fiy,t2);
-                       fiz             = _mm_add_sd(fiz,t3);
-                       
-                       /* accumulate forces from memory */
-                       xmm5    = _mm_load_sd(faction+j13);   /* fx */
-                       xmm6    = _mm_load_sd(faction+j13+1); /* fy */
-                       xmm7    = _mm_load_sd(faction+j13+2); /* fz */
-                       
-                       /* subtract partial forces */
-                       xmm5    = _mm_sub_sd(xmm5,t1);
-                       xmm6    = _mm_sub_sd(xmm6,t2);
-                       xmm7    = _mm_sub_sd(xmm7,t3);
-                       
-                       /* store forces */
-                       _mm_store_sd(faction+j13,xmm5);
-                       _mm_store_sd(faction+j13+1,xmm6);
-                       _mm_store_sd(faction+j13+2,xmm7);
+
+      /***********************************/
+                       /*  INTERACTION SECTION ENDS HERE  */
+                       /***********************************/
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
+                       GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-               /* fix/fiy/fiz now contain four partial terms, that all should be
-                * added to the i particle forces
-                */
-               t1               = _mm_unpacklo_pd(t1,fix);
-               t2               = _mm_unpacklo_pd(t2,fiy);
-               t3               = _mm_unpacklo_pd(t3,fiz);
-               
-               fix              = _mm_add_pd(fix,t1);
-               fiy              = _mm_add_pd(fiy,t2);
-               fiz              = _mm_add_pd(fiz,t3);
-               
-               fix      = _mm_shuffle_pd(fix,fix,_MM_SHUFFLE2(1,1));
-               fiy      = _mm_shuffle_pd(fiy,fiy,_MM_SHUFFLE2(1,1));
-               fiz      = _mm_shuffle_pd(fiz,fiz,_MM_SHUFFLE2(1,1));
-               
-               /* Load i forces from memory */
-               xmm1     = _mm_load_sd(faction+ii3);
-               xmm2     = _mm_load_sd(faction+ii3+1);
-               xmm3     = _mm_load_sd(faction+ii3+2);
-               
-               /* Add to i force */
-               fix      = _mm_add_sd(fix,xmm1);
-               fiy      = _mm_add_sd(fiy,xmm2);
-               fiz      = _mm_add_sd(fiz,xmm3);
-               
-               /* store i forces to memory */
-               _mm_store_sd(faction+ii3,fix);
-               _mm_store_sd(faction+ii3+1,fiy);
-               _mm_store_sd(faction+ii3+2,fiz);
-               
-               /* now do dvda */
-               dvdatmp  = _mm_unpacklo_pd(dvdatmp,dvdasum);
-               dvdasum  = _mm_add_pd(dvdasum,dvdatmp);
-               _mm_storeh_pd(&dva,dvdasum);
-               dvda[ii] = dvda[ii] + dva*isai_d*isai_d;
-               
-               ggid     = gid[n];
-               
-               /* Coulomb potential */
-               vcoul    = _mm_unpacklo_pd(vcoul,vctot);
-               vctot    = _mm_add_pd(vctot,vcoul);
-               _mm_storeh_pd(&vct,vctot);
-               Vc[ggid] = Vc[ggid] + vct;
-               
-               /* VdW potential */
-               Vvdwtmp  = _mm_unpacklo_pd(Vvdwtmp,Vvdwtot);
-               Vvdwtot  = _mm_add_pd(Vvdwtot,Vvdwtmp);
-               _mm_storeh_pd(&vdwt,Vvdwtot);
-               Vvdw[ggid] = Vvdw[ggid] + vdwt;
-               
-               /* GB potential */
-               vgb      = _mm_unpacklo_pd(vgb,vgbtot);
-               vgbtot   = _mm_add_pd(vgbtot,vgb);
-               _mm_storeh_pd(&vgbt,vgbtot);
-               gpol[ggid] = gpol[ggid] + vgbt;
-       }
-       
-       *outeriter   = nri;            
-    *inneriter   = nj1;        
-}
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel430nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel430nf_sse2_double(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    double *         shiftvec,
-                    double *         fshift,
-                    int *           gid,
-                    double *         pos,
-                    double *         faction,
-                    double *         charge,
-                    double *         p_facel,
-                    double *         p_krf,
-                    double *         p_crf,
-                    double *         Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    double *         vdwparam,
-                    double *         Vvdw,
-                    double *         p_tabscale,
-                    double *         VFtab,
-                    double *         invsqrta,
-                    double *         dvda,
-                    double *         p_gbtabscale,
-                    double *         GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    double *         work)
-{
-    int           nri,ntype,nthreads;
-    double         facel,krf,crf,tabscale,gbtabscale,vgb,fgb;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    double         shX,shY,shZ;
-    double         iq;
-    double         qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    double         Vvdw6,Vvdwtot;
-    double         Vvdw12;
-    double         r,rt,eps,eps2;
-    int           n0,nnn;
-    double         Y,F,Geps,Heps2,Fp,VV;
-    double         isai,isaj,isaprod,gbscale;
-    double         ix1,iy1,iz1;
-    double         jx1,jy1,jz1;
-    double         dx11,dy11,dz11,rsq11,rinv11;
-    double         c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-    nj1              = 0;              
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
     
-    for(n=0; (n<nri); n++)
-    {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
-        ix1              = shX + pos[ii3+0];
-        iy1              = shY + pos[ii3+1];
-        iz1              = shZ + pos[ii3+2];
-        iq               = facel*charge[ii];
-        isai             = invsqrta[ii];   
-        nti              = 2*ntype*type[ii];
-        vctot            = 0;              
-        Vvdwtot          = 0;              
-        
-        for(k=nj0; (k<nj1); k++)
-        {
-            jnr              = jjnr[k];        
-            j3               = 3*jnr;          
-            jx1              = pos[j3+0];      
-            jy1              = pos[j3+1];      
-            jz1              = pos[j3+2];      
-            dx11             = ix1 - jx1;      
-            dy11             = iy1 - jy1;      
-            dz11             = iz1 - jz1;      
-            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-            rinv11           = gmx_invsqrt(rsq11);
-            isaj             = invsqrta[jnr];  
-            isaprod          = isai*isaj;      
-            qq               = iq*charge[jnr]; 
-            vcoul            = qq*rinv11;      
-            qq               = isaprod*(-qq);  
-            gbscale          = isaprod*gbtabscale;
-            tj               = nti+2*type[jnr];
-            c6               = vdwparam[tj];   
-            c12              = vdwparam[tj+1]; 
-            r                = rsq11*rinv11;   
-            rt               = r*gbscale;      
-            n0               = rt;             
-            eps              = rt-n0;          
-            eps2             = eps*eps;        
-            nnn              = 4*n0;           
-            Y                = GBtab[nnn];     
-            F                = GBtab[nnn+1];   
-            Geps             = eps*GBtab[nnn+2];
-            Heps2            = eps2*GBtab[nnn+3];
-            Fp               = F+Geps+Heps2;   
-            VV               = Y+eps*Fp;       
-            vgb              = qq*VV;          
-            vctot            = vctot + vcoul;  
-            r                = rsq11*rinv11;   
-            rt               = r*tabscale;     
-            n0               = rt;             
-            eps              = rt-n0;          
-            eps2             = eps*eps;        
-            nnn              = 8*n0;           
-            Y                = VFtab[nnn];     
-            F                = VFtab[nnn+1];   
-            Geps             = eps*VFtab[nnn+2];
-            Heps2            = eps2*VFtab[nnn+3];
-            Fp               = F+Geps+Heps2;   
-            VV               = Y+eps*Fp;       
-            Vvdw6            = c6*VV;          
-            nnn              = nnn+4;          
-            Y                = VFtab[nnn];     
-            F                = VFtab[nnn+1];   
-            Geps             = eps*VFtab[nnn+2];
-            Heps2            = eps2*VFtab[nnn+3];
-            Fp               = F+Geps+Heps2;   
-            VV               = Y+eps*Fp;       
-            Vvdw12           = c12*VV;         
-            Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-        }
-        
-        ggid             = gid[n];         
-        Vc[ggid]         = Vc[ggid] + vctot;
-        Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-    }
+    ggid     = gid[n];         
+    
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+    gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid);
     
-    *outeriter       = nri;            
-    *inneriter       = nj1;            
+       }
+  
+       *outeriter   = nri;            
+  *inneriter   = nj1;  
 }
 
-
index 72b318e5da7623f657aaa3e456690538fabcf86c..26aa82f8d8354bbde19552ec3b4a86374d504aa6 100644 (file)
@@ -26,9 +26,6 @@
 /* get gmx_gbdata_t */
 #include "../nb_kerneltype.h"
 
-#include "nb_kernel400_sse2_single.h"
-
-
 
 void nb_kernel400_sse2_single(int *           p_nri,
                     int *           iinr,
@@ -144,7 +141,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
                fix              = _mm_setzero_ps();
                fiy              = _mm_setzero_ps();
                fiz              = _mm_setzero_ps();
-       
+
         for(k=nj0; k<nj1-7; k+=8)
                {
                        jnrA        = jjnr[k];   
@@ -213,7 +210,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
                        rB           = _mm_mul_ps(rsqB,rinvB);
                        rtab         = _mm_mul_ps(r,gbscale);
                        rtabB        = _mm_mul_ps(rB,gbscaleB);
-                       
+
                        n0           = _mm_cvttps_epi32(rtab);
                        n0B          = _mm_cvttps_epi32(rtabB);
             eps          = _mm_sub_ps(rtab , _mm_cvtepi32_ps(n0) );
@@ -248,8 +245,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
             FB      = _mm_add_ps(FB, _mm_add_ps(GB , _mm_mul_ps(HB,two)));
             vgbB    = _mm_mul_ps(YB, qqB);           
             fijGBB  = _mm_mul_ps(FB, _mm_mul_ps(qqB,gbscaleB));
-            
-            
+           
             dvdatmp = _mm_mul_ps(_mm_add_ps(vgb, _mm_mul_ps(fijGB,r)) , minushalf);
             dvdatmpB = _mm_mul_ps(_mm_add_ps(vgbB, _mm_mul_ps(fijGBB,rB)) , minushalf);
 
@@ -333,7 +329,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
                        /* Calculate GB table index */
                        r            = _mm_mul_ps(rsq,rinv);
                        rtab         = _mm_mul_ps(r,gbscale);
-                       
+
                        n0           = _mm_cvttps_epi32(rtab);
             eps          = _mm_sub_ps(rtab , _mm_cvtepi32_ps(n0) );
                        nnn          = _mm_slli_epi32(n0,2);
@@ -352,7 +348,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
             F       = _mm_add_ps(F, _mm_add_ps(G , _mm_mul_ps(H,two)));
             vgb     = _mm_mul_ps(Y, qq);           
             fijGB   = _mm_mul_ps(F, _mm_mul_ps(qq,gbscale));
-                        
+   
             dvdatmp = _mm_mul_ps(_mm_add_ps(vgb, _mm_mul_ps(fijGB,r)) , minushalf);
             
             vgbtot  = _mm_add_ps(vgbtot, vgb);
@@ -448,7 +444,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
                        /* Calculate GB table index */
                        r            = _mm_mul_ps(rsq,rinv);
                        rtab         = _mm_mul_ps(r,gbscale);
-                       
+
                        n0           = _mm_cvttps_epi32(rtab);
             eps          = _mm_sub_ps(rtab , _mm_cvtepi32_ps(n0) );
                        nnn          = _mm_slli_epi32(n0,2);
@@ -467,7 +463,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
             F       = _mm_add_ps(F, _mm_add_ps(G , _mm_mul_ps(H,two)));
             vgb     = _mm_mul_ps(Y, qq);           
             fijGB   = _mm_mul_ps(F, _mm_mul_ps(qq,gbscale));
-            
+
             dvdatmp = _mm_mul_ps(_mm_add_ps(vgb, _mm_mul_ps(fijGB,r)) , minushalf);            
             vgbtot  = _mm_add_ps(vgbtot, vgb);
             
@@ -531,7 +527,7 @@ void nb_kernel400_sse2_single(int *           p_nri,
  * water optimization:      No
  * Calculate forces:        no
  */
-void nb_kernel400nf_sse2_single(
+void nb_kernel400nf_x86_64_sse(
                                int *           p_nri,
                                int *           iinr,
                                int *           jindex,
index b55fc2e0a6a75cad60f110dacf4cc50d0bd97f11..edeb677eb61538d69ca595390df212d6d1ba10bd 100644 (file)
@@ -26,9 +26,6 @@
 /* get gmx_gbdata_t */
 #include "../nb_kerneltype.h"
 
-#include "nb_kernel410_sse2_single.h"
-
-
 
 void nb_kernel410_sse2_single(int *           p_nri,
                     int *           iinr,
@@ -101,12 +98,12 @@ void nb_kernel410_sse2_single(int *           p_nri,
        __m128i  n0, nnn;
        __m128i  n0B, nnnB;
        
-       const __m128 neg        = {-1.0f,-1.0f,-1.0f,-1.0f};
-       const __m128 zero       = {0.0f,0.0f,0.0f,0.0f};
-       const __m128 minushalf  = {-0.5f,-0.5f,-0.5f,-0.5f};
-       const __m128 two        = {2.0f,2.0f,2.0f,2.0f};
-       const __m128 six        = {6.0f,6.0f,6.0f,6.0f};
-       const __m128 twelve     = {12.0f,12.0f,12.0f,12.0f};  
+       const __m128 neg        = _mm_set1_ps(-1.0f);
+       const __m128 zero       = _mm_set1_ps(0.0f);
+       const __m128 minushalf  = _mm_set1_ps(-0.5f);
+       const __m128 two        = _mm_set1_ps(2.0f);
+       const __m128 six        = _mm_set1_ps(6.0f);
+       const __m128 twelve     = _mm_set1_ps(12.0f);
 
        gbdata          = (gmx_gbdata_t *)work;
        gpol            = gbdata->gpol;
@@ -620,7 +617,7 @@ void nb_kernel410_sse2_single(int *           p_nri,
  * water optimization:      No
  * Calculate forces:        no
  */
-void nb_kernel410nf_sse2_single(
+void nb_kernel410nf_x86_64_sse(
                     int *           p_nri,
                     int *           iinr,
                     int *           jindex,
index 307f9726a2c91a24db18e85396a0b5c66f9dcdff..10646126956901876c4cf96fadde6720798272ee 100644 (file)
 
 #include <xmmintrin.h>
 #include <emmintrin.h>
+
 #include <gmx_sse2_single.h>
 
 /* get gmx_gbdata_t */
 #include "../nb_kerneltype.h"
 
-#include "nb_kernel430_sse2_single.h"
-
-/* to extract single integers from a __m128i datatype */
-#define _mm_extract_epi32(x, imm) \
-_mm_cvtsi128_si32(_mm_srli_si128((x), 4 * (imm)))
 
 void nb_kernel430_sse2_single(int *           p_nri,
                                                   int *           iinr,
@@ -91,15 +87,15 @@ void nb_kernel430_sse2_single(int *           p_nri,
        __m128   fac_sse,tabscale_sse,gbtabscale_sse;
        
        __m128i  n0, nnn;
-       const __m128 neg    = {-1.0f,-1.0f,-1.0f,-1.0f};
-       const __m128 zero   = {0.0f,0.0f,0.0f,0.0f};
-       const __m128 half   = {0.5f,0.5f,0.5f,0.5f};
-       const __m128 two    = {2.0f,2.0f,2.0f,2.0f};
-       const __m128 three  = {3.0f,3.0f,3.0f,3.0f};
-       const __m128 six    = {6.0f,6.0f,6.0f,6.0f};
-       const __m128 twelwe = {12.0f,12.0f,12.0f,12.0f};
+       const __m128 neg    = _mm_set1_ps(-1.0f);
+       const __m128 zero   = _mm_set1_ps(0.0f);
+    const __m128 half   = _mm_set1_ps(0.5f);
+       const __m128 two    = _mm_set1_ps(2.0f);
+       const __m128 three  = _mm_set1_ps(3.0f);
+       const __m128 six    = _mm_set1_ps(6.0f);
+    const __m128 twelwe = _mm_set1_ps(12.0f);
        
-       __m128i four        = _mm_set_epi32(4,4,4,4); 
+       __m128i four        = _mm_set1_epi32(4);
        __m128i maski       = _mm_set_epi32(0, 0xffffffff, 0xffffffff, 0xffffffff);     
        __m128i mask        = _mm_set_epi32(0, 0xffffffff, 0xffffffff, 0xffffffff);   
        
@@ -109,14 +105,14 @@ void nb_kernel430_sse2_single(int *           p_nri,
        gpol            = gbdata->gpol;
                
        nri              = *p_nri;         
-  ntype            = *p_ntype;       
-  nthreads         = *p_nthreads;    
-  facel            = (*p_facel) * ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent));       
-  krf              = *p_krf;         
-  crf              = *p_crf;         
-  tabscale         = *p_tabscale;    
-  gbtabscale       = *p_gbtabscale;  
-  nj1              = 0;
+    ntype            = *p_ntype;       
+    nthreads         = *p_nthreads;    
+    facel            = (*p_facel) * ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent));       
+    krf              = *p_krf;         
+    crf              = *p_crf;         
+    tabscale         = *p_tabscale;    
+    gbtabscale       = *p_gbtabscale;  
+    nj1              = 0;
 
        /* Splat variables */
        fac_sse        = _mm_load1_ps(&facel);
@@ -282,10 +278,10 @@ void nb_kernel430_sse2_single(int *           p_nri,
                        nnn     = _mm_slli_epi32(n0,2);
                
                        /* the tables are 16-byte aligned, so we can use _mm_load_ps */                 
-                       xmm1    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
-                       xmm2    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
-                       xmm3    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
-                       xmm4    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
+                       xmm1    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
+                       xmm2    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
+                       xmm3    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
+                       xmm4    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
                        
                        /* transpose 4*4 */
                        xmm5    = _mm_unpacklo_ps(xmm1,xmm2); /* Y1,Y2,F1,F2 */
@@ -345,10 +341,10 @@ void nb_kernel430_sse2_single(int *           p_nri,
                        nnn     = _mm_slli_epi32(n0,3);
 
                        /* Tabulated VdW interaction - disperion */                     
-                       xmm1    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
-                       xmm2    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
-                       xmm3    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
-                       xmm4    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
+                       xmm1    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
+                       xmm2    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
+                       xmm3    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
+                       xmm4    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
                        
                        /* transpose 4*4 */
                        xmm5    = _mm_unpacklo_ps(xmm1,xmm2); /* Y1,Y2,F1,F2 */
@@ -377,10 +373,10 @@ void nb_kernel430_sse2_single(int *           p_nri,
                        /* Tabulated VdW interaction - repulsion */
                        nnn     = _mm_add_epi32(nnn,four);
                        
-                       xmm1    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
-                       xmm2    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
-                       xmm3    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
-                       xmm4    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
+                       xmm1    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
+                       xmm2    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
+                       xmm3    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
+                       xmm4    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
                        
                        /* transpose 4*4 */
                        xmm5    = _mm_unpacklo_ps(xmm1,xmm2); /* Y1,Y2,F1,F2 */
@@ -654,10 +650,10 @@ void nb_kernel430_sse2_single(int *           p_nri,
                        nnn     = _mm_slli_epi32(n0,2);
                        
                        /* the tables are 16-byte aligned, so we can use _mm_load_ps */                 
-                       xmm1    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
-                       xmm2    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
-                       xmm3    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
-                       xmm4    = _mm_load_ps(GBtab+(_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
+                       xmm1    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
+                       xmm2    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
+                       xmm3    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
+                       xmm4    = _mm_load_ps(GBtab+(gmx_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
                        
                        /* transpose 4*4 */
                        xmm5    = _mm_unpacklo_ps(xmm1,xmm2); /* Y1,Y2,F1,F2 */
@@ -710,10 +706,10 @@ void nb_kernel430_sse2_single(int *           p_nri,
                        nnn     = _mm_slli_epi32(n0,3);
                        
                        /* Tabulated VdW interaction - disperion */     
-                       xmm1    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
-                       xmm2    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
-                       xmm3    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
-                       xmm4    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
+                       xmm1    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
+                       xmm2    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
+                       xmm3    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
+                       xmm4    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
                
                        /* transpose 4*4 */
                        xmm5    = _mm_unpacklo_ps(xmm1,xmm2); /* Y1,Y2,F1,F2 */
@@ -742,10 +738,10 @@ void nb_kernel430_sse2_single(int *           p_nri,
                        /* Tabulated VdW interaction - repulsion */
                        nnn     = _mm_add_epi32(nnn,four);
                                        
-                       xmm1    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
-                       xmm2    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
-                       xmm3    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
-                       xmm4    = _mm_load_ps(VFtab+(_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
+                       xmm1    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,0)));  /* Y1,F1,G1,H1 */
+                       xmm2    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,1)));  /* Y2,F2,G2,H2 */
+                       xmm3    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,2)));  /* Y3,F3,G3,H3 */
+                       xmm4    = _mm_load_ps(VFtab+(gmx_mm_extract_epi32(nnn,3)));  /* Y4,F4,G4,H4 */
                        
                        /* transpose 4*4 */
                        xmm5    = _mm_unpacklo_ps(xmm1,xmm2); /* Y1,Y2,F1,F2 */
@@ -985,7 +981,7 @@ void nb_kernel430_sse2_single(int *           p_nri,
  * water optimization:      No
  * Calculate forces:        no
  */
-void nb_kernel430nf_sse2_single(
+void nb_kernel430nf_x86_64_sse(
                     int *           p_nri,
                     int *           iinr,
                     int *           jindex,
@@ -1077,7 +1073,7 @@ void nb_kernel430nf_sse2_single(
             dy11             = iy1 - jy1;      
             dz11             = iz1 - jz1;      
             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-            rinv11           = gmx_mm_invsqrt(rsq11);
+            rinv11           = gmx_invsqrt(rsq11);
             isaj             = invsqrta[jnr];  
             isaprod          = isai*isaj;      
             qq               = iq*charge[jnr]; 
index badc0c9b26b456d2625a496e9aff9b01aa34d957..a420fffe4c27cc657a56da942ca49e476dae7e68 100644 (file)
@@ -61,11 +61,11 @@ void nb_kernel400_x86_64_sse2(int *           p_nri,
                               int *           inneriter,
                               double *         work)
 {
-    int           nri,nthreads;
-    int           n,ii,is3,ii3,k,nj0,nj1,ggid;
-    double        shX,shY,shZ;
-    int           jnrA,jnrB;
-    int           j3A,j3B;
+  int           nri,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
        gmx_gbdata_t *gbdata;
        double *      gpol;
     
@@ -93,35 +93,35 @@ void nb_kernel400_x86_64_sse2(int *           p_nri,
     
        nri        = *p_nri;
     
-    gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
-    gbtabscale = _mm_load1_pd(p_gbtabscale);  
-    facel      = _mm_load1_pd(p_facel);
-    
-    nj1         = 0;
-    jnrA = jnrB = 0;
-    j3A = j3B   = 0;
-    jx          = _mm_setzero_pd();
-    jy          = _mm_setzero_pd();
-    jz          = _mm_setzero_pd();
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
                ix               = _mm_set1_pd(shX+pos[ii3+0]);
                iy               = _mm_set1_pd(shY+pos[ii3+1]);
                iz               = _mm_set1_pd(shZ+pos[ii3+2]);
-        
+    
                iq               = _mm_load1_pd(charge+ii);
                iq               = _mm_mul_pd(iq,facel);
-        
+    
                isai             = _mm_load1_pd(invsqrta+ii);
                        
                vctot            = _mm_setzero_pd();
@@ -138,18 +138,18 @@ void nb_kernel400_x86_64_sse2(int *           p_nri,
                        
                        j3A     = jnrA * 3;
                        j3B     = jnrB * 3;
-            
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+      
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
             
                        dx           = _mm_sub_pd(ix,jx);
                        dy           = _mm_sub_pd(iy,jy);
                        dz           = _mm_sub_pd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_pd(rinv,rinv);
-            
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
@@ -160,7 +160,7 @@ void nb_kernel400_x86_64_sse2(int *           p_nri,
                        qq           = _mm_mul_pd(iq,jq);            
                        vcoul        = _mm_mul_pd(qq,rinv);
                        fscal        = _mm_mul_pd(vcoul,rinv);                                 
-            vctot        = _mm_add_pd(vctot,vcoul);
+      vctot        = _mm_add_pd(vctot,vcoul);
             
             /* Polarization interaction */
                        qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
@@ -174,48 +174,48 @@ void nb_kernel400_x86_64_sse2(int *           p_nri,
                        eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
-            H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
-            F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
-            Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
-            F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
-            vgb     = _mm_mul_pd(Y, qq);           
-            fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
-
-            vgbtot  = _mm_add_pd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_pd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
-                                   
-            fscal        = _mm_mul_pd( _mm_sub_pd( fscal, fijGB),rinv );
-            
-            /***********************************/
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+      
+      fscal        = _mm_mul_pd( _mm_sub_pd( fscal, fijGB),rinv );
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_pd(fscal,dx);
-            ty           = _mm_mul_pd(fscal,dy);
-            tz           = _mm_mul_pd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_pd(fix,tx);
-            fiy          = _mm_add_pd(fiy,ty);
-            fiz          = _mm_add_pd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
@@ -224,96 +224,109 @@ void nb_kernel400_x86_64_sse2(int *           p_nri,
                {
                        jnrA    = jjnr[k];
                        j3A     = jnrA * 3;
-            
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
-            
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
                        dx           = _mm_sub_sd(ix,jx);
                        dy           = _mm_sub_sd(iy,jy);
                        dz           = _mm_sub_sd(iz,jz);
-            
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_sd(rinv,rinv);
-            
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      vgb          = _mm_setzero_pd();
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
                        GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
-                               
+      
                        isaprod      = _mm_mul_sd(isai,isaj);
-                       qq           = _mm_mul_sd(iq,jq);            
-                       vcoul        = _mm_mul_sd(qq,rinv);
-                       fscal        = _mm_mul_sd(vcoul,rinv);                                 
-            vctot        = _mm_add_sd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      /* Since we need _mm_add_pd below, the order here og jq,iq becomes important */
+                       qq           = _mm_mul_sd(jq,iq);  
+      vcoul        = _mm_mul_sd(qq,rinv);
+      fscal        = _mm_mul_sd(vcoul,rinv);                                 
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
                        gbscale      = _mm_mul_sd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_sd(rsq,rinv);
                        rtab         = _mm_mul_sd(r,gbscale);
-
+      
                        n0                   = _mm_cvttpd_epi32(rtab);
                        eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
-            H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
-            F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
-            Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
-            F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
-            vgb     = _mm_mul_sd(Y, qq);           
-            fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
 
-            dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_sd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_sd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
                        
-            fscal        = _mm_mul_sd( _mm_sub_sd( fscal, fijGB),rinv );
-            
-            /***********************************/
+      fscal        = _mm_mul_sd( _mm_sub_sd( fscal, fijGB),rinv );
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_sd(fscal,dx);
-            ty           = _mm_mul_sd(fscal,dy);
-            tz           = _mm_mul_sd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_sd(fix,tx);
-            fiy          = _mm_add_sd(fiy,ty);
-            fiz          = _mm_add_sd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
-        gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
-        
-        ggid     = gid[n];         
-        
-        gmx_mm_update_1pot_pd(vctot,vc+ggid);
-        gmx_mm_update_2pot_pd(vgbtot,gpol+ggid,dvdasum,dvda+ii);
-       }
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
     
+    ggid     = gid[n];         
+   
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+  }
+  
        *outeriter   = nri;            
-    *inneriter   = nj1;        
+  *inneriter   = nj1;  
 }
index c1d89282396b600839704cc511fbfb711b283dbb..60706bcea5c7f5e8da85af66f0673f996c5a7677 100644 (file)
@@ -62,16 +62,16 @@ void nb_kernel410_x86_64_sse2(int *           p_nri,
                                                        int *           inneriter,
                                                        double *         work)
 {
-    int           nri,ntype,nthreads;
-    int           n,ii,is3,ii3,k,nj0,nj1,ggid;
-    double        shX,shY,shZ;
+  int           nri,ntype,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
        int                       offset,nti;
-    int           jnrA,jnrB;
-    int           j3A,j3B;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
        int           tjA,tjB;
        gmx_gbdata_t *gbdata;
        double *      gpol;
-    
        __m128d  iq,qq,jq,isai;
        __m128d  ix,iy,iz;
        __m128d  jx,jy,jz;
@@ -100,37 +100,37 @@ void nb_kernel410_x86_64_sse2(int *           p_nri,
        nri        = *p_nri;
        ntype      = *p_ntype;
     
-    gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
-    gbtabscale = _mm_load1_pd(p_gbtabscale);  
-    facel      = _mm_load1_pd(p_facel);
-
-    nj1         = 0;
-    jnrA = jnrB = 0;
-    j3A = j3B   = 0;
-    jx          = _mm_setzero_pd();
-    jy          = _mm_setzero_pd();
-    jz          = _mm_setzero_pd();
-    c6          = _mm_setzero_pd();
-    c12         = _mm_setzero_pd();
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
+  c6          = _mm_setzero_pd();
+  c12         = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
                ix               = _mm_set1_pd(shX+pos[ii3+0]);
                iy               = _mm_set1_pd(shY+pos[ii3+1]);
                iz               = _mm_set1_pd(shZ+pos[ii3+2]);
-        
+    
                iq               = _mm_load1_pd(charge+ii);
                iq               = _mm_mul_pd(iq,facel);
-        
+    
                isai             = _mm_load1_pd(invsqrta+ii);
         
                nti              = 2*ntype*type[ii];
@@ -151,39 +151,39 @@ void nb_kernel410_x86_64_sse2(int *           p_nri,
                        j3A     = jnrA * 3;
                        j3B     = jnrB * 3;
 
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
 
                        dx           = _mm_sub_pd(ix,jx);
                        dy           = _mm_sub_pd(iy,jy);
                        dz           = _mm_sub_pd(iz,jz);
 
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_pd(rinv,rinv);
-            
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
                        GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
             
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
                        tjB          = nti+2*type[jnrB];
-            
-            GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
+      
+      GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
                        
                        isaprod      = _mm_mul_pd(isai,isaj);
                        qq           = _mm_mul_pd(iq,jq);            
                        vcoul        = _mm_mul_pd(qq,rinv);
                        fscal        = _mm_mul_pd(vcoul,rinv);                                 
-            vctot        = _mm_add_pd(vctot,vcoul);
+      vctot        = _mm_add_pd(vctot,vcoul);
             
-            /* Polarization interaction */
+      /* Polarization interaction */
                        qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
                        gbscale      = _mm_mul_pd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_pd(rsq,rinv);
                        rtab         = _mm_mul_pd(r,gbscale);
@@ -192,30 +192,30 @@ void nb_kernel410_x86_64_sse2(int *           p_nri,
                        eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
-            H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
-            F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
-            Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
-            F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
-            vgb     = _mm_mul_pd(Y, qq);           
-            fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_pd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_pd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
                        
                        rinvsix      = _mm_mul_pd(rinvsq,rinvsq);
                        rinvsix      = _mm_mul_pd(rinvsix,rinvsq);
@@ -224,26 +224,26 @@ void nb_kernel410_x86_64_sse2(int *           p_nri,
                        vvdw12       = _mm_mul_pd(c12, _mm_mul_pd(rinvsix,rinvsix));
                        vvdwtot      = _mm_add_pd(vvdwtot,_mm_sub_pd(vvdw12,vvdw6));
             
-            fscal        = _mm_sub_pd(_mm_mul_pd(rinvsq, 
-                                                 _mm_sub_pd(_mm_mul_pd(twelve,vvdw12),
-                                                            _mm_mul_pd(six,vvdw6))),
-                                      _mm_mul_pd( _mm_sub_pd( fijGB,fscal),rinv ));
-                        
-            /***********************************/
+      fscal        = _mm_sub_pd(_mm_mul_pd(rinvsq, 
+                                           _mm_sub_pd(_mm_mul_pd(twelve,vvdw12),
+                                                      _mm_mul_pd(six,vvdw6))),
+                                _mm_mul_pd( _mm_sub_pd( fijGB,fscal),rinv ));
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_pd(fscal,dx);
-            ty           = _mm_mul_pd(fscal,dy);
-            tz           = _mm_mul_pd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_pd(fix,tx);
-            fiy          = _mm_add_pd(fiy,ty);
-            fiz          = _mm_add_pd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
@@ -253,39 +253,53 @@ void nb_kernel410_x86_64_sse2(int *           p_nri,
                        jnrA    = jjnr[k];
                        
                        j3A     = jnrA * 3;
-            
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
             
                        dx           = _mm_sub_sd(ix,jx);
                        dy           = _mm_sub_sd(iy,jy);
                        dz           = _mm_sub_sd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_sd(rinv,rinv);
-            
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      vvdw6        = _mm_setzero_pd();
+      vvdw12       = _mm_setzero_pd();
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
                        GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
-            
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
-            
-            GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
+      
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+      
+      GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
                        
                        isaprod      = _mm_mul_sd(isai,isaj);
-                       qq           = _mm_mul_sd(iq,jq);            
+                       qq           = _mm_mul_sd(jq,iq);            
                        vcoul        = _mm_mul_sd(qq,rinv);
                        fscal        = _mm_mul_sd(vcoul,rinv);                                 
-            vctot        = _mm_add_sd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
                        gbscale      = _mm_mul_sd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_sd(rsq,rinv);
                        rtab         = _mm_mul_sd(r,gbscale);
@@ -294,70 +308,73 @@ void nb_kernel410_x86_64_sse2(int *           p_nri,
                        eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
-            H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
-            F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
-            Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
-            F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
-            vgb     = _mm_mul_sd(Y, qq);           
-            fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_sd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_sd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
                        
                        rinvsix      = _mm_mul_sd(rinvsq,rinvsq);
                        rinvsix      = _mm_mul_sd(rinvsix,rinvsq);
                        
                        vvdw6        = _mm_mul_sd(c6,rinvsix);
                        vvdw12       = _mm_mul_sd(c12, _mm_mul_sd(rinvsix,rinvsix));
-                       vvdwtot      = _mm_add_sd(vvdwtot,_mm_sub_sd(vvdw12,vvdw6));
-        
-            fscal        = _mm_sub_sd(_mm_mul_sd(rinvsq, 
-                                                 _mm_sub_sd(_mm_mul_sd(twelve,vvdw12),
-                                                            _mm_mul_sd(six,vvdw6))),
-                                      _mm_mul_sd( _mm_sub_sd( fijGB,fscal),rinv ));
-            
-            /***********************************/
+                       vvdwtot      = _mm_add_pd(vvdwtot,_mm_sub_sd(vvdw12,vvdw6)); /* (**) */
+      
+      fscal        = _mm_sub_sd(_mm_mul_sd(rinvsq, 
+                                           _mm_sub_sd(_mm_mul_sd(twelve,vvdw12),
+                                                      _mm_mul_sd(six,vvdw6))),
+                                _mm_mul_sd( _mm_sub_sd( fijGB,fscal),rinv ));
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_sd(fscal,dx);
-            ty           = _mm_mul_sd(fscal,dy);
-            tz           = _mm_mul_sd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_sd(fix,tx);
-            fiy          = _mm_add_sd(fiy,ty);
-            fiz          = _mm_add_sd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
-        gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
-
-        ggid     = gid[n];         
-        
-        gmx_mm_update_2pot_pd(vctot,vc+ggid,vvdwtot,vvdw+ggid);
-        gmx_mm_update_2pot_pd(vgbtot,gpol+ggid,dvdasum,dvda+ii);
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
+    
+    ggid     = gid[n];         
+    
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+    gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid);
+    
        }
-
+  
        *outeriter   = nri;            
-    *inneriter   = nj1;        
+  *inneriter   = nj1;  
 }
index f2335aa1a4308e511ffccbfc296118c836efbe9c..895c383b90bdf106c00f5af0cf15685f8680f094 100644 (file)
@@ -60,12 +60,12 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                               int *           inneriter,
                               double *         work)
 {
-    int           nri,ntype,nthreads;
-    int           n,ii,is3,ii3,k,nj0,nj1,ggid;
-    double        shX,shY,shZ;
+  int           nri,ntype,nthreads;
+  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
+  double        shX,shY,shZ;
        int                       offset,nti;
-    int           jnrA,jnrB;
-    int           j3A,j3B;
+  int           jnrA,jnrB;
+  int           j3A,j3B;
        int           tjA,tjB;
        gmx_gbdata_t *gbdata;
        double *      gpol;
@@ -80,12 +80,12 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
        __m128d  vcoul,fscal,gbscale,c6,c12;
        __m128d  rinvsq,r,rtab;
        __m128d  eps,Y,F,G,H;
-    __m128d  VV,FF,Fp;
+  __m128d  VV,FF,Fp;
        __m128d  vgb,fijGB,dvdatmp;
        __m128d  rinvsix,vvdw6,vvdw12,vvdwtmp;
        __m128d  facel,gbtabscale,dvdaj;
-    __m128d  fijD,fijR;
-    __m128d  xmm1,tabscale,eps2;
+  __m128d  fijD,fijR;
+  __m128d  xmm1,tabscale,eps2;
        __m128i  n0, nnn;
     
        
@@ -100,40 +100,40 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
        nri        = *p_nri;
        ntype      = *p_ntype;
     
-    gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
-    gbtabscale = _mm_load1_pd(p_gbtabscale);  
-    facel      = _mm_load1_pd(p_facel);
-    tabscale   = _mm_load1_pd(p_tabscale);
-    
-    nj1         = 0;
-    jnrA = jnrB = 0;
-    j3A = j3B   = 0;
-    jx          = _mm_setzero_pd();
-    jy          = _mm_setzero_pd();
-    jz          = _mm_setzero_pd();
-    c6          = _mm_setzero_pd();
-    c12         = _mm_setzero_pd();
+  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
+  gbtabscale = _mm_load1_pd(p_gbtabscale);  
+  facel      = _mm_load1_pd(p_facel);
+  tabscale   = _mm_load1_pd(p_tabscale);
+  
+  nj1         = 0;
+  jnrA = jnrB = 0;
+  j3A = j3B   = 0;
+  jx          = _mm_setzero_pd();
+  jy          = _mm_setzero_pd();
+  jz          = _mm_setzero_pd();
+  c6          = _mm_setzero_pd();
+  c12         = _mm_setzero_pd();
        
        for(n=0;n<nri;n++)
        {
-        is3              = 3*shift[n];     
-        shX              = shiftvec[is3];  
-        shY              = shiftvec[is3+1];
-        shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
-        nj1              = jindex[n+1];    
-        ii               = iinr[n];        
-        ii3              = 3*ii;           
+    is3              = 3*shift[n];     
+    shX              = shiftvec[is3];  
+    shY              = shiftvec[is3+1];
+    shZ              = shiftvec[is3+2];
+    nj0              = jindex[n];      
+    nj1              = jindex[n+1];    
+    ii               = iinr[n];        
+    ii3              = 3*ii;           
                
                ix               = _mm_set1_pd(shX+pos[ii3+0]);
                iy               = _mm_set1_pd(shY+pos[ii3+1]);
                iz               = _mm_set1_pd(shZ+pos[ii3+2]);
-        
+    
                iq               = _mm_load1_pd(charge+ii);
                iq               = _mm_mul_pd(iq,facel);
-        
+    
                isai             = _mm_load1_pd(invsqrta+ii);
-        
+    
                nti              = 2*ntype*type[ii];
                
                vctot            = _mm_setzero_pd();
@@ -152,39 +152,39 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        j3A     = jnrA * 3;
                        j3B     = jnrB * 3;
             
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
             
                        dx           = _mm_sub_pd(ix,jx);
                        dy           = _mm_sub_pd(iy,jy);
                        dz           = _mm_sub_pd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_pd(rinv,rinv);
-            
+      
                        /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
                        GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
             
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
                        tjB          = nti+2*type[jnrB];
-            
-            GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
+      
+      GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
                        
                        isaprod      = _mm_mul_pd(isai,isaj);
                        qq           = _mm_mul_pd(iq,jq);            
                        vcoul        = _mm_mul_pd(qq,rinv);
                        fscal        = _mm_mul_pd(vcoul,rinv);                                 
-            vctot        = _mm_add_pd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      vctot        = _mm_add_pd(vctot,vcoul);
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
                        gbscale      = _mm_mul_pd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_pd(rsq,rinv);
                        rtab         = _mm_mul_pd(r,gbscale);
@@ -193,47 +193,47 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        eps              = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
-            H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
-            F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
-            Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
-            F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
-            vgb     = _mm_mul_pd(Y, qq);           
-            fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_pd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_pd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
+      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
+      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
+      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
+      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
+      vgb     = _mm_mul_pd(Y, qq);           
+      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb);
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
+      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
                        
-            /* Calculate VDW table index */
+      /* Calculate VDW table index */
                        rtab    = _mm_mul_pd(r,tabscale);
                        n0      = _mm_cvttpd_epi32(rtab);
                        eps     = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
                        eps2    = _mm_mul_pd(eps,eps);
                        nnn     = _mm_slli_epi32(n0,3);
                        
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1)));
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+2);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1)));
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+2);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
                        H       = _mm_mul_pd(H,eps2);
                        Fp      = _mm_add_pd(F,G);
                        Fp      = _mm_add_pd(Fp,H);
@@ -245,16 +245,16 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        
                        vvdw6   = _mm_mul_pd(c6,VV);
                        fijD    = _mm_mul_pd(c6,FF);
-            
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
-            F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+4);
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
-            H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+6);
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_pd(G,eps);
+      
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
+      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+4);
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
+      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+6);
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_pd(G,eps);
                        H       = _mm_mul_pd(H,eps2);
                        Fp      = _mm_add_pd(F,G);
                        Fp      = _mm_add_pd(Fp,H);
@@ -269,29 +269,29 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        
                        vvdwtmp = _mm_add_pd(vvdw12,vvdw6);
                        vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp);
-            
+      
                        xmm1    = _mm_add_pd(fijD,fijR);
                        xmm1    = _mm_mul_pd(xmm1,tabscale);
                        xmm1    = _mm_add_pd(xmm1,fijGB);
                        xmm1    = _mm_sub_pd(xmm1,fscal);
                        fscal   = _mm_mul_pd(xmm1,neg);
                        fscal   = _mm_mul_pd(fscal,rinv);
-            
-            /***********************************/
+      
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_pd(fscal,dx);
-            ty           = _mm_mul_pd(fscal,dy);
-            tz           = _mm_mul_pd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_pd(fix,tx);
-            fiy          = _mm_add_pd(fiy,ty);
-            fiz          = _mm_add_pd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_pd(fscal,dx);
+      ty           = _mm_mul_pd(fscal,dy);
+      tz           = _mm_mul_pd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_pd(fix,tx);
+      fiy          = _mm_add_pd(fiy,ty);
+      fiz          = _mm_add_pd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
                }
                
@@ -300,39 +300,53 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                {
                        jnrA    = jjnr[k];
                        j3A     = jnrA * 3;
-            
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
-            
+      
+      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
+      
                        dx           = _mm_sub_sd(ix,jx);
                        dy           = _mm_sub_sd(iy,jy);
                        dz           = _mm_sub_sd(iz,jz);
             
-            rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
-            
-            rinv         = gmx_mm_invsqrt_pd(rsq);
+      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
+      
+      rinv         = gmx_mm_invsqrt_pd(rsq);
                        rinvsq       = _mm_mul_sd(rinv,rinv);
-            
-                       /***********************************/
+      
+      /* These reason for zeroing these variables here is for fixing bug 585
+       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
+       * and r1=0, but it should be r1=a[1]. 
+       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
+       * To work around it, we zero these variables and use _mm_add_pd (**) instead
+       * Note that the only variables that get affected are the energies since
+       * the total sum needs to be correct 
+       */
+      vgb          = _mm_setzero_pd();
+      vcoul        = _mm_setzero_pd();
+      dvdatmp      = _mm_setzero_pd();
+      vvdw6        = _mm_setzero_pd();
+      vvdw12       = _mm_setzero_pd();
+
+      /***********************************/
                        /* INTERACTION SECTION STARTS HERE */
                        /***********************************/
                        GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
                        GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
             
-            /* Lennard-Jones */
-            tjA          = nti+2*type[jnrA];
-            
-            GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
+      /* Lennard-Jones */
+      tjA          = nti+2*type[jnrA];
+      
+      GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
                        
                        isaprod      = _mm_mul_sd(isai,isaj);
-                       qq           = _mm_mul_sd(iq,jq);            
+                       qq           = _mm_mul_sd(jq,iq);            
                        vcoul        = _mm_mul_sd(qq,rinv);
                        fscal        = _mm_mul_sd(vcoul,rinv);                                 
-            vctot        = _mm_add_sd(vctot,vcoul);
-            
-            /* Polarization interaction */
+      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
+      
+      /* Polarization interaction */
                        qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
                        gbscale      = _mm_mul_sd(isaprod,gbtabscale);
-            
+      
                        /* Calculate GB table index */
                        r            = _mm_mul_sd(rsq,rinv);
                        rtab         = _mm_mul_sd(r,gbscale);
@@ -341,47 +355,47 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        eps              = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        nnn                  = _mm_slli_epi32(n0,2);
                        
-            /* the tables are 16-byte aligned, so we can use _mm_load_pd */                    
-            Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
-            H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
-            F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
-            Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
-            F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
-            vgb     = _mm_mul_sd(Y, qq);           
-            fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
-            
-            dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
-            
-            vgbtot  = _mm_add_sd(vgbtot, vgb);
-            
-            dvdasum = _mm_add_sd(dvdasum, dvdatmp);
-            dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
-            
-            GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
+      /* the tables are 16-byte aligned, so we can use _mm_load_pd */                  
+      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
+      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
+      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
+      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
+      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
+      vgb     = _mm_mul_sd(Y, qq);           
+      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
+      
+      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
+      
+      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
+      
+      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
+      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
+      
+      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
                        
-            /* Calculate VDW table index */
+      /* Calculate VDW table index */
                        rtab    = _mm_mul_sd(r,tabscale);
                        n0      = _mm_cvttpd_epi32(rtab);
                        eps     = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
                        eps2    = _mm_mul_sd(eps,eps);
                        nnn     = _mm_slli_epi32(n0,3);
                        
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
                        H       = _mm_mul_sd(H,eps2);
                        Fp      = _mm_add_sd(F,G);
                        Fp      = _mm_add_sd(Fp,H);
@@ -393,16 +407,16 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        
                        vvdw6   = _mm_mul_sd(c6,VV);
                        fijD    = _mm_mul_sd(c6,FF);
-            
-            /* Dispersion */
-            Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
-            F            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(Y,F);
-            G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
-            H            = _mm_setzero_pd();
-            GMX_MM_TRANSPOSE2_PD(G,H);
-            
-            G       = _mm_mul_sd(G,eps);
+      
+      /* Dispersion */
+      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
+      F            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(Y,F);
+      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
+      H            = _mm_setzero_pd();
+      GMX_MM_TRANSPOSE2_PD(G,H);
+      
+      G       = _mm_mul_sd(G,eps);
                        H       = _mm_mul_sd(H,eps2);
                        Fp      = _mm_add_sd(F,G);
                        Fp      = _mm_add_sd(Fp,H);
@@ -416,7 +430,7 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        fijR    = _mm_mul_sd(c12,FF);
                        
                        vvdwtmp = _mm_add_sd(vvdw12,vvdw6);
-                       vvdwtot = _mm_add_sd(vvdwtot,vvdwtmp);
+                       vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp); /* (**) */
             
                        xmm1    = _mm_add_sd(fijD,fijR);
                        xmm1    = _mm_mul_sd(xmm1,tabscale);
@@ -425,34 +439,37 @@ void nb_kernel430_x86_64_sse2(int *           p_nri,
                        fscal   = _mm_mul_sd(xmm1,neg);
                        fscal   = _mm_mul_sd(fscal,rinv);
 
-            /***********************************/
+      /***********************************/
                        /*  INTERACTION SECTION ENDS HERE  */
                        /***********************************/
-            
-            /* Calculate temporary vectorial force */
-            tx           = _mm_mul_sd(fscal,dx);
-            ty           = _mm_mul_sd(fscal,dy);
-            tz           = _mm_mul_sd(fscal,dz);
-            
-            /* Increment i atom force */
-            fix          = _mm_add_sd(fix,tx);
-            fiy          = _mm_add_sd(fiy,ty);
-            fiz          = _mm_add_sd(fiz,tz);
-            
-            /* Store j forces back */
+      
+      /* Calculate temporary vectorial force */
+      tx           = _mm_mul_sd(fscal,dx);
+      ty           = _mm_mul_sd(fscal,dy);
+      tz           = _mm_mul_sd(fscal,dz);
+      
+      /* Increment i atom force */
+      fix          = _mm_add_sd(fix,tx);
+      fiy          = _mm_add_sd(fiy,ty);
+      fiz          = _mm_add_sd(fiz,tz);
+      
+      /* Store j forces back */
                        GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
                }
                
-        dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
-        gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
-        
-        ggid     = gid[n];         
-        
-        gmx_mm_update_2pot_pd(vctot,vc+ggid,vvdwtot,vvdw+ggid);
-        gmx_mm_update_2pot_pd(vgbtot,gpol+ggid,dvdasum,dvda+ii);
-       }
+    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
+    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
     
+    ggid     = gid[n];         
+    
+    gmx_mm_update_1pot_pd(vctot,vc+ggid);
+    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
+    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
+    gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid);
+    
+       }
+  
        *outeriter   = nri;            
-    *inneriter   = nj1;        
+  *inneriter   = nj1;  
 }
 
index 2c3d8d43687cd340b77262dbc05d136f170baf0f..e43d28db6731968d656802416ff590fd995a2098 100644 (file)
@@ -715,7 +715,7 @@ nb_kernel_allvsallgb_sse2_double(t_forcerec *           fr,
         pmask1           = prologue_mask[i+1];
         emask0           = epilogue_mask[i];
         emask1           = epilogue_mask[i+1];
-        imask_SSE0       = _mm_load1_pd((double *)(imask+i));
+        imask_SSE0       = _mm_load1_pd((double *)(imask+2*i));
         imask_SSE1       = _mm_load1_pd((double *)(imask+2*i+2));
         
         for(j=nj0; j<nj1; j+=UNROLLJ)
index 76f40036d7ba203ff0c9f93b9cab5fe86020b6e5..45ca192cf6e5fabedec6e18961dbf9145b20cd55 100644 (file)
@@ -1770,6 +1770,16 @@ init_method(t_selelem *sel, t_topology *top, int isize)
                 }
             }
         }
+        /* Clear the values for dynamic output to avoid valgrind warnings. */
+        if ((sel->flags & SEL_DYNAMIC) && sel->v.type == REAL_VALUE)
+        {
+            int i;
+
+            for (i = 0; i < sel->v.nr; ++i)
+            {
+                sel->v.u.r[i] = 0.0;
+            }
+        }
     }
 
     return 0;
index 06a4d8b7062b13aa2af56492cedaf1714a092530..9fbe3c064fd6872f689514f905c8f9e9449e9eff 100644 (file)
@@ -1079,13 +1079,16 @@ _gmx_sel_evaluate_arithmetic(gmx_sel_evaluate_t *data, t_selelem *sel,
     if (left->mempool)
     {
         _gmx_selvalue_setstore(&left->v, sel->v.u.ptr);
-        rc = _gmx_selelem_mempool_reserve(right, g->isize);
-        if (rc != 0)
+        if (right)
         {
-            return rc;
+            rc = _gmx_selelem_mempool_reserve(right, g->isize);
+            if (rc != 0)
+            {
+                return rc;
+            }
         }
     }
-    else if (right->mempool)
+    else if (right && right->mempool)
     {
         _gmx_selvalue_setstore(&right->v, sel->v.u.ptr);
     }
@@ -1123,9 +1126,12 @@ _gmx_sel_evaluate_arithmetic(gmx_sel_evaluate_t *data, t_selelem *sel,
     if (left->mempool)
     {
         _gmx_selvalue_setstore(&left->v, NULL);
-        _gmx_selelem_mempool_release(right);
+        if (right)
+        {
+            _gmx_selelem_mempool_release(right);
+        }
     }
-    else if (right->mempool)
+    else if (right && right->mempool)
     {
         _gmx_selvalue_setstore(&right->v, NULL);
     }
index e8aac56206f8d79b83ac9a7ae4e84f7a8a54ef5f..3e57a98218e146464191d14b3073035b2f722fbc 100644 (file)
      OR = 286,
      AND = 287,
      NOT = 288,
-     UNARY_NEG = 289
+     UNARY_NEG = 289,
+     NUM_REDUCT = 290
    };
 #endif
 /* Tokens.  */
 #define AND 287
 #define NOT 288
 #define UNARY_NEG 289
+#define NUM_REDUCT 290
 
 
 
@@ -208,7 +210,7 @@ typedef union YYSTYPE
     struct t_selexpr_param     *param;
 }
 /* Line 187 of yacc.c.  */
-#line 212 "parser.c"
+#line 214 "parser.c"
        YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
@@ -221,7 +223,7 @@ typedef union YYSTYPE
 
 
 /* Line 216 of yacc.c.  */
-#line 225 "parser.c"
+#line 227 "parser.c"
 
 #ifdef short
 # undef short
@@ -436,20 +438,20 @@ union yyalloc
 /* YYFINAL -- State number of the termination state.  */
 #define YYFINAL  2
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   329
+#define YYLAST   417
 
 /* YYNTOKENS -- Number of terminals.  */
-#define YYNTOKENS  44
+#define YYNTOKENS  49
 /* YYNNTS -- Number of nonterminals.  */
-#define YYNNTS  20
+#define YYNNTS  26
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  76
+#define YYNRULES  91
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  125
+#define YYNSTATES  150
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
-#define YYMAXUTOK   289
+#define YYMAXUTOK   290
 
 #define YYTRANSLATE(YYX)                                               \
   ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
@@ -461,15 +463,15 @@ static const yytype_uint8 yytranslate[] =
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-      41,    42,    36,    34,    43,    35,     2,    37,     2,     2,
+      42,    43,    36,    34,    45,    35,     2,    37,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,    40,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,    41,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,    39,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,    44,     2,    46,    39,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,    47,     2,    48,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
@@ -485,63 +487,72 @@ static const yytype_uint8 yytranslate[] =
        2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
        5,     6,     7,     8,     9,    10,    11,    12,    13,    14,
       15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
-      25,    26,    27,    28,    29,    30,    31,    32,    33,    38
+      25,    26,    27,    28,    29,    30,    31,    32,    33,    38,
+      40
 };
 
 #if YYDEBUG
 /* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
    YYRHS.  */
-static const yytype_uint8 yyprhs[] =
+static const yytype_uint16 yyprhs[] =
 {
        0,     0,     3,     4,     7,    10,    13,    14,    16,    18,
       20,    22,    25,    29,    33,    37,    39,    41,    44,    47,
-      49,    51,    55,    59,    61,    63,    65,    67,    70,    74,
-      78,    82,    86,    89,    92,    94,    96,    99,   103,   107,
-     111,   113,   115,   118,   122,   126,   130,   134,   138,   141,
-     145,   149,   151,   154,   162,   166,   169,   173,   175,   177,
-     179,   181,   184,   185,   188,   191,   192,   194,   196,   199,
-     203,   205,   207,   209,   211,   215,   219
+      49,    51,    55,    59,    61,    64,    66,    69,    71,    73,
+      75,    77,    80,    84,    88,    92,    96,    99,   102,   104,
+     106,   109,   113,   117,   121,   123,   125,   128,   132,   136,
+     140,   144,   148,   151,   155,   159,   161,   164,   172,   176,
+     179,   183,   185,   187,   189,   191,   194,   195,   198,   201,
+     202,   204,   208,   210,   213,   217,   219,   223,   225,   228,
+     232,   234,   236,   238,   240,   242,   244,   246,   248,   250,
+     254,   258
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
 static const yytype_int8 yyrhs[] =
 {
-      45,     0,    -1,    -1,    45,    46,    -1,    47,    10,    -1,
-       1,    10,    -1,    -1,    48,    -1,     6,    -1,    52,    -1,
-      50,    -1,    52,    50,    -1,     9,    40,    53,    -1,     9,
-      40,    55,    -1,     9,    40,    57,    -1,     4,    -1,    49,
-      -1,     4,     5,    -1,    49,     5,    -1,    57,    -1,    53,
-      -1,    41,    50,    42,    -1,    50,    23,    58,    -1,     6,
-      -1,     7,    -1,     8,    -1,     9,    -1,    33,    53,    -1,
-      53,    32,    53,    -1,    53,    31,    53,    -1,    41,    53,
-      42,    -1,    55,    28,    55,    -1,    11,    52,    -1,    11,
-       6,    -1,    24,    -1,    18,    -1,    54,    19,    -1,    54,
-      17,    62,    -1,    54,    16,    62,    -1,    54,    21,    58,
-      -1,     6,    -1,     7,    -1,    54,    16,    -1,    54,    20,
-      58,    -1,    55,    34,    55,    -1,    55,    35,    55,    -1,
-      55,    36,    55,    -1,    55,    37,    55,    -1,    35,    55,
-      -1,    55,    39,    55,    -1,    41,    55,    42,    -1,    52,
-      -1,    54,    17,    -1,    41,    51,    43,    51,    43,    51,
-      42,    -1,    41,    57,    42,    -1,    22,    58,    -1,    18,
-      27,    53,    -1,    14,    -1,    13,    -1,    15,    -1,    59,
-      -1,    59,    26,    -1,    -1,    59,    60,    -1,    25,    61,
-      -1,    -1,    62,    -1,    63,    -1,    62,    63,    -1,    62,
-      43,    63,    -1,    53,    -1,    57,    -1,    55,    -1,    56,
-      -1,     6,    12,     6,    -1,     6,    12,     7,    -1,     7,
-      12,    51,    -1
+      50,     0,    -1,    -1,    50,    51,    -1,    52,    10,    -1,
+       1,    10,    -1,    -1,    53,    -1,     6,    -1,    59,    -1,
+      55,    -1,    59,    55,    -1,     9,    41,    60,    -1,     9,
+      41,    62,    -1,     9,    41,    64,    -1,     4,    -1,    54,
+      -1,     4,     5,    -1,    54,     5,    -1,    64,    -1,    60,
+      -1,    42,    55,    43,    -1,    55,    23,    65,    -1,     6,
+      -1,    35,     6,    -1,     7,    -1,    35,     7,    -1,    56,
+      -1,    57,    -1,     8,    -1,     9,    -1,    33,    60,    -1,
+      60,    32,    60,    -1,    60,    31,    60,    -1,    42,    60,
+      43,    -1,    62,    28,    62,    -1,    11,    59,    -1,    11,
+       6,    -1,    24,    -1,    18,    -1,    61,    19,    -1,    61,
+      17,    70,    -1,    61,    16,    70,    -1,    61,    21,    65,
+      -1,     6,    -1,     7,    -1,    61,    16,    -1,    61,    20,
+      65,    -1,    62,    34,    62,    -1,    62,    35,    62,    -1,
+      62,    36,    62,    -1,    62,    37,    62,    -1,    35,    62,
+      -1,    62,    39,    62,    -1,    42,    62,    43,    -1,    59,
+      -1,    61,    17,    -1,    44,    58,    45,    58,    45,    58,
+      46,    -1,    42,    64,    43,    -1,    22,    65,    -1,    18,
+      27,    60,    -1,    14,    -1,    13,    -1,    15,    -1,    66,
+      -1,    66,    26,    -1,    -1,    66,    67,    -1,    25,    68,
+      -1,    -1,    69,    -1,    47,    69,    48,    -1,    72,    -1,
+      69,    72,    -1,    69,    45,    72,    -1,    71,    -1,    47,
+      71,    48,    -1,    73,    -1,    71,    73,    -1,    71,    45,
+      73,    -1,    60,    -1,    64,    -1,    62,    -1,    63,    -1,
+      74,    -1,    56,    -1,    57,    -1,    59,    -1,    74,    -1,
+      56,    12,    56,    -1,    56,    12,    57,    -1,    57,    12,
+      58,    -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
 static const yytype_uint16 yyrline[] =
 {
-       0,   178,   178,   179,   188,   189,   209,   213,   214,   223,
-     233,   235,   237,   239,   241,   247,   248,   251,   252,   256,
-     257,   262,   263,   274,   275,   278,   279,   287,   293,   299,
-     311,   315,   323,   329,   337,   338,   342,   347,   352,   360,
-     372,   379,   389,   394,   402,   404,   406,   408,   410,   412,
-     414,   421,   428,   440,   445,   449,   457,   468,   472,   476,
-     485,   487,   492,   493,   498,   505,   506,   510,   511,   513,
-     517,   519,   521,   523,   525,   530,   535
+       0,   182,   182,   183,   192,   193,   213,   217,   218,   227,
+     237,   239,   241,   243,   245,   251,   252,   255,   256,   260,
+     261,   266,   267,   279,   280,   284,   285,   288,   289,   292,
+     293,   301,   307,   313,   325,   329,   337,   343,   351,   352,
+     356,   361,   366,   374,   386,   393,   403,   408,   416,   418,
+     420,   422,   424,   426,   428,   435,   442,   454,   459,   463,
+     471,   482,   486,   490,   499,   501,   506,   507,   512,   519,
+     520,   521,   525,   526,   528,   533,   534,   538,   539,   541,
+     545,   547,   549,   551,   553,   557,   562,   567,   572,   576,
+     581,   586
 };
 #endif
 
@@ -556,12 +567,14 @@ static const char *const yytname[] =
   "KEYWORD_STR", "KEYWORD_POS", "KEYWORD_GROUP", "METHOD_NUMERIC",
   "METHOD_GROUP", "METHOD_POS", "MODIFIER", "EMPTY_POSMOD", "PARAM",
   "END_OF_METHOD", "OF", "CMP_OP", "PARAM_REDUCT", "XOR", "OR", "AND",
-  "NOT", "'+'", "'-'", "'*'", "'/'", "UNARY_NEG", "'^'", "'='", "'('",
-  "')'", "','", "$accept", "commands", "command", "cmd_plain",
-  "help_request", "help_topic", "selection", "number", "string",
+  "NOT", "'+'", "'-'", "'*'", "'/'", "UNARY_NEG", "'^'", "NUM_REDUCT",
+  "'='", "'('", "')'", "'['", "','", "']'", "'{'", "'}'", "$accept",
+  "commands", "command", "cmd_plain", "help_request", "help_topic",
+  "selection", "integer_number", "real_number", "number", "string",
   "sel_expr", "pos_mod", "num_expr", "str_expr", "pos_expr",
   "method_params", "method_param_list", "method_param", "value_list",
-  "value_list_nonempty", "value_item", 0
+  "value_list_contents", "basic_value_list", "basic_value_list_contents",
+  "value_item", "basic_value_item", "value_item_range", 0
 };
 #endif
 
@@ -574,21 +587,23 @@ static const yytype_uint16 yytoknum[] =
      265,   266,   267,   268,   269,   270,   271,   272,   273,   274,
      275,   276,   277,   278,   279,   280,   281,   282,   283,   284,
      285,   286,   287,   288,    43,    45,    42,    47,   289,    94,
-      61,    40,    41,    44
+     290,    61,    40,    41,    91,    44,    93,   123,   125
 };
 # endif
 
 /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
 static const yytype_uint8 yyr1[] =
 {
-       0,    44,    45,    45,    46,    46,    47,    47,    47,    47,
-      47,    47,    47,    47,    47,    48,    48,    49,    49,    50,
-      50,    50,    50,    51,    51,    52,    52,    53,    53,    53,
-      53,    53,    53,    53,    54,    54,    53,    53,    53,    53,
-      55,    55,    55,    55,    55,    55,    55,    55,    55,    55,
-      55,    56,    56,    57,    57,    57,    57,    53,    55,    57,
-      58,    58,    59,    59,    60,    61,    61,    62,    62,    62,
-      63,    63,    63,    63,    63,    63,    63
+       0,    49,    50,    50,    51,    51,    52,    52,    52,    52,
+      52,    52,    52,    52,    52,    53,    53,    54,    54,    55,
+      55,    55,    55,    56,    56,    57,    57,    58,    58,    59,
+      59,    60,    60,    60,    60,    60,    60,    60,    61,    61,
+      60,    60,    60,    60,    62,    62,    62,    62,    62,    62,
+      62,    62,    62,    62,    62,    63,    63,    64,    64,    64,
+      64,    60,    62,    64,    65,    65,    66,    66,    67,    68,
+      68,    68,    69,    69,    69,    70,    70,    71,    71,    71,
+      72,    72,    72,    72,    72,    73,    73,    73,    73,    74,
+      74,    74
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
@@ -596,12 +611,14 @@ static const yytype_uint8 yyr2[] =
 {
        0,     2,     0,     2,     2,     2,     0,     1,     1,     1,
        1,     2,     3,     3,     3,     1,     1,     2,     2,     1,
-       1,     3,     3,     1,     1,     1,     1,     2,     3,     3,
-       3,     3,     2,     2,     1,     1,     2,     3,     3,     3,
-       1,     1,     2,     3,     3,     3,     3,     3,     2,     3,
-       3,     1,     2,     7,     3,     2,     3,     1,     1,     1,
-       1,     2,     0,     2,     2,     0,     1,     1,     2,     3,
-       1,     1,     1,     1,     3,     3,     3
+       1,     3,     3,     1,     2,     1,     2,     1,     1,     1,
+       1,     2,     3,     3,     3,     3,     2,     2,     1,     1,
+       2,     3,     3,     3,     1,     1,     2,     3,     3,     3,
+       3,     3,     2,     3,     3,     1,     2,     7,     3,     2,
+       3,     1,     1,     1,     1,     2,     0,     2,     2,     0,
+       1,     3,     1,     2,     3,     1,     3,     1,     2,     3,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     3,
+       3,     3
 };
 
 /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -609,151 +626,177 @@ static const yytype_uint8 yyr2[] =
    means the default is an error.  */
 static const yytype_uint8 yydefact[] =
 {
-       2,     0,     1,     0,    15,    40,    41,    25,    26,     0,
-      58,    57,    59,    35,    62,    34,     0,     0,     0,     3,
-       0,     7,    16,    10,     9,    20,     0,     0,    19,     5,
-      17,     0,    33,    26,    32,     0,    55,    60,    40,    35,
-       0,    27,     0,     0,    48,    40,    41,     0,     0,    20,
-       0,    19,     4,    18,    62,    11,     0,     0,    42,     0,
-      36,    62,    62,     0,     0,     0,     0,     0,     0,     0,
-      12,    13,    14,    56,    65,    61,    63,     0,     0,    42,
-      21,     0,    30,    50,    54,    22,    29,    28,    40,    41,
-      51,    70,     0,    72,    73,    71,    38,    67,    37,    43,
-      39,    31,    44,    45,    46,    47,    49,     0,    64,    66,
-      23,    24,     0,     0,     0,    52,     0,    68,     0,    74,
-      75,    76,    69,     0,    53
+       2,     0,     1,     0,    15,    44,    45,    29,    30,     0,
+      62,    61,    63,    39,    66,    38,     0,     0,     0,     0,
+       3,     0,     7,    16,    10,     9,    20,     0,     0,    19,
+       5,    17,     0,    37,    30,    36,     0,    59,    64,    44,
+      39,     0,    31,     0,     0,    52,     0,    20,     0,    19,
+      23,    25,     0,    27,    28,     0,     4,    18,    66,    11,
+       0,     0,    46,     0,    40,    66,    66,     0,     0,     0,
+       0,     0,     0,     0,    12,    13,    14,    60,    69,    65,
+      67,     0,     0,    46,    21,    34,    54,    58,    24,    26,
+       0,    22,    33,    32,     0,    85,    86,    87,    42,    75,
+      77,    88,    41,    47,    43,    35,    48,    49,    50,    51,
+      53,     0,    44,    45,     0,     0,     0,     0,    55,    80,
+       0,    82,    83,    81,    68,    70,    72,    84,     0,     0,
+       0,     0,     0,    78,    44,    45,     0,    56,     0,    73,
+       0,    76,    89,    90,    91,    79,    71,    74,     0,    57
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int8 yydefgoto[] =
 {
-      -1,     1,    19,    20,    21,    22,    23,    48,    90,    91,
-      26,    93,    94,    95,    36,    37,    76,   108,    98,    97
+      -1,     1,    20,    21,    22,    23,    24,    95,    96,    55,
+      97,   119,    27,    28,   122,   123,    37,    38,    80,   124,
+     125,   102,    99,   126,   100,   101
 };
 
 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
    STATE-NUM.  */
-#define YYPACT_NINF -87
+#define YYPACT_NINF -93
 static const yytype_int16 yypact[] =
 {
-     -87,   124,   -87,    -1,     6,     8,   -87,   -87,    12,    56,
-     -87,   -87,   -87,    17,   -87,   -87,   278,   288,   200,   -87,
-      53,   -87,    75,    66,   225,     0,   133,    84,   -87,   -87,
-     -87,   239,   -87,   -87,   -87,   278,   -87,    57,   -87,   -87,
-     278,   -87,   288,   -14,    55,    54,    58,   -16,    63,   -18,
-      68,    69,   -87,   -87,   -87,    66,   278,   278,   186,   186,
-     -87,   -87,   -87,   288,   288,   288,   288,   288,   288,   264,
-       0,    84,   -87,     0,   186,   -87,   -87,   -18,    51,   -87,
-     -87,   102,   -87,   -87,   -87,   -87,    83,   -87,   110,   114,
-     -87,     0,   164,    19,   -87,   -87,   155,   -87,   155,   -87,
-     -87,    19,    38,    38,    55,    55,    55,    69,   -87,   155,
-     -87,   -87,    93,   107,   102,   186,   186,   -87,   102,   -87,
-     -87,   -87,   -87,    85,   -87
+     -93,   155,   -93,    10,    19,    26,   -93,   -93,    -3,    73,
+     -93,   -93,   -93,    22,   -93,   -93,   356,   372,   317,    11,
+     -93,    79,   -93,    86,    70,   317,    29,   384,   180,   -93,
+     -93,   -93,   342,   -93,   -93,   -93,   356,   -93,     6,   -93,
+     -93,   356,   -93,   372,   -10,    57,   -20,   -17,   256,    54,
+     -93,   -93,    88,   -93,   -93,    55,   -93,   -93,   -93,    70,
+     356,   356,   197,   174,   -93,   -93,   -93,   372,   372,   372,
+     372,   372,   372,   342,    29,   180,   -93,    29,   221,   -93,
+     -93,   -17,   223,   -93,   -93,   -93,   -93,   -93,   -93,   -93,
+      11,   -93,    69,   -93,    78,    90,    94,   -93,   -93,   244,
+     -93,   -93,   -93,   -93,   -93,   267,    36,    36,    57,    57,
+      57,    54,    95,    96,   375,   303,    90,    94,   -93,    29,
+     392,   267,   -93,   -93,   -93,   263,   -93,   -93,    71,    35,
+      11,    11,    78,   -93,   105,   106,   178,   174,   303,   -93,
+      11,   -93,   -93,   -93,   -93,   -93,   -93,   -93,    80,   -93
 };
 
 /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int8 yypgoto[] =
 {
-     -87,   -87,   -87,   -87,   -87,   -87,    15,   -78,    28,    60,
-     -17,     3,   -87,     4,   -46,   -87,   -87,   -87,   -57,   -86
+     -93,   -93,   -93,   -93,   -93,   -93,   -13,     0,    14,   -81,
+      -1,    87,    -4,   -16,   -93,     3,   -36,   -93,   -93,   -93,
+      12,    81,    39,   -91,   -92,   -67
 };
 
 /* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
    positive, shift that token.  If negative, reduce the rule which
    number is the opposite.  If zero, do what YYDEFACT says.
    If YYTABLE_NINF, syntax error.  */
-#define YYTABLE_NINF -25
-static const yytype_int8 yytable[] =
+#define YYTABLE_NINF -27
+static const yytype_int16 yytable[] =
 {
-      43,    96,    79,   112,    27,    28,    61,    54,    85,    29,
-     117,    30,   117,    56,    57,    99,   100,   109,    -8,    27,
-      44,    50,    51,   117,    82,    43,    80,    27,    28,    24,
-     122,    56,    57,    47,    71,    72,   121,    34,    27,    55,
-     123,    92,    92,    50,    35,    78,    43,    43,    43,    43,
-      43,    43,    31,    64,    65,    66,    67,    92,    68,    27,
-      27,    25,    32,    52,     7,    33,   101,   102,   103,   104,
-     105,   106,    50,   107,    66,    67,    41,    68,    49,    92,
-      53,    92,    74,    75,    25,    64,    65,    66,    67,    54,
-      68,    70,    92,    83,    68,    73,    63,   -23,    92,    92,
-      77,   -24,    64,    65,    66,    67,    81,    68,   110,   111,
-      83,    84,    63,   119,   120,    57,    86,    87,    64,    65,
-      66,    67,   113,    68,     2,     3,   114,   124,     4,    77,
-       5,     6,     7,     8,    -6,     9,   118,    10,    11,    12,
-       0,     0,    13,     0,     0,     0,    14,     0,    15,    58,
-      59,     0,    60,    61,    62,     0,     0,    16,     0,    17,
-       0,    88,    89,     7,    33,    18,     9,     0,    10,    11,
+      25,    45,    48,    58,    29,    46,    83,   133,    35,   128,
+      65,   127,    59,    44,    60,    61,    75,    50,    51,    53,
+      30,    49,    91,    84,    31,    48,    85,    82,    29,   103,
+     104,    78,    79,    54,   139,    76,    -8,   133,    32,    44,
+     145,    50,    51,     7,    34,   139,    52,   147,   127,    36,
+     144,   105,   106,   107,   108,   109,   110,    48,   127,   148,
+      60,    61,   121,    44,    44,    44,    44,    44,    44,   127,
+      52,   127,    70,    71,   120,    72,   111,   118,   116,    33,
+     132,     7,    34,   141,    50,    51,     7,    34,    26,    56,
+      53,    57,   117,    58,    88,    89,    72,    87,    45,   121,
+      90,    61,   130,    42,    54,    47,   131,   -23,   -25,   121,
+      44,   120,    26,    52,   118,   116,   140,   -24,   -26,    74,
+     121,   120,   121,    77,   118,   116,   149,   136,    81,   117,
+     142,    53,   120,   129,   120,   118,   116,   118,   116,   117,
+      53,     0,     0,    98,   143,    54,     0,    92,    93,     0,
+     117,     0,   117,     0,    54,     2,     3,     0,     0,     4,
+      81,     5,     6,     7,     8,    -6,     9,     0,    10,    11,
       12,     0,     0,    13,     0,     0,     0,    14,     0,    15,
-      58,   115,     0,    60,    61,    62,     0,     0,    16,     0,
-      17,     0,    88,    89,     7,    33,    69,     9,   116,    10,
-      11,    12,     0,     0,    13,     0,    45,    46,    14,     0,
-      15,     9,     0,    10,    11,    12,     0,     0,    13,    16,
-       0,    17,    14,     0,    15,     0,     0,    69,     0,     0,
-       0,    38,     6,    16,     0,    17,     9,     0,    10,    11,
-      12,    18,     0,    13,     0,    38,     6,    14,     0,    15,
-       9,     0,    10,    11,    12,     0,     0,    13,    16,     0,
-      17,    14,     0,    15,     0,     0,    18,     0,     0,     0,
-      45,    46,    16,     0,    17,     9,     0,    10,    11,    12,
-      69,     0,    13,     0,    38,     6,    14,     0,    15,     9,
-       0,    10,    11,     0,    38,     6,    39,    16,     0,    17,
-       0,    10,    15,     0,     0,    69,    39,     0,     0,     0,
-       0,    16,    15,    17,     0,     0,     0,     0,     0,    40,
-       0,     0,     0,    17,     0,     0,     0,     0,     0,    42
+      50,    51,     7,    34,   112,   113,     7,    34,    16,     9,
+      17,    10,    11,    12,     0,     0,    13,    18,     0,    19,
+      14,     0,    15,    50,    51,     7,    34,     0,    67,    52,
+       0,    16,     0,   114,    68,    69,    70,    71,     0,    72,
+      73,    94,    19,   138,     0,     0,   146,   112,   113,     7,
+      34,     0,     9,     0,    10,    11,    12,     0,     0,    13,
+       0,     0,     0,    14,    94,    15,     0,     0,     0,     0,
+      50,    51,     7,    34,    16,     0,   114,    68,    69,    70,
+      71,     0,    72,    73,     0,    19,    86,     0,   115,   112,
+     113,     7,    34,     0,     9,     0,    10,    11,    12,    52,
+       0,    13,     0,     0,    67,    14,     0,    15,     0,   132,
+      68,    69,    70,    71,     0,    72,    16,     0,   114,    86,
+       0,    68,    69,    70,    71,    73,    72,    19,   138,   112,
+     113,     7,    34,     0,     9,     0,    10,    11,    12,     0,
+       0,    13,     0,    39,     6,    14,     0,    15,     9,     0,
+      10,    11,    12,     0,     0,    13,    16,     0,   114,    14,
+       0,    15,     0,     0,     0,    73,     0,    19,    39,     6,
+      16,     0,    17,     9,     0,    10,    11,    12,     0,    18,
+      13,    19,    39,     6,    14,     0,    15,     9,     0,    10,
+      11,     0,     0,     0,    40,    16,     0,    17,    39,     6,
+      15,   134,   135,     0,    73,    10,    19,     0,    10,    16,
+      40,    17,     0,    40,     0,     0,    15,     0,    41,    15,
+      62,    63,     0,    64,    65,    66,     0,    17,    62,   137,
+      17,    64,    65,    66,    43,     0,     0,    43
 };
 
-static const yytype_int8 yycheck[] =
+static const yytype_int16 yycheck[] =
 {
-      17,    58,    16,    81,     1,     1,    20,    23,    54,    10,
-      96,     5,    98,    31,    32,    61,    62,    74,    10,    16,
-      17,    18,    18,   109,    42,    42,    42,    24,    24,     1,
-     116,    31,    32,    18,    31,    31,   114,     9,    35,    24,
-     118,    58,    59,    40,    27,    42,    63,    64,    65,    66,
-      67,    68,    40,    34,    35,    36,    37,    74,    39,    56,
-      57,     1,     6,    10,     8,     9,    63,    64,    65,    66,
-      67,    68,    69,    69,    36,    37,    16,    39,    18,    96,
-       5,    98,    25,    26,    24,    34,    35,    36,    37,    23,
-      39,    31,   109,    42,    39,    35,    28,    43,   115,   116,
-      40,    43,    34,    35,    36,    37,    43,    39,     6,     7,
-      42,    42,    28,     6,     7,    32,    56,    57,    34,    35,
-      36,    37,    12,    39,     0,     1,    12,    42,     4,    69,
-       6,     7,     8,     9,    10,    11,    43,    13,    14,    15,
-      -1,    -1,    18,    -1,    -1,    -1,    22,    -1,    24,    16,
-      17,    -1,    19,    20,    21,    -1,    -1,    33,    -1,    35,
-      -1,     6,     7,     8,     9,    41,    11,    -1,    13,    14,
+       1,    17,    18,    23,     1,    18,    16,    99,     9,    90,
+      20,    78,    25,    17,    31,    32,    32,     6,     7,    19,
+      10,    18,    58,    43,     5,    41,    43,    43,    25,    65,
+      66,    25,    26,    19,   125,    32,    10,   129,    41,    43,
+     132,     6,     7,     8,     9,   136,    35,   138,   115,    27,
+     131,    67,    68,    69,    70,    71,    72,    73,   125,   140,
+      31,    32,    78,    67,    68,    69,    70,    71,    72,   136,
+      35,   138,    36,    37,    78,    39,    73,    78,    78,     6,
+      45,     8,     9,    48,     6,     7,     8,     9,     1,    10,
+      90,     5,    78,    23,     6,     7,    39,    43,   114,   115,
+      45,    32,    12,    16,    90,    18,    12,    12,    12,   125,
+     114,   115,    25,    35,   115,   115,    45,    12,    12,    32,
+     136,   125,   138,    36,   125,   125,    46,   115,    41,   115,
+     130,   131,   136,    94,   138,   136,   136,   138,   138,   125,
+     140,    -1,    -1,    62,   130,   131,    -1,    60,    61,    -1,
+     136,    -1,   138,    -1,   140,     0,     1,    -1,    -1,     4,
+      73,     6,     7,     8,     9,    10,    11,    -1,    13,    14,
       15,    -1,    -1,    18,    -1,    -1,    -1,    22,    -1,    24,
-      16,    17,    -1,    19,    20,    21,    -1,    -1,    33,    -1,
-      35,    -1,     6,     7,     8,     9,    41,    11,    43,    13,
-      14,    15,    -1,    -1,    18,    -1,     6,     7,    22,    -1,
-      24,    11,    -1,    13,    14,    15,    -1,    -1,    18,    33,
-      -1,    35,    22,    -1,    24,    -1,    -1,    41,    -1,    -1,
-      -1,     6,     7,    33,    -1,    35,    11,    -1,    13,    14,
-      15,    41,    -1,    18,    -1,     6,     7,    22,    -1,    24,
-      11,    -1,    13,    14,    15,    -1,    -1,    18,    33,    -1,
-      35,    22,    -1,    24,    -1,    -1,    41,    -1,    -1,    -1,
-       6,     7,    33,    -1,    35,    11,    -1,    13,    14,    15,
-      41,    -1,    18,    -1,     6,     7,    22,    -1,    24,    11,
-      -1,    13,    14,    -1,     6,     7,    18,    33,    -1,    35,
-      -1,    13,    24,    -1,    -1,    41,    18,    -1,    -1,    -1,
-      -1,    33,    24,    35,    -1,    -1,    -1,    -1,    -1,    41,
-      -1,    -1,    -1,    35,    -1,    -1,    -1,    -1,    -1,    41
+       6,     7,     8,     9,     6,     7,     8,     9,    33,    11,
+      35,    13,    14,    15,    -1,    -1,    18,    42,    -1,    44,
+      22,    -1,    24,     6,     7,     8,     9,    -1,    28,    35,
+      -1,    33,    -1,    35,    34,    35,    36,    37,    -1,    39,
+      42,    47,    44,    45,    -1,    -1,    48,     6,     7,     8,
+       9,    -1,    11,    -1,    13,    14,    15,    -1,    -1,    18,
+      -1,    -1,    -1,    22,    47,    24,    -1,    -1,    -1,    -1,
+       6,     7,     8,     9,    33,    -1,    35,    34,    35,    36,
+      37,    -1,    39,    42,    -1,    44,    43,    -1,    47,     6,
+       7,     8,     9,    -1,    11,    -1,    13,    14,    15,    35,
+      -1,    18,    -1,    -1,    28,    22,    -1,    24,    -1,    45,
+      34,    35,    36,    37,    -1,    39,    33,    -1,    35,    43,
+      -1,    34,    35,    36,    37,    42,    39,    44,    45,     6,
+       7,     8,     9,    -1,    11,    -1,    13,    14,    15,    -1,
+      -1,    18,    -1,     6,     7,    22,    -1,    24,    11,    -1,
+      13,    14,    15,    -1,    -1,    18,    33,    -1,    35,    22,
+      -1,    24,    -1,    -1,    -1,    42,    -1,    44,     6,     7,
+      33,    -1,    35,    11,    -1,    13,    14,    15,    -1,    42,
+      18,    44,     6,     7,    22,    -1,    24,    11,    -1,    13,
+      14,    -1,    -1,    -1,    18,    33,    -1,    35,     6,     7,
+      24,     6,     7,    -1,    42,    13,    44,    -1,    13,    33,
+      18,    35,    -1,    18,    -1,    -1,    24,    -1,    42,    24,
+      16,    17,    -1,    19,    20,    21,    -1,    35,    16,    17,
+      35,    19,    20,    21,    42,    -1,    -1,    42
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
    symbol of state STATE-NUM.  */
 static const yytype_uint8 yystos[] =
 {
-       0,    45,     0,     1,     4,     6,     7,     8,     9,    11,
-      13,    14,    15,    18,    22,    24,    33,    35,    41,    46,
-      47,    48,    49,    50,    52,    53,    54,    55,    57,    10,
-       5,    40,     6,     9,    52,    27,    58,    59,     6,    18,
-      41,    53,    41,    54,    55,     6,     7,    50,    51,    53,
-      55,    57,    10,     5,    23,    50,    31,    32,    16,    17,
-      19,    20,    21,    28,    34,    35,    36,    37,    39,    41,
-      53,    55,    57,    53,    25,    26,    60,    53,    55,    16,
-      42,    43,    42,    42,    42,    58,    53,    53,     6,     7,
-      52,    53,    54,    55,    56,    57,    62,    63,    62,    58,
-      58,    55,    55,    55,    55,    55,    55,    57,    61,    62,
-       6,     7,    51,    12,    12,    17,    43,    63,    43,     6,
-       7,    51,    63,    51,    42
+       0,    50,     0,     1,     4,     6,     7,     8,     9,    11,
+      13,    14,    15,    18,    22,    24,    33,    35,    42,    44,
+      51,    52,    53,    54,    55,    59,    60,    61,    62,    64,
+      10,     5,    41,     6,     9,    59,    27,    65,    66,     6,
+      18,    42,    60,    42,    61,    62,    55,    60,    62,    64,
+       6,     7,    35,    56,    57,    58,    10,     5,    23,    55,
+      31,    32,    16,    17,    19,    20,    21,    28,    34,    35,
+      36,    37,    39,    42,    60,    62,    64,    60,    25,    26,
+      67,    60,    62,    16,    43,    43,    43,    43,     6,     7,
+      45,    65,    60,    60,    47,    56,    57,    59,    70,    71,
+      73,    74,    70,    65,    65,    62,    62,    62,    62,    62,
+      62,    64,     6,     7,    35,    47,    56,    57,    59,    60,
+      61,    62,    63,    64,    68,    69,    72,    74,    58,    71,
+      12,    12,    45,    73,     6,     7,    69,    17,    45,    72,
+      45,    48,    56,    57,    58,    73,    48,    72,    58,    46
 };
 
 #define yyerrok                (yyerrstatus = 0)
@@ -1268,99 +1311,119 @@ yydestruct (yymsg, yytype, yyvaluep, scanner)
   switch (yytype)
     {
       case 5: /* "HELP_TOPIC" */
-#line 158 "parser.y"
+#line 161 "parser.y"
        { free((yyvaluep->str));                     };
-#line 1274 "parser.c"
+#line 1317 "parser.c"
        break;
       case 8: /* "STR" */
-#line 158 "parser.y"
+#line 161 "parser.y"
        { free((yyvaluep->str));                     };
-#line 1279 "parser.c"
+#line 1322 "parser.c"
        break;
       case 9: /* "IDENTIFIER" */
-#line 158 "parser.y"
+#line 161 "parser.y"
        { free((yyvaluep->str));                     };
-#line 1284 "parser.c"
+#line 1327 "parser.c"
        break;
       case 25: /* "PARAM" */
-#line 159 "parser.y"
+#line 162 "parser.y"
        { if((yyvaluep->str)) free((yyvaluep->str));              };
-#line 1289 "parser.c"
+#line 1332 "parser.c"
        break;
       case 28: /* "CMP_OP" */
-#line 158 "parser.y"
+#line 161 "parser.y"
        { free((yyvaluep->str));                     };
-#line 1294 "parser.c"
+#line 1337 "parser.c"
        break;
-      case 46: /* "command" */
-#line 160 "parser.y"
+      case 51: /* "command" */
+#line 163 "parser.y"
        { if((yyvaluep->sel)) _gmx_selelem_free((yyvaluep->sel)); };
-#line 1299 "parser.c"
+#line 1342 "parser.c"
        break;
-      case 47: /* "cmd_plain" */
-#line 160 "parser.y"
+      case 52: /* "cmd_plain" */
+#line 163 "parser.y"
        { if((yyvaluep->sel)) _gmx_selelem_free((yyvaluep->sel)); };
-#line 1304 "parser.c"
+#line 1347 "parser.c"
        break;
-      case 50: /* "selection" */
-#line 161 "parser.y"
+      case 55: /* "selection" */
+#line 164 "parser.y"
        { _gmx_selelem_free_chain((yyvaluep->sel));  };
-#line 1309 "parser.c"
+#line 1352 "parser.c"
        break;
-      case 52: /* "string" */
-#line 158 "parser.y"
+      case 59: /* "string" */
+#line 161 "parser.y"
        { free((yyvaluep->str));                     };
-#line 1314 "parser.c"
+#line 1357 "parser.c"
        break;
-      case 53: /* "sel_expr" */
-#line 162 "parser.y"
+      case 60: /* "sel_expr" */
+#line 165 "parser.y"
        { _gmx_selelem_free((yyvaluep->sel));        };
-#line 1319 "parser.c"
+#line 1362 "parser.c"
        break;
-      case 55: /* "num_expr" */
-#line 162 "parser.y"
+      case 62: /* "num_expr" */
+#line 165 "parser.y"
        { _gmx_selelem_free((yyvaluep->sel));        };
-#line 1324 "parser.c"
+#line 1367 "parser.c"
        break;
-      case 56: /* "str_expr" */
-#line 162 "parser.y"
+      case 63: /* "str_expr" */
+#line 165 "parser.y"
        { _gmx_selelem_free((yyvaluep->sel));        };
-#line 1329 "parser.c"
+#line 1372 "parser.c"
        break;
-      case 57: /* "pos_expr" */
-#line 162 "parser.y"
+      case 64: /* "pos_expr" */
+#line 165 "parser.y"
        { _gmx_selelem_free((yyvaluep->sel));        };
-#line 1334 "parser.c"
+#line 1377 "parser.c"
        break;
-      case 58: /* "method_params" */
-#line 163 "parser.y"
+      case 65: /* "method_params" */
+#line 166 "parser.y"
        { _gmx_selexpr_free_params((yyvaluep->param)); };
-#line 1339 "parser.c"
+#line 1382 "parser.c"
        break;
-      case 59: /* "method_param_list" */
-#line 163 "parser.y"
+      case 66: /* "method_param_list" */
+#line 166 "parser.y"
        { _gmx_selexpr_free_params((yyvaluep->param)); };
-#line 1344 "parser.c"
+#line 1387 "parser.c"
        break;
-      case 60: /* "method_param" */
-#line 163 "parser.y"
+      case 67: /* "method_param" */
+#line 166 "parser.y"
        { _gmx_selexpr_free_params((yyvaluep->param)); };
-#line 1349 "parser.c"
+#line 1392 "parser.c"
        break;
-      case 61: /* "value_list" */
-#line 164 "parser.y"
+      case 68: /* "value_list" */
+#line 167 "parser.y"
        { _gmx_selexpr_free_values((yyvaluep->val)); };
-#line 1354 "parser.c"
+#line 1397 "parser.c"
        break;
-      case 62: /* "value_list_nonempty" */
-#line 164 "parser.y"
+      case 69: /* "value_list_contents" */
+#line 167 "parser.y"
        { _gmx_selexpr_free_values((yyvaluep->val)); };
-#line 1359 "parser.c"
+#line 1402 "parser.c"
        break;
-      case 63: /* "value_item" */
-#line 164 "parser.y"
+      case 70: /* "basic_value_list" */
+#line 168 "parser.y"
+       { _gmx_selexpr_free_values((yyvaluep->val)); };
+#line 1407 "parser.c"
+       break;
+      case 71: /* "basic_value_list_contents" */
+#line 168 "parser.y"
+       { _gmx_selexpr_free_values((yyvaluep->val)); };
+#line 1412 "parser.c"
+       break;
+      case 72: /* "value_item" */
+#line 167 "parser.y"
+       { _gmx_selexpr_free_values((yyvaluep->val)); };
+#line 1417 "parser.c"
+       break;
+      case 73: /* "basic_value_item" */
+#line 168 "parser.y"
+       { _gmx_selexpr_free_values((yyvaluep->val)); };
+#line 1422 "parser.c"
+       break;
+      case 74: /* "value_item_range" */
+#line 167 "parser.y"
        { _gmx_selexpr_free_values((yyvaluep->val)); };
-#line 1364 "parser.c"
+#line 1427 "parser.c"
        break;
 
       default:
@@ -1669,12 +1732,12 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 178 "parser.y"
+#line 182 "parser.y"
     { (yyval.sel) = NULL ;}
     break;
 
   case 3:
-#line 180 "parser.y"
+#line 184 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_append_selection((yyvsp[(2) - (2)].sel), (yyvsp[(1) - (2)].sel), scanner);
                  if (_gmx_sel_parser_should_finish(scanner))
@@ -1683,12 +1746,12 @@ yyreduce:
     break;
 
   case 4:
-#line 188 "parser.y"
+#line 192 "parser.y"
     { (yyval.sel) = (yyvsp[(1) - (2)].sel); ;}
     break;
 
   case 5:
-#line 190 "parser.y"
+#line 194 "parser.y"
     {
                  (yyval.sel) = NULL;
                  _gmx_selparser_error("invalid selection '%s'",
@@ -1707,7 +1770,7 @@ yyreduce:
     break;
 
   case 6:
-#line 209 "parser.y"
+#line 213 "parser.y"
     {
                  (yyval.sel) = NULL;
                  _gmx_sel_handle_empty_cmd(scanner);
@@ -1715,12 +1778,12 @@ yyreduce:
     break;
 
   case 7:
-#line 213 "parser.y"
+#line 217 "parser.y"
     { (yyval.sel) = NULL; ;}
     break;
 
   case 8:
-#line 215 "parser.y"
+#line 219 "parser.y"
     {
                  t_selelem *s, *p;
                  s = _gmx_sel_init_group_by_id((yyvsp[(1) - (1)].i), scanner);
@@ -1732,7 +1795,7 @@ yyreduce:
     break;
 
   case 9:
-#line 224 "parser.y"
+#line 228 "parser.y"
     {
                  t_selelem *s, *p;
                  s = _gmx_sel_init_group_by_name((yyvsp[(1) - (1)].str), scanner);
@@ -1745,52 +1808,52 @@ yyreduce:
     break;
 
   case 10:
-#line 234 "parser.y"
+#line 238 "parser.y"
     { (yyval.sel) = _gmx_sel_init_selection(NULL, (yyvsp[(1) - (1)].sel), scanner); ;}
     break;
 
   case 11:
-#line 236 "parser.y"
+#line 240 "parser.y"
     { (yyval.sel) = _gmx_sel_init_selection((yyvsp[(1) - (2)].str), (yyvsp[(2) - (2)].sel), scanner);   ;}
     break;
 
   case 12:
-#line 238 "parser.y"
+#line 242 "parser.y"
     { (yyval.sel) = _gmx_sel_assign_variable((yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].sel), scanner);  ;}
     break;
 
   case 13:
-#line 240 "parser.y"
+#line 244 "parser.y"
     { (yyval.sel) = _gmx_sel_assign_variable((yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].sel), scanner);  ;}
     break;
 
   case 14:
-#line 242 "parser.y"
+#line 246 "parser.y"
     { (yyval.sel) = _gmx_sel_assign_variable((yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].sel), scanner);  ;}
     break;
 
   case 15:
-#line 247 "parser.y"
+#line 251 "parser.y"
     { _gmx_sel_handle_help_cmd(NULL, scanner); ;}
     break;
 
   case 17:
-#line 251 "parser.y"
+#line 255 "parser.y"
     { _gmx_sel_handle_help_cmd((yyvsp[(2) - (2)].str), scanner); ;}
     break;
 
   case 18:
-#line 252 "parser.y"
+#line 256 "parser.y"
     { _gmx_sel_handle_help_cmd((yyvsp[(2) - (2)].str), scanner); ;}
     break;
 
   case 19:
-#line 256 "parser.y"
+#line 260 "parser.y"
     { (yyval.sel) = (yyvsp[(1) - (1)].sel); ;}
     break;
 
   case 20:
-#line 258 "parser.y"
+#line 262 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_position((yyvsp[(1) - (1)].sel), NULL, scanner);
                  if ((yyval.sel) == NULL) YYERROR;
@@ -1798,12 +1861,12 @@ yyreduce:
     break;
 
   case 21:
-#line 262 "parser.y"
+#line 266 "parser.y"
     { (yyval.sel) = (yyvsp[(2) - (3)].sel); ;}
     break;
 
   case 22:
-#line 264 "parser.y"
+#line 268 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_modifier((yyvsp[(2) - (3)].meth), (yyvsp[(3) - (3)].param), (yyvsp[(1) - (3)].sel), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
@@ -1811,27 +1874,47 @@ yyreduce:
     break;
 
   case 23:
-#line 274 "parser.y"
+#line 279 "parser.y"
     { (yyval.r) = (yyvsp[(1) - (1)].i); ;}
     break;
 
   case 24:
-#line 275 "parser.y"
-    { (yyval.r) = (yyvsp[(1) - (1)].r); ;}
+#line 280 "parser.y"
+    { (yyval.r) = -(yyvsp[(2) - (2)].i); ;}
     break;
 
   case 25:
-#line 278 "parser.y"
-    { (yyval.str) = (yyvsp[(1) - (1)].str); ;}
+#line 284 "parser.y"
+    { (yyval.r) = (yyvsp[(1) - (1)].r); ;}
     break;
 
   case 26:
-#line 279 "parser.y"
-    { (yyval.str) = (yyvsp[(1) - (1)].str); ;}
+#line 285 "parser.y"
+    { (yyval.r) = -(yyvsp[(2) - (2)].r); ;}
     break;
 
   case 27:
 #line 288 "parser.y"
+    { (yyval.r) = (yyvsp[(1) - (1)].r); ;}
+    break;
+
+  case 28:
+#line 289 "parser.y"
+    { (yyval.r) = (yyvsp[(1) - (1)].r); ;}
+    break;
+
+  case 29:
+#line 292 "parser.y"
+    { (yyval.str) = (yyvsp[(1) - (1)].str); ;}
+    break;
+
+  case 30:
+#line 293 "parser.y"
+    { (yyval.str) = (yyvsp[(1) - (1)].str); ;}
+    break;
+
+  case 31:
+#line 302 "parser.y"
     {
                  (yyval.sel) = _gmx_selelem_create(SEL_BOOLEAN);
                  (yyval.sel)->u.boolt = BOOL_NOT;
@@ -1839,8 +1922,8 @@ yyreduce:
              ;}
     break;
 
-  case 28:
-#line 294 "parser.y"
+  case 32:
+#line 308 "parser.y"
     {
                  (yyval.sel) = _gmx_selelem_create(SEL_BOOLEAN);
                  (yyval.sel)->u.boolt = BOOL_AND;
@@ -1848,8 +1931,8 @@ yyreduce:
              ;}
     break;
 
-  case 29:
-#line 300 "parser.y"
+  case 33:
+#line 314 "parser.y"
     {
                  (yyval.sel) = _gmx_selelem_create(SEL_BOOLEAN);
                  (yyval.sel)->u.boolt = BOOL_OR;
@@ -1857,21 +1940,21 @@ yyreduce:
              ;}
     break;
 
-  case 30:
-#line 311 "parser.y"
+  case 34:
+#line 325 "parser.y"
     { (yyval.sel) = (yyvsp[(2) - (3)].sel); ;}
     break;
 
-  case 31:
-#line 316 "parser.y"
+  case 35:
+#line 330 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_comparison((yyvsp[(1) - (3)].sel), (yyvsp[(3) - (3)].sel), (yyvsp[(2) - (3)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 32:
-#line 324 "parser.y"
+  case 36:
+#line 338 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_group_by_name((yyvsp[(2) - (2)].str), scanner);
                  free((yyvsp[(2) - (2)].str));
@@ -1879,58 +1962,58 @@ yyreduce:
              ;}
     break;
 
-  case 33:
-#line 330 "parser.y"
+  case 37:
+#line 344 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_group_by_id((yyvsp[(2) - (2)].i), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 34:
-#line 337 "parser.y"
+  case 38:
+#line 351 "parser.y"
     { (yyval.str) = NULL; ;}
     break;
 
-  case 35:
-#line 338 "parser.y"
+  case 39:
+#line 352 "parser.y"
     { (yyval.str) = (yyvsp[(1) - (1)].str);   ;}
     break;
 
-  case 36:
-#line 343 "parser.y"
+  case 40:
+#line 357 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_keyword((yyvsp[(2) - (2)].meth), NULL, (yyvsp[(1) - (2)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 37:
-#line 348 "parser.y"
+  case 41:
+#line 362 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_keyword((yyvsp[(2) - (3)].meth), process_value_list((yyvsp[(3) - (3)].val), NULL), (yyvsp[(1) - (3)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 38:
-#line 353 "parser.y"
+  case 42:
+#line 367 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_keyword((yyvsp[(2) - (3)].meth), process_value_list((yyvsp[(3) - (3)].val), NULL), (yyvsp[(1) - (3)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 39:
-#line 361 "parser.y"
+  case 43:
+#line 375 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_method((yyvsp[(2) - (3)].meth), (yyvsp[(3) - (3)].param), (yyvsp[(1) - (3)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 40:
-#line 373 "parser.y"
+  case 44:
+#line 387 "parser.y"
     {
                  (yyval.sel) = _gmx_selelem_create(SEL_CONST);
                  _gmx_selelem_set_vtype((yyval.sel), INT_VALUE);
@@ -1939,8 +2022,8 @@ yyreduce:
              ;}
     break;
 
-  case 41:
-#line 380 "parser.y"
+  case 45:
+#line 394 "parser.y"
     {
                  (yyval.sel) = _gmx_selelem_create(SEL_CONST);
                  _gmx_selelem_set_vtype((yyval.sel), REAL_VALUE);
@@ -1949,59 +2032,59 @@ yyreduce:
              ;}
     break;
 
-  case 42:
-#line 390 "parser.y"
+  case 46:
+#line 404 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_keyword((yyvsp[(2) - (2)].meth), NULL, (yyvsp[(1) - (2)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 43:
-#line 395 "parser.y"
+  case 47:
+#line 409 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_method((yyvsp[(2) - (3)].meth), (yyvsp[(3) - (3)].param), (yyvsp[(1) - (3)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 44:
-#line 403 "parser.y"
+  case 48:
+#line 417 "parser.y"
     { (yyval.sel) = _gmx_sel_init_arithmetic((yyvsp[(1) - (3)].sel), (yyvsp[(3) - (3)].sel), '+', scanner); ;}
     break;
 
-  case 45:
-#line 405 "parser.y"
+  case 49:
+#line 419 "parser.y"
     { (yyval.sel) = _gmx_sel_init_arithmetic((yyvsp[(1) - (3)].sel), (yyvsp[(3) - (3)].sel), '-', scanner); ;}
     break;
 
-  case 46:
-#line 407 "parser.y"
+  case 50:
+#line 421 "parser.y"
     { (yyval.sel) = _gmx_sel_init_arithmetic((yyvsp[(1) - (3)].sel), (yyvsp[(3) - (3)].sel), '*', scanner); ;}
     break;
 
-  case 47:
-#line 409 "parser.y"
+  case 51:
+#line 423 "parser.y"
     { (yyval.sel) = _gmx_sel_init_arithmetic((yyvsp[(1) - (3)].sel), (yyvsp[(3) - (3)].sel), '/', scanner); ;}
     break;
 
-  case 48:
-#line 411 "parser.y"
+  case 52:
+#line 425 "parser.y"
     { (yyval.sel) = _gmx_sel_init_arithmetic((yyvsp[(2) - (2)].sel), NULL, '-', scanner); ;}
     break;
 
-  case 49:
-#line 413 "parser.y"
+  case 53:
+#line 427 "parser.y"
     { (yyval.sel) = _gmx_sel_init_arithmetic((yyvsp[(1) - (3)].sel), (yyvsp[(3) - (3)].sel), '^', scanner); ;}
     break;
 
-  case 50:
-#line 414 "parser.y"
+  case 54:
+#line 428 "parser.y"
     { (yyval.sel) = (yyvsp[(2) - (3)].sel); ;}
     break;
 
-  case 51:
-#line 422 "parser.y"
+  case 55:
+#line 436 "parser.y"
     {
                  (yyval.sel) = _gmx_selelem_create(SEL_CONST);
                  _gmx_selelem_set_vtype((yyval.sel), STR_VALUE);
@@ -2010,146 +2093,210 @@ yyreduce:
              ;}
     break;
 
-  case 52:
-#line 429 "parser.y"
+  case 56:
+#line 443 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_keyword((yyvsp[(2) - (2)].meth), NULL, (yyvsp[(1) - (2)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 53:
-#line 441 "parser.y"
+  case 57:
+#line 455 "parser.y"
     { (yyval.sel) = _gmx_sel_init_const_position((yyvsp[(2) - (7)].r), (yyvsp[(4) - (7)].r), (yyvsp[(6) - (7)].r)); ;}
     break;
 
-  case 54:
-#line 445 "parser.y"
+  case 58:
+#line 459 "parser.y"
     { (yyval.sel) = (yyvsp[(2) - (3)].sel); ;}
     break;
 
-  case 55:
-#line 450 "parser.y"
+  case 59:
+#line 464 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_method((yyvsp[(1) - (2)].meth), (yyvsp[(2) - (2)].param), NULL, scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 56:
-#line 458 "parser.y"
+  case 60:
+#line 472 "parser.y"
     {
                  (yyval.sel) = _gmx_sel_init_position((yyvsp[(3) - (3)].sel), (yyvsp[(1) - (3)].str), scanner);
                  if ((yyval.sel) == NULL) YYERROR;
              ;}
     break;
 
-  case 57:
-#line 469 "parser.y"
+  case 61:
+#line 483 "parser.y"
     { (yyval.sel) = _gmx_sel_init_variable_ref((yyvsp[(1) - (1)].sel)); ;}
     break;
 
-  case 58:
-#line 473 "parser.y"
+  case 62:
+#line 487 "parser.y"
     { (yyval.sel) = _gmx_sel_init_variable_ref((yyvsp[(1) - (1)].sel)); ;}
     break;
 
-  case 59:
-#line 477 "parser.y"
+  case 63:
+#line 491 "parser.y"
     { (yyval.sel) = _gmx_sel_init_variable_ref((yyvsp[(1) - (1)].sel)); ;}
     break;
 
-  case 60:
-#line 486 "parser.y"
+  case 64:
+#line 500 "parser.y"
     { (yyval.param) = process_param_list((yyvsp[(1) - (1)].param)); ;}
     break;
 
-  case 61:
-#line 488 "parser.y"
+  case 65:
+#line 502 "parser.y"
     { (yyval.param) = process_param_list((yyvsp[(1) - (2)].param)); ;}
     break;
 
-  case 62:
-#line 492 "parser.y"
+  case 66:
+#line 506 "parser.y"
     { (yyval.param) = NULL;              ;}
     break;
 
-  case 63:
-#line 494 "parser.y"
+  case 67:
+#line 508 "parser.y"
     { (yyvsp[(2) - (2)].param)->next = (yyvsp[(1) - (2)].param); (yyval.param) = (yyvsp[(2) - (2)].param); ;}
     break;
 
-  case 64:
-#line 499 "parser.y"
+  case 68:
+#line 513 "parser.y"
     {
                  (yyval.param) = _gmx_selexpr_create_param((yyvsp[(1) - (2)].str));
                  (yyval.param)->value = process_value_list((yyvsp[(2) - (2)].val), &(yyval.param)->nval);
              ;}
     break;
 
-  case 65:
-#line 505 "parser.y"
+  case 69:
+#line 519 "parser.y"
     { (yyval.val) = NULL; ;}
     break;
 
-  case 66:
-#line 506 "parser.y"
+  case 70:
+#line 520 "parser.y"
     { (yyval.val) = (yyvsp[(1) - (1)].val);   ;}
     break;
 
-  case 67:
-#line 510 "parser.y"
+  case 71:
+#line 521 "parser.y"
+    { (yyval.val) = (yyvsp[(2) - (3)].val);   ;}
+    break;
+
+  case 72:
+#line 525 "parser.y"
     { (yyval.val) = (yyvsp[(1) - (1)].val); ;}
     break;
 
-  case 68:
-#line 512 "parser.y"
+  case 73:
+#line 527 "parser.y"
     { (yyvsp[(2) - (2)].val)->next = (yyvsp[(1) - (2)].val); (yyval.val) = (yyvsp[(2) - (2)].val); ;}
     break;
 
-  case 69:
-#line 514 "parser.y"
+  case 74:
+#line 529 "parser.y"
     { (yyvsp[(3) - (3)].val)->next = (yyvsp[(1) - (3)].val); (yyval.val) = (yyvsp[(3) - (3)].val); ;}
     break;
 
-  case 70:
-#line 518 "parser.y"
+  case 75:
+#line 533 "parser.y"
+    { (yyval.val) = (yyvsp[(1) - (1)].val); ;}
+    break;
+
+  case 76:
+#line 534 "parser.y"
+    { (yyval.val) = (yyvsp[(2) - (3)].val); ;}
+    break;
+
+  case 77:
+#line 538 "parser.y"
+    { (yyval.val) = (yyvsp[(1) - (1)].val); ;}
+    break;
+
+  case 78:
+#line 540 "parser.y"
+    { (yyvsp[(2) - (2)].val)->next = (yyvsp[(1) - (2)].val); (yyval.val) = (yyvsp[(2) - (2)].val); ;}
+    break;
+
+  case 79:
+#line 542 "parser.y"
+    { (yyvsp[(3) - (3)].val)->next = (yyvsp[(1) - (3)].val); (yyval.val) = (yyvsp[(3) - (3)].val); ;}
+    break;
+
+  case 80:
+#line 546 "parser.y"
     { (yyval.val) = _gmx_selexpr_create_value_expr((yyvsp[(1) - (1)].sel)); ;}
     break;
 
-  case 71:
-#line 520 "parser.y"
+  case 81:
+#line 548 "parser.y"
     { (yyval.val) = _gmx_selexpr_create_value_expr((yyvsp[(1) - (1)].sel)); ;}
     break;
 
-  case 72:
-#line 522 "parser.y"
+  case 82:
+#line 550 "parser.y"
     { (yyval.val) = _gmx_selexpr_create_value_expr((yyvsp[(1) - (1)].sel)); ;}
     break;
 
-  case 73:
-#line 524 "parser.y"
+  case 83:
+#line 552 "parser.y"
     { (yyval.val) = _gmx_selexpr_create_value_expr((yyvsp[(1) - (1)].sel)); ;}
     break;
 
-  case 74:
-#line 526 "parser.y"
+  case 84:
+#line 553 "parser.y"
+    { (yyval.val) = (yyvsp[(1) - (1)].val); ;}
+    break;
+
+  case 85:
+#line 558 "parser.y"
     {
                  (yyval.val) = _gmx_selexpr_create_value(INT_VALUE);
-                 (yyval.val)->u.i.i1 = (yyvsp[(1) - (3)].i); (yyval.val)->u.i.i2 = (yyvsp[(3) - (3)].i);
+                 (yyval.val)->u.i.i1 = (yyval.val)->u.i.i2 = (yyvsp[(1) - (1)].r);
              ;}
     break;
 
-  case 75:
-#line 531 "parser.y"
+  case 86:
+#line 563 "parser.y"
     {
                  (yyval.val) = _gmx_selexpr_create_value(REAL_VALUE);
-                 (yyval.val)->u.r.r1 = (yyvsp[(1) - (3)].i); (yyval.val)->u.r.r2 = (yyvsp[(3) - (3)].r);
+                 (yyval.val)->u.r.r1 = (yyval.val)->u.r.r2 = (yyvsp[(1) - (1)].r);
              ;}
     break;
 
-  case 76:
-#line 536 "parser.y"
+  case 87:
+#line 568 "parser.y"
+    {
+                 (yyval.val) = _gmx_selexpr_create_value(STR_VALUE);
+                 (yyval.val)->u.s = (yyvsp[(1) - (1)].str);
+             ;}
+    break;
+
+  case 88:
+#line 572 "parser.y"
+    { (yyval.val) = (yyvsp[(1) - (1)].val); ;}
+    break;
+
+  case 89:
+#line 577 "parser.y"
+    {
+                 (yyval.val) = _gmx_selexpr_create_value(INT_VALUE);
+                 (yyval.val)->u.i.i1 = (yyvsp[(1) - (3)].r); (yyval.val)->u.i.i2 = (yyvsp[(3) - (3)].r);
+             ;}
+    break;
+
+  case 90:
+#line 582 "parser.y"
+    {
+                 (yyval.val) = _gmx_selexpr_create_value(REAL_VALUE);
+                 (yyval.val)->u.r.r1 = (yyvsp[(1) - (3)].r); (yyval.val)->u.r.r2 = (yyvsp[(3) - (3)].r);
+             ;}
+    break;
+
+  case 91:
+#line 587 "parser.y"
     {
                  (yyval.val) = _gmx_selexpr_create_value(REAL_VALUE);
                  (yyval.val)->u.r.r1 = (yyvsp[(1) - (3)].r); (yyval.val)->u.r.r2 = (yyvsp[(3) - (3)].r);
@@ -2158,7 +2305,7 @@ yyreduce:
 
 
 /* Line 1267 of yacc.c.  */
-#line 2162 "parser.c"
+#line 2309 "parser.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -2372,7 +2519,7 @@ yyreturn:
 }
 
 
-#line 542 "parser.y"
+#line 593 "parser.y"
 
 
 static t_selexpr_value *
index 088158bd72fd40369d7a2b874d7348dba2db6c7e..e240cb388f0cda4034603de307bddadcb15b9de2 100644 (file)
@@ -70,7 +70,8 @@
      OR = 286,
      AND = 287,
      NOT = 288,
-     UNARY_NEG = 289
+     UNARY_NEG = 289,
+     NUM_REDUCT = 290
    };
 #endif
 /* Tokens.  */
 #define AND 287
 #define NOT 288
 #define UNARY_NEG 289
+#define NUM_REDUCT 290
 
 
 
@@ -125,7 +127,7 @@ typedef union YYSTYPE
     struct t_selexpr_param     *param;
 }
 /* Line 1489 of yacc.c.  */
-#line 129 "parser.h"
+#line 131 "parser.h"
        YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
index 6738ff11e500ac23d7b7a11686166e80a4527669..551e0cf7abe10d55da34d2f0c907c3744182e093 100644 (file)
@@ -137,9 +137,11 @@ yyerror(yyscan_t, char const *s);
 %left           '*' '/'
 %right          UNARY_NEG   /* Dummy token for unary negation precedence */
 %right          '^'
+%nonassoc       NUM_REDUCT  /* Dummy token for numerical keyword reduction precedence */
 
 /* Simple non-terminals */
-%type <r>     number
+%type <r>     integer_number
+%type <r>     real_number number
 %type <str>   string
 %type <str>   pos_mod
 
@@ -153,7 +155,8 @@ yyerror(yyscan_t, char const *s);
 
 /* Parameter/value non-terminals */
 %type <param> method_params method_param_list method_param
-%type <val>   value_list value_list_nonempty value_item
+%type <val>   value_list value_list_contents value_item value_item_range
+%type <val>   basic_value_list basic_value_list_contents basic_value_item
 
 %destructor { free($$);                     } HELP_TOPIC STR IDENTIFIER CMP_OP string
 %destructor { if($$) free($$);              } PARAM
@@ -161,9 +164,10 @@ yyerror(yyscan_t, char const *s);
 %destructor { _gmx_selelem_free_chain($$);  } selection
 %destructor { _gmx_selelem_free($$);        } sel_expr num_expr str_expr pos_expr
 %destructor { _gmx_selexpr_free_params($$); } method_params method_param_list method_param
-%destructor { _gmx_selexpr_free_values($$); } value_list value_list_nonempty value_item
+%destructor { _gmx_selexpr_free_values($$); } value_list value_list_contents value_item value_item_range
+%destructor { _gmx_selexpr_free_values($$); } basic_value_list basic_value_list_contents basic_value_item
 
-%expect 91
+%expect 50
 %debug
 %pure-parser
 
@@ -271,8 +275,18 @@ selection:   pos_expr           { $$ = $1; }
  * BASIC NON-TERMINAL SYMBOLS
  ********************************************************************/
 
-number:      TOK_INT            { $$ = $1; }
-           | TOK_REAL               { $$ = $1; }
+integer_number:
+             TOK_INT            { $$ = $1; }
+           | '-' TOK_INT        { $$ = -$2; }
+;
+
+real_number:
+             TOK_REAL           { $$ = $1; }
+           | '-' TOK_REAL       { $$ = -$2; }
+;
+
+number:      integer_number     { $$ = $1; }
+           | real_number        { $$ = $1; }
 ;
 
 string:      STR                { $$ = $1; }
@@ -344,12 +358,12 @@ sel_expr:    pos_mod KEYWORD_GROUP
                  $$ = _gmx_sel_init_keyword($2, NULL, $1, scanner);
                  if ($$ == NULL) YYERROR;
              }
-           | pos_mod KEYWORD_STR value_list_nonempty
+           | pos_mod KEYWORD_STR basic_value_list
              {
                  $$ = _gmx_sel_init_keyword($2, process_value_list($3, NULL), $1, scanner);
                  if ($$ == NULL) YYERROR;
              }
-           | pos_mod KEYWORD_NUMERIC value_list_nonempty
+           | pos_mod KEYWORD_NUMERIC basic_value_list
              {
                  $$ = _gmx_sel_init_keyword($2, process_value_list($3, NULL), $1, scanner);
                  if ($$ == NULL) YYERROR;
@@ -386,7 +400,7 @@ num_expr:    TOK_INT
 ;
 
 /* Numeric selection methods */
-num_expr:    pos_mod KEYWORD_NUMERIC
+num_expr:    pos_mod KEYWORD_NUMERIC    %prec NUM_REDUCT
              {
                  $$ = _gmx_sel_init_keyword($2, NULL, $1, scanner);
                  if ($$ == NULL) YYERROR;
@@ -437,7 +451,7 @@ str_expr:    string
  ********************************************************************/
 
 /* Constant position expressions */
-pos_expr:    '(' number ',' number ',' number ')'
+pos_expr:    '[' number ',' number ',' number ']'
              { $$ = _gmx_sel_init_const_position($2, $4, $6); }
 ;
 
@@ -502,15 +516,29 @@ method_param:
              }
 ;
 
-value_list:  /* empty */         { $$ = NULL; }
-           | value_list_nonempty { $$ = $1;   }
+value_list:  /* empty */                         { $$ = NULL; }
+           | value_list_contents                 { $$ = $1;   }
+           | '{' value_list_contents '}'         { $$ = $2;   }
 ;
 
-value_list_nonempty:
+value_list_contents:
              value_item          { $$ = $1; }
-           | value_list_nonempty value_item
+           | value_list_contents value_item
                                  { $2->next = $1; $$ = $2; }
-           | value_list_nonempty ',' value_item
+           | value_list_contents ',' value_item
+                                 { $3->next = $1; $$ = $3; }
+;
+
+basic_value_list:
+             basic_value_list_contents           { $$ = $1; }
+           | '{' basic_value_list_contents '}'   { $$ = $2; }
+;
+
+basic_value_list_contents:
+             basic_value_item    { $$ = $1; }
+           | basic_value_list_contents basic_value_item
+                                 { $2->next = $1; $$ = $2; }
+           | basic_value_list_contents ',' basic_value_item
                                  { $3->next = $1; $$ = $3; }
 ;
 
@@ -522,17 +550,40 @@ value_item:  sel_expr            %prec PARAM_REDUCT
              { $$ = _gmx_selexpr_create_value_expr($1); }
            | str_expr            %prec PARAM_REDUCT
              { $$ = _gmx_selexpr_create_value_expr($1); }
-           | TOK_INT TO TOK_INT
+           | value_item_range    { $$ = $1; }
+;
+
+basic_value_item:
+             integer_number      %prec PARAM_REDUCT
+             {
+                 $$ = _gmx_selexpr_create_value(INT_VALUE);
+                 $$->u.i.i1 = $$->u.i.i2 = $1;
+             }
+           | real_number         %prec PARAM_REDUCT
+             {
+                 $$ = _gmx_selexpr_create_value(REAL_VALUE);
+                 $$->u.r.r1 = $$->u.r.r2 = $1;
+             }
+           | string              %prec PARAM_REDUCT
+             {
+                 $$ = _gmx_selexpr_create_value(STR_VALUE);
+                 $$->u.s = $1;
+             }
+           | value_item_range    { $$ = $1; }
+;
+
+value_item_range:
+             integer_number TO integer_number
              {
                  $$ = _gmx_selexpr_create_value(INT_VALUE);
                  $$->u.i.i1 = $1; $$->u.i.i2 = $3;
              }
-           | TOK_INT TO TOK_REAL
+           | integer_number TO real_number
              {
                  $$ = _gmx_selexpr_create_value(REAL_VALUE);
                  $$->u.r.r1 = $1; $$->u.r.r2 = $3;
              }
-           | TOK_REAL TO number
+           | real_number TO number
              {
                  $$ = _gmx_selexpr_create_value(REAL_VALUE);
                  $$->u.r.r1 = $1; $$->u.r.r2 = $3;
index 7236ecc2778150fded09c8dea48d1f110a7e3699..de74a3d8a80c6e266a9e3ce43b7960e1988612b6 100644 (file)
@@ -365,19 +365,17 @@ struct yy_trans_info
        flex_int32_t yy_verify;
        flex_int32_t yy_nxt;
        };
-static yyconst flex_int16_t yy_accept[93] =
+static yyconst flex_int16_t yy_accept[89] =
     {   0,
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-       27,   25,   23,    6,   20,   25,    1,   25,   25,   25,
-        2,    6,   21,   25,   22,   25,   24,   22,   22,   22,
-       22,   22,   22,   25,   22,   22,   22,   22,   22,   11,
-        8,   10,   10,    9,   23,   21,    0,    4,    0,    1,
-       17,    0,    0,    3,    3,    2,   24,   24,   22,    5,
-       22,   22,   22,   18,   15,   22,   18,   16,   13,   22,
-       12,   22,   22,    8,    9,    0,    0,    0,    3,   17,
-       22,   20,   19,   13,   22,    3,    0,    3,   22,    7,
-       14,    0
-
+       27,   25,   23,    6,   20,   25,    1,   25,   25,    2,
+        6,   21,   25,   22,   25,   24,   22,   22,   22,   22,
+       22,   22,   25,   22,   22,   22,   22,   22,   11,    8,
+       10,   10,    9,   23,   21,    0,    4,    0,    1,   17,
+        3,    3,    2,   24,   24,   22,    5,   22,   22,   22,
+       18,   15,   22,   18,   16,   13,   22,   12,   22,   22,
+        8,    9,    0,    0,    3,   17,   22,   20,   19,   13,
+       22,    0,    3,    3,   22,    7,   14,    0
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -386,16 +384,16 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    2,    4,    5,    6,    1,    1,    7,    1,    1,
-        1,    1,    8,    1,    9,   10,    1,   11,   11,   11,
-       11,   11,   11,   11,   11,   11,   11,    1,   12,   13,
-       14,   13,    1,    1,   15,   15,   15,   15,   16,   15,
-       15,   15,   15,   15,   15,   15,   15,   15,   15,   15,
-       15,   15,   15,   15,   15,   15,   15,   15,   15,   15,
-        1,   17,    1,    1,   18,    1,   19,   15,   15,   20,
-
-       21,   22,   23,   24,   15,   15,   15,   25,   15,   26,
-       27,   28,   15,   29,   30,   31,   32,   15,   15,   33,
-       34,   15,    1,   35,    1,    1,    1,    1,    1,    1,
+        1,    1,    8,    1,    8,    9,    1,   10,   10,   10,
+       10,   10,   10,   10,   10,   10,   10,    1,   11,   12,
+       13,   12,    1,    1,   14,   14,   14,   14,   15,   14,
+       14,   14,   14,   14,   14,   14,   14,   14,   14,   14,
+       14,   14,   14,   14,   14,   14,   14,   14,   14,   14,
+        1,   16,    1,    1,   17,    1,   18,   14,   14,   19,
+
+       20,   21,   22,   23,   14,   14,   14,   24,   14,   25,
+       26,   27,   14,   28,   29,   30,   31,   14,   14,   32,
+       33,   14,    1,   34,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -412,96 +410,94 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[36] =
+static yyconst flex_int32_t yy_meta[35] =
     {   0,
-        1,    1,    2,    1,    1,    1,    1,    1,    1,    3,
-        4,    1,    1,    1,    4,    4,    1,    4,    4,    4,
+        1,    1,    2,    1,    1,    1,    1,    1,    3,    4,
+        1,    1,    1,    4,    4,    1,    4,    4,    4,    4,
         4,    4,    4,    4,    4,    4,    4,    4,    4,    4,
-        4,    4,    4,    4,    1
+        4,    4,    4,    1
     } ;
 
-static yyconst flex_int16_t yy_base[98] =
+static yyconst flex_int16_t yy_base[94] =
     {   0,
-        0,    0,  144,  143,   10,   12,  145,  144,   46,    0,
-      167,  172,  164,  172,  151,   77,    0,  157,   30,  152,
-       74,  172,  148,  147,    0,  157,  149,  132,  128,  129,
-      126,  127,  126,  117,   64,  124,   70,  129,  128,  172,
-      146,  172,  172,    0,  145,  172,   83,  172,  141,    0,
-      172,  134,   87,   90,   91,   94,  134,  108,    0,  172,
-      123,  115,  110,    0,    0,  111,  172,    0,  108,  116,
-        0,  107,  111,  133,    0,   34,  112,  123,  103,    0,
-       97,    0,    0,    0,   97,  111,   98,   80,   61,    0,
-        0,  172,  123,  127,  129,   83,   79
+        0,    0,  131,  130,   10,   12,  132,  131,   45,    0,
+      153,  158,  150,  158,  138,   75,    0,  143,  139,   72,
+      158,  135,  134,    0,  143,  136,  119,  115,  116,  113,
+      114,  113,  104,   62,  111,   68,  116,  115,  158,  132,
+      158,  158,    0,  131,  158,   79,  158,  127,    0,  158,
+       84,   87,   91,  122,   31,    0,  158,  111,  103,   98,
+        0,    0,   99,  158,    0,   96,  104,    0,   95,   99,
+      120,    0,   34,  107,   76,    0,   82,    0,    0,    0,
+       83,   99,   98,   95,   76,    0,    0,  158,  111,  115,
+      117,   94,   84
 
     } ;
 
-static yyconst flex_int16_t yy_def[98] =
+static yyconst flex_int16_t yy_def[94] =
     {   0,
-       92,    1,    1,    1,    1,    1,    1,    1,   92,    9,
-       92,   92,   92,   92,   92,   93,   94,   92,   92,   92,
-       95,   92,   92,   92,   96,   92,   95,   96,   96,   96,
-       96,   96,   96,   92,   96,   96,   96,   96,   96,   92,
-       92,   92,   92,   97,   92,   92,   93,   92,   92,   94,
-       92,   92,   92,   92,   92,   95,   95,   95,   96,   92,
-       96,   96,   96,   96,   96,   96,   92,   96,   96,   96,
-       96,   96,   96,   92,   97,   92,   92,   92,   95,   96,
-       96,   96,   96,   96,   96,   92,   92,   92,   96,   96,
-       96,    0,   92,   92,   92,   92,   92
+       88,    1,    1,    1,    1,    1,    1,    1,   88,    9,
+       88,   88,   88,   88,   88,   89,   90,   88,   88,   91,
+       88,   88,   88,   92,   88,   91,   92,   92,   92,   92,
+       92,   92,   88,   92,   92,   92,   92,   92,   88,   88,
+       88,   88,   93,   88,   88,   89,   88,   88,   90,   88,
+       88,   88,   91,   91,   91,   92,   88,   92,   92,   92,
+       92,   92,   92,   88,   92,   92,   92,   92,   92,   92,
+       88,   93,   88,   88,   91,   92,   92,   92,   92,   92,
+       92,   88,   88,   88,   92,   92,   92,    0,   88,   88,
+       88,   88,   88
 
     } ;
 
-static yyconst flex_int16_t yy_nxt[208] =
+static yyconst flex_int16_t yy_nxt[193] =
     {   0,
        12,   13,   14,   15,   16,   17,   18,   12,   19,   20,
-       21,   22,   23,   24,   25,   25,   26,   27,   28,   25,
-       25,   25,   29,   25,   25,   30,   31,   25,   25,   25,
-       32,   25,   33,   25,   34,   36,   37,   36,   37,   52,
-       53,   78,   78,   38,   86,   38,   40,   41,   42,   40,
-       40,   40,   40,   40,   40,   40,   40,   43,   40,   40,
-       44,   44,   40,   40,   44,   44,   44,   44,   44,   44,
-       44,   44,   44,   44,   44,   44,   44,   44,   44,   44,
-       40,   48,   75,   55,   56,   68,   59,   48,   91,   58,
-       88,   70,   64,   49,   58,   71,   55,   53,   64,   49,
-
-       54,   54,   76,   55,   56,   77,   77,   76,   88,   58,
-       77,   77,   92,   79,   58,   78,   78,   92,   79,   87,
-       87,   86,   88,   47,   90,   47,   47,   50,   89,   50,
-       50,   57,   57,   86,   74,   85,   71,   84,   82,   83,
-       82,   81,   80,   92,   54,   47,   45,   74,   73,   72,
-       69,   67,   66,   65,   64,   63,   62,   61,   92,   60,
-       46,   46,   54,   51,   46,   45,   92,   39,   39,   35,
-       35,   11,   92,   92,   92,   92,   92,   92,   92,   92,
-       92,   92,   92,   92,   92,   92,   92,   92,   92,   92,
-       92,   92,   92,   92,   92,   92,   92,   92,   92,   92,
-
-       92,   92,   92,   92,   92,   92,   92
+       21,   22,   23,   24,   24,   25,   26,   27,   24,   24,
+       24,   28,   24,   24,   29,   30,   24,   24,   24,   31,
+       24,   32,   24,   33,   35,   36,   35,   36,   74,   88,
+       75,   82,   37,   83,   37,   39,   40,   41,   39,   39,
+       39,   39,   39,   39,   39,   42,   39,   39,   43,   43,
+       39,   39,   43,   43,   43,   43,   43,   43,   43,   43,
+       43,   43,   43,   43,   43,   43,   43,   43,   39,   47,
+       52,   53,   65,   47,   88,   75,   55,   72,   67,   61,
+       48,   55,   68,   51,   48,   61,   51,   56,   73,   52,
+
+       53,   73,   87,   73,   84,   55,   73,   83,   83,   86,
+       55,   46,   85,   46,   46,   49,   84,   49,   49,   54,
+       54,   71,   81,   68,   80,   78,   79,   78,   77,   76,
+       88,   46,   44,   71,   70,   69,   66,   64,   63,   62,
+       61,   60,   59,   58,   88,   57,   45,   45,   51,   50,
+       45,   44,   88,   38,   38,   34,   34,   11,   88,   88,
+       88,   88,   88,   88,   88,   88,   88,   88,   88,   88,
+       88,   88,   88,   88,   88,   88,   88,   88,   88,   88,
+       88,   88,   88,   88,   88,   88,   88,   88,   88,   88,
+       88,   88
+
     } ;
 
-static yyconst flex_int16_t yy_chk[208] =
+static yyconst flex_int16_t yy_chk[193] =
     {   0,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    5,    5,    6,    6,   19,
-       19,   76,   76,    5,   76,    6,    9,    9,    9,    9,
+        1,    1,    1,    1,    5,    5,    6,    6,   55,   55,
+       55,   73,    5,   73,    6,    9,    9,    9,    9,    9,
         9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
         9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
-        9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
-        9,   16,   97,   21,   21,   35,   96,   47,   89,   21,
-       88,   37,   35,   16,   21,   37,   53,   53,   37,   47,
-
-       54,   55,   53,   56,   56,   54,   55,   53,   87,   56,
-       54,   55,   79,   79,   56,   58,   58,   58,   58,   77,
-       77,   86,   77,   93,   85,   93,   93,   94,   81,   94,
-       94,   95,   95,   78,   74,   73,   72,   70,   69,   66,
-       63,   62,   61,   57,   52,   49,   45,   41,   39,   38,
-       36,   34,   33,   32,   31,   30,   29,   28,   27,   26,
-       24,   23,   20,   18,   15,   13,   11,    8,    7,    4,
-        3,   92,   92,   92,   92,   92,   92,   92,   92,   92,
-       92,   92,   92,   92,   92,   92,   92,   92,   92,   92,
-       92,   92,   92,   92,   92,   92,   92,   92,   92,   92,
-
-       92,   92,   92,   92,   92,   92,   92
+        9,    9,    9,    9,    9,    9,    9,    9,    9,   16,
+       20,   20,   34,   46,   75,   75,   20,   93,   36,   34,
+       16,   20,   36,   51,   46,   36,   52,   92,   51,   53,
+
+       53,   52,   85,   51,   84,   53,   52,   83,   82,   81,
+       53,   89,   77,   89,   89,   90,   74,   90,   90,   91,
+       91,   71,   70,   69,   67,   66,   63,   60,   59,   58,
+       54,   48,   44,   40,   38,   37,   35,   33,   32,   31,
+       30,   29,   28,   27,   26,   25,   23,   22,   19,   18,
+       15,   13,   11,    8,    7,    4,    3,   88,   88,   88,
+       88,   88,   88,   88,   88,   88,   88,   88,   88,   88,
+       88,   88,   88,   88,   88,   88,   88,   88,   88,   88,
+       88,   88,   88,   88,   88,   88,   88,   88,   88,   88,
+       88,   88
+
     } ;
 
 /* The intent behind this definition is that it'll catch
@@ -571,7 +567,7 @@ static yyconst flex_int16_t yy_chk[208] =
 
 
 
-#line 575 "scanner.c"
+#line 571 "scanner.c"
 
 #define INITIAL 0
 #define matchof 1
@@ -834,7 +830,7 @@ YY_DECL
     }
 
 
-#line 838 "scanner.c"
+#line 834 "scanner.c"
 
        if ( !yyg->yy_init )
                {
@@ -887,13 +883,13 @@ yy_match:
                        while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
                                {
                                yy_current_state = (int) yy_def[yy_current_state];
-                               if ( yy_current_state >= 93 )
+                               if ( yy_current_state >= 89 )
                                        yy_c = yy_meta[(unsigned int) yy_c];
                                }
                        yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
                        ++yy_cp;
                        }
-               while ( yy_current_state != 92 );
+               while ( yy_current_state != 88 );
                yy_cp = yyg->yy_last_accepting_cpos;
                yy_current_state = yyg->yy_last_accepting_state;
 
@@ -1060,7 +1056,7 @@ YY_RULE_SETUP
 #line 156 "scanner.l"
 YY_FATAL_ERROR( "flex scanner jammed" );
        YY_BREAK
-#line 1064 "scanner.c"
+#line 1060 "scanner.c"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(matchof):
 case YY_STATE_EOF(matchbool):
@@ -1359,7 +1355,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
                while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
                        {
                        yy_current_state = (int) yy_def[yy_current_state];
-                       if ( yy_current_state >= 93 )
+                       if ( yy_current_state >= 89 )
                                yy_c = yy_meta[(unsigned int) yy_c];
                        }
                yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1388,11 +1384,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
        while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
                {
                yy_current_state = (int) yy_def[yy_current_state];
-               if ( yy_current_state >= 93 )
+               if ( yy_current_state >= 89 )
                        yy_c = yy_meta[(unsigned int) yy_c];
                }
        yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-       yy_is_jam = (yy_current_state == 92);
+       yy_is_jam = (yy_current_state == 88);
 
        return yy_is_jam ? 0 : yy_current_state;
 }
index f3404ca9487e9187603acb54b0f88d822c09211d..b23b2b3389b6369004f845b9c3fec8270d3ff40e 100644 (file)
@@ -58,7 +58,7 @@ INTEGER    [[:digit:]]+
 DSEQ       ([[:digit:]]+)
 FRAC       (([[:digit:]]*"."{DSEQ})|{DSEQ}".")
 EXP        ([eE][+-]?{DSEQ})
-REAL       ("-"?(({FRAC}{EXP}?)|({DSEQ}{EXP})))
+REAL       (({FRAC}{EXP}?)|({DSEQ}{EXP}))
 STRING     (\"([^\"\\\n]|(\\\"))*\")
 IDENTIFIER ([[:alpha:]][_[:alnum:]]*)
 CMPOP      (([<>]=?)|([!=]=))
index 7fa44b88c6edbe538f410ab936e603d581d97072..0bbe59fabac7654136541e33396261d4779a8d32 100644 (file)
@@ -113,7 +113,7 @@ static const char *help_eval[] = {
     "remaining expressions are not evaluated at all.",
     "This can be used to optimize the selections: you should write the",
     "most restrictive and/or the most inexpensive expressions first in",
-    "gmx_boolean expressions.",
+    "boolean expressions.",
     "The relative ordering between dynamic and static expressions does not",
     "matter: all static expressions are evaluated only once, before the first",
     "frame, and the result becomes the leftmost expression.[PAR]",
@@ -186,6 +186,14 @@ static const char *help_limits[] = {
     "of groups of three or four atoms).",
     "For such programs, it is up to the user to provide a proper selection",
     "expression that always returns such positions.",
+    "[PAR]",
+
+    "Due to technical reasons, having a negative value as the first value in",
+    "expressions like[BR]",
+    "[TT]charge -1 to -0.7[tt][BR]",
+    "result in a syntax error. A workaround is to write[BR]",
+    "[TT]charge {-1 to -0.7}[tt][BR]",
+    "instead.",
 };
 
 static const char *help_positions[] = {
@@ -193,7 +201,7 @@ static const char *help_positions[] = {
 
     "Possible ways of specifying positions in selections are:[PAR]",
 
-    "1. A constant position can be defined as [TT](XX, YY, ZZ)[tt], where",
+    "1. A constant position can be defined as [TT][XX, YY, ZZ][tt], where",
     "[TT]XX[tt], [TT]YY[tt] and [TT]ZZ[tt] are real numbers.[PAR]",
 
     "2. [TT]com of ATOM_EXPR [pbc][tt] or [TT]cog of ATOM_EXPR [pbc][tt]",
index 57b7ce1238bf4580f26445e7697a33502b6867ac..15a868e9967c7a8b07d2ba4b5fb2630f4f6e83d3 100644 (file)
@@ -211,12 +211,19 @@ gmx_bool read_next_vmd_frame(int status,t_trxframe *fr)
 #endif
 
     fr->bX = 1;
+    fr->bBox = 1;
     vec[0] = .1*ts.A; vec[1] = .1*ts.B; vec[2] = .1*ts.C;
     angle[0] = ts.alpha; angle[1] = ts.beta; angle[2] = ts.gamma; 
     matrix_convert(fr->box,vec,angle);
-    fr->bTime = 1;
-    fr->time = ts.physical_time;
-
+    if (fr->vmdplugin.api->abiversion>10)
+    {
+        fr->bTime = TRUE;
+        fr->time = ts.physical_time;
+    }
+    else
+    {
+        fr->bTime = FALSE;
+    }
 
 
     return 1;
@@ -334,16 +341,22 @@ int read_first_vmd_frame(int *status,const char *fn,t_trxframe *fr,int flags)
         return 0;
     }
 
-    if (fr->natoms < 1) {
-        fprintf(stderr, "\nNo atoms found by VMD plugin in %s.\n"
-            "Or format does not record number of atoms.\n", fn );
+    if (fr->natoms == MOLFILE_NUMATOMS_UNKNOWN) {
+        fprintf(stderr, "\nFormat of file %s does not record number of atoms.\n", fn);
+        return 0;
+    } else if (fr->natoms == MOLFILE_NUMATOMS_NONE) {
+        fprintf(stderr, "\nNo atoms found by VMD plugin in file %s.\n", fn );
+        return 0;
+    } else if (fr->natoms < 1) { /*should not be reached*/
+        fprintf(stderr, "\nUnknown number of atoms %d for VMD plugin opening file %s.\n",
+                fr->natoms, fn );
         return 0;
     }
     
     snew(fr->x,fr->natoms);
 
     fr->vmdplugin.bV = 0;
-    if (fr->vmdplugin.api->read_timestep_metadata) 
+    if (fr->vmdplugin.api->abiversion > 10 && fr->vmdplugin.api->read_timestep_metadata)
     {
         fr->vmdplugin.api->read_timestep_metadata(fr->vmdplugin.handle, metadata);
         fr->vmdplugin.bV = metadata->has_velocities; 
index 8c6e4a0f2d0b0f6a16d17cb426287857e4ec7c13..f35457ce843a0ba98a2a15426ae7b1291b85ad20 100644 (file)
@@ -41,26 +41,19 @@ set(MDRUN_SOURCES
 add_library(gmxpreprocess ${GMXPREPROCESS_SOURCES})
 target_link_libraries(gmxpreprocess md)
 set_target_properties(gmxpreprocess PROPERTIES OUTPUT_NAME "gmxpreprocess${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION} INSTALL_NAME_DIR "${LIB_INSTALL_DIR}")
-set_target_properties(md PROPERTIES OUTPUT_NAME "md${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION} INSTALL_NAME_DIR "${LIB_INSTALL_DIR}")
 
 
 if(GMX_OPENMM) 
     add_subdirectory(gmx_gpu_utils)
     include_directories(./gmx_gpu_utils ${OpenMM_INCLUDE_DIR})
     link_directories(${OpenMM_LIBRARY_DIR}) 
-    # only define if this is a local build not a release 
-    # we assume that the auto-generated version is not used && 
-    # version string does not contain "-dev" => it's a release build
-    if(NOT USE_VERSION_H AND NOT PROJECT_VERSION MATCHES ".*-dev.*")  
-        add_definitions( -DOPENMM_PLUGIN_DIR="${OpenMM_PLUGIN_DIR}" ) 
-    else()
-        add_definitions( -DOPENMM_PLUGIN_DIR="" )
-    endif()
-    add_library(openmm_api_wrapper openmm_wrapper.cpp)
-    target_link_libraries(openmm_api_wrapper gmx gmx_gpu_utils ${OpenMM_LIBRARIES})
-#    remove_definitions( -DOPENMM_PLUGIN_DIR="${OpenMM_PLUGIN_DIR}" )  # TODO where should this go?!
+    # with this define no evn.var. is needed with OPENMM_PLUGIN_DIR
+    # if the same OpenMM installation is used for running and building 
+    add_definitions( -DOPENMM_PLUGIN_DIR="${OpenMM_PLUGIN_DIR}" ) 
+    file(TO_CMAKE_PATH ${OpenMM_PLUGIN_DIR} _path)
+    add_library(openmm_api_wrapper STATIC openmm_wrapper.cpp)
+    target_link_libraries(openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})
     set(GMX_OPENMM_LIBRARIES openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})   
-       install(TARGETS openmm_api_wrapper DESTINATION ${LIB_INSTALL_DIR})   
 endif(GMX_OPENMM)
 
 if(GMX_FAHCORE)
@@ -103,13 +96,7 @@ set_target_properties(gmxcheck PROPERTIES OUTPUT_NAME "gmxcheck${GMX_BINARY_SUFF
 
 add_executable(mdrun ${MDRUN_SOURCES})
 target_link_libraries(mdrun ${GMX_EXTRA_LIBRARIES} ${GMX_OPENMM_LIBRARIES})
-# set binary name to mdrun-gpu
-if(GMX_OPENMM)
-    set(_mdrun_exec_name "mdrun-gpu")    
-else()
-    set(_mdrun_exec_name "mdrun${GMX_BINARY_SUFFIX}")
-endif()
-set_target_properties(mdrun PROPERTIES OUTPUT_NAME "${_mdrun_exec_name}")
+set_target_properties(mdrun PROPERTIES OUTPUT_NAME "mdrun${GMX_BINARY_SUFFIX}")
 
 # this is to circumvent the following MSVC error: 
 # warning LNK4098: defaultlib 'LIBCMT' conflicts with use of other libs
@@ -119,6 +106,8 @@ if(GMX_OPENMM AND MSVC)
 endif()
 
 
+install(TARGETS gmxpreprocess DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
+install(TARGETS mdrun DESTINATION ${BIN_INSTALL_DIR} COMPONENT mdrun)
 install(TARGETS 
         grompp
         tpbconv
@@ -128,20 +117,31 @@ install(TARGETS
         gmxdump
         g_x2top
         gmxcheck
-        mdrun
-       gmxpreprocess DESTINATION ${LIB_INSTALL_DIR}
+        COMPONENT runtime
         RUNTIME DESTINATION ${BIN_INSTALL_DIR})
 
-
-get_target_property(MDRUN_PATH mdrun LOCATION)
-add_custom_target(install-mdrun
-        COMMAND ${CMAKE_COMMAND} -E copy "${MDRUN_PATH}" 
-            "${CMAKE_INSTALL_PREFIX}/bin/${_mdrun_exec_name}"
-        DEPENDS mdrun
+# Create the custom install-mdrun target
+if (BUILD_SHARED_LIBS)
+    # If shared libraries are used, we need to install the libraries in
+    # addition to the mdrun binary.
+    add_custom_target(install-mdrun
+        COMMAND ${CMAKE_COMMAND} -DCOMPONENT=libraries
+                -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
+        COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
+                -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
+        COMMENT "Installing mdrun")
+else (BUILD_SHARED_LIBS)
+    add_custom_target(install-mdrun
+        COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
+                -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
         COMMENT "Installing mdrun")
+endif (BUILD_SHARED_LIBS)
+add_dependencies(install-mdrun mdrun)
 
 endif(GMX_FAHCORE)
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgmxpreprocess.pc.cmakein ${CMAKE_CURRENT_BINARY_DIR}/libgmxpreprocess.pc @ONLY)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgmxpreprocess.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig 
-  RENAME "libgmxpreprocess${GMX_LIBS_SUFFIX}.pc")
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgmxpreprocess.pc
+        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
+        RENAME "libgmxpreprocess${GMX_LIBS_SUFFIX}.pc"
+        COMPONENT development)
index 25f4aa12dbea6eed2420b726abc6d3593a41f232..f06debc0b4973db321c18b03054102649c5f1298 100644 (file)
@@ -18,21 +18,13 @@ endif()
 CUDA_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)        
 if(CMAKE_BUILD_TYPE STREQUAL "DEBUG")
-    CUDA_ADD_LIBRARY(gmx_gpu_utils  
-        gmx_gpu_utils.cu 
-        memtestG80_core.cu 
+    CUDA_ADD_LIBRARY(gmx_gpu_utils STATIC
+        gmx_gpu_utils.cu memtestG80_core.cu 
         OPTIONS ${_os_def}
-        DEBUG -g -D_DEBUG_=1 
-        )
+        DEBUG -g -D_DEBUG_=1 )
 else()
-    CUDA_ADD_LIBRARY(gmx_gpu_utils 
-        gmx_gpu_utils.cu 
-        memtestG80_core.cu 
-        OPTIONS ${_os_def}
-        )
+    CUDA_ADD_LIBRARY(gmx_gpu_utils STATIC
+        gmx_gpu_utils.cu memtestG80_core.cu 
+        OPTIONS ${_os_def} )
 endif()
 CUDA_BUILD_CLEAN_TARGET()
-
-target_link_libraries(gmx_gpu_utils gmx)
-install(TARGETS gmx_gpu_utils DESTINATION ${LIB_INSTALL_DIR})   
-
index 0f9e7794d9dbaed2de4d7411c40a8ca44fb56484..ee14674d5c8dcd66988efadd7bf35c851cd5a1c4 100644 (file)
@@ -169,8 +169,12 @@ static void check_cg_sizes(const char *topfn,t_block *cgs,warninp_t wi)
     {
         maxsize = max(maxsize,cgs->index[cg+1]-cgs->index[cg]);
     }
-    if (maxsize > 10)
+    
+    if (maxsize > MAX_CHARGEGROUP_SIZE)
+    {
+        gmx_fatal(FARGS,"The largest charge group contains %d atoms. The maximum is %d.",maxsize,MAX_CHARGEGROUP_SIZE);
+    }
+    else if (maxsize > 10)
     {
         set_warning_line(wi,topfn,-1);
         sprintf(warn_buf,
@@ -183,6 +187,148 @@ static void check_cg_sizes(const char *topfn,t_block *cgs,warninp_t wi)
     }
 }
 
+static void check_bonds_timestep(gmx_mtop_t *mtop,double dt,warninp_t wi)
+{
+    /* This check is not intended to ensure accurate integration,
+     * rather it is to signal mistakes in the mdp settings.
+     * A common mistake is to forget to turn on constraints
+     * for MD after energy minimization with flexible bonds.
+     * This check can also detect too large time steps for flexible water
+     * models, but such errors will often be masked by the constraints
+     * mdp options, which turns flexible water into water with bond constraints,
+     * but without an angle constraint. Unfortunately such incorrect use
+     * of water models can not easily be detected without checking
+     * for specific model names.
+     *
+     * The stability limit of leap-frog or velocity verlet is 4.44 steps
+     * per oscillational period.
+     * But accurate bonds distributions are lost far before that limit.
+     * To allow relatively common schemes (although not common with Gromacs)
+     * of dt=1 fs without constraints and dt=2 fs with only H-bond constraints
+     * we set the note limit to 10.
+     */
+    int       min_steps_warn=5;
+    int       min_steps_note=10;
+    t_iparams *ip;
+    int       molt;
+    gmx_moltype_t *moltype,*w_moltype;
+    t_atom    *atom;
+    t_ilist   *ilist,*ilb,*ilc,*ils;
+    int       ftype;
+    int       i,a1,a2,w_a1,w_a2,j;
+    real      twopi2,limit2,fc,re,m1,m2,period2,w_period2;
+    gmx_bool  bFound,bWater,bWarn;
+    char      warn_buf[STRLEN];
+
+    ip = mtop->ffparams.iparams;
+
+    twopi2 = sqr(2*M_PI);
+
+    limit2 = sqr(min_steps_note*dt);
+
+    w_a1 = w_a2 = -1;
+    w_period2 = -1.0;
+    
+    w_moltype = NULL;
+    for(molt=0; molt<mtop->nmoltype; molt++)
+    {
+        moltype = &mtop->moltype[molt];
+        atom  = moltype->atoms.atom;
+        ilist = moltype->ilist;
+        ilc = &ilist[F_CONSTR];
+        ils = &ilist[F_SETTLE];
+        for(ftype=0; ftype<F_NRE; ftype++)
+        {
+            if (!(ftype == F_BONDS || ftype == F_G96BONDS || ftype == F_HARMONIC))
+            {
+                continue;
+            }
+            
+            ilb = &ilist[ftype];
+            for(i=0; i<ilb->nr; i+=3)
+            {
+                fc = ip[ilb->iatoms[i]].harmonic.krA;
+                re = ip[ilb->iatoms[i]].harmonic.rA;
+                if (ftype == F_G96BONDS)
+                {
+                    /* Convert squared sqaure fc to harmonic fc */
+                    fc = 2*fc*re;
+                }
+                a1 = ilb->iatoms[i+1];
+                a2 = ilb->iatoms[i+2];
+                m1 = atom[a1].m;
+                m2 = atom[a2].m;
+                if (fc > 0 && m1 > 0 && m2 > 0)
+                {
+                    period2 = twopi2*m1*m2/((m1 + m2)*fc);
+                }
+                else
+                {
+                    period2 = GMX_FLOAT_MAX;
+                }
+                if (debug)
+                {
+                    fprintf(debug,"fc %g m1 %g m2 %g period %g\n",
+                            fc,m1,m2,sqrt(period2));
+                }
+                if (period2 < limit2)
+                {
+                    bFound = FALSE;
+                    for(j=0; j<ilc->nr; j+=3)
+                    {
+                        if ((ilc->iatoms[j+1] == a1 && ilc->iatoms[j+2] == a2) ||
+                            (ilc->iatoms[j+1] == a2 && ilc->iatoms[j+2] == a1))
+                            {
+                                bFound = TRUE;
+                            }
+                        }
+                    for(j=0; j<ils->nr; j+=2)
+                    {
+                        if ((a1 >= ils->iatoms[j+1] && a1 < ils->iatoms[j+1]+3) &&
+                            (a2 >= ils->iatoms[j+1] && a2 < ils->iatoms[j+1]+3))
+                        {
+                            bFound = TRUE;
+                        }
+                    }
+                    if (!bFound &&
+                        (w_moltype == NULL || period2 < w_period2))
+                    {
+                        w_moltype = moltype;
+                        w_a1      = a1;
+                        w_a2      = a2;
+                        w_period2 = period2;
+                    }
+                }
+            }
+        }
+    }
+    
+    if (w_moltype != NULL)
+    {
+        bWarn = (w_period2 < sqr(min_steps_warn*dt));
+        /* A check that would recognize most water models */
+        bWater = ((*w_moltype->atoms.atomname[0])[0] == 'O' &&
+                  w_moltype->atoms.nr <= 5);
+        sprintf(warn_buf,"The bond in molecule-type %s between atoms %d %s and %d %s has an estimated oscillational period of %.1e ps, which is less than %d times the time step of %.1e ps.\n"
+                "%s",
+                *w_moltype->name,
+                w_a1+1,*w_moltype->atoms.atomname[w_a1],
+                w_a2+1,*w_moltype->atoms.atomname[w_a2],
+                sqrt(w_period2),bWarn ? min_steps_warn : min_steps_note,dt,
+                bWater ?
+                "Maybe you asked for fexible water." :
+                "Maybe you forgot to change the constraints mdp option.");
+        if (bWarn)
+        {
+            warning(wi,warn_buf);
+        }
+        else
+        {
+            warning_note(wi,warn_buf);
+        }
+    }
+}
+
 static void check_vel(gmx_mtop_t *mtop,rvec v[])
 {
   gmx_mtop_atomloop_all_t aloop;
@@ -912,6 +1058,42 @@ static int count_constraints(gmx_mtop_t *mtop,t_molinfo *mi,warninp_t wi)
   return count;
 }
 
+static void check_gbsa_params_charged(gmx_mtop_t *sys, gpp_atomtype_t atype)
+{
+    int i,nmiss,natoms,mt;
+    real q;
+    const t_atoms *atoms;
+  
+    nmiss = 0;
+    for(mt=0;mt<sys->nmoltype;mt++)
+    {
+        atoms  = &sys->moltype[mt].atoms;
+        natoms = atoms->nr;
+
+        for(i=0;i<natoms;i++)
+        {
+            q = atoms->atom[i].q;
+            if ((get_atomtype_radius(atoms->atom[i].type,atype)    == 0  ||
+                 get_atomtype_vol(atoms->atom[i].type,atype)       == 0  ||
+                 get_atomtype_surftens(atoms->atom[i].type,atype)  == 0  ||
+                 get_atomtype_gb_radius(atoms->atom[i].type,atype) == 0  ||
+                 get_atomtype_S_hct(atoms->atom[i].type,atype)     == 0) &&
+                q != 0)
+            {
+                fprintf(stderr,"\nGB parameter(s) zero for atom type '%s' while charge is %g\n",
+                        get_atomtype_name(atoms->atom[i].type,atype),q);
+                nmiss++;
+            }
+        }
+    }
+
+    if (nmiss > 0)
+    {
+        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield has parameters with value zero for %d atomtypes that occur as charged atoms.",nmiss);
+    }
+}
+
+
 static void check_gbsa_params(t_inputrec *ir,gpp_atomtype_t atype)
 {
     int  nmiss,i;
@@ -930,7 +1112,7 @@ static void check_gbsa_params(t_inputrec *ir,gpp_atomtype_t atype)
             get_atomtype_gb_radius(i,atype) < 0 ||
             get_atomtype_S_hct(i,atype)     < 0)
         {
-            fprintf(stderr,"GB parameter(s) missing or negative for atom type '%s'\n",
+            fprintf(stderr,"\nGB parameter(s) missing or negative for atom type '%s'\n",
                     get_atomtype_name(i,atype));
             nmiss++;
         }
@@ -938,8 +1120,7 @@ static void check_gbsa_params(t_inputrec *ir,gpp_atomtype_t atype)
     
     if (nmiss > 0)
     {
-        gmx_fatal(FARGS,"Can't do GB electrostatics; the forcefield is missing %d values for\n"
-                  "atomtype radii, or they might be negative\n.",nmiss);
+        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield is missing parameters for %d atomtypes or they might be negative.",nmiss);
     }
   
 }
@@ -1056,7 +1237,7 @@ int main (int argc, char *argv[])
   char         warn_buf[STRLEN];
 
   t_filenm fnm[] = {
-    { efMDP, NULL,  NULL,        ffOPTRD },
+    { efMDP, NULL,  NULL,        ffREAD  },
     { efMDP, "-po", "mdout",     ffWRITE },
     { efSTX, "-c",  NULL,        ffREAD  },
     { efSTX, "-r",  NULL,        ffOPTRD },
@@ -1239,6 +1420,11 @@ int main (int argc, char *argv[])
     {
         /* Now we have renumbered the atom types, we can check the GBSA params */
         check_gbsa_params(ir,atype);
+      
+      /* Check that all atoms that have charge and/or LJ-parameters also have 
+       * sensible GB-parameters
+       */
+      check_gbsa_params_charged(sys,atype);
     }
 
        /* PELA: Copy the atomtype data to the topology atomtype list */
@@ -1275,6 +1461,11 @@ int main (int argc, char *argv[])
       check_cg_sizes(ftp2fn(efTOP,NFILE,fnm),&sys->moltype[i].cgs,wi);
   }
 
+  if (EI_DYNAMICS(ir->eI) && ir->eI != eiBD)
+  {
+      check_bonds_timestep(sys,ir->delta_t,wi);
+  }
+
   check_warning_error(wi,FARGS);
        
   if (bVerbose) 
index 225a4bbda2461614a14a40c6a667f338dd0fa12e..5a73798199d6a15e65ecb890565d4bba71106db5 100644 (file)
@@ -2705,25 +2705,17 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         bStartingFromCpt = FALSE;
 
         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
-        /* Complicated conditional when bGStatEveryStep=FALSE.
-         * We can not just use bGStat, since then the simulation results
-         * would depend on nstenergy and nstlog or step_nscheck.
+        /* With all integrators, except VV, we need to retain the pressure
+         * at the current step for coupling at the next step.
          */
-        if (((state->flags & (1<<estPRES_PREV)) || 
-             (state->flags & (1<<estSVIR_PREV)) ||
-             (state->flags & (1<<estFVIR_PREV))) &&
+        if ((state->flags & (1<<estPRES_PREV)) &&
             (bGStatEveryStep ||
-             (ir->nstlist > 0 && step % ir->nstlist == 0) ||
-             (ir->nstlist < 0 && nlh.nabnsb > 0) ||
-             (ir->nstlist == 0 && bGStat))) 
+             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
         {
             /* Store the pressure in t_state for pressure coupling
              * at the next MD step.
              */
-            if (state->flags & (1<<estPRES_PREV))
-            {
-                copy_mat(pres,state->pres_prev);
-            }
+            copy_mat(pres,state->pres_prev);
         }
         
         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
index 9305dcb460208b402a642bad6618c9c5576123a8..908fd03ff061c93aed2683e9052dc611a5746450 100644 (file)
@@ -254,6 +254,13 @@ double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
     const char *ommOptions = NULL;
     void   *openmmData;
 
+#ifdef GMX_DOUBLE
+    /* Checks in cmake should prevent the compilation in double precision
+     * with OpenMM, but just to be sure we check here.
+     */
+    gmx_fatal(FARGS,"Compilation was performed in double precision, but OpenMM only supports single precision. If you want to use to OpenMM, compile in single precision.");
+#endif
+
     bAppend  = (Flags & MD_APPENDFILES);
     check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 
@@ -526,7 +533,7 @@ double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 
             openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene);
 
-            upd_mdebin(mdebin, FALSE,TRUE,
+            upd_mdebin(mdebin,FALSE,TRUE,
                        t,mdatoms->tmass,enerd,state,lastbox,
                        shake_vir,force_vir,total_vir,pres,
                        ekind,mu_tot,constr);
index acc829cb22936f6489a4b202d76c7187b00f6b5a..239f4c7eb78b03845dbc6b56186e46cec7f5d736 100644 (file)
@@ -796,7 +796,7 @@ void* openmm_init(FILE *fplog, const char *platformOptStr,
             }
 
             /* macro set at build time  */
-#ifdef OpenMM_PLUGIN_DIR
+#ifdef OPENMM_PLUGIN_DIR
             if (!hasLoadedPlugins)
             {
                 loadedPlugins = Platform::loadPluginsFromDirectory(OPENMM_PLUGIN_DIR);
index ccf7b68009a96ce69f3bd74cb5a703f54ea480ca..38229c61569d3463187bb972a92f56a906681d18 100644 (file)
@@ -1106,6 +1106,7 @@ int main(int argc, char *argv[])
   int           nid_used;
   int           this_chainstart;
   int           prev_chainstart;
+  gmx_bool      bMerged;
     
   gmx_atomprop_t aps;
   
@@ -1332,6 +1333,7 @@ int main(int argc, char *argv[])
   this_chainstart     = 0;
     
   pdb_ch=NULL;
+  bMerged = FALSE;
   for (i=0; (i<natom); i++) 
   {
       ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
@@ -1342,7 +1344,10 @@ int main(int argc, char *argv[])
       prev_resnum        = this_resnum;
       prev_chainid       = this_chainid;
       prev_chainnumber   = this_chainnumber;
-      prev_chainstart    = this_chainstart;
+      if (!bMerged)
+      {
+          prev_chainstart    = this_chainstart;
+      }
       
       this_atomname      = *pdba_all.atomname[i];
       this_atomnum       = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
@@ -1372,10 +1377,11 @@ int main(int argc, char *argv[])
               select[0] = 'n';
           }
           
-          if (select[0] == 'y') 
+          bMerged = (select[0] == 'y');
+          if (bMerged) 
           {
               pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] = 
-              pdba_all.atom[i].resind - prev_chainstart;
+                  pdba_all.atom[i].resind - prev_chainstart;
               pdb_ch[nch-1].nterpairs++;
               srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
           }
@@ -1560,23 +1566,25 @@ int main(int argc, char *argv[])
                  bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
                  nrtprename,rtprename);
       
-    for(i=0; i<cc->nterpairs; i++) {
-        
-      cc->chainstart[cc->nterpairs] = pdba->nres;
-                
-      find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
-                 &(cc->r_start[i]),&(cc->r_end[i]),rt);    
+        cc->chainstart[cc->nterpairs] = pdba->nres;
+        j = 0;
+        for(i=0; i<cc->nterpairs; i++)
+        {
+            find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
+                        &(cc->r_start[j]),&(cc->r_end[j]),rt);    
       
-        
-      if ( (cc->r_start[i]<0) || (cc->r_end[i]<0) ) {
-       printf("Problem with chain definition, or missing terminal residues.\n"
-              "This chain does not appear to contain a recognized chain molecule.\n"
-           "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
-           
-       cc->nterpairs = i;
-       break;
-      }
-    }
+            if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
+            {
+                j++;
+            }
+        }
+        cc->nterpairs = j;
+        if (cc->nterpairs == 0)
+        {
+            printf("Problem with chain definition, or missing terminal residues.\n"
+                   "This chain does not appear to contain a recognized chain molecule.\n"
+                   "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
+        }
 
     /* Check for disulfides and other special bonds */
     nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
@@ -1621,14 +1629,20 @@ int main(int argc, char *argv[])
             {
                 if(bTerMan && ntdblist>1)
                 {
-                    cc->ntdb[i] = choose_ter(ntdblist,tdblist,"Select start terminus type");
+                    sprintf(select,"Select start terminus type for %s-%d",
+                            *pdba->resinfo[cc->r_start[i]].name,
+                            pdba->resinfo[cc->r_start[i]].nr);
+                    cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
                 }
                 else
                 {
                     cc->ntdb[i] = tdblist[0];
                 }
                 
-                printf("Start terminus: %s\n",(cc->ntdb[i])->name);
+                printf("Start terminus %s-%d: %s\n",
+                       *pdba->resinfo[cc->r_start[i]].name,
+                       pdba->resinfo[cc->r_start[i]].nr,
+                       (cc->ntdb[i])->name);
                 sfree(tdblist);
             }
         }
@@ -1654,13 +1668,19 @@ int main(int argc, char *argv[])
             {
                 if(bTerMan && ntdblist>1)
                 {
-                    cc->ctdb[i] = choose_ter(ntdblist,tdblist,"Select end terminus type");
+                    sprintf(select,"Select end terminus type for %s-%d",
+                            *pdba->resinfo[cc->r_end[i]].name,
+                            pdba->resinfo[cc->r_end[i]].nr);
+                    cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
                 }
                 else
                 {
                     cc->ctdb[i] = tdblist[0];
                 }
-                printf("End terminus: %s\n",(cc->ctdb[i])->name);
+                printf("End terminus %s-%d: %s\n",
+                       *pdba->resinfo[cc->r_end[i]].name,
+                       pdba->resinfo[cc->r_end[i]].nr,
+                       (cc->ctdb[i])->name);
                 sfree(tdblist);
             }
         }
@@ -1819,7 +1839,7 @@ int main(int argc, char *argv[])
     pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
            nrtp,restp,
            restp_chain,hb_chain,
-           cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
+           cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
            bVsites,bVsiteAromatics,forcefield,ffdir,
            mHmult,nssbonds,ssbonds,
            long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
index 95e601c9663679f7952e8ad8d65c6bdbfd0ca3fc..bc03decca477ca7a0f35808d68dbb096ce756a08 100644 (file)
@@ -1398,7 +1398,7 @@ void pdb2top(FILE *top_file, char *posre_fn, char *molname,
              int nrtp, t_restp rtp[],
              t_restp *restp, t_hackblock *hb,
              int nterpairs,t_hackblock **ntdb, t_hackblock **ctdb,
-             int *rn, int *rc, gmx_bool bAllowMissing,
+             gmx_bool bAllowMissing,
              gmx_bool bVsites, gmx_bool bVsiteAromatics,
              const char *ff, const char *ffdir,
              real mHmult,
index 6771c75a845d5e55548331628eb98e3e2e5472e6..6445b6a102970c4ca9013ce707414e818c1fe867 100644 (file)
@@ -2532,11 +2532,10 @@ void check_chargegroup_radii(const gmx_mtop_t *mtop,const t_inputrec *ir,
              * not be zero at the cut-off.
              */
             if (EVDW_IS_ZERO_AT_CUTOFF(ir->vdwtype) &&
-                rvdw1 + rvdw2 > ir->rlistlong - ir->rvdw)
+                rvdw1 + rvdw2 > ir->rlist - ir->rvdw)
             {
-                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than %s (%f) - rvdw (%f)\n",
+                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f) - rvdw (%f)\n",
                         rvdw1+rvdw2,
-                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
                         ir->rlist,ir->rvdw);
                 if (ir_NVE(ir))
                 {
index b9317a8db9b0df3ffccb637afed85fa3c6fb2051..fd0953c4229f3ae3be79df7c1b250a38ee9d48d0 100644 (file)
@@ -593,8 +593,12 @@ int mdrunner(int nthreads_requested, FILE *fplog,t_commrec *cr,int nfile,
         /* PME, if used, is done on all nodes with 1D decomposition */
         cr->npmenodes = 0;
         cr->duty = (DUTY_PP | DUTY_PME);
-        npme_major = cr->nnodes;
+        npme_major = 1;
         npme_minor = 1;
+        if (!EI_TPI(inputrec->eI))
+        {
+            npme_major = cr->nnodes;
+        }
         
         if (inputrec->ePBC == epbcSCREW)
         {
index 967632833e437f0d539cd49ed20f0f85343ebf53..18f3a6e3370eb2f86c5ffd797bd2497990b59765 100644 (file)
@@ -8,9 +8,12 @@ list(REMOVE_ITEM MDLIB_SOURCES ${NOT_MDLIB_SOURCES})
 
 add_library(md ${MDLIB_SOURCES})
 target_link_libraries(md gmx ${GMX_EXTRA_LIBRARIES} ${FFT_LIBRARIES} ${XML_LIBRARIES})
-set_target_properties(md PROPERTIES OUTPUT_NAME "md${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION})
+set_target_properties(md PROPERTIES OUTPUT_NAME "md${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION} INSTALL_NAME_DIR "${LIB_INSTALL_DIR}")
 
-install(TARGETS md DESTINATION ${LIB_INSTALL_DIR})
+install(TARGETS md DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libmd.pc.cmakein ${CMAKE_CURRENT_BINARY_DIR}/libmd.pc @ONLY)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libmd.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig RENAME "libmd${GMX_LIBS_SUFFIX}.pc")
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libmd.pc
+        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
+        RENAME "libmd${GMX_LIBS_SUFFIX}.pc"
+        COMPONENT development)
index 67afee52d556f138f9e1652964ebf730d2a20e1e..2bb95ba20add64878949e01c99605405883e0016 100644 (file)
@@ -1291,7 +1291,6 @@ gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
         {
             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc_null,
                     &ncons_loc,&p_ssd,&p_max,&p_imax);
-            lincsd->rmsd_data[0] = ncons_loc;
             /* Check if we are doing the second part of SD */
             if (ir->eI == eiSD2 && v == NULL)
             {
@@ -1304,6 +1303,12 @@ gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
             lincsd->rmsd_data[0] = ncons_loc;
             lincsd->rmsd_data[i] = p_ssd;
         }
+        else
+        {
+            lincsd->rmsd_data[0] = 0;
+            lincsd->rmsd_data[1] = 0;
+            lincsd->rmsd_data[2] = 0;
+        }
         if (bLog && fplog && lincsd->nc > 0)
         {
             fprintf(fplog,
index 37ca7fc4d05fd5d3151de3e9784df0fe0841978d..fb384823a24dedb5be4aaf0c1aecc364f2adfc2e 100644 (file)
@@ -1110,32 +1110,50 @@ real NPT_energy(t_inputrec *ir, t_state *state, t_extmass *MassQ)
 static real vrescale_gamdev(int ia, gmx_rng_t rng)
 /* Gamma distribution, adapted from numerical recipes */
 {
-  int j;
-  real am,e,s,v1,v2,x,y;
-  
-  if (ia < 6) {
-    x = 1.0;
-    for(j=1; j<=ia; j++) {
-      x *= gmx_rng_uniform_real(rng);
+    int j;
+    real am,e,s,v1,v2,x,y;
+
+    if (ia < 6)
+    {
+        do
+        {
+            x = 1.0;
+            for(j=1; j<=ia; j++)
+            {
+                x *= gmx_rng_uniform_real(rng);
+            }
+        }
+        while (x == 0);
+        x = -log(x);
+    }
+    else
+    {
+        do
+        {
+            do
+            {
+                do
+                {
+                    v1 = gmx_rng_uniform_real(rng);
+                    v2 = 2.0*gmx_rng_uniform_real(rng)-1.0;
+                }
+                while (v1*v1 + v2*v2 > 1.0 ||
+                       v1*v1*GMX_REAL_MAX < 3.0*ia);
+                /* The last check above ensures that both x (3.0 > 2.0 in s)
+                 * and the pre-factor for e do not go out of range.
+                 */
+                y = v2/v1;
+                am = ia - 1;
+                s = sqrt(2.0*am + 1.0);
+                x = s*y + am;
+            }
+            while (x <= 0.0);
+            e = (1.0 + y*y)*exp(am*log(x/am) - s*y);
+        }
+        while (gmx_rng_uniform_real(rng) > e);
     }
-    x = -log(x);
-  } else {
-    do {
-      do {
-        do {
-          v1 = gmx_rng_uniform_real(rng);
-          v2 = 2.0*gmx_rng_uniform_real(rng)-1.0;
-        } while (v1*v1 + v2*v2 > 1.0);
-        y = v2/v1;
-        am = ia - 1;
-        s = sqrt(2.0*am + 1.0);
-        x = s*y + am;
-      } while (x <= 0.0);
-      e = (1.0 + y*y)*exp(am*log(x/am) - s*y);
-    } while (gmx_rng_uniform_real(rng) > e);
-  }
 
-  return x;
+    return x;
 }
 
 static real vrescale_sumnoises(int nn,gmx_rng_t rng)
index 1f225c3411d048c0368b9d8b9277f0aab78d7b4d..89740bcb51634ddc4e0966b6e09eaaf0f4044384 100644 (file)
@@ -202,7 +202,7 @@ void pr_ebin(FILE *fp,t_ebin *eb,int index,int nener,int nperline,
     char buf[30];
 
     rc = 0;
-       
+
     if (index < 0)
     {
         gmx_fatal(FARGS,"Invalid index in pr_ebin: %d",index);
@@ -215,11 +215,12 @@ void pr_ebin(FILE *fp,t_ebin *eb,int index,int nener,int nperline,
     {
         nener = index + nener;
     }
-       for(i=index; (i<nener) && rc>=0; ) {
-               if (bPrHead)
+    for(i=index; (i<nener) && rc>=0; ) 
+    {
+        if (bPrHead)
         {
-                       i0=i;
-                       for(j=0; (j<nperline) && (i<nener) && rc>=0; j++,i++)
+            i0=i;
+            for(j=0; (j<nperline) && (i<nener) && rc>=0; j++,i++)
             {
                 if (strncmp(eb->enm[i].name,"Pres",4) == 0)
                 {
@@ -232,33 +233,34 @@ void pr_ebin(FILE *fp,t_ebin *eb,int index,int nener,int nperline,
                     rc = fprintf(fp,"%15s",eb->enm[i].name);
                 }
             }
-                       
-                       if (rc >= 0)
+
+            if (rc >= 0)
             {
-                               rc = fprintf(fp,"\n");
+                rc = fprintf(fp,"\n");
             }
-            
-                       i=i0;
-               }
-               for(j=0; (j<nperline) && (i<nener) && rc>=0; j++,i++)
+
+            i=i0;
+        }
+        for(j=0; (j<nperline) && (i<nener) && rc>=0; j++,i++)
         {
             switch (prmode) {
-            case eprNORMAL: ee = eb->e[i].e; break;
-            case eprAVER:   ee = eb->e_sim[i].esum/eb->nsum_sim; break;
-            default: gmx_fatal(FARGS,"Invalid print mode %d in pr_ebin",prmode);
+                case eprNORMAL: ee = eb->e[i].e; break;
+                case eprAVER:   ee = eb->e_sim[i].esum/eb->nsum_sim; break;
+                default: gmx_fatal(FARGS,"Invalid print mode %d in pr_ebin",
+                                   prmode);
             }
-                       
-                       rc = fprintf(fp,"   %12.5e",ee);
-               }
-               if (rc >= 0)
+
+            rc = fprintf(fp,"   %12.5e",ee);
+        }
+        if (rc >= 0)
         {
-                       rc = fprintf(fp,"\n");
+            rc = fprintf(fp,"\n");
         }
-       }
-       if (rc < 0)
-       
-               gmx_fatal(FARGS,"Cannot write to logfile; maybe you are out of quota?");
-       }
+    }
+    if (rc < 0)
+    { 
+        gmx_fatal(FARGS,"Cannot write to logfile; maybe you are out of quota?");
+    }
 }
 
 #ifdef DEBUGEBIN
index 719938af7d6e4e669f849fad30ea235dfd6fd5d3..6d0917912c27648b8deb1868f6e2da31601b05e7 100644 (file)
@@ -454,6 +454,11 @@ void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                     {
                         pme_flags |= GMX_PME_CALC_ENER_VIR;
                     }
+                    if (fr->n_tpi > 0)
+                    {
+                        /* We don't calculate f, but we do want the potential */
+                        pme_flags |= GMX_PME_CALC_POT;
+                    }
                     wallcycle_start(wcycle,ewcPMEMESH);
                     status = gmx_pme_do(fr->pmedata,
                                         md->start,md->homenr - fr->n_tpi,
index 03316c731e0c2d0641fb869c9e645b3b6120ee0d..7c01576d6ad289b270cee04684ddc2f6e4f7a6df 100644 (file)
@@ -772,7 +772,7 @@ void update_forcerec(FILE *log,t_forcerec *fr,matrix box)
 
 void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
 {
-    const t_atoms *atoms;
+    const t_atoms *atoms,*atoms_tpi;
     const t_blocka *excl;
     int    mb,nmol,nmolc,i,j,tpi,tpj,j1,j2,k,n,nexcl,q;
 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)    
@@ -882,15 +882,9 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
             /* Only correct for the interaction of the test particle
              * with the rest of the system.
              */
-            atoms = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
-            if (q == 0)
-            {
-                tpi = atoms->atom[atoms->nr-1].type;
-            }
-            else
-            {
-                tpi = atoms->atom[atoms->nr-1].typeB;
-            }
+            atoms_tpi =
+                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
+
             npair = 0;
             for(mb=0; mb<mtop->nmolblock; mb++) {
                 nmol  = mtop->molblock[mb].nmol;
@@ -900,9 +894,14 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                     /* Remove the interaction of the test charge group
                      * with itself.
                      */
-                    if (mb == mtop->nmolblock-1 && j >= atoms->nr - fr->n_tpi)
+                    if (mb == mtop->nmolblock-1)
                     {
                         nmolc--;
+                        
+                        if (mb == 0 && nmol == 1)
+                        {
+                            gmx_fatal(FARGS,"Old format tpr with TPI, please generate a new tpr file");
+                        }
                     }
                     if (q == 0)
                     {
@@ -912,16 +911,27 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                     {
                         tpj = atoms->atom[j].typeB;
                     }
-                    if (bBHAM)
-                    {
-                        csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj);
-                    }
-                    else
+                    for(i=0; i<fr->n_tpi; i++)
                     {
-                        csix    += nmolc*C6 (nbfp,ntp,tpi,tpj);
-                        ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj);
+                        if (q == 0)
+                        {
+                            tpi = atoms_tpi->atom[i].type;
+                        }
+                        else
+                        {
+                            tpi = atoms_tpi->atom[i].typeB;
+                        }
+                        if (bBHAM)
+                        {
+                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj);
+                        }
+                        else
+                        {
+                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj);
+                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj);
+                        }
+                        npair += nmolc;
                     }
-                    npair += nmolc;
                 }
             }
         }
index 325ab67156555c26a32fea5673271169e9bd526d..6fa2d7b6b048d625d086c9fc3a216e630e9047d8 100644 (file)
@@ -154,43 +154,6 @@ int print_nblist(int natoms, t_nblist *nl)
     return 0;    
 }
 
-typedef union {
-    real numlog;
-    int exp;
-} u_table;
-
-void fill_log_table(const int n, real *table)
-{
-    u_table log_table;
-    real logfactor;
-    int i;
-    
-    int incr = 1 << (23-n);
-    int p=pow(2,n);
-
-    logfactor = 1.0/log(2.0);
-    
-    log_table.exp = 0x3F800000;
-    
-    for(i=0;i<p;++i)
-    {
-        /* log2(numlog)=log(numlog)/log(2.0) */
-        table[i]=log(log_table.numlog)*logfactor; 
-        log_table.exp+=incr;
-    }
-}
-
-
-real table_log(real val, const real *table, const int n)
-{
-    int *const exp_ptr = ((int*)&val);
-    int x              = *exp_ptr;
-    const int log_2    = ((x>>23) & 255) - 127;
-    x &= 0x7FFFFF;
-    x = x >> (23-n);
-    val = table[x];
-    return ((val+log_2)*0.69314718);  
-}
 
 void gb_pd_send(t_commrec *cr, real *send_data, int nr)
 {
@@ -418,11 +381,6 @@ int init_gb_still(const t_commrec *cr, t_forcerec  *fr,
     return 0;
 }
 
-
-
-#define LOG_TABLE_ACCURACY 15 /* Accuracy of the table logarithm */
-
-
 /* Initialize all GB datastructs and compute polarization energies */
 int init_gb(gmx_genborn_t **p_born,
             const t_commrec *cr, t_forcerec *fr, const t_inputrec *ir,
@@ -443,7 +401,6 @@ int init_gb(gmx_genborn_t **p_born,
     snew(born,1);
     *p_born = born;
 
-       born->nr = fr->natoms_force;
     born->nr  = natoms;
     
     snew(born->drobc, natoms);
@@ -537,12 +494,6 @@ int init_gb(gmx_genborn_t **p_born,
         }
     }
         
-    /* Init the logarithm table */
-    p=pow(2,LOG_TABLE_ACCURACY);
-    snew(born->log_table, p);
-    
-    fill_log_table(LOG_TABLE_ACCURACY, born->log_table);
-    
     /* Allocate memory for work arrays for temporary use */
     snew(born->work,natoms+4);
     snew(born->count,natoms);
@@ -722,8 +673,8 @@ calc_gb_rad_hct(t_commrec *cr,t_forcerec *fr,int natoms, gmx_localtop_t *top,
     {
         ai     = nl->iinr[i];
             
-        nj0    = nl->jindex[ai];            
-        nj1    = nl->jindex[ai+1];
+        nj0    = nl->jindex[i];            
+        nj1    = nl->jindex[i+1];
         
         /* Load shifts for this list */
         shift   = nl->shift[i];
@@ -789,8 +740,6 @@ calc_gb_rad_hct(t_commrec *cr,t_forcerec *fr,int natoms, gmx_localtop_t *top,
                 sk2_rinv = sk2*rinv;
                 prod     = 0.25*sk2_rinv;
                 
-                /* log_term = table_log(uij*lij_inv,born->log_table,
-                   LOG_TABLE_ACCURACY); */
                 log_term = log(uij*lij_inv);
                 
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
@@ -1013,7 +962,6 @@ calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
                 
                 log_term = log(uij*lij_inv);
                 
-                /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
                 
                 if(rai < sk-dr)
@@ -1604,35 +1552,31 @@ real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[
     n  = 0;    
     rb = born->work;
         
-    
-    n0 = md->start;
-    n1 = md->start+md->homenr+1+natoms/2;
-    
+  n0 = 0;
+  n1 = natoms;
+  
     if(gb_algorithm==egbSTILL) 
     {
         for(i=n0;i<n1;i++)
         {
-            k = i % natoms;
-            rbi   = born->bRad[k];
-            rb[k] = (2 * rbi * rbi * dvda[k])/ONE_4PI_EPS0;
+          rbi   = born->bRad[i];
+          rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
         }
     }
     else if(gb_algorithm==egbHCT) 
     {
         for(i=n0;i<n1;i++)
         {
-            k = i % natoms;
-            rbi   = born->bRad[k];
-            rb[k] = rbi * rbi * dvda[k];
+          rbi   = born->bRad[i];
+          rb[i] = rbi * rbi * dvda[i];
         }
     }
     else if(gb_algorithm==egbOBC) 
     {
         for(i=n0;i<n1;i++)
         {
-            k = i % natoms;
-            rbi   = born->bRad[k];
-            rb[k] = rbi * rbi * born->drobc[k] * dvda[k];
+          rbi   = born->bRad[i];
+          rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
         }
     }
     
@@ -1784,17 +1728,17 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || (defined(GMX_DOUBLE) && defined(GMX_SSE2)) )
     if(fr->UseOptimizedKernels)
     {
-        calc_gb_chainrule_sse2_double(born->nr, &(fr->gblist), fr->dadx, fr->dvda, 
+        calc_gb_chainrule_sse2_double(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
                                       x[0], f[0], fr->fshift[0],  fr->shift_vec[0],
                                       gb_algorithm, born, md); 
     }
     else
     {
-        calc_gb_chainrule(born->nr, &(fr->gblist), fr->dadx, fr->dvda, 
+        calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
                           x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md); 
     }
 #else
-    calc_gb_chainrule(born->nr, &(fr->gblist), fr->dadx, fr->dvda, 
+    calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
                       x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);
 #endif
     
@@ -1804,19 +1748,19 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
     if(fr->UseOptimizedKernels)
     {
-        calc_gb_chainrule_sse2_single(born->nr, &(fr->gblist), fr->dadx, fr->dvda, 
+        calc_gb_chainrule_sse2_single(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
                                       x[0], f[0], fr->fshift[0], fr->shift_vec[0], 
                                       gb_algorithm, born, md);
     }
     else
     {
-        calc_gb_chainrule(born->nr, &(fr->gblist), fr->dadx, fr->dvda, 
+        calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
                           x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);    
     }
     
 #else
     /* Calculate the forces due to chain rule terms with non sse code */
-    calc_gb_chainrule(born->nr, &(fr->gblist), fr->dadx, fr->dvda, 
+    calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
                       x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);    
 #endif    
 #endif
index 43ce789971492c29ae3c81ec828315c3a7d0d8de..36111bef22132bf74ac91a79085b45a803eecc1c 100644 (file)
@@ -366,7 +366,7 @@ genborn_allvsall_calc_still_radii(t_forcerec *           fr,
     
     natoms              = mdatoms->nr;
        ni0                 = mdatoms->start;
-       ni1                 = mdatoms->homenr;
+       ni1                 = mdatoms->start+mdatoms->homenr;
     factor  = 0.5*ONE_4PI_EPS0;
     n = 0;
     
@@ -583,7 +583,7 @@ genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
     
     natoms              = mdatoms->nr;
        ni0                 = mdatoms->start;
-       ni1                 = mdatoms->homenr;
+       ni1                 = mdatoms->start+mdatoms->homenr;
 
     n = 0;
     prod = 0;
@@ -972,7 +972,7 @@ genborn_allvsall_calc_chainrule(t_forcerec *           fr,
     
     natoms              = mdatoms->nr;
        ni0                 = mdatoms->start;
-       ni1                 = mdatoms->homenr;
+       ni1                 = mdatoms->start+mdatoms->homenr;
     dadx                = fr->dadx;
     
     aadata = (gmx_allvsallgb2_data_t *)work;
index c2673c972ca797048016a7aedf499738201e3c9f..fb207b4f5f5e07a951489407425188afec23d96e 100644 (file)
@@ -792,145 +792,142 @@ calc_gb_chainrule_sse2_double(int natoms, t_nblist *nl, double *dadx, double *dv
     
        rb     = born->work; 
     
-    jjnr   = nl->jjnr;
-    
+  jjnr   = nl->jjnr;
+  
        /* Loop to get the proper form for the Born radius term, sse style */   
-    n0 = md->start;
-    n1 = md->start+md->homenr+1+natoms/2;
+  n0 = 0;
+  n1 = natoms;
     
        if(gb_algorithm==egbSTILL) 
        {
                for(i=n0;i<n1;i++)
                {
-            k = i % natoms;
-                       rbi   = born->bRad[k];
-                       rb[k] = (2 * rbi * rbi * dvda[k])/ONE_4PI_EPS0;
+      rbi   = born->bRad[i];
+                       rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
                }
        }
        else if(gb_algorithm==egbHCT) 
        {
                for(i=n0;i<n1;i++)
                {
-            k = i % natoms;
-                       rbi   = born->bRad[k];
-                       rb[k] = rbi * rbi * dvda[k];
+      rbi   = born->bRad[i];
+                       rb[i] = rbi * rbi * dvda[i];
                }
        }
        else if(gb_algorithm==egbOBC) 
        {
                for(i=n0;i<n1;i++)
                {
-            k = i % natoms;
-                       rbi   = born->bRad[k];
-                       rb[k] = rbi * rbi * born->drobc[k] * dvda[k];
+      rbi   = born->bRad[k];
+                       rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
                }
        }
     
-    jz = _mm_setzero_pd();
-    
-    n = j3A = j3B = 0;
-    
+  jz = _mm_setzero_pd();
+  
+  n = j3A = j3B = 0;
+  
        for(i=0;i<nl->nri;i++)
        {
-        ii     = nl->iinr[i];
+    ii     = nl->iinr[i];
                ii3        = ii*3;
-        is3    = 3*nl->shift[i];     
-        shX    = shiftvec[is3];  
-        shY    = shiftvec[is3+1];
-        shZ    = shiftvec[is3+2];
-        nj0    = nl->jindex[i];      
-        nj1    = nl->jindex[i+1];    
-        
-        ix     = _mm_set1_pd(shX+x[ii3+0]);
+    is3    = 3*nl->shift[i];     
+    shX    = shiftvec[is3];  
+    shY    = shiftvec[is3+1];
+    shZ    = shiftvec[is3+2];
+    nj0    = nl->jindex[i];      
+    nj1    = nl->jindex[i+1];    
+    
+    ix     = _mm_set1_pd(shX+x[ii3+0]);
                iy     = _mm_set1_pd(shY+x[ii3+1]);
                iz     = _mm_set1_pd(shZ+x[ii3+2]);
-                               
+    
                rbai   = _mm_load1_pd(rb+ii);                   
                fix    = _mm_setzero_pd();
                fiy    = _mm_setzero_pd();
                fiz    = _mm_setzero_pd();      
+    
         
-        
-        for(k=nj0;k<nj1-1;k+=2)
+    for(k=nj0;k<nj1-1;k+=2)
                {
                        jnrA        = jjnr[k];   
                        jnrB        = jjnr[k+1];
-            
-            j3A         = 3*jnrA;  
+      
+      j3A         = 3*jnrA;  
                        j3B         = 3*jnrB;
             
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
-            
+      GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
+      
                        dx          = _mm_sub_pd(ix,jx);
                        dy          = _mm_sub_pd(iy,jy);
                        dz          = _mm_sub_pd(iz,jz);
-            
-            GMX_MM_LOAD_2VALUES_PD(rb+jnrA,rb+jnrB,rbaj);
-            
+      
+      GMX_MM_LOAD_2VALUES_PD(rb+jnrA,rb+jnrB,rbaj);
+      
                        /* load chain rule terms for j1-4 */
                        f_gb        = _mm_load_pd(dadx);
                        dadx += 2;
                        f_gb_ai     = _mm_load_pd(dadx);
                        dadx += 2;
                        
-            /* calculate scalar force */
-            f_gb    = _mm_mul_pd(f_gb,rbai); 
-            f_gb_ai = _mm_mul_pd(f_gb_ai,rbaj);
-            f_gb    = _mm_add_pd(f_gb,f_gb_ai);
-            
-            tx     = _mm_mul_pd(f_gb,dx);
-            ty     = _mm_mul_pd(f_gb,dy);
-            tz     = _mm_mul_pd(f_gb,dz);
-            
-            fix    = _mm_add_pd(fix,tx);
-            fiy    = _mm_add_pd(fiy,ty);
-            fiz    = _mm_add_pd(fiz,tz);
-            
-            GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(f+j3A,f+j3B,tx,ty,tz);
+      /* calculate scalar force */
+      f_gb    = _mm_mul_pd(f_gb,rbai); 
+      f_gb_ai = _mm_mul_pd(f_gb_ai,rbaj);
+      f_gb    = _mm_add_pd(f_gb,f_gb_ai);
+      
+      tx     = _mm_mul_pd(f_gb,dx);
+      ty     = _mm_mul_pd(f_gb,dy);
+      tz     = _mm_mul_pd(f_gb,dz);
+      
+      fix    = _mm_add_pd(fix,tx);
+      fiy    = _mm_add_pd(fiy,ty);
+      fiz    = _mm_add_pd(fiz,tz);
+      
+      GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(f+j3A,f+j3B,tx,ty,tz);
                }
-        
+    
                /*deal with odd elements */
                if(k<nj1) 
         {
-                       jnrA        = jjnr[k];   
-            j3A         = 3*jnrA;  
-            
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
-            
-                       dx          = _mm_sub_sd(ix,jx);
-                       dy          = _mm_sub_sd(iy,jy);
-                       dz          = _mm_sub_sd(iz,jz);
-            
-            GMX_MM_LOAD_1VALUE_PD(rb+jnrA,rbaj);
-            
-                       /* load chain rule terms */
-                       f_gb        = _mm_load_pd(dadx);
-                       dadx += 2;
-                       f_gb_ai     = _mm_load_pd(dadx);
-                       dadx += 2;
-                       
-            /* calculate scalar force */
-            f_gb    = _mm_mul_sd(f_gb,rbai); 
-            f_gb_ai = _mm_mul_sd(f_gb_ai,rbaj);
-            f_gb    = _mm_add_sd(f_gb,f_gb_ai);
-            
-            tx     = _mm_mul_sd(f_gb,dx);
-            ty     = _mm_mul_sd(f_gb,dy);
-            tz     = _mm_mul_sd(f_gb,dz);
-            
-            fix    = _mm_add_sd(fix,tx);
-            fiy    = _mm_add_sd(fiy,ty);
-            fiz    = _mm_add_sd(fiz,tz);
-            
-            GMX_MM_DECREMENT_1RVEC_1POINTER_PD(f+j3A,tx,ty,tz);
+          jnrA        = jjnr[k];   
+          j3A         = 3*jnrA;  
+          
+          GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
+          
+          dx          = _mm_sub_sd(ix,jx);
+          dy          = _mm_sub_sd(iy,jy);
+          dz          = _mm_sub_sd(iz,jz);
+          
+          GMX_MM_LOAD_1VALUE_PD(rb+jnrA,rbaj);
+          
+          /* load chain rule terms */
+          f_gb        = _mm_load_pd(dadx);
+          dadx += 2;
+          f_gb_ai     = _mm_load_pd(dadx);
+          dadx += 2;
+          
+          /* calculate scalar force */
+          f_gb    = _mm_mul_sd(f_gb,rbai); 
+          f_gb_ai = _mm_mul_sd(f_gb_ai,rbaj);
+          f_gb    = _mm_add_sd(f_gb,f_gb_ai);
+          
+          tx     = _mm_mul_sd(f_gb,dx);
+          ty     = _mm_mul_sd(f_gb,dy);
+          tz     = _mm_mul_sd(f_gb,dz);
+          
+          fix    = _mm_add_sd(fix,tx);
+          fiy    = _mm_add_sd(fiy,ty);
+          fiz    = _mm_add_sd(fiz,tz);
+          
+          GMX_MM_DECREMENT_1RVEC_1POINTER_PD(f+j3A,tx,ty,tz);
         } 
-        
+    
                /* fix/fiy/fiz now contain four partial force terms, that all should be
-         * added to the i particle forces and shift forces. 
-         */
+     * added to the i particle forces and shift forces. 
+     */
                gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,f+ii3,fshift+is3);
        }       
-    
+  
        return 0;       
 }
 
index 99400b955ddd1b4c2c69b0d5996cc62bb08e23f7..9ff8bfc362f42f58d6110ad9134281fbb059754d 100644 (file)
@@ -1344,34 +1344,31 @@ float calc_gb_chainrule_sse2_single(int natoms, t_nblist *nl, float *dadx, float
        /* Loop to get the proper form for the Born radius term, sse style */
        offset=natoms%4;
        
-    n0 = md->start;
-    n1 = md->start+md->homenr+1+natoms/2;
-    
+  n0 = 0;
+  n1 = natoms;
+  
        if(gb_algorithm==egbSTILL) 
        {
                for(i=n0;i<n1;i++)
                {
-            k = i % natoms;
-                       rbi   = born->bRad[k];
-                       rb[k] = (2 * rbi * rbi * dvda[k])/ONE_4PI_EPS0;
+      rbi   = born->bRad[i];
+                       rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
                }
        }
        else if(gb_algorithm==egbHCT) 
        {
                for(i=n0;i<n1;i++)
                {
-            k = i % natoms;
-                       rbi   = born->bRad[k];
-                       rb[k] = rbi * rbi * dvda[k];
+      rbi   = born->bRad[i];
+                       rb[i] = rbi * rbi * dvda[i];
                }
        }
        else if(gb_algorithm==egbOBC) 
        {
                for(i=n0;i<n1;i++)
                {
-            k = i % natoms;
-                       rbi   = born->bRad[k];
-                       rb[k] = rbi * rbi * born->drobc[k] * dvda[k];
+      rbi   = born->bRad[i];
+                       rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
                }
        }
     
index 467f16c9fa9452702b63f5cebbf9a6e56b99dd3d..342bba6ceac386795fc58187dfcd12c57b137bc6 100644 (file)
@@ -49,6 +49,13 @@ static gmx_bool gmx_fft_threads_initialized=FALSE;
 #define FFTW_UNLOCK 
 #endif /* GMX_THREADS */
 
+/* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation. 
+   Consequesences:
+   - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes. 
+     This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
+   - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
+*/
+
 struct gmx_fft
 {
     /* Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
@@ -743,7 +750,7 @@ gmx_fft_1d               (gmx_fft_t                  fft,
                           void *                     in_data,
                           void *                     out_data)
 {
-    int           aligned   = (((size_t)in_data & (size_t)out_data & 0xf)==0);
+    int           aligned   = ((((size_t)in_data | (size_t)out_data) & 0xf)==0);
     int           inplace   = (in_data == out_data);
     int           isforward = (dir == GMX_FFT_FORWARD);
     
@@ -777,7 +784,7 @@ gmx_fft_1d_real          (gmx_fft_t                  fft,
                           void *                     in_data,
                           void *                     out_data)
 {
-    int           aligned   = (((size_t)in_data & (size_t)out_data & 0xf)==0);
+    int           aligned   = ((((size_t)in_data | (size_t)out_data) & 0xf)==0);
     int           inplace   = (in_data == out_data);
     int           isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
     
@@ -818,7 +825,7 @@ gmx_fft_2d               (gmx_fft_t                  fft,
                           void *                     in_data,
                           void *                     out_data)
 {
-    int           aligned   = (((size_t)in_data & (size_t)out_data & 0xf)==0);
+    int           aligned   = ((((size_t)in_data | (size_t)out_data) & 0xf)==0);
     int           inplace   = (in_data == out_data);
     int           isforward = (dir == GMX_FFT_FORWARD);
     
@@ -844,7 +851,7 @@ gmx_fft_2d_real          (gmx_fft_t                  fft,
                           void *                     in_data,
                           void *                     out_data)
 {
-    int           aligned   = (((size_t)in_data & (size_t)out_data & 0xf)==0);
+    int           aligned   = ((((size_t)in_data | (size_t)out_data) & 0xf)==0);
     int           inplace   = (in_data == out_data);
     int           isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
     
@@ -880,7 +887,7 @@ gmx_fft_3d               (gmx_fft_t                  fft,
                           void *                     in_data,
                           void *                     out_data)
 {
-    int           aligned   = (((size_t)in_data & (size_t)out_data & 0xf)==0);
+    int           aligned   = ((((size_t)in_data | (size_t)out_data) & 0xf)==0);
     int           inplace   = (in_data == out_data);
     int           isforward = (dir == GMX_FFT_FORWARD);
     
@@ -906,7 +913,7 @@ gmx_fft_3d_real          (gmx_fft_t                  fft,
                           void *                     in_data,
                           void *                     out_data)
 {
-    int           aligned   = (((size_t)in_data & (size_t)out_data & 0xf)==0);
+    int           aligned   = ((((size_t)in_data | (size_t)out_data) & 0xf)==0);
     int           inplace   = (in_data == out_data);
     int           isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
     
@@ -974,5 +981,5 @@ gmx_many_fft_destroy(gmx_fft_t    fft)
 
 #else
 int
-gmx_fft_fftw2_empty;
-#endif /* GMX_FFT_FFTW2 */
+gmx_fft_fftw3_empty;
+#endif /* GMX_FFT_FFTW3 */
index 851153811bbd80b533e87ca86612c179eab3af43..47fbcdc4802993e1a1c880f016622e62c0c0aac5 100644 (file)
@@ -330,18 +330,23 @@ void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc,double cycles[])
 }
 
 static void print_cycles(FILE *fplog, double c2t, const char *name, int nnodes,
-                        int n, gmx_cycles_t c, gmx_cycles_t tot)
+                         int n, double c, double tot)
 {
-  char num[11];
+    char num[11];
   
-  if (c > 0) {
-    if (n > 0)
-      sprintf(num,"%10d",n);
-    else
-      sprintf(num,"          ");
-    fprintf(fplog," %-19s %4d %10s %12.3f %10.1f   %5.1f\n",
-           name,nnodes,num,c*1e-9,c*c2t,100*(double)c/(double)tot);
-  }
+    if (c > 0)
+    {
+        if (n > 0)
+        {
+            sprintf(num,"%10d",n);
+        }
+        else
+        {
+            sprintf(num,"          ");
+        }
+        fprintf(fplog," %-19s %4d %10s %12.3f %10.1f   %5.1f\n",
+                name,nnodes,num,c*1e-9,c*c2t,100*c/tot);
+    }
 }
 
 static gmx_bool subdivision(int ewc)
@@ -350,7 +355,7 @@ static gmx_bool subdivision(int ewc)
 }
 
 void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
-                    gmx_wallcycle_t wc, double cycles[])
+                     gmx_wallcycle_t wc, double cycles[])
 {
     double c2t,tot,sum;
     int    i,j,npp;
index b351bb847fb573d76ce457eec5b28696d84a5a19..394ca899232c84da7056ea7b194111faabeed95c 100644 (file)
@@ -63,8 +63,10 @@ static const char *conrmsd_nm[] = { "Constr. rmsd", "Constr.2 rmsd" };
 
 static const char *boxs_nm[] = { "Box-X", "Box-Y", "Box-Z" };
 
-static const char *tricl_boxs_nm[] = { "Box-XX", "Box-YX", "Box-YY",
-    "Box-ZX", "Box-ZY", "Box-ZZ" };
+static const char *tricl_boxs_nm[] = { 
+    "Box-XX", "Box-YY", "Box-ZZ",
+    "Box-YX", "Box-ZX", "Box-ZY" 
+};
 
 static const char *vol_nm[] = { "Volume" };
 
@@ -179,7 +181,9 @@ t_mdebin *init_mdebin(ener_file_t fp_ene,
         md->bEInd[i]=FALSE;
     }
 
-    for(i=0; i<F_NRE; i++) {
+#ifndef GMX_OPENMM
+    for(i=0; i<F_NRE; i++)
+    {
         md->bEner[i] = FALSE;
         if (i == F_LJ)
             md->bEner[i] = !bBHAM;
@@ -238,6 +242,13 @@ t_mdebin *init_mdebin(ener_file_t fp_ene,
         else
             md->bEner[i] = (gmx_mtop_ftype_count(mtop,i) > 0);
     }
+#else
+    /* OpenMM always produces only the following 4 energy terms */
+    md->bEner[F_EPOT] = TRUE;
+    md->bEner[F_EKIN] = TRUE;
+    md->bEner[F_ETOT] = TRUE;
+    md->bEner[F_TEMP] = TRUE;
+#endif
 
     md->f_nre=0;
     for(i=0; i<F_NRE; i++)
@@ -279,8 +290,9 @@ t_mdebin *init_mdebin(ener_file_t fp_ene,
     }
     if (md->bDynBox)
     {
-        md->ib    = get_ebin_space(md->ebin, md->bTricl ? NTRICLBOXS :
-                                   NBOXS, md->bTricl ? tricl_boxs_nm : boxs_nm,
+        md->ib    = get_ebin_space(md->ebin, 
+                                   md->bTricl ? NTRICLBOXS : NBOXS, 
+                                   md->bTricl ? tricl_boxs_nm : boxs_nm,
                                    unit_length);
         md->ivol  = get_ebin_space(md->ebin, 1, vol_nm,  unit_volume);
         md->idens = get_ebin_space(md->ebin, 1, dens_nm, unit_density_SI);
@@ -668,20 +680,23 @@ void upd_mdebin(t_mdebin *md, gmx_bool write_dhdl,
     }
     if (md->bDynBox)
     {
+        int nboxs;
         if(md->bTricl)
         {
             bs[0] = box[XX][XX];
-            bs[1] = box[YY][XX];
-            bs[2] = box[YY][YY];
-            bs[3] = box[ZZ][XX];
-            bs[4] = box[ZZ][YY];
-            bs[5] = box[ZZ][ZZ];
+            bs[1] = box[YY][YY];
+            bs[2] = box[ZZ][ZZ];
+            bs[3] = box[YY][XX];
+            bs[4] = box[ZZ][XX];
+            bs[5] = box[ZZ][YY];
+            nboxs=NTRICLBOXS;
         }
         else
         {
             bs[0] = box[XX][XX];
             bs[1] = box[YY][YY];
             bs[2] = box[ZZ][ZZ];
+            nboxs=NBOXS;
         }
         vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
         dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
@@ -703,7 +718,8 @@ void upd_mdebin(t_mdebin *md, gmx_bool write_dhdl,
                 }
             }
         }
-        add_ebin(md->ebin,md->ib   ,NBOXS,bs   ,bSum);
+
+        add_ebin(md->ebin,md->ib   ,nboxs,bs   ,bSum);
         add_ebin(md->ebin,md->ivol ,1    ,&vol ,bSum);
         add_ebin(md->ebin,md->idens,1    ,&dens,bSum);
         add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
index 8d1182ed59d03f25af16e9cef8c777636ffffe8f..c30a47540a1bccb43c958d8888df921372abe09e 100644 (file)
@@ -1549,9 +1549,6 @@ static real gather_energy_bsplines(gmx_pme_t pme,real *grid,
     
     
     order = pme->pme_order;
-    thx   = atc->theta[XX];
-    thy   = atc->theta[YY];
-    thz   = atc->theta[ZZ];
     
     energy = 0;
     for(n=0; (n<atc->n); n++) {
@@ -2056,12 +2053,16 @@ int gmx_pme_init(gmx_pme_t *         pmedata,
     pme->nnodes_minor        = nnodes_minor;
 
 #ifdef GMX_MPI
-    if (PAR(cr)) 
+    if (nnodes_major*nnodes_minor > 1 && PAR(cr)) 
     {
         pme->mpi_comm        = cr->mpi_comm_mygroup;
         
         MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
         MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
+        if (pme->nnodes != nnodes_major*nnodes_minor)
+        {
+            gmx_incons("PME node count mismatch");
+        }
     }
 #endif
 
@@ -2348,7 +2349,7 @@ void gmx_pme_calc_energy(gmx_pme_t pme,int n,rvec *x,real *q,real *V)
     /* We only use the A-charges grid */
     grid = pme->pmegridA;
 
-    spread_on_grid(pme,atc,grid,TRUE,FALSE);
+    spread_on_grid(pme,atc,NULL,TRUE,FALSE);
 
     *V = gather_energy_bsplines(pme,grid,atc);
 }
@@ -2479,6 +2480,11 @@ int gmx_pme_do(gmx_pme_t pme,
         }
         atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
     }
+    else
+    {
+        /* This could be necessary for TPI */
+        pme->atc[0].n = homenr;
+    }
     
     for(q=0; q<(pme->bFEP ? 2 : 1); q++) {
         if (q == 0) {
@@ -2619,7 +2625,8 @@ int gmx_pme_do(gmx_pme_t pme,
             inc_nrnb(nrnb,eNR_SOLVEPME,loop_count);
         }
 
-        if (flags & GMX_PME_CALC_F)
+        if ((flags & GMX_PME_CALC_F) ||
+            (flags & GMX_PME_CALC_POT))
         {
             
             /* do 3d-invfft */
@@ -2654,7 +2661,10 @@ int gmx_pme_do(gmx_pme_t pme,
             where();
 
             unwrap_periodic_pmegrid(pme,grid);
-            
+        }
+
+        if (flags & GMX_PME_CALC_F)
+        {
             /* interpolate forces for our local atoms */
             GMX_BARRIER(cr->mpi_comm_mygroup);
             GMX_MPE_LOG(ev_gather_f_bsplines_start);
index 753f4436cfd436164bf42e69ee76390130ebc4e5..1721941b6afddc751243f0f8dd2044b2fb21a6ce 100644 (file)
@@ -24,6 +24,7 @@ foreach(PROG ${NGMX_PROGRAMS})
 endforeach(PROG) 
 
 install(TARGETS ${NGMX_PROGRAMS}
+        COMPONENT ngmx
         RUNTIME DESTINATION ${BIN_INSTALL_DIR})
 
 endif(X11_FOUND)
index 0b05ab3670097d835c9bdba9826a62c5fd03cb22..0308d69d5ce0f7743e8d4dd42eb3912f02af1fc8 100644 (file)
@@ -47,7 +47,7 @@ set(GMX_TOOLS_PROGRAMS
     g_helixorient g_principal g_dipoles g_disre g_dist
     g_dyndom g_enemat g_energy g_lie g_filter g_gyrate
     g_h2order g_hbond g_helix g_mindist g_msd g_morph g_nmeig
-    g_nmens g_order g_polystat g_potential g_rama g_rdf g_rms
+    g_nmens g_order g_kinetics g_polystat g_potential g_rama g_rdf g_rms
     g_rmsf g_rotacf g_saltbr g_sas g_select g_sgangle g_sham g_sorient
     g_spol g_spatial g_tcaf g_traj g_tune_pme g_vanhove
     g_velacc g_clustsize g_mdmat g_wham g_sigeps g_bar
@@ -62,9 +62,13 @@ foreach(TOOL ${GMX_TOOLS_PROGRAMS})
 endforeach(TOOL ${GMX_TOOLS_PROGRAMS}) 
 
 
+install(TARGETS gmxana DESTINATION ${LIB_INSTALL_DIR} COMPONENT runtime)
 install(TARGETS ${GMX_TOOLS_PROGRAMS}
-       gmxana DESTINATION ${LIB_INSTALL_DIR}   
-       RUNTIME DESTINATION ${BIN_INSTALL_DIR})
+        DESTINATION ${BIN_INSTALL_DIR}
+        COMPONENT runtime)
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgmxana.pc.cmakein ${CMAKE_CURRENT_BINARY_DIR}/libgmxana.pc @ONLY)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgmxana.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig RENAME "libgmxana${GMX_LIBS_SUFFIX}.pc")
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgmxana.pc
+        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
+        RENAME "libgmxana${GMX_LIBS_SUFFIX}.pc"
+        COMPONENT development)
index 7aca3607601a4bcd426a27dce20ff849d775caae..6c92fb869053c8067844764aa0039a32094cb862 100644 (file)
@@ -64,7 +64,7 @@ bin_PROGRAMS = \
        g_enemat        g_energy        g_lie           g_filter        \
        g_gyrate        g_h2order       g_hbond         g_helix         \
        g_mindist       g_msd           g_morph         g_nmeig         \
-       g_nmens         g_order         \
+       g_nmens         g_order         g_kinetics      \
        g_polystat      g_potential     g_rama          \
        g_rdf           g_rms           g_rmsdist       g_rmsf          \
        g_rotacf        g_rotmat        g_saltbr        g_sas           \
index ce9a61aea8f6ebb07c0ee2353d7b80e4674da759..7ff0660a54f44f0e1bd30a1375271ed9685f73d1 100644 (file)
 #include "smalloc.h"
 #include "gstat.h"
 #include "gmx_fatal.h"
+#include "index.h"
        
 t_dlist *mk_dlist(FILE *log, 
                  t_atoms *atoms, int *nlist,
                  gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bHChi,
-                 int maxchi,int r0,int naa,char **aa)
+                 int maxchi, int r0, gmx_residuetype_t rt)
 {
   int     ires,i,j,k,ii;
   t_dihatms atm,prev;
@@ -167,11 +168,7 @@ t_dlist *mk_dlist(FILE *log,
       }
       if ((atm.minC != -1) && (atm.minO != -1))
        nc[6]++;
-      for(k=0; (k<naa); k++) {
-       if (gmx_strcasecmp(aa[k],thisres) == 0)
-         break;
-      }
-      dl[nl].index=k;
+      dl[nl].index=gmx_residuetype_get_index(rt,thisres);
       
       sprintf(dl[nl].name,"%s%d",thisres,ires+r0);
       nl++;
index 6e8c00109802c5cbfcc8718bd544211c1175680a..5c899daafc73b2f0caf0781eb347dc0139d32574 100644 (file)
@@ -2096,7 +2096,9 @@ static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
          (blk->sub[0].nr < 1) ||
          (blk->sub[1].nr < 1) )
     {
-        gmx_fatal(FARGS, "Unexpected block data in file %s", filename);
+        gmx_fatal(FARGS, 
+                  "Unexpected/corrupted block data in file %s around time %g.", 
+                  filename, start_time);
     }
    
     derivative = blk->sub[0].ival[0]; 
@@ -2120,11 +2122,11 @@ static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
          (  (derivative!=0) != (s->derivative!=0) ) )
     {
         fprintf(stderr, "Got foreign lambda=%g, expected: %g\n", 
-                s->foreign_lambda, foreign_lambda);
-        fprintf(stderr, "Got derivative=%d, derivative: %d\n", 
+                foreign_lambda, s->foreign_lambda);
+        fprintf(stderr, "Got derivative=%d, expected: %d\n", 
                 derivative, s->derivative);
-        gmx_fatal(FARGS, "Inconsistent data in file %s around t=%g", filename,
-                  start_time);
+        gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.", 
+                  filename, start_time);
     }
 
     /* make room for the data */
@@ -2177,7 +2179,9 @@ static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
          (blk->sub[0].nr < 2)  ||
          (blk->sub[1].nr < 2) )
     {
-        gmx_fatal(FARGS, "Unexpected block data in file %s", filename);
+        gmx_fatal(FARGS, 
+                  "Unexpected/corrupted block data in file %s around time %g", 
+                  filename, start_time);
     }
 
     nhist=blk->nsub-2;
@@ -2187,7 +2191,9 @@ static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
     }
     if (nhist > 2)
     {
-        gmx_fatal(FARGS, "Unexpected block data in file %s", filename);
+        gmx_fatal(FARGS, 
+                  "Unexpected/corrupted block data in file %s around time %g", 
+                  filename, start_time);
     }
 
     snew(s, 1);
index e97133fe0711374aaabe901cbc95eb6735216a32..bf18bdba3f31fba695b2ec3e0e06394c9042175f 100644 (file)
@@ -385,7 +385,7 @@ static int reset_em_all(int nlist,t_dlist dlist[],int nf,
   return j ; 
 }
 
-static void histogramming(FILE *log,int nbin, int naa,char **aa,
+static void histogramming(FILE *log,int nbin,gmx_residuetype_t rt,
                          int nf,int maxchi,real **dih,
                          int nlist,t_dlist dlist[],
                          atom_id index[],
@@ -426,7 +426,10 @@ static void histogramming(FILE *log,int nbin, int naa,char **aa,
   gmx_bool    bBfac,bOccup;
   char    hisfile[256],hhisfile[256],sshisfile[256],title[256],*ss_str=NULL;
   char **leg; 
-  
+  const char *residue_name;
+  int     rt_size;
+
+  rt_size = gmx_residuetype_get_size(rt);
   if (bSSHisto) {
     fp = ffopen(ssdump,"r");
     if(1 != fscanf(fp,"%d",&nres))
@@ -444,8 +447,8 @@ static void histogramming(FILE *log,int nbin, int naa,char **aa,
     /* Four dimensional array... Very cool */
     snew(his_aa_ss,3);
     for(i=0; (i<3); i++) {
-      snew(his_aa_ss[i],naa+1);
-      for(j=0; (j<=naa); j++) {
+      snew(his_aa_ss[i],rt_size+1);
+      for(j=0; (j<=rt_size); j++) {
        snew(his_aa_ss[i][j],edMax);
        for(Dih=0; (Dih<edMax); Dih++)
          snew(his_aa_ss[i][j][Dih],nbin+1);
@@ -454,8 +457,8 @@ static void histogramming(FILE *log,int nbin, int naa,char **aa,
   }
   snew(his_aa,edMax);
   for(Dih=0; (Dih<edMax); Dih++) {
-    snew(his_aa[Dih],naa+1);
-    for(i=0; (i<=naa); i++) {
+    snew(his_aa[Dih],rt_size+1);
+    for(i=0; (i<=rt_size); i++) {
       snew(his_aa[Dih][i],nbin+1);
     }
   }
@@ -611,7 +614,7 @@ static void histogramming(FILE *log,int nbin, int naa,char **aa,
   /* finished -jc stuff */ 
 
   snew(normhisto,nbin);
-  for(i=0; (i<naa); i++) {
+  for(i=0; (i<rt_size); i++) {
     for(Dih=0; (Dih<edMax); Dih++){
       /* First check whether something is in there */
       for(j=0; (j<nbin); j++)
@@ -625,23 +628,24 @@ static void histogramming(FILE *log,int nbin, int naa,char **aa,
        if (bNormalize)
          normalize_histo(nbin,his_aa[Dih][i],(360.0/nbin),normhisto);
        
+       residue_name = gmx_residuetype_get_name(rt,i);
        switch (Dih) {
        case edPhi:
-         sprintf(hisfile,"histo-phi%s",aa[i]);
-         sprintf(title,"\\xf\\f{} Distribution for %s",aa[i]);
+         sprintf(hisfile,"histo-phi%s",residue_name);
+         sprintf(title,"\\xf\\f{} Distribution for %s",residue_name);
          break;
        case edPsi:
-         sprintf(hisfile,"histo-psi%s",aa[i]);
-         sprintf(title,"\\xy\\f{} Distribution for %s",aa[i]);
+         sprintf(hisfile,"histo-psi%s",residue_name);
+         sprintf(title,"\\xy\\f{} Distribution for %s",residue_name);
          break;
        case edOmega:
-         sprintf(hisfile,"histo-omega%s",aa[i]);
-         sprintf(title,"\\xw\\f{} Distribution for %s",aa[i]);
+         sprintf(hisfile,"histo-omega%s",residue_name);
+         sprintf(title,"\\xw\\f{} Distribution for %s",residue_name);
          break;
        default:
-         sprintf(hisfile,"histo-chi%d%s",Dih-NONCHI+1,aa[i]);
+         sprintf(hisfile,"histo-chi%d%s",Dih-NONCHI+1,residue_name);
          sprintf(title,"\\xc\\f{}\\s%d\\N Distribution for %s",
-                 Dih-NONCHI+1,aa[i]);
+                 Dih-NONCHI+1,residue_name);
        }
        strcpy(hhisfile,hisfile);
        strcat(hhisfile,".xvg");
@@ -689,7 +693,7 @@ static void histogramming(FILE *log,int nbin, int naa,char **aa,
   if (bSSHisto) {
     /* Four dimensional array... Very cool */
     for(i=0; (i<3); i++) {
-      for(j=0; (j<=naa); j++) {
+      for(j=0; (j<=rt_size); j++) {
        for(Dih=0; (Dih<edMax); Dih++)
          sfree(his_aa_ss[i][j][Dih]);
        sfree(his_aa_ss[i][j]);
@@ -1113,18 +1117,18 @@ int gmx_chi(int argc,char *argv[])
   };
 
   FILE       *log;
-  int        natoms,nlist,naa,idum,nbin; 
+  int        natoms,nlist,idum,nbin;
   t_atoms    atoms;
   rvec       *x;
   int        ePBC;
   matrix     box;
   char       title[256],grpname[256]; 
   t_dlist    *dlist;
-  char       **aa;
   gmx_bool       bChi,bCorr,bSSHisto;
   gmx_bool       bDo_rt, bDo_oh, bDo_ot, bDo_jc ; 
   real       dt=0, traj_t_ns;
   output_env_t oenv;
+  gmx_residuetype_t rt;
   
   atom_id    isize,*index;
   int        ndih,nactdih,nf;
@@ -1203,8 +1207,8 @@ int gmx_chi(int argc,char *argv[])
   read_stx_conf(ftp2fn(efSTX,NFILE,fnm),title,&atoms,x,NULL,&ePBC,box);
   fprintf(log,"Title: %s\n",title);
   
-  naa=get_strings("aminoacids.dat",&aa);
-  dlist=mk_dlist(log,&atoms,&nlist,bPhi,bPsi,bChi,bHChi,maxchi,r0,naa,aa);
+  gmx_residuetype_init(&rt);
+  dlist=mk_dlist(log,&atoms,&nlist,bPhi,bPsi,bChi,bHChi,maxchi,r0,rt);
   fprintf(stderr,"%d residues with dihedrals found\n", nlist);
   
   if (nlist == 0) 
@@ -1239,7 +1243,7 @@ int gmx_chi(int argc,char *argv[])
     dump_em_all(nlist,dlist,nf,time,dih,maxchi,bPhi,bPsi,bChi,bOmega,bRAD,oenv);
   
   /* Histogramming & J coupling constants & calc of S2 order params */
-  histogramming(log,nbin,naa,aa,nf,maxchi,dih,nlist,dlist,index,
+  histogramming(log,nbin,rt,nf,maxchi,dih,nlist,dlist,index,
                bPhi,bPsi,bOmega,bChi,
                bNormHisto,bSSHisto,ftp2fn(efDAT,NFILE,fnm),bfac_max,&atoms,
                bDo_jc,opt2fn("-jc",NFILE,fnm),oenv);
@@ -1318,6 +1322,8 @@ int gmx_chi(int argc,char *argv[])
   if (bCorr)
     do_view(oenv,opt2fn("-corr",NFILE,fnm),"-nxy");
     
+  gmx_residuetype_destroy(rt);
+
   thanx(stderr);
     
   return 0;
index 0d0acae7f0d00e82e2740ebea95c739e741f8dd3..545dcb154bba99b8b2dfa377514d39edfbf87ec2 100644 (file)
@@ -365,8 +365,10 @@ void plot_density(real *slDensity[], const char *afile, int nslices,
 int gmx_density(int argc,char *argv[])
 {
   const char *desc[] = {
-    "Compute partial densities across the box, using an index file. Densities",
-    "in kg/m^3, number densities or electron densities can be",
+    "Compute partial densities across the box, using an index file.[PAR]",
+    "For the total density of NPT simulations, use [TT]g_energy[tt] instead.",
+    "[PAR]",
+    "Densities in kg/m^3, number densities or electron densities can be",
     "calculated. For electron densities, a file describing the number of",
     "electrons for each type of atom should be provided using [TT]-ei[tt].",
     "It should look like:[BR]",
index 1558c241b80f941bb5c0b72f3158c2c91c56bf8e..33d98b90b14082f6a268ce9fb971c0ed6a092428 100644 (file)
@@ -118,9 +118,8 @@ typedef struct {
 
 void sort_molecule(t_atoms **atoms_solvt,rvec *x,rvec *v,real *r)
 {
-  int atnr,i,j,moltp=0,nrmoltypes,resnr;
+  int atnr,i,j,moltp=0,nrmoltypes,resi_o,resi_n,resnr;
   t_moltypes *moltypes;
-  int *tps;
   t_atoms *atoms,*newatoms;
   rvec *newx, *newv=NULL;
   real *newr;
@@ -130,7 +129,6 @@ void sort_molecule(t_atoms **atoms_solvt,rvec *x,rvec *v,real *r)
   atoms = *atoms_solvt;
 
   /* copy each residue from *atoms to a molecule in *molecule */
-  snew(tps,atoms->nr);
   moltypes=NULL;
   nrmoltypes=0;
   atnr=0;
@@ -156,7 +154,6 @@ void sort_molecule(t_atoms **atoms_solvt,rvec *x,rvec *v,real *r)
       }
       moltypes[moltp].nmol++;
     }
-    tps[i]=moltp;
   }
   
   fprintf(stderr,"Found %d%s molecule type%s:\n",
@@ -189,18 +186,40 @@ void sort_molecule(t_atoms **atoms_solvt,rvec *x,rvec *v,real *r)
     if (v) snew(newv,atoms->nr);
     snew(newr,atoms->nr);
     
-    for (i=0; i<atoms->nr; i++) {
-      resnr = moltypes[tps[i]].res0 +
-       (moltypes[tps[i]].i-moltypes[tps[i]].i0) / moltypes[tps[i]].natoms;
-      newatoms->atom[moltypes[tps[i]].i].resind = resnr;
-      newatoms->resinfo[resnr] = atoms->resinfo[atoms->atom[i].resind];
-      newatoms->resinfo[resnr].nr = resnr + 1;
-      newatoms->atomname[moltypes[tps[i]].i] = atoms->atomname[i];
-      newatoms->atom[moltypes[tps[i]].i] = atoms->atom[i];
-      copy_rvec(x[i],newx[moltypes[tps[i]].i]);
-      if (v) copy_rvec(v[i],newv[moltypes[tps[i]].i]);
-      newr[moltypes[tps[i]].i] = r[i];
-      moltypes[tps[i]].i++;
+    resi_n = 0;
+    resnr = 1;
+    j = 0;
+    for(moltp=0; moltp<nrmoltypes; moltp++) {
+      i = 0;
+      while (i < atoms->nr) {
+       resi_o = atoms->atom[i].resind;
+       if (strcmp(*atoms->resinfo[resi_o].name,moltypes[moltp].name) == 0) {
+         /* Copy the residue info */
+         newatoms->resinfo[resi_n]    = atoms->resinfo[resi_o];
+         newatoms->resinfo[resi_n].nr = resnr;
+         /* Copy the atom info */
+         do {
+           newatoms->atom[j]        = atoms->atom[i];
+           newatoms->atomname[j]    = atoms->atomname[i];
+           newatoms->atom[j].resind = resi_n;
+           copy_rvec(x[i],newx[j]);
+           if (v != NULL) {
+             copy_rvec(v[i],newv[j]);
+           }
+           newr[j] = r[i];
+           i++;
+           j++;
+         } while (i < atoms->nr && atoms->atom[i].resind == resi_o);
+         /* Increase the new residue counters */
+         resi_n++;
+         resnr++;
+       } else {
+         /* Skip this residue */
+         do {
+           i++;
+         } while (i < atoms->nr && atoms->atom[i].resind == resi_o);
+       }
+      }
     }
     
     /* put them back into the original arrays and throw away temporary arrays */
index d4769e5d73f9ef1a1c0ed944fe0edbd145445329..12e16ed21e141234d10f06363ed9bbdea27a95cb 100644 (file)
@@ -1649,7 +1649,10 @@ static void do_merge(t_hbdata *hb,int ntmp,
         srenew(hb0->h[0],4+nnframes/hb->wordlen);
         srenew(hb0->g[0],4+nnframes/hb->wordlen);  
     }
-    clearPshift(&(hb->per->pHist[a1][a2]));
+    if (NULL != hb->per->pHist)
+    {
+        clearPshift(&(hb->per->pHist[a1][a2]));
+    }
 
     /* Copy temp array to target array */
     for(m=0; (m<=nnframes); m++) {
@@ -3847,7 +3850,7 @@ int gmx_hbond(int argc,char *argv[])
                 trrStatus = (read_next_x(oenv,status,&t,natoms,x,box));
                 nframes++;      /*    +   */
             }      /*                 +   */
-#ifdef HAVE_OPENMP /* ++++++++++++++++   */
+#ifdef HAVE_OPENMP /* +++++++++++++++++   */
 #pragma omp barrier
 #endif
         } while (trrStatus);
index d597c15fc6eb57551b2b0a4b98f29b0b31444c2b..3923a97611d5df32bb2ac11f6a571518a99b5686 100644 (file)
@@ -3464,25 +3464,17 @@ double do_md_membed(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         bStartingFromCpt = FALSE;
 
         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
-        /* Complicated conditional when bGStatEveryStep=FALSE.
-         * We can not just use bGStat, since then the simulation results
-         * would depend on nstenergy and nstlog or step_nscheck.
+       /* With all integrators, except VV, we need to retain the pressure
+         * at the current step for coupling at the next step.
          */
-        if (((state->flags & (1<<estPRES_PREV)) ||
-             (state->flags & (1<<estSVIR_PREV)) ||
-             (state->flags & (1<<estFVIR_PREV))) &&
+        if ((state->flags & (1<<estPRES_PREV)) &&
             (bGStatEveryStep ||
-             (ir->nstlist > 0 && step % ir->nstlist == 0) ||
-             (ir->nstlist < 0 && nlh.nabnsb > 0) ||
-             (ir->nstlist == 0 && bGStat)))
+             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
         {
             /* Store the pressure in t_state for pressure coupling
              * at the next MD step.
              */
-            if (state->flags & (1<<estPRES_PREV))
-            {
-                copy_mat(pres,state->pres_prev);
-            }
+            copy_mat(pres,state->pres_prev);
         }
 
         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
index 9abef117ac84e00568a7034104b80bd0ce9b791c..9d1b8094ce6e10150412ac2e4a872a5372fff79d 100644 (file)
@@ -169,6 +169,13 @@ void calc_potential(const char *fn, atom_id **index, int gnx[],
          
     for (n = 0; n < nr_grps; n++)
     {      
+        /* Check whether we actually have all positions of the requested index
+         * group in the trajectory file */
+        if (gnx[n] > natoms)
+        {
+            gmx_fatal(FARGS, "You selected a group with %d atoms, but only %d atoms\n"
+                             "were found in the trajectory.\n", gnx[n], natoms);
+        }
       for (i = 0; i < gnx[n]; i++)   /* loop over all atoms in index file */
       {
        if (bSpherical)
@@ -364,7 +371,7 @@ void plot_potential(double *potential[], double *charge[], double *field[],
     for (n = 0; n < nr_grps; n++)
     {
       fprintf(pot,"   %20.16g", potential[n][slice]);
-      fprintf(fie,"   %20.16g", field[n][slice]);
+      fprintf(fie,"   %20.16g", field[n][slice]/1e9);  /* convert to V/nm */
       fprintf(cha,"   %20.16g", charge[n][slice]);
     }
     fprintf(pot,"\n");
index da974a4f792f67b86708dfe7b200d61e5bd53949..ed6e9bb407b59468b18ada313746108f29b50ab7 100644 (file)
 #include <config.h>
 #endif
 
+
+#include <time.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+
+
 #include "statutil.h"
 #include "typedefs.h"
 #include "smalloc.h"
index d656889836ac0192e31a2a965407611695bf9c62..8ea9ce9fd38583256cc2bc9cbe644797001d5a0d 100644 (file)
@@ -162,8 +162,9 @@ int gmx_velacc(int argc,char *argv[])
   parse_common_args(&argc,argv,PCA_CAN_VIEW | PCA_CAN_TIME | PCA_BE_NICE,
                    NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
 
-  if (bMol)
-    bTPS = bM || ftp2bSet(efTPS,NFILE,fnm) || !ftp2bSet(efNDX,NFILE,fnm);
+  if (bMol || bM) {
+    bTPS = ftp2bSet(efTPS,NFILE,fnm) || !ftp2bSet(efNDX,NFILE,fnm);
+  }
 
   if (bTPS) {
     bTop=read_tps_conf(ftp2fn(efTPS,NFILE,fnm),title,&top,&ePBC,NULL,NULL,box,
index ff7c7a39b098f2fd9053bbea8f4f5a6adfcf526a..fa0e58a8b121f070658c9ff44477cc3eba441c1d 100644 (file)
@@ -1,11 +1,12 @@
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- */ 
 /*
- *
+ * 
  *                This source code is part of
- *
+ * 
  *                 G   R   O   M   A   C   S
- *
+ * 
  *          GROningen MAchine for Chemical Simulations
- *
+ * 
  *                        VERSION 3.2.0
  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  * of the License, or (at your option) any later version.
- *
+ * 
  * If you want to redistribute modifications, please consider that
  * scientific software is very special. Version control is crucial -
  * bugs must be traceable. We will be happy to consider code for
  * inclusion in the official distribution, but derived work must not
  * be called official GROMACS. Details are found in the README & COPYING
  * files - if they are missing, get the official version at www.gromacs.org.
- *
+ * 
  * To help us fund GROMACS development, we humbly ask that you cite
  * the papers on the package - you can find them in the top README file.
- *
+ * 
  * For more info, check our website at http://www.gromacs.org
- *
+ * 
  * And Hey:
  * Green Red Orange Magenta Azure Cyan Skyblue
  */
@@ -36,8 +37,6 @@
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
-
-
 #include <stdio.h>
 
 #include "statutil.h"
@@ -48,9 +47,9 @@
 #include "statutil.h"
 #include "tpxio.h"
 #include "names.h"
+#include "gmx_random.h"
 #include "gmx_ana.h"
 
-
 #ifndef HAVE_STRDUP
 #define HAVE_STRDUP
 #endif
 #include "xvgr.h"
 
 
+#define WHAM_MAXFILELEN 2048
+
 /* enum for energy units */
 enum { enSel, en_kJ, en_kCal, en_kT, enNr };
 /* enum for type of input files (pdos, tpr, or pullf) */
 enum { whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo };
-/* enum for methods to make profile cyclic/periodic */
-enum { enCycl, enCycl_no, enCycl_yes, enCycl_weighted, enCycl_nr};
+/* enum for bootstrapping method (
+   - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
+   - bootstrap complete histograms
+   - bootstrap trajectories from given umbrella histograms
+   - bootstrap trajectories from Gaussian with mu/sigam computed from 
+   the respective histogram
+   
+   ********************************************************************
+   FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
+   JS Hub, BL de Groot, D van der Spoel
+   g_wham - A free weighted histogram analysis implementation including 
+   robust error and autocorrelation estimates, 
+   J Chem Theory Comput, accepted (2010)
+   ********************************************************************
+ */
+enum { bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist, 
+       bsMethod_traj, bsMethod_trajGauss };
+
 
-typedef struct
+typedef struct 
 {
-    /* umbrella with pull code of gromacs 4 */
+    /* umbrella with pull code of gromacs 4.x */
     int npullgrps;      /* nr of pull groups in tpr file         */
     int pull_geometry;  /* such as distance, position            */
     ivec pull_dim;      /* pull dimension with geometry distance */
     int  pull_ndim;     /* nr of pull_dim != 0                   */
     real *k;            /* force constants in tpr file           */
     rvec *init_dist;    /* reference displacements               */
-    real *umbInitDist;  /* referebce displacement in umbrella direction */
-
+    real *umbInitDist;  /* reference displacement in umbrella direction */
+    
     /* From here, old pdo stuff */
-    int nSkip;
+    int nSkip;             
     char Reference[256];
     int nPull;
     int nDim;
@@ -85,59 +102,185 @@ typedef struct
     char PullName[4][256];
     double UmbPos[4][3];
     double UmbCons[4][3];
-    gmx_bool Flipped[4];
 } t_UmbrellaHeader;
 
-typedef struct
+typedef struct 
 {
-    int nPull;
-    int nBin;
-    double **Histo,**cum;
-    double *k;
-    double *pos;
-    double *z;
-    double * N, *Ntot;
-    gmx_bool * Flipped;
-    double dt;
-    gmx_bool **bContrib;
+    int nPull;            /* nr of pull groups in this pdo or pullf/x file */
+    double **Histo,**cum; /* nPull histograms and nPull cumulative distr. funct */
+    int nBin;             /* nr of bins. identical to opt->bins */
+    double *k;            /* force constants for the nPull groups */
+    double *pos;          /* umbrella positions for the nPull groups */
+    double *z;            /* z=(-Fi/kT) for the nPull groups. These values are
+                             iteratively computed during wham */
+    double *N, *Ntot;     /* nr of data points in nPull histograms. N and Ntot
+                             only differ if bHistEq==TRUE */
+    
+    double *g,*tau,*tausmooth;  /* g = 1 + 2*tau[int]/dt where tau is the integrated
+                                   autocorrelation time. Compare, e.g. 
+                                   Ferrenberg/Swendsen, PRL 63:1195 (1989)
+                                   Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28 */
+
+    double dt;                 /* timestep in the input data. Can be adapted with 
+                                  g_wham option -dt */
+    gmx_bool **bContrib;       /* TRUE, if any data point of the histogram is within min 
+                                  and max, otherwise FALSE. */
+    real **ztime;              /* input data z(t) as a function of time. Required to 
+                                  compute ACTs */
+    real *forceAv;             /* average force estimated from average displacement, fAv=dzAv*k
+                                  Used for integration to guess the potential. */
+    real *aver,*sigma;         /* average and stddev of histograms */
+    double *bsWeight;          /* for bootstrapping complete histograms with continuous weights */ 
 } t_UmbrellaWindow;
 
-typedef struct
+
+typedef struct 
 {
-    const char *fnTpr,*fnPullf,*fnPdo,*fnPullx;
-    gmx_bool bTpr,bPullf,bPdo,bPullx;
-    int bins,cycl;
-    gmx_bool verbose,bShift,bAuto,bBoundsOnly;
-    gmx_bool bFlipProf;
-    real tmin, tmax, dt;
-    real Temperature,Tolerance;
-    int nBootStrap,histBootStrapBlockLength;
-    real dtBootStrap,zProfZero,alpha;
-    int bsSeed,stepchange;
-    gmx_bool bHistBootStrap,bWeightedCycl,bHistOutOnly;
-    gmx_bool bAutobounds,bNoprof;
+    /* INPUT STUFF */
+    const char *fnTpr,*fnPullf;
+    const char *fnPdo,*fnPullx;      /* file names of input */
+    gmx_bool bTpr,bPullf,bPdo,bPullx;/* input file types given? */
+    real tmin, tmax, dt;             /* only read input within tmin and tmax with dt */
+    
+    gmx_bool bInitPotByIntegration;  /* before WHAM, guess potential by force integration. Yields
+                                        1.5 to 2 times faster convergence */
+    int stepUpdateContrib;           /* update contribution table every ... iterations. Accelerates
+                                        WHAM. */
+    
+    /* BASIC WHAM OPTIONS */
+    int bins;                        /* nr of bins, min, max, and dz of profile */
     real min,max,dz;
-    gmx_bool bLog;
-    int unit;
-    real zProf0;
-    gmx_bool bProf0Set,bs_verbose;
-    gmx_bool bHistEq, bTab;
+    real Temperature,Tolerance;      /* temperature, converged when probability changes less
+                                        than Tolerance */
+    gmx_bool bCycl;                  /* generate cyclic (periodic) PMF */
+    
+    /* OUTPUT CONTROL */
+    gmx_bool bLog;                   /* energy output (instead of probability) for profile */
+    int unit;                        /* unit for PMF output kJ/mol or kT or kCal/mol */
+    gmx_bool bSym;                   /* symmetrize PMF around z=0 after WHAM, useful for 
+                                        membranes etc. */
+    real zProf0;                     /* after wham, set prof to zero at this z-position 
+                                        When bootstrapping, set zProf0 to a "stable" reference
+                                        position. */
+    gmx_bool bProf0Set;              /* setting profile to 0 at zProf0? */
+    
+    gmx_bool bBoundsOnly,bHistOnly;  /* determine min and max, or write histograms and exit */
+    gmx_bool bAuto;                  /* determine min and max automatically but do not exit */
+    
+    gmx_bool verbose;               /* more noisy wham mode */
+    int stepchange;                 /* print maximum change in prof after how many interations */
+    output_env_t oenv;              /* xvgr options */
+    
+    /* AUTOCORRELATION STUFF */
+    gmx_bool bTauIntGiven,bCalcTauInt;/* IACT given or should be calculated? */
+    real sigSmoothIact;              /* sigma of Gaussian to smooth ACTs */
+    gmx_bool bAllowReduceIact;           /* Allow to reduce ACTs during smoothing. Otherwise
+                                            ACT are only increased during smoothing */
+    real acTrestart;                 /* when computing ACT, time between restarting points */
+    gmx_bool bHistEq;                /* Enforce the same weight for each umbella window, that is
+                                        calculate with the same number of data points for
+                                        each window. That can be reasonable, if the histograms
+                                        have different length, but due to autocorrelation, 
+                                        a longer simulation should not have larger weightin wham. */
+    
+    /* BOOTSTRAPPING STUFF */
+    int nBootStrap;                /* nr of bootstraps (50 is usually enough) */
+    int bsMethod;                  /* if == bsMethod_hist, consider complete histograms as independent 
+                                      data points and, hence, only mix complete histograms.
+                                      if == bsMethod_BayesianHist, consider complete histograms
+                                      as independent data points, but assign random weights
+                                      to the histograms during the bootstrapping ("Bayesian bootstrap")
+                                      In case of long correlations (e.g., inside a channel), these
+                                      will yield a more realistic error.
+                                      if == bsMethod_traj(Gauss), generate synthetic histograms 
+                                      for each given
+                                      histogram by generating an autocorrelated random sequence
+                                      that is distributed according to the respective given
+                                      histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
+                                      (instead of from the umbrella histogram) to generate a new
+                                      histogram 
+                                   */
+    real tauBootStrap;             /* autocorrelation time (ACT) used to generate synthetic
+                                      histograms. If ==0, use calculated ACF */
+    int histBootStrapBlockLength;  /* when mixing histograms, mix only histograms withing blocks
+                                      long the reaction coordinate xi. Avoids gaps along xi. */
+    int bsSeed;                    /* random seed for bootstrapping */
+    gmx_bool bs_verbose;               /* Write cumulative distribution functions (CDFs) of histograms
+                                          and write the generated histograms for each bootstrap */
+    
+    /* tabulated umbrella potential stuff */
+    gmx_bool bTab;                      
     double *tabX,*tabY,tabMin,tabMax,tabDz;
     int tabNbins;
+    
+    gmx_rng_t rng;                  /* gromacs random number generator */
 } t_UmbrellaOptions;
 
 
+t_UmbrellaWindow * initUmbrellaWindows(int nwin)
+{
+    t_UmbrellaWindow *win;
+    int i;
+    snew(win,nwin);
+    for (i=0; i<nwin; i++)
+    {
+        win[i].Histo = win[i].cum  = 0;
+        win[i].k     = win[i].pos  = win[i].z =0;
+        win[i].N     = win[i].Ntot = 0;
+        win[i].g     = win[i].tau  = win[i].tausmooth = 0;
+        win[i].bContrib=0;
+        win[i].ztime=0;
+        win[i].forceAv=0;
+        win[i].aver = win[i].sigma = 0;
+        win[i].bsWeight = 0;
+    }
+    return win;
+}
+
+void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
+{
+    int i,j;
+    for (i=0; i<nwin; i++)
+    {
+        if (win[i].Histo)
+            for (j=0;j<win[i].nPull;j++)
+                sfree(win[i].Histo[j]);
+        if (win[i].cum)
+            for (j=0;j<win[i].nPull;j++)
+                sfree(win[i].cum[j]);
+        if (win[i].bContrib)
+            for (j=0;j<win[i].nPull;j++)
+                sfree(win[i].bContrib[j]);
+        sfree(win[i].Histo);
+        sfree(win[i].cum);
+        sfree(win[i].k);
+        sfree(win[i].pos);
+        sfree(win[i].z);
+        sfree(win[i].N);
+        sfree(win[i].Ntot);
+        sfree(win[i].g);
+        sfree(win[i].tau);
+        sfree(win[i].tausmooth);
+        sfree(win[i].bContrib);
+        sfree(win[i].ztime);
+        sfree(win[i].forceAv);
+        sfree(win[i].aver);
+        sfree(win[i].sigma);
+        sfree(win[i].bsWeight);
+    }
+    sfree(win);
+}
+
 /* Return j such that xx[j] <= x < xx[j+1] */
 void searchOrderedTable(double xx[], int n, double x, int *j)
 {
     int ju,jm,jl;
     int ascending;
-
-
+    
     jl=-1;
     ju=n;
     ascending=(xx[n-1] > xx[0]);
-    while (ju-jl > 1)
+    while (ju-jl > 1) 
     {
         jm=(ju+jl) >> 1;
         if ((x >= xx[jm]) == ascending)
@@ -150,14 +293,12 @@ void searchOrderedTable(double xx[], int n, double x, int *j)
     else *j=jl;
 }
 
-
 /* Read and setup tabulated umbrella potential */
 void setup_tab(const char *fn,t_UmbrellaOptions *opt)
 {
     int i,ny,nl;
     double **y;
-
-
+    
     printf("Setting up tabulated potential from file %s\n",fn);
     nl=read_xvg(fn,&y,&ny);
     opt->tabNbins=nl;
@@ -168,113 +309,83 @@ void setup_tab(const char *fn,t_UmbrellaOptions *opt)
     opt->tabDz=(opt->tabMax-opt->tabMin)/(nl-1);
     if (opt->tabDz<=0)
         gmx_fatal(FARGS,"The tabulated potential in %s must be provided in \n"
-                "ascending z-direction",fn);
+                  "ascending z-direction",fn);
     for (i=0;i<nl-1;i++)
         if  (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
             gmx_fatal(FARGS,"z-values in %s are not equally spaced.\n",ny,fn);
     snew(opt->tabY,nl);
     snew(opt->tabX,nl);
-    for (i=0;i<nl;i++){
+    for (i=0;i<nl;i++)
+    {
         opt->tabX[i]=y[0][i];
         opt->tabY[i]=y[1][i];
     }
     printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
-            opt->tabMin,opt->tabMax,opt->tabDz);
+           opt->tabMin,opt->tabMax,opt->tabDz);
 }
 
-
-void read_pdo_header(FILE * file,t_UmbrellaHeader * header, 
-                     t_UmbrellaOptions *opt)
+void read_pdo_header(FILE * file,t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
 {
-    char Buffer0[256];
-    char Buffer1[256];
-    char Buffer2[256];
-    char Buffer3[256];
-    char Buffer4[256];
-    int i,j;
-
-
+    char line[2048];
+    char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
+    int i;
+    
     /*  line 1 */
-    if(3 != fscanf(file,"%s%s%s",Buffer0,Buffer1,Buffer2))
-    {
-       gmx_fatal(FARGS,"Error reading header from pdo file");
-    }
-    if(strcmp(Buffer1,"UMBRELLA"))
-        gmx_fatal(FARGS,"This does not appear to be a valid pdo file. Found %s, expected %s",
-                Buffer1, "UMBRELLA");
+    if (fgets(line,2048,file) == NULL)
+        gmx_fatal(FARGS,"Error reading header from pdo file\n");
+    sscanf(line,"%s%s%s",Buffer0,Buffer1,Buffer2);
+    if(strcmp(Buffer1,"UMBRELLA")) 
+        gmx_fatal(FARGS,"This does not appear to be a valid pdo file. Found %s, expected %s\n"
+                  "(Found in first line: `%s')\n",
+                  Buffer1, "UMBRELLA",line);
     if(strcmp(Buffer2,"3.0"))
         gmx_fatal(FARGS,"This does not appear to be a version 3.0 pdo file");
-
+    
     /*  line 2 */
-    if(6 != fscanf(file,"%s%s%s%d%d%d",Buffer0,Buffer1,Buffer2,
-                  &(header->Dims[0]),&(header->Dims[1]),&(header->Dims[2])))
-    { 
-       gmx_fatal(FARGS,"Error reading dimensions in header from pdo file");
-    }
-
+    if (fgets(line,2048,file) == NULL)
+        gmx_fatal(FARGS,"Error reading header from pdo file\n");
+    sscanf(line,"%s%s%s%d%d%d",Buffer0,Buffer1,Buffer2,
+           &(header->Dims[0]),&(header->Dims[1]),&(header->Dims[2]));
+    
     /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
-
+    
     header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
     if(header->nDim!=1)
         gmx_fatal(FARGS,"Currently only supports one dimension");
-
+    
     /* line3 */
-    if(3 != fscanf(file,"%s%s%d",Buffer0,Buffer1,&(header->nSkip)))
-    { 
-       gmx_fatal(FARGS,"Error reading header from pdo file");
-    }
-
-    /* line 4 */
-    if(4 != fscanf(file,"%s%s%s%s",Buffer0,Buffer1,Buffer2,header->Reference))
-    { 
-       gmx_fatal(FARGS,"Error reading header from pdo file");
-    }
-
+    if (fgets(line,2048,file) == NULL)
+        gmx_fatal(FARGS,"Error reading header from pdo file\n");
+    sscanf(line,"%s%s%d",Buffer0,Buffer1,&(header->nSkip));
+    
+    /* line 4 */ 
+    if (fgets(line,2048,file) == NULL)
+        gmx_fatal(FARGS,"Error reading header from pdo file\n");
+    sscanf(line,"%s%s%s%s",Buffer0,Buffer1,Buffer2,header->Reference);
+    
     /* line 5 */
-    if(6 != fscanf(file,"%s%s%s%s%s%d",Buffer0,Buffer1,Buffer2,Buffer3,Buffer4,&(header->nPull)))
-    { 
-       gmx_fatal(FARGS,"Error reading header from pdo file");
-    }
-
+    if (fgets(line,2048,file) == NULL)
+        gmx_fatal(FARGS,"Error reading header from pdo file\n");
+    sscanf(line,"%s%s%s%s%s%d",Buffer0,Buffer1,Buffer2,Buffer3,Buffer4,&(header->nPull));
+       
     if (opt->verbose)
-        printf("Found nPull=%d , nSkip=%d, ref=%s\n",header->nPull,header->nSkip,
-                header->Reference);
-
-    for(i=0;i<header->nPull;++i)
+        printf("\tFound nPull=%d , nSkip=%d, ref=%s\n",header->nPull,header->nSkip,
+               header->Reference);
+    
+    for(i=0;i<header->nPull;++i) 
     {
-      if(4 != fscanf(file,"%s%s%s%s",Buffer0,Buffer1,Buffer2,header->PullName[i]))
-      { 
-         gmx_fatal(FARGS,"Error reading header from pdo file");
-      }
+        if (fgets(line,2048,file) == NULL)
+            gmx_fatal(FARGS,"Error reading header from pdo file\n");
+        sscanf(line,"%*s%*s%*s%s%*s%*s%lf%*s%*s%lf",header->PullName[i]
+               ,&(header->UmbPos[i][0]),&(header->UmbCons[i][0]));
         if (opt->verbose)
-            printf("pullgroup %d, pullname = %s\n",i,header->PullName[i]);
-        for(j=0;j<header->nDim;++j)
-        {
-         if(6 != fscanf(file,"%s%s%lf%s%s%lf",Buffer0,Buffer1,&(header->UmbPos[i][j]),
-                        Buffer2,Buffer3,&(header->UmbCons[i][j])))
-          { 
-             gmx_fatal(FARGS,"Error reading header from pdo file");
-         }
-
-            if (opt->bFlipProf)
-            {
-                /* We want to combine both halves of a profile into one */
-                if(header->UmbPos[i][j]<0)
-                {
-                    header->UmbPos[i][j]= -header->UmbPos[i][j];
-                    header->Flipped[i]=TRUE;
-                }
-            }
-            else header->Flipped[i]=FALSE;
-            /*printf("%f\t%f\n",header->UmbPos[i][j],header->UmbCons[i][j]);*/
-        }
-    }
-
-    if(1 != fscanf(file,"%s",Buffer3))
-    { 
-       gmx_fatal(FARGS,"Error reading header from pdo file");
+            printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
+                   i,header->PullName[i],header->UmbPos[i][0],header->UmbCons[i][0]);
     }
-
+    
+    if (fgets(line,2048,file) == NULL)
+        gmx_fatal(FARGS,"Cannot read from file\n");
+    sscanf(line,"%s",Buffer3);
     if (strcmp(Buffer3,"#####") != 0)
         gmx_fatal(FARGS,"Expected '#####', found %s. Hick.\n",Buffer3);
 }
@@ -284,12 +395,11 @@ static char *fgets3(FILE *fp,char ptr[],int *len)
 {
     char *p;
     int  slen;
-
-
+    
     if (fgets(ptr,*len-1,fp) == NULL)
         return NULL;
     p = ptr;
-    while ((strchr(ptr,'\n') == NULL) && (!feof(fp)))
+    while ((strchr(ptr,'\n') == NULL) && (!feof(fp))) 
     {
         /* This line is longer than len characters, let's increase len! */
         *len += STRLEN;
@@ -301,57 +411,68 @@ static char *fgets3(FILE *fp,char ptr[],int *len)
     slen = strlen(ptr);
     if (ptr[slen-1] == '\n')
         ptr[slen-1] = '\0';
-
+    
     return ptr;
 }
 
-
 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
-        int fileno, t_UmbrellaWindow * win,
-        t_UmbrellaOptions *opt,
-        gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
+                   int fileno, t_UmbrellaWindow * win,
+                   t_UmbrellaOptions *opt,
+                   gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
 {
-    int i,inttemp,bins,count;
-    real min,max,minfound,maxfound;
+    int i,inttemp,bins,count,ntot;
+    real min,max,minfound=1e20,maxfound=-1e20;
     double temp,time,time0=0,dt;
-    char *ptr;
+    char *ptr=0;
     t_UmbrellaWindow * window=0;
     gmx_bool timeok,dt_ok=1;
-    char  *tmpbuf,fmt[256],fmtign[256];
-    int    len=STRLEN,dstep=1;
-
-       minfound=1e20;
-       maxfound=-1e20;
-       
+    char  *tmpbuf=0,fmt[256],fmtign[256];
+    int    len=STRLEN,dstep=1;  
+    const int blocklen=4096;
+    int *lennow=0;
+    
     if (!bGetMinMax)
     {
         bins=opt->bins;
         min=opt->min;
         max=opt->max;
-
+        
         window=win+fileno;
         /* Need to alocate memory and set up structure */
         window->nPull=header->nPull;
         window->nBin=bins;
-
+        
         snew(window->Histo,window->nPull);
         snew(window->z,window->nPull);
         snew(window->k,window->nPull);
         snew(window->pos,window->nPull);
-        snew(window->Flipped,window->nPull);
         snew(window->N, window->nPull);
         snew(window->Ntot, window->nPull);
-
-        for(i=0;i<window->nPull;++i)
+        snew(window->g, window->nPull);
+        snew(window->bsWeight, window->nPull);
+        
+        window->bContrib=0;
+        
+        if (opt->bCalcTauInt)
+            snew(window->ztime, window->nPull);
+        else
+            window->ztime=0;
+        snew(lennow,window->nPull);
+        
+        for(i=0;i<window->nPull;++i) 
         {
             window->z[i]=1;
+            window->bsWeight[i]=1.;
             snew(window->Histo[i],bins);
             window->k[i]=header->UmbCons[i][0];
             window->pos[i]=header->UmbPos[i][0];
-            window->Flipped[i]=header->Flipped[i];
             window->N[i]=0;
             window->Ntot[i]=0;
+            window->g[i]=1.;
+            if (opt->bCalcTauInt)
+                window->ztime[i]=0;      
         }
+        
         /* Done with setup */
     }
     else
@@ -360,24 +481,24 @@ void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
         maxfound=-1e20;
         min=max=bins=0; /* Get rid of warnings */
     }
-
+    
     count=0;
     snew(tmpbuf,len);
     while ( (ptr=fgets3(file,tmpbuf,&len)) != NULL)
     {
         trim(ptr);
-
+        
         if (ptr[0] == '#' || strlen(ptr)<2)
             continue;
-
+        
         /* Initiate format string */
         fmtign[0] = '\0';
         strcat(fmtign,"%*s");
-
+        
         sscanf(ptr,"%lf",&time); /* printf("Time %f\n",time); */
         /* Round time to fs */
         time=1.0/1000*( (int) (time*1000+0.5) );
-
+        
         /* get time step of pdo file */
         if (count==0)
             time0=time;
@@ -386,7 +507,7 @@ void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
             dt=time-time0;
             if (opt->dt>0.0)
             {
-             dstep=(int)(opt->dt/dt+0.5);
+                dstep=(int)(opt->dt/dt+0.5);
                 if (dstep==0)
                     dstep=1;
             }
@@ -394,58 +515,65 @@ void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
                 window->dt=dt*dstep;
         }
         count++;
-
+        
         dt_ok=((count-1)%dstep == 0);
         timeok=(dt_ok && time >= opt->tmin && time <= opt->tmax);
         /* if (opt->verbose)
-      printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
-      time,opt->tmin, opt->tmax, dt_ok,timeok); */
-
+           printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n", 
+           time,opt->tmin, opt->tmax, dt_ok,timeok); */
+        
         if (timeok)
         {
-            for(i=0;i<header->nPull;++i)
+            for(i=0;i<header->nPull;++i) 
             {
                 strcpy(fmt,fmtign);
                 strcat(fmt,"%lf");      /* Creating a format stings such as "%*s...%*s%lf" */
                 strcat(fmtign,"%*s");   /* ignoring one more entry in the next loop */
-                if(sscanf(ptr,fmt,&temp))
+                if(sscanf(ptr,fmt,&temp)) 
                 {
-                    if(opt->bFlipProf)
-                    {
-                        if(header->Flipped[i]) temp=-temp;
-                    }
-
                     temp+=header->UmbPos[i][0];
-                    if (bGetMinMax){
+                    if (bGetMinMax)
+                    {
                         if (temp<minfound)
                             minfound=temp;
                         if (temp>maxfound)
                             maxfound=temp;
                     }
-                    else
-                    {
+                    else{
+                        if (opt->bCalcTauInt)
+                        {
+                            /* save time series for autocorrelation analysis */
+                            ntot=window->Ntot[i];
+                            if (ntot>=lennow[i])
+                            {
+                                lennow[i]+=blocklen;
+                                srenew(window->ztime[i],lennow[i]);
+                            }
+                            window->ztime[i][ntot]=temp;
+                        }
+                        
                         temp-=min;
                         temp/=(max-min);
                         temp*=bins;
                         temp=floor(temp);
-
+                        
                         inttemp = (int)temp;
-                        if (opt->cycl==enCycl_yes)
+                        if (opt->bCycl)
                         {
                             if (inttemp < 0)
                                 inttemp+=bins;
                             else if (inttemp >= bins)
                                 inttemp-=bins;
                         }
-
-                        if(inttemp >= 0 && inttemp < bins)
+                        
+                        if(inttemp >= 0 && inttemp < bins) 
                         {
-                            window->Histo[i][inttemp]+=1;
+                            window->Histo[i][inttemp]+=1.;
                             window->N[i]++;
                         }
                         window->Ntot[i]++;
                     }
-                }
+                }        
             }
         }
         if (time>opt->tmax)
@@ -455,49 +583,54 @@ void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
             break;
         }
     }
-
+    
     if (bGetMinMax)
     {
         *mintmp=minfound;
         *maxtmp=maxfound;
     }
+    
+    sfree(lennow);
+    sfree(tmpbuf);
 }
 
-
 void enforceEqualWeights(t_UmbrellaWindow * window,int nWindows)
 {
     int i,k,j,NEnforced;
     double ratio;
-
-
+    
     NEnforced=window[0].Ntot[0];
     printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
-            "non-weighted histogram analysis method. Ndata = %d\n",NEnforced);
+           "non-weighted histogram analysis method. Ndata = %d\n",NEnforced);
     /* enforce all histograms to have the same weight as the very first histogram */
-
+    
     for(j=0;j<nWindows;++j)
-        for(k=0;k<window[j].nPull;++k)
+    {
+        for(k=0;k<window[j].nPull;++k) 
         {
             ratio=1.0*NEnforced/window[j].Ntot[k];
             for(i=0;i<window[0].nBin;++i)
+            {
                 window[j].Histo[k][i]*=ratio;
-            window[j].N[k]=(int)(ratio*window[j].N[k]+0.5);
+            }
+            window[j].N[k]=(int)(ratio*window[j].N[k] + 0.5);
         }
+    }
 }
 
-
 /* Simple linear interpolation between two given tabulated points */
 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
 {
     int jl,ju;
     double pl,pu,dz,dp;
 
-
     jl=floor((dist-opt->tabMin)/opt->tabDz);
     ju=jl+1;
     if (jl<0 || ju>=opt->tabNbins)
+    {
         gmx_fatal(FARGS,"Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
-                "Provide an extended table.",dist,jl,ju);
+                  "Provide an extended table.",dist,jl,ju);
+    }
     pl=opt->tabY[jl];
     pu=opt->tabY[ju];
     dz=dist-opt->tabX[jl];
@@ -507,95 +640,127 @@ double tabulated_pot(double dist, t_UmbrellaOptions *opt)
 
 
 /* Don't worry, that routine does not mean we compute the PMF in limited precision.
-   After rapid convergence (using only substiantal contributions), we always switch to
+   After rapid convergence (using only substiantal contributions), we always switch to 
    full precision. */
-#define WHAM_CONTRIB_LIM 1e-10
-void setup_acc_wham(t_UmbrellaWindow * window,int nWindows, t_UmbrellaOptions *opt)
+void setup_acc_wham(double *profile,t_UmbrellaWindow * window,int nWindows, 
+                    t_UmbrellaOptions *opt)
 {
-    int i,j,k;
-    double U,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,contrib;
+    int i,j,k,nGrptot=0,nContrib=0,nTot=0;
+    double U,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,contrib1,contrib2;
     gmx_bool bAnyContrib;
-
-
+    static int bFirst=1;
+    static double wham_contrib_lim;
+    
+    if (bFirst)
+    {
+        for(i=0;i<nWindows;++i)
+        {
+            nGrptot+=window[i].nPull;
+        }
+        wham_contrib_lim=opt->Tolerance/nGrptot;
+    }
+    
     ztot=opt->max-opt->min;
     ztot_half=ztot/2;
-
-    for(i=0;i<nWindows;++i)
+       
+    for(i=0;i<nWindows;++i) 
     {
-        snew(window[i].bContrib,window[i].nPull);
-        for(j=0;j<window[i].nPull;++j)
+        if ( ! window[i].bContrib)
+        {
+            snew(window[i].bContrib,window[i].nPull);
+        }
+        for(j=0;j<window[i].nPull;++j) 
         {
-            snew(window[i].bContrib[j],opt->bins);
+            if ( ! window[i].bContrib[j])
+                snew(window[i].bContrib[j],opt->bins);
             bAnyContrib=FALSE;
-            for(k=0;k<opt->bins;++k)
+            for(k=0;k<opt->bins;++k) 
             {
                 temp=(1.0*k+0.5)*dz+min;
                 distance = temp - window[i].pos[j];   /* distance to umbrella center */
-                if (opt->cycl==enCycl_yes)
+                if (opt->bCycl)
                 {                                     /* in cyclic wham:             */
                     if (distance > ztot_half)           /*    |distance| < ztot_half   */
                         distance-=ztot;
                     else if (distance < -ztot_half)
                         distance+=ztot;
                 }
+                /* Note: there are two contributions to bin k in the wham equations:
+                   i)  N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
+                   ii) exp(- U/(8.314e-3*opt->Temperature))
+                   where U is the umbrella potential
+                   If any of these number is larger wham_contrib_lim, I set contrib=TRUE
+                */
+                
                 if (!opt->bTab)
                     U=0.5*window[i].k[j]*sqr(distance);       /* harmonic potential assumed. */
                 else
                     U=tabulated_pot(distance,opt);            /* Use tabulated potential     */
-
-                contrib=exp(- U/(8.314e-3*opt->Temperature));
-                window[i].bContrib[j][k] = (contrib > WHAM_CONTRIB_LIM);
+                
+                contrib1=profile[k]*exp(- U/(8.314e-3*opt->Temperature));
+                contrib2=window[i].N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j]);
+                window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
                 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
+                if (window[i].bContrib[j][k])
+                    nContrib++;
+                nTot++;
             }
             /* If this histo is far outside min and max all bContrib may be FALSE,
-             * causing a floating point exception later on. To avoid that, switch
-             * them all to true.*/
+               causing a floating point exception later on. To avoid that, switch 
+               them all to true.*/
             if (!bAnyContrib)
                 for(k=0;k<opt->bins;++k)
                     window[i].bContrib[j][k]=TRUE;
         }
     }
-    printf("Initialized rapid wham stuff.\n");
+    if (bFirst)
+        printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
+               "Evaluating only %d of %d expressions.\n\n",wham_contrib_lim,nContrib, nTot);
+    
+    if (opt->verbose)
+        printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
+               nContrib,nTot);
+    bFirst=0;    
 }
 
 
-void calc_profile(double *profile,t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt,
-        gmx_bool bExact)
+void calc_profile(double *profile,t_UmbrellaWindow * window, int nWindows, 
+                  t_UmbrellaOptions *opt, gmx_bool bExact)
 {
     int i,k,j;
     double num,ztot_half,ztot,distance,min=opt->min,dz=opt->dz;
-    double denom,U=0,temp=0;
-
-
+    double denom,U=0,temp=0,invg;
+    
     ztot=opt->max-opt->min;
-    ztot_half=ztot/2;
-
-
-    for(i=0;i<opt->bins;++i)
+    ztot_half=ztot/2;   
+    
+    for(i=0;i<opt->bins;++i) 
     {
-        num=denom=0;
-        for(j=0;j<nWindows;++j)
+        num=denom=0.;
+        for(j=0;j<nWindows;++j) 
         {
-            for(k=0;k<window[j].nPull;++k)
+            for(k=0;k<window[j].nPull;++k) 
             {
-                temp=(1.0*i+0.5)*dz+min;
-                num+=window[j].Histo[k][i];
-
+                invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
+                temp = (1.0*i+0.5)*dz+min;
+                num += invg*window[j].Histo[k][i];
+                
                 if (! (bExact || window[j].bContrib[k][i]))
                     continue;
                 distance = temp - window[j].pos[k];   /* distance to umbrella center */
-                if (opt->cycl==enCycl_yes){           /* in cyclic wham:             */
+                if (opt->bCycl)
+                {                                     /* in cyclic wham:             */
                     if (distance > ztot_half)           /*    |distance| < ztot_half   */
                         distance-=ztot;
                     else if (distance < -ztot_half)
                         distance+=ztot;
                 }
-
+                
                 if (!opt->bTab)
                     U=0.5*window[j].k[k]*sqr(distance);       /* harmonic potential assumed. */
                 else
                     U=tabulated_pot(distance,opt);            /* Use tabulated potential     */
-                denom+=window[j].N[k]*exp(- U/(8.314e-3*opt->Temperature) + window[j].z[k]);
+                denom+=invg*window[j].N[k]*exp(- U/(8.314e-3*opt->Temperature) + window[j].z[k]);
             }
         }
         profile[i]=num/denom;
@@ -603,138 +768,108 @@ void calc_profile(double *profile,t_UmbrellaWindow * window, int nWindows, t_Umb
 }
 
 
-double calc_z(double * profile,t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt,
-        gmx_bool bExact)
+double calc_z(double * profile,t_UmbrellaWindow * window, int nWindows, 
+              t_UmbrellaOptions *opt, gmx_bool bExact)
 {
-    int i,j,k;
-    double U=0,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot;
+    int i,j,k,binMax=-1;
+    double U=0,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,totalMax;
     double MAX=-1e20, total=0;
-
-
+    
     ztot=opt->max-opt->min;
     ztot_half=ztot/2;
-
-    for(i=0;i<nWindows;++i)
+       
+    for(i=0;i<nWindows;++i) 
     {
-        for(j=0;j<window[i].nPull;++j)
+        for(j=0;j<window[i].nPull;++j) 
         {
             total=0;
-            for(k=0;k<window[i].nBin;++k)
+            for(k=0;k<window[i].nBin;++k) 
             {
                 if (! (bExact || window[i].bContrib[j][k]))
                     continue;
                 temp=(1.0*k+0.5)*dz+min;
                 distance = temp - window[i].pos[j];   /* distance to umbrella center */
-                if (opt->cycl==enCycl_yes)
+                if (opt->bCycl)
                 {                                     /* in cyclic wham:             */
                     if (distance > ztot_half)           /*    |distance| < ztot_half   */
                         distance-=ztot;
                     else if (distance < -ztot_half)
                         distance+=ztot;
                 }
-
+                
                 if (!opt->bTab)
                     U=0.5*window[i].k[j]*sqr(distance);       /* harmonic potential assumed. */
                 else
                     U=tabulated_pot(distance,opt);            /* Use tabulated potential     */
-
+                
                 total+=profile[k]*exp(-U/(8.314e-3*opt->Temperature));
             }
-            if (total > 0.0)
+            /* Avoid floating point exception if window is far outside min and max */
+            if (total != 0.0)
                 total = -log(total);
             else
                 total = 1000.0;
             temp = fabs(total - window[i].z[j]);
-            if(temp > MAX) MAX=temp;
+            if(temp > MAX){
+                MAX=temp;
+                binMax=k;
+                totalMax=total;
+            }
             window[i].z[j] = total;
         }
     }
     return MAX;
 }
 
-
-void cyclicProfByWeightedCorr(double *profile,t_UmbrellaWindow *window,
-                              int nWindows, t_UmbrellaOptions * opt,
-                              gmx_bool bAppendCorr2File, const char *fn, 
-                              const output_env_t oenv)
+void symmetrizeProfile(double* profile,t_UmbrellaOptions *opt)
 {
-    int i,j,k,bins=opt->bins;
-    static int first=1;
-    double *weight,sum=0.,diff,*histsum,*corr,sumCorr=0.,dCorr;
-    FILE *fp;
-    char buf[256];
-
-    if (first)
-    {
-        printf("\nEnforcing a periodic profile by a sampling wheighted correction.");
-        please_cite(stdout,"Hub2008");
-    }
-
-    snew(weight,bins-1);
-    snew(histsum,bins);
-    snew(corr,bins-1);
-
-    /* generate weights proportional to 1/(n(i)*n(i+1))^alpha
-     where n(i) is the total nur of data points in bin i from all histograms */
-    for(i=0;i<nWindows;++i)
-        for(j=0;j<window[i].nPull;++j)
-            for(k=0;k<bins;++k)
-                histsum[k]+=window[i].Histo[j][k];
-
-    for(k=0,sum=0.;k<bins-1;++k)
-    {
-        weight[k]=1./pow(histsum[k]*histsum[k+1],opt->alpha);
-        sum+=weight[k];
-    }
-    for(k=0;k<bins-1;++k)
-        weight[k]/=sum;
-
-    /* difference between last and first bin */
-    diff=profile[bins-1]-profile[0];
-    printf("Distributing %f between adjacent bins to enforce a cyclic profile\n",diff);
-
-    for (i=0;i<bins-1;i++)
-    {
-        dCorr=weight[i]*diff;
-        sumCorr+=dCorr;
-        corr[i]=sumCorr;
-    }
-
-    for (i=0;i<bins-1;i++)
-        profile[i+1]-=corr[i];
-
-    if (bAppendCorr2File)
+    int i,j;
+    double *prof2,bins=opt->bins,min=opt->min,max=opt->max,dz=opt->dz,zsym,deltaz,profsym;
+    double z,z1;
+    
+    if (min>0. || max<0.)
+        gmx_fatal(FARGS,"Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
+                  opt->min,opt->max);
+    
+    snew(prof2,bins);
+    
+    for (i=0;i<bins;i++)
     {
-        fp=xvgropen(fn,"Corrections to enforce periodicity","z",
-                    "\\f{12}D\\f{}G(z)",oenv);
-        sprintf(buf,"corrections propotional to 1/(n\\si\\Nn\\si+1\\N)\\S%.2f",
-                opt->alpha);
-        xvgr_subtitle(fp,buf,oenv);
-        for (i=0;i<bins-1;i++)
-            fprintf(fp,"%g %g\n",opt->min+opt->dz*(i+1),-corr[i]);
-        ffclose(fp);
+        z=min+(i+0.5)*dz;
+        zsym=-z;
+        /* bin left of zsym */
+        j=floor((zsym-min)/dz-0.5);
+        if (j>=0 && (j+1)<bins)
+        {
+            /* interpolate profile linearly between bins j and j+1 */
+            z1=min+(j+0.5)*dz;
+            deltaz=zsym-z1;
+            profsym=profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
+            /* average between left and right */
+            prof2[i]=0.5*(profsym+profile[i]);
+        }
+        else
+        {
+            prof2[i]=profile[i];
+        }
     }
-
-    sfree(histsum);
-    sfree(corr);
-    sfree(weight);
-    first=0;
+    
+    memcpy(profile,prof2,bins*sizeof(double));
+    sfree(prof2);
 }
 
-
 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
 {
     int i,bins,imin;
     double unit_factor=1., R_MolarGasConst, diff;
-
-
+    
     R_MolarGasConst=8.314472e-3; /* in kJ/(mol*K) */
     bins=opt->bins;
-
-    /* No log? Nothing to do! */
+    
+    /* Not log? Nothing to do! */
     if (!opt->bLog)
         return;
-
+    
     /* Get profile in units of kT, kJ/mol, or kCal/mol */
     if (opt->unit == en_kT)
         unit_factor=1.0;
@@ -743,59 +878,61 @@ void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
     else if (opt->unit == en_kCal)
         unit_factor=R_MolarGasConst*opt->Temperature/4.1868;
     else
-        gmx_fatal(FARGS,"Sorry I don't know this energy unit.");
-
+        gmx_fatal(FARGS,"Sorry, I don't know this energy unit.");
+    
     for (i=0;i<bins;i++)
         if (profile[i]>0.0)
             profile[i]=-log(profile[i])*unit_factor;
-
+    
     /* shift to zero at z=opt->zProf0 */
     if (!opt->bProf0Set)
+    {
         diff=profile[0];
-    else{
-        /* Get bin with shortest distance to opt->zProf0 */
-      imin=(int)((opt->zProf0-opt->min)/opt->dz);
+    }
+    else
+    {
+        /* Get bin with shortest distance to opt->zProf0 
+           (-0.5 from bin position and +0.5 from rounding cancel) */
+        imin=(int)((opt->zProf0-opt->min)/opt->dz);
         if (imin<0)
             imin=0;
         else if (imin>=bins)
             imin=bins-1;
         diff=profile[imin];
     }
-
+  
     /* Shift to zero */
     for (i=0;i<bins;i++)
         profile[i]-=diff;
 }
 
-
-void getRandomIntArray(int nPull,int blockLength,int* randomArray)
+void getRandomIntArray(int nPull,int blockLength,int* randomArray,gmx_rng_t rng)
 {
     int ipull,blockBase,nr,ipullRandom;
-
-
+    
     if (blockLength==0)
         blockLength=nPull;
-
+    
     for (ipull=0; ipull<nPull; ipull++)
     {
         blockBase=(ipull/blockLength)*blockLength;
-        do{      /* make sure nothing bad happens in the last block */
-            nr=(int)((1.0*rand()/RAND_MAX)*blockLength);
+        do
+        {      /* make sure nothing bad happens in the last block */
+            nr=(int)(gmx_rng_uniform_real(rng)*blockLength);
             ipullRandom = blockBase + nr;
         } while (ipullRandom >= nPull);
         if (ipullRandom<0 || ipullRandom>=nPull)
             gmx_fatal(FARGS,"Ups, random iWin = %d, nPull = %d, nr = %d, "
-                    "blockLength = %d, blockBase = %d\n",
-                    ipullRandom,nPull,nr,blockLength,blockBase);
+                      "blockLength = %d, blockBase = %d\n",
+                      ipullRandom,nPull,nr,blockLength,blockBase);
         randomArray[ipull]=ipullRandom;
     }
     /*for (ipull=0; ipull<nPull; ipull++)
-    printf("%d ",randomArray[ipull]); printf("\n"); */
+      printf("%d ",randomArray[ipull]); printf("\n"); */
 }
 
-
 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
-        t_UmbrellaWindow *thisWindow,int pullid)
+                                 t_UmbrellaWindow *thisWindow,int pullid)
 {
     synthWindow->N       [0]=thisWindow->N        [pullid];
     synthWindow->Histo   [0]=thisWindow->Histo    [pullid];
@@ -803,31 +940,30 @@ void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
     synthWindow->z       [0]=thisWindow->z        [pullid];
     synthWindow->k       [0]=thisWindow->k        [pullid];
     synthWindow->bContrib[0]=thisWindow->bContrib [pullid];
+    synthWindow->g       [0]=thisWindow->g        [pullid];
+    synthWindow->bsWeight[0]=thisWindow->bsWeight [pullid];
 }
 
-
-/* Calculate cummulative of all histograms. They allow to create random numbers
+/* Calculate cumulative distribution function of of all histograms. They 
+   allow to create random number sequences
    which are distributed according to the histograms. Required to generate
    the "synthetic" histograms for the Bootstrap method */
-void calc_cummulants(t_UmbrellaWindow *window,int nWindows,
-                     t_UmbrellaOptions *opt,const char *fnhist, 
-                     const output_env_t oenv)
+void calc_cumulatives(t_UmbrellaWindow *window,int nWindows,
+                      t_UmbrellaOptions *opt,const char *fnhist)
 {
     int i,j,k,nbin;
     double last;
     char *fn=0,*buf=0;
     FILE *fp=0;
-
-
+    
     if (opt->bs_verbose)
     {
         snew(fn,strlen(fnhist)+10);
         snew(buf,strlen(fnhist)+10);
         sprintf(fn,"%s_cumul.xvg",strncpy(buf,fnhist,strlen(fnhist)-4));
-        fp=xvgropen(fn,"Cummulants of umbrella histograms","z","cummulant",
-                    oenv);
+        fp=xvgropen(fn,"CDFs of umbrella windows","z","CDF",opt->oenv);
     }
-
+    
     nbin=opt->bins;
     for (i=0; i<nWindows; i++)
     {
@@ -838,15 +974,15 @@ void calc_cummulants(t_UmbrellaWindow *window,int nWindows,
             window[i].cum[j][0]=0.;
             for (k=1; k<=nbin; k++)
                 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
-
-            /* normalize cummulant. Ensure cum[nbin]==1 */
+            
+            /* normalize CDFs. Ensure cum[nbin]==1 */
             last = window[i].cum[j][nbin];
             for (k=0; k<=nbin; k++)
                 window[i].cum[j][k] /= last;
         }
     }
-
-    printf("Cumulants of all histograms created.\n");
+    
+    printf("Cumulative distriubtion functions of all histograms created.\n");
     if (opt->bs_verbose)
     {
         for (k=0; k<=nbin; k++)
@@ -857,7 +993,7 @@ void calc_cummulants(t_UmbrellaWindow *window,int nWindows,
                     fprintf(fp,"%g\t",window[i].cum[j][k]);
             fprintf(fp,"\n");
         }
-        printf("Wrote cumulants to %s\n",fn);
+        printf("Wrote cumulative distribution functions to %s\n",fn);
         ffclose(fp);
         sfree(fn);
         sfree(buf);
@@ -866,14 +1002,13 @@ void calc_cummulants(t_UmbrellaWindow *window,int nWindows,
 
 
 /* Return j such that xx[j] <= x < xx[j+1] */
-void searchCummulant(double xx[], int n, double x, int *j)
+void searchCumulative(double xx[], int n, double x, int *j)
 {
     int ju,jm,jl;
-
-
+    
     jl=-1;
     ju=n;
-    while (ju-jl > 1)
+    while (ju-jl > 1) 
     {
         jm=(ju+jl) >> 1;
         if (x >= xx[jm])
@@ -889,64 +1024,140 @@ void searchCummulant(double xx[], int n, double x, int *j)
         *j=jl;
 }
 
-
-void create_synthetic_histo(t_UmbrellaWindow *synthWindow, 
-                            t_UmbrellaWindow *thisWindow,
+void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
                             int pullid,t_UmbrellaOptions *opt)
 {
-    int nsynth,N,i,nbins,r_index;
-    double r;
-    static gmx_bool bWarnout=0;
-
-
+    int N,i,nbins,r_index,ibin;
+    double r,tausteps=0.0,a,ap,dt,x,invsqrt2,g,y,sig=0.,z,mu=0.;
+    char errstr[1024];
+    
     N=thisWindow->N[pullid];
+    dt=thisWindow->dt;
     nbins=thisWindow->nBin;
-
-    /* nsynth = nr of data points in synthetic histo */
-    if (opt->dtBootStrap==0.0)
-        nsynth=N;
-    else
+    
+    /* tau = autocorrelation time */
+    if (opt->tauBootStrap>0.0)
+        tausteps=opt->tauBootStrap/dt;
+    else if (opt->bTauIntGiven || opt->bCalcTauInt)
     {
-      nsynth=(int)(thisWindow->N[pullid]*thisWindow->dt/opt->dtBootStrap+0.5);
-        if (nsynth>N)
-            nsynth=N;
+        /* calc tausteps from g=1+2tausteps */
+        g=thisWindow->g[pullid];
+        tausteps=(g-1)/2;
     }
-
-    if (!bWarnout && nsynth<10)
+    else
     {
-        printf("\n++++ WARNING ++++\n\tOnly %d data points per synthetic histogram!\n"
-                "\tYou may want to consider option -bs-dt.\n\n",nsynth);
-        bWarnout=1;
+        sprintf(errstr,
+                "When generating hypothetical trajctories from given umbrella histograms,\n"
+                "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
+                "cannot be predicted. You have 3 options:\n"
+                "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
+                "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
+        strcat(errstr,
+               "   with option -iiact for all umbrella windows.\n"
+               "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
+               "   Use option (3) only if you are sure what you're doing, you may severely\n"
+               "   underestimate the error if a too small ACT is given.\n");
+        gmx_fatal(FARGS,errstr);
     }
 
-    synthWindow->N       [0]=nsynth;
+    synthWindow->N       [0]=N;
     synthWindow->pos     [0]=thisWindow->pos[pullid];
     synthWindow->z       [0]=thisWindow->z[pullid];
     synthWindow->k       [0]=thisWindow->k[pullid];
     synthWindow->bContrib[0]=thisWindow->bContrib[pullid];
-
+    synthWindow->g       [0]=thisWindow->g       [pullid];
+    synthWindow->bsWeight[0]=thisWindow->bsWeight[pullid];
+    
     for (i=0;i<nbins;i++)
         synthWindow->Histo[0][i]=0.;
-
-    for (i=0;i<nsynth;i++)
+    
+    if (opt->bsMethod==bsMethod_trajGauss)
+    {
+        sig = thisWindow->sigma [pullid];
+        mu  = thisWindow->aver  [pullid];
+    }
+    
+    /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau 
+       Use the following:
+       If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
+       then
+       z = a*x + sqrt(1-a^2)*y
+       is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
+       x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation 
+       function
+       C(t) = exp(-t/tau) with tau=-1/ln(a)
+
+       Then, use error function to turn the Gaussian random variable into a uniformly
+       distributed one in [0,1]. Eventually, use cumulative distribution function of
+       histogram to get random variables distributed according to histogram.
+       Note: The ACT of the flat distribution and of the generated histogram is not
+       100% exactly tau, but near tau (my test was 3.8 instead of 4).
+    */
+    a=exp(-1.0/tausteps);
+    ap=sqrt(1-a*a);
+    invsqrt2=1./sqrt(2.0);
+    
+    /* init random sequence */
+    x=gmx_rng_gaussian_table(opt->rng); 
+    
+    if (opt->bsMethod==bsMethod_traj)
+    {
+        /* bootstrap points from the umbrella histograms */
+        for (i=0;i<N;i++)
+        {
+            y=gmx_rng_gaussian_table(opt->rng);
+            x=a*x+ap*y;
+            /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
+               Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
+            */           
+            r=0.5*(1+gmx_erf(x*invsqrt2));
+            searchCumulative(thisWindow->cum[pullid],nbins+1 ,r,&r_index);
+            synthWindow->Histo[0][r_index]+=1.;    
+        }
+    }
+    else if (opt->bsMethod==bsMethod_trajGauss)
+    {
+        /* bootstrap points from a Gaussian with the same average and sigma
+           as the respective umbrella histogram. The idea was, that -given
+           limited sampling- the bootstrapped histograms are otherwise biased 
+           from the limited sampling of the US histos. However, bootstrapping from
+           the Gaussian seems to yield a similar estimate. */
+        i=0;
+        while (i<N)
+        {
+            y=gmx_rng_gaussian_table(opt->rng);
+            x=a*x+ap*y;
+            z = x*sig+mu;
+            ibin=floor((z-opt->min)/opt->dz);
+            if (opt->bCycl)
+            {
+                if (ibin<0)
+                    while ( (ibin+=nbins) < 0);
+                else if (ibin>=nbins)
+                    while ( (ibin-=nbins) >= nbins);
+            }
+            
+            if (ibin>=0 && ibin<nbins)
+            {
+                synthWindow->Histo[0][ibin]+=1.;
+                i++;
+            }
+        }
+    }
+    else
     {
-        r=1.0*rand()/RAND_MAX;
-        searchCummulant(thisWindow->cum[pullid],nbins+1 ,r,&r_index);
-        synthWindow->Histo[0][r_index]+=1.;
+        gmx_fatal(FARGS,"Unknown bsMethod (id %d). That should not happen.\n",opt->bsMethod);
     }
 }
 
 
-void print_histograms(const char *fnhist, t_UmbrellaWindow * window, 
-                      int nWindows, int bs_index,t_UmbrellaOptions *opt, 
-                      const output_env_t oenv)
+void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
+                      int bs_index,t_UmbrellaOptions *opt)
 {
-    char *fn;
-    char *buf=0,title[256];
+    char *fn=0,*buf=0,title[256];
     FILE *fp;
     int bins,l,i,j;
-
-
+    
     if (bs_index<0)
     {
         fn=strdup(fnhist);
@@ -954,29 +1165,29 @@ void print_histograms(const char *fnhist, t_UmbrellaWindow * window,
     }
     else
     {
-        snew(fn,strlen(fnhist)+6);
+        snew(fn,strlen(fnhist)+10);
         snew(buf,strlen(fnhist)+1);
         sprintf(fn,"%s_bs%d.xvg",strncpy(buf,fnhist,strlen(fnhist)-4),bs_index);
         sprintf(title,"Umbrella histograms. Bootstrap #%d",bs_index);
     }
-
-    fp=xvgropen(fn,title,"z","count",oenv);
+    
+    fp=xvgropen(fn,title,"z","count",opt->oenv);
     bins=opt->bins;
-
+    
     /* Write histograms */
-    for(l=0;l<bins;++l)
+    for(l=0;l<bins;++l) 
     {
         fprintf(fp,"%e\t",(double)(l+0.5)*opt->dz+opt->min);
-        for(i=0;i<nWindows;++i)
+        for(i=0;i<nWindows;++i) 
         {
-            for(j=0;j<window[i].nPull;++j)
+            for(j=0;j<window[i].nPull;++j) 
             {
                 fprintf(fp,"%e\t",window[i].Histo[j][l]);
             }
         }
         fprintf(fp,"\n");
     }
-
+    
     ffclose(fp);
     printf("Wrote %s\n",fn);
     if (buf)
@@ -986,11 +1197,52 @@ void print_histograms(const char *fnhist, t_UmbrellaWindow * window,
     }
 }
 
+int func_wham_is_larger(const void *a, const void *b)
+{
+    double *aa,*bb;
+    aa=(double*)a;
+    bb=(double*)b;
+    if (*aa < *bb)
+        return -1;
+    else if (*aa > *bb)
+        return 1;
+    else
+        return 0;
+}
+
+
+void setRandomBsWeights(t_UmbrellaWindow *synthwin,int nAllPull, t_UmbrellaOptions *opt)
+{
+    int i;
+    double *r;
+    
+    snew(r,nAllPull);
+    
+    /* generate ordered random numbers between 0 and nAllPull  */
+    for (i=0; i<nAllPull-1; i++)
+    {
+        r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
+    }
+    qsort((void *)r,nAllPull-1, sizeof(double), &func_wham_is_larger);
+    r[nAllPull-1]=1.0*nAllPull;
+    
+    synthwin[0].bsWeight[0]=r[0];
+    for (i=1; i<nAllPull; i++)
+    {
+        synthwin[i].bsWeight[0]=r[i]-r[i-1];
+    }
+    
+    /* avoid to have zero weight by adding a tiny value */
+    for (i=0; i<nAllPull; i++)
+        if (synthwin[i].bsWeight[0] < 1e-5)   
+            synthwin[i].bsWeight[0] = 1e-5;
+
+    sfree(r);
+}
 
-void do_bootstrapping(const char *fnres, const char* fnprof, 
-                      const char *fnhist, char* ylabel, double *profile,
-                      t_UmbrellaWindow * window, int nWindows, 
-                      t_UmbrellaOptions *opt, const output_env_t oenv)
+void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
+                      char* ylabel, double *profile,
+                      t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
 {
     t_UmbrellaWindow * synthWindow;
     double *bsProfile,*bsProfiles_av, *bsProfiles_av2,maxchange=1e20,tmp,stddev;
@@ -998,21 +1250,20 @@ void do_bootstrapping(const char *fnres, const char* fnprof,
     int iAllPull,nAllPull,*allPull_winId,*allPull_pullId;
     FILE *fp;
     gmx_bool bExact=FALSE;
-
-
-    /* init random */
+    
+    /* init random generator */
     if (opt->bsSeed==-1)
-        srand(time(NULL));
+        opt->rng=gmx_rng_init(gmx_rng_make_seed());
     else
-        srand(opt->bsSeed);
-
+        opt->rng=gmx_rng_init(opt->bsSeed);
+    
     snew(bsProfile,     opt->bins);
     snew(bsProfiles_av, opt->bins);
     snew(bsProfiles_av2,opt->bins);
-
+    
     /* Create array of all pull groups. Note that different windows
-     may have different nr of pull groups
-     First: Get total nr of pull groups */
+       may have different nr of pull groups
+       First: Get total nr of pull groups */
     nAllPull=0;
     for (i=0;i<nWindows;i++)
         nAllPull+=window[i].nPull;
@@ -1021,13 +1272,15 @@ void do_bootstrapping(const char *fnres, const char* fnprof,
     iAllPull=0;
     /* Setup one array of all pull groups */
     for (i=0;i<nWindows;i++)
+    {
         for (j=0;j<window[i].nPull;j++)
         {
             allPull_winId[iAllPull]=i;
             allPull_pullId[iAllPull]=j;
             iAllPull++;
         }
-
+    }
+    
     /* setup stuff for synthetic windows */
     snew(synthWindow,nAllPull);
     for (i=0;i<nAllPull;i++)
@@ -1035,66 +1288,90 @@ void do_bootstrapping(const char *fnres, const char* fnprof,
         synthWindow[i].nPull=1;
         synthWindow[i].nBin=opt->bins;
         snew(synthWindow[i].Histo,1);
-        if (!opt->bHistBootStrap)
+        if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
             snew(synthWindow[i].Histo[0],opt->bins);
         snew(synthWindow[i].N,1);
         snew(synthWindow[i].pos,1);
         snew(synthWindow[i].z,1);
         snew(synthWindow[i].k,1);
         snew(synthWindow[i].bContrib,1);
+        snew(synthWindow[i].g,1);
+        snew(synthWindow[i].bsWeight,1);
     }
-
-    if (opt->bHistBootStrap)
+    
+    switch(opt->bsMethod)
     {
+    case bsMethod_hist:
         snew(randomArray,nAllPull);
         printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
         please_cite(stdout,"Hub2006");
+        break;
+    case bsMethod_BayesianHist :
+        /* just copy all histogams into synthWindow array */
+        for (i=0;i<nAllPull;i++)
+        {
+            winid =allPull_winId [i];
+            pullid=allPull_pullId[i];
+            copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
+        }
+        break;
+    case bsMethod_traj:
+    case bsMethod_trajGauss:   
+        calc_cumulatives(window,nWindows,opt,fnhist);
+        break;
+    default:
+        gmx_fatal(FARGS,"Unknown bootstrap method. That should not have happened.\n");
     }
-    else
-    {
-        calc_cummulants(window,nWindows,opt,fnhist,oenv);
-    }
-
+  
     /* do bootstrapping */
-    fp=xvgropen(fnprof,"Boot strap profiles","z",ylabel,oenv);
+    fp=xvgropen(fnprof,"Boot strap profiles","z",ylabel,opt->oenv);
     for (ib=0;ib<opt->nBootStrap;ib++)
     {
         printf("  *******************************************\n"
-                "  ******** Start bootstrap nr %d ************\n"
-                "  *******************************************\n",ib+1);
-
-        if (opt->bHistBootStrap)
+               "  ******** Start bootstrap nr %d ************\n"
+               "  *******************************************\n",ib+1);
+        
+        switch(opt->bsMethod)
         {
-            /* only mix given histos */
-            getRandomIntArray(nAllPull,opt->histBootStrapBlockLength,randomArray);
-            for (i=0;i<nAllPull;i++)
-            {
+        case bsMethod_hist:  
+            /* bootstrap complete histograms from given histograms */
+            getRandomIntArray(nAllPull,opt->histBootStrapBlockLength,randomArray,opt->rng);
+            for (i=0;i<nAllPull;i++){
                 winid =allPull_winId [randomArray[i]];
                 pullid=allPull_pullId[randomArray[i]];
                 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
             }
-        }
-        else
-        {
-            /* create new histos from given histos */
+            break;
+        case bsMethod_BayesianHist:  
+            /* keep histos, but assign random weights ("Bayesian bootstrap") */
+            setRandomBsWeights(synthWindow,nAllPull,opt);
+            break;
+        case bsMethod_traj:
+        case bsMethod_trajGauss:       
+            /* create new histos from given histos, that is generate new hypothetical
+               trajectories */
             for (i=0;i<nAllPull;i++)
             {
                 winid=allPull_winId[i];
-                pullid=allPull_pullId[i];
+                pullid=allPull_pullId[i];        
                 create_synthetic_histo(synthWindow+i,window+winid,pullid,opt);
             }
+            break;
         }
-
-        /* print histos in case of verbose output */
+        
+        /* write histos in case of verbose output */
         if (opt->bs_verbose)
-            print_histograms(fnhist,synthWindow,nAllPull,ib,opt,oenv);
-
+            print_histograms(fnhist,synthWindow,nAllPull,ib,opt);
+        
         /* do wham */
         i=0;
         bExact=FALSE;
         maxchange=1e20;
         memcpy(bsProfile,profile,opt->bins*sizeof(double)); /* use profile as guess */
-        do {
+        do 
+        {
+            if ( (i%opt->stepUpdateContrib) == 0)
+                setup_acc_wham(bsProfile,synthWindow,nAllPull,opt);
             if (maxchange<opt->Tolerance)
                 bExact=TRUE;
             if (((i%opt->stepchange) == 0 || i==1) && !i==0)
@@ -1103,14 +1380,14 @@ void do_bootstrapping(const char *fnres, const char* fnprof,
             i++;
         } while( (maxchange=calc_z(bsProfile, synthWindow, nAllPull, opt,bExact)) > opt->Tolerance || !bExact);
         printf("\tConverged in %d iterations. Final maximum change %g\n",i,maxchange);
-
+        
         if (opt->bLog)
             prof_normalization_and_unit(bsProfile,opt);
-        /* Force cyclic profile by wheighted correction */
-        if (opt->cycl==enCycl_weighted)
-            cyclicProfByWeightedCorr(bsProfile,synthWindow,nAllPull,opt, 
-                                     FALSE, 0,oenv);
-
+        
+        /* symmetrize profile around z=0 */
+        if (opt->bSym)
+            symmetrizeProfile(bsProfile,opt);
+        
         /* save stuff to get average and stddev */
         for (i=0;i<opt->bins;i++)
         {
@@ -1122,12 +1399,9 @@ void do_bootstrapping(const char *fnres, const char* fnprof,
         fprintf(fp,"&\n");
     }
     ffclose(fp);
-
+  
     /* write average and stddev */
-    fp=ffopen(fnres,"w");
-    fprintf(fp,"@    title \"%s\"\n","Average and stddev from bootstrapping");
-    fprintf(fp,"@    xaxis  label \"%s\"\n","z");
-    fprintf(fp,"@    yaxis  label \"%s\"\n",ylabel);
+    fp=xvgropen(fnres,"Average and stddev from bootstrapping","z",ylabel,opt->oenv);
     fprintf(fp,"@TYPE xydy\n");
     for (i=0;i<opt->bins;i++)
     {
@@ -1141,8 +1415,7 @@ void do_bootstrapping(const char *fnres, const char* fnprof,
     printf("Wrote boot strap result to %s\n",fnres);
 }
 
-
-int whaminFileType(const char *fn)
+int whaminFileType(char *fn)
 {
     int len;
     len=strlen(fn);
@@ -1157,85 +1430,122 @@ int whaminFileType(const char *fn)
     return whamin_unknown;
 }
 
-
 void read_wham_in(const char *fn,char ***filenamesRet, int *nfilesRet,
                   t_UmbrellaOptions *opt)
 {
-    char **filename,tmp[STRLEN];
-    int nread,sizenow,i,block=10;
+    char **filename=0,tmp[STRLEN];
+    int nread,sizenow,i,block=1;
     FILE *fp;
-#define MAXFILELEN 512
-
-
+    
     fp=ffopen(fn,"r");
-    sizenow=block;
-    snew(filename,sizenow);
-    for (i=0;i<sizenow;i++)
-        snew(filename[i],MAXFILELEN);
     nread=0;
+    sizenow=0;
     while (fscanf(fp,"%s",tmp) != EOF)
     {
-        if (strlen(tmp)>=MAXFILELEN)
-            gmx_fatal(FARGS,"Filename too long. Only %d characters allowed\n",MAXFILELEN);
-        strcpy(filename[nread],tmp);
-        if (opt->verbose)
-            printf("Found file %s in %s\n",filename[nread],fn);
-        nread++;
+        if (strlen(tmp)>=WHAM_MAXFILELEN)
+            gmx_fatal(FARGS,"Filename too long. Only %d characters allowed\n",WHAM_MAXFILELEN);
         if (nread>=sizenow)
         {
             sizenow+=block;
             srenew(filename,sizenow);
             for (i=sizenow-block;i<sizenow;i++)
-                snew(filename[i],MAXFILELEN);
+                snew(filename[i],WHAM_MAXFILELEN);
         }
+        strcpy(filename[nread],tmp);
+        if (opt->verbose)
+            printf("Found file %s in %s\n",filename[nread],fn);
+        nread++;
     }
     *filenamesRet=filename;
     *nfilesRet=nread;
 }
 
 
-FILE *pdo_open_file(const char *fn)
+FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt,gmx_bool *bPipeOpen)
 {
     char Buffer[1024],gunzip[1024],*Path=0;
-    FILE *fp;
-
-    if (!gmx_fexist(fn))
-       {
-        gmx_fatal(FARGS,"File %s does not exist.\n",fn);
-       }
-       
+    FILE *pipe=0;
+    static gmx_bool bFirst=1;  
+    
     /* gzipped pdo file? */
-    if (strcmp(fn+strlen(fn)-3,".gz")==0)
+    if ((strcmp(fn+strlen(fn)-3,".gz")==0))
     {
-#ifdef HAVE_PIPES
+        /* search gunzip executable */
         if(!(Path=getenv("GMX_PATH_GZIP")))
-            sprintf(gunzip,"%s","/bin/gunzip");
+        {
+            if (gmx_fexist("/bin/gunzip"))
+                sprintf(gunzip,"%s","/bin/gunzip");
+            else if (gmx_fexist("/usr/bin/gunzip"))
+                sprintf(gunzip,"%s","/usr/bin/gunzip");
+            else
+                gmx_fatal(FARGS,"Cannot find executable gunzip in /bin or /usr/bin.\n"
+                          "You may want to define the path to gunzip "
+                          "with the environment variable GMX_PATH_GZIP.",gunzip);
+        }
         else
+        {
             sprintf(gunzip,"%s/gunzip",Path);
-        if (!gmx_fexist(gunzip))
-            gmx_fatal(FARGS,"Cannot find executable %s. You may want to define the path to gunzip "
-                    "with the environment variable GMX_PATH_GZIP.",gunzip);
-        sprintf(Buffer,"%s -c < %s",gunzip,fn);
-               if((fp=popen(Buffer,"r"))==NULL)
-               {
-                       gmx_fatal(FARGS,"Unable to open pipe to `%s'\n",Buffer);
-               }
+            if (!gmx_fexist(gunzip))
+                gmx_fatal(FARGS,"Cannot find executable %s. Please define the path to gunzip"
+                          " in the environmental varialbe GMX_PATH_GZIP.",gunzip);
+        }    
+        if (bFirst)
+        {
+            printf("Using gunzig executable %s\n",gunzip);
+            bFirst=0;
+        }
+        if (!gmx_fexist(fn))
+        {
+            gmx_fatal(FARGS,"File %s does not exist.\n",fn);
+        }
+        sprintf(Buffer,"%s -c < %s",gunzip,fn);    
+        if (opt->verbose)
+            printf("Executing command '%s'\n",Buffer);
+#ifdef HAVE_PIPES
+        if((pipe=popen(Buffer,"r"))==NULL)
+        {
+            gmx_fatal(FARGS,"Unable to open pipe to `%s'\n",Buffer);
+        }
 #else
-               gmx_fatal(FARGS,"Cannot open a compressed file on platform without pipe support");
+        gmx_fatal(FARGS,"Cannot open a compressed file on platform without pipe support");
 #endif
+        *bPipeOpen=TRUE;
+    }
+    else{
+        pipe=ffopen(fn,"r");
+        *bPipeOpen=FALSE;
+    }
+    
+    return pipe;
+}
+
+
+FILE *open_pdo_pipe_gmx(const char *fn)
+{
+    char *fnNoGz=0;
+    FILE *pipe;
+
+    /* gzipped pdo file? */
+    if (strcmp(fn+strlen(fn)-3,".gz")==0)
+    {
+        snew(fnNoGz,strlen(fn));
+        strncpy(fnNoGz,fn,strlen(fn)-3);
+        fnNoGz[strlen(fn)-3]='\0';
+        if (gmx_fexist(fnNoGz) && gmx_fexist(fn))
+            gmx_fatal(FARGS,"Found file %s and %s. That confuses me. Please remove one of them\n",
+                      fnNoGz,fn);
+        pipe=ffopen(fnNoGz,"r");  
+        sfree(fnNoGz);
     }
     else
-       {
-               if((fp=ffopen(fn,"r"))==NULL)
-               {
-                       gmx_fatal(FARGS,"Unable to open file %s\n",fn);
-               }               
-       }
-       return fp;
+    {
+        pipe=ffopen(fn,"r");
+    }
+  
+    return pipe;
 }
 
-void
-pdo_close_file(FILE *fp)
+void pdo_close_file(FILE *fp)
 {
 #ifdef HAVE_PIPES
        pclose(fp);
@@ -1244,28 +1554,33 @@ pdo_close_file(FILE *fp)
 #endif
 }
 
+
 /* Reading pdo files */
 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
-        t_UmbrellaWindow **window, t_UmbrellaOptions *opt)
+                    t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
 {
-    FILE  * file;
-    real mintmp,maxtmp;
+    FILE  *file;
+    real mintmp,maxtmp,done=0.;
     int i;
-
-
+    gmx_bool bPipeOpen;
+    /* char Buffer0[1000]; */
+    
     if(nfiles<1)
         gmx_fatal(FARGS,"No files found. Hick.");
-
+    
     /* if min and max are not given, get min and max from the input files */
     if (opt->bAuto)
     {
         printf("Automatic determination of boundaries from %d pdo files...\n",nfiles);
         opt->min=1e20;
         opt->max=-1e20;
-        for(i=0;i<nfiles;++i)
+        for(i=0;i<nfiles;++i) 
         {
-            file=pdo_open_file(fn[i]);
-            printf("\rOpening %s ...",fn[i]); fflush(stdout);
+            file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
+            /*fgets(Buffer0,999,file);
+              fprintf(stderr,"First line '%s'\n",Buffer0); */
+            done=100.0*(i+1)/nfiles;
+            printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
             if (opt->verbose)
                 printf("\n");
             read_pdo_header(file,header,opt);
@@ -1275,7 +1590,10 @@ void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
                 opt->max=maxtmp;
             if (mintmp<opt->min)
                 opt->min=mintmp;
-            pdo_close_file(file);
+            if (bPipeOpen)
+                pdo_close_file(file);
+            else
+                ffclose(file);
         }
         printf("\n");
         printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
@@ -1287,73 +1605,87 @@ void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
     }
     /* store stepsize in profile */
     opt->dz=(opt->max-opt->min)/opt->bins;
-
-    snew(*window,nfiles);
-
+    
     /* Having min and max, we read in all files */
     /* Loop over all files */
-    for(i=0;i<nfiles;++i)
+    for(i=0;i<nfiles;++i) 
     {
-        printf("\rOpening %s ...",fn[i]); fflush(stdout);
+        done=100.0*(i+1)/nfiles;
+        printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
         if (opt->verbose)
             printf("\n");
-        file=pdo_open_file(fn[i]);
-        /* read in the headers */
+        file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
         read_pdo_header(file,header,opt);
         /* load data into window */
-        read_pdo_data(file,header,i,*window,opt,FALSE,NULL,NULL);
-        pdo_close_file(file);
+        read_pdo_data(file,header,i,window,opt,FALSE,NULL,NULL);
+        if ((window+i)->Ntot[0] == 0.0)
+            fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
+        if (bPipeOpen)
+            pdo_close_file(file);
+        else
+            ffclose(file);
     }
     printf("\n");
+    for(i=0;i<nfiles;++i)
+        sfree(fn[i]);
+    sfree(fn);
 }
 
-
 #define int2YN(a) (((a)==0)?("N"):("Y"))
 
-void read_tpr_header(const char *fn,t_UmbrellaHeader* header,
-                     t_UmbrellaOptions *opt)
+void read_tpr_header(const char *fn,t_UmbrellaHeader* header,t_UmbrellaOptions *opt)
 {
     t_inputrec  ir;
-    int         i,ngrp,d;
+    int i,ngrp,d;
     t_state     state;
     static int first=1;
-
-
+    
     /* printf("Reading %s \n",fn); */
     read_tpx_state(fn,&ir,&state,NULL,NULL);
-
+    
     if (ir.ePull != epullUMBRELLA)
-        gmx_fatal(FARGS,"This is not a tpr of an umbrella simulation. Found ir.ePull = %s\n",
-                epull_names[ir.ePull]);
-
+        gmx_fatal(FARGS,"This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
+                  " (ir.ePull = %d)\n", epull_names[ir.ePull],ir.ePull);
+    
     /* nr of pull groups */
     ngrp=ir.pull->ngrp;
     if (ngrp < 1)
         gmx_fatal(FARGS,"This is not a tpr of umbrella simulation. Found only %d pull groups\n",ngrp);
-
+    
     header->npullgrps=ir.pull->ngrp;
     header->pull_geometry=ir.pull->eGeom;
     copy_ivec(ir.pull->dim,header->pull_dim);
     header->pull_ndim=header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
     if (header->pull_geometry==epullgPOS && header->pull_ndim>1)
+    {
         gmx_fatal(FARGS,"Found pull geometry 'position' and more than 1 pull dimension (%d).\n"
-                "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
-                "If you have some special umbrella setup you may want to write your own pdo files\n"
-                "and feed them into g_wham. Check g_wham -h !\n",header->pull_ndim);
+                  "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
+                  "If you have some special umbrella setup you may want to write your own pdo files\n"
+                  "and feed them into g_wham. Check g_wham -h !\n",header->pull_ndim);
+    }
     snew(header->k,ngrp);
     snew(header->init_dist,ngrp);
     snew(header->umbInitDist,ngrp);
+    
+    /* only z-direction with epullgCYL? */
+    if (header->pull_geometry == epullgCYL)
+    {
+        if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
+            gmx_fatal(FARGS,"With pull geometry 'cylinder', expected pulling in Z direction only.\n"
+                      "However, found dimensions [%s %s %s]\n",
+                      int2YN(header->pull_dim[XX]),int2YN(header->pull_dim[YY]),
+                      int2YN(header->pull_dim[ZZ]));
+    }
 
     for (i=0;i<ngrp;i++)
     {
         header->k[i]=ir.pull->grp[i+1].k;
         if (header->k[i]==0.0)
             gmx_fatal(FARGS,"Pull group %d has force constant of of 0.0 in %s.\n"
-                    "That doesn't seem to be an Umbrella tpr.\n",
-                    i,fn);
+                      "That doesn't seem to be an Umbrella tpr.\n",
+                      i,fn);
         copy_rvec(ir.pull->grp[i+1].init,header->init_dist[i]);
-        header->Flipped[i]=opt->bFlipProf;
-
+        
         /* initial distance to reference */
         switch(header->pull_geometry)
         {
@@ -1362,23 +1694,30 @@ void read_tpr_header(const char *fn,t_UmbrellaHeader* header,
                 if (header->pull_dim[d])
                     header->umbInitDist[i]=header->init_dist[i][d];
             break;
+        case epullgCYL:
+            /* umbrella distance stored in init_dist[i][0] for geometry cylinder (not in ...[i][ZZ]) */
         case epullgDIST:
+        case epullgDIR:
+        case epullgDIRPBC:
             header->umbInitDist[i]=header->init_dist[i][0];
             break;
         default:
             gmx_fatal(FARGS,"Pull geometry %s not supported\n",epullg_names[header->pull_geometry]);
         }
     }
-
+    
     if (opt->verbose || first)
     {
         printf("File %s, %d groups, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n",
-                fn,header->npullgrps,epullg_names[header->pull_geometry],
-                int2YN(header->pull_dim[0]),int2YN(header->pull_dim[1]),int2YN(header->pull_dim[2]),
-                header->pull_ndim);
+               fn,header->npullgrps,epullg_names[header->pull_geometry],
+               int2YN(header->pull_dim[0]),int2YN(header->pull_dim[1]),int2YN(header->pull_dim[2]),
+               header->pull_ndim);
         for (i=0;i<ngrp;i++)
-            printf("\tgrp %d) k = %.3f  inittial distance = %g\n",i,header->k[i],header->umbInitDist[i]);
+            printf("\tgrp %d) k = %-5g  position = %g\n",i,header->k[i],header->umbInitDist[i]);    
     }
+    if (!opt->verbose && first)
+        printf("\tUse option -v to see this output for all input tpr files\n");
+    
     first=0;
 }
 
@@ -1387,103 +1726,154 @@ double dist_ndim(double **dx,int ndim,int line)
 {
     int i;
     double r2=0.;
-
-
     for (i=0;i<ndim;i++)
         r2+=sqr(dx[i][line]);
-
     return sqrt(r2);
 }
 
-
-void read_pull_xf(const char *fn, const char *fntpr, 
-                  t_UmbrellaHeader * header, t_UmbrellaWindow * window,
-                  t_UmbrellaOptions *opt, gmx_bool bGetMinMax,real *mintmp,
-                  real *maxtmp)
+void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
+                  t_UmbrellaWindow * window,
+                  t_UmbrellaOptions *opt,
+                  gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
 {
-    double **y,pos=0.,t,force,time0=0.,dt;
-    int ny,nt,bins,ibin,i,g,dstep=1,nColPerGrp,nColRef,nColExpect;
-    real min,max,minfound,maxfound;
+    double **y=0,pos=0.,t,force,time0=0.,dt;
+    int ny,nt,bins,ibin,i,g,dstep=1,nColPerGrp,nColRefOnce,nColRefEachGrp,nColExpect,ntot;
+    real min,max,minfound=1e20,maxfound=-1e20;
     gmx_bool dt_ok,timeok,bHaveForce;
     const char *quantity;
+    const int blocklen=4096;
+    int *lennow=0;
+    
+    /* 
+       in force    output pullf.xvg: 
+       No   reference, one  column  per pull group
+       in position output pullx.xvg (not cylinder)
+       ndim reference, ndim columns per pull group
+       in position output pullx.xvg (in geometry cylinder): 
+       ndim*2 columns per pull group (ndim for ref, ndim for group)
+    */
 
-       minfound=1e20;
-       maxfound=-1e20;
-
-    /*
-     in force    output pullf.xvg: No   reference, one  column  per pull group
-     in position output pullx.xvg: ndim reference, ndim columns per pull group
-     */
     nColPerGrp = opt->bPullx ? header->pull_ndim : 1;
-    nColRef    = opt->bPullx ? header->pull_ndim : 0;
     quantity   = opt->bPullx ? "position" : "force";
-    nColExpect = 1 + nColRef + header->npullgrps*nColPerGrp;
+    
+    if (opt->bPullx)
+    {
+        if (header->pull_geometry == epullgCYL)
+        {
+            /* Geometry cylinder -> reference group before each pull group */
+            nColRefEachGrp=header->pull_ndim;
+            nColRefOnce=0;
+        }
+        else
+        {
+            /* Geometry NOT cylinder -> reference group only once after time column */
+            nColRefEachGrp=0;
+            nColRefOnce=header->pull_ndim;
+        }
+    }
+    else /* read forces, no reference groups */
+    {
+       nColRefEachGrp=0;
+        nColRefOnce=0;
+    }
+    nColExpect = 1 + nColRefOnce + header->npullgrps*(nColRefEachGrp+nColPerGrp);
     bHaveForce = opt->bPullf;
-
+    
+    /* With geometry "distance" or "distance_periodic", only force reading is supported so far. 
+       That avoids the somewhat tedious extraction of the right columns from the pullx files
+       to compute the distances projection on the vector. Sorry for the laziness. */
+    if  ( (header->pull_geometry==epullgDIR || header->pull_geometry==epullgDIRPBC) 
+          && opt->bPullx)
+    {
+        gmx_fatal(FARGS,"With pull geometries \"direction\" and \"direction_periodic\", only pull force "
+                  "reading \n(option -if) is supported at present, "
+                  "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
+                  "forces (pullf.xvg files)\nand provide them to g_wham with option -if.", 
+                  epullg_names[header->pull_geometry]);
+    }
+    
     nt=read_xvg(fn,&y,&ny);
 
     /* Check consistency */
     if (nt<1)
+    {
         gmx_fatal(FARGS,"Empty pull %s file %s\n",quantity,fn);
+    }
     if (ny != nColExpect)
+    {
         gmx_fatal(FARGS,"Found %d pull groups in %s,\n but %d data columns in %s (expected %d)\n",
-                header->npullgrps,fntpr,ny-1,fn,nColExpect-1);
-
+                  header->npullgrps,fntpr,ny-1,fn,nColExpect-1);
+    }
+    
     if (opt->verbose)
         printf("Found %d times and %d %s sets %s\n",nt,(ny-1)/nColPerGrp,quantity,fn);
-
+  
     if (!bGetMinMax)
     {
         bins=opt->bins;
         min=opt->min;
         max=opt->max;
         if (nt>1)
+        {
             window->dt=y[0][1]-y[0][0];
-        else if (opt->nBootStrap && opt->dtBootStrap!=0.0)
+        }
+        else if (opt->nBootStrap && opt->tauBootStrap!=0.0)
+        {
             fprintf(stderr,"\n *** WARNING, Could not determine time step in %s\n",fn);
-
+        }
+        
         /* Need to alocate memory and set up structure */
         window->nPull=header->npullgrps;
         window->nBin=bins;
-
+        
         snew(window->Histo,window->nPull);
         snew(window->z,window->nPull);
         snew(window->k,window->nPull);
         snew(window->pos,window->nPull);
-        snew(window->Flipped,window->nPull);
         snew(window->N, window->nPull);
         snew(window->Ntot, window->nPull);
+        snew(window->g, window->nPull);
+        snew(window->bsWeight, window->nPull);    
+        window->bContrib=0;
 
-        for(g=0;g<window->nPull;++g)
+        if (opt->bCalcTauInt)
+            snew(window->ztime,window->nPull);
+        else
+            window->ztime=NULL;
+        snew(lennow,window->nPull);
+
+        for(g=0;g<window->nPull;++g) 
         {
             window->z[g]=1;
+            window->bsWeight[g]=1.;
             snew(window->Histo[g],bins);
             window->k[g]=header->k[g];
-            window->Flipped[g]=header->Flipped[g];
             window->N[g]=0;
             window->Ntot[g]=0;
+            window->g[g]=1.;
             window->pos[g]=header->umbInitDist[g];
+            if (opt->bCalcTauInt)
+                window->ztime[g]=NULL;
         }
+
     }
     else
-    {
-        /* only determine min and max */
+    { /* only determine min and max */
         minfound=1e20;
         maxfound=-1e20;
         min=max=bins=0; /* Get rid of warnings */
     }
 
-    if(header->Flipped[0])
-        gmx_fatal(FARGS,"Sorry, flipping not supported for gmx4 output\n");
-
     for (i=0;i<nt;i++)
     {
         /* Do you want that time frame? */
-      t=1.0/1000*((int)(0.5+y[0][i]*1000)); /* round time to fs */
-
+        t=1.0/1000*( (int) ((y[0][i]*1000) + 0.5)); /* round time to fs */
+        
         /* get time step of pdo file and get dstep from opt->dt */
         if (i==0)
+        {
             time0=t;
+        }
         else if (i==1)
         {
             dt=t-time0;
@@ -1496,16 +1886,16 @@ void read_pull_xf(const char *fn, const char *fntpr,
             if (!bGetMinMax)
                 window->dt=dt*dstep;
         }
-
+        
         dt_ok=(i%dstep == 0);
         timeok=(dt_ok && t >= opt->tmin && t <= opt->tmax);
         /*if (opt->verbose)
-      printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
-      t,opt->tmin, opt->tmax, dt_ok,timeok); */
-
+          printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n", 
+          t,opt->tmin, opt->tmax, dt_ok,timeok); */
+        
         if (timeok)
         {
-            for(g=0;g<header->npullgrps;++g)
+            for(g=0;g<header->npullgrps;++g) 
             {
                 if (bHaveForce)
                 {
@@ -1518,20 +1908,25 @@ void read_pull_xf(const char *fn, const char *fntpr,
                     switch (header->pull_geometry)
                     {
                     case epullgDIST:
-                        /* y has 1 time column y[0] and nColPerGrps columns per pull group;
-                              Distance to reference: */
-                        pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i);
+                        /* y has 1 time column y[0] and nColPerGrps columns per pull group; 
+                           Distance to reference:                                           */
+                        /* pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i); gmx 4.0 */
+                        pos=dist_ndim(y + 1 + nColRefOnce + g*nColPerGrp,header->pull_ndim,i);
                         break;
                     case epullgPOS:
-                        /* with geometry==position, we have always one column per group;
-                              Distance to reference: */
-                        pos=y[1+nColRef+g][i];
+                    case epullgCYL:
+                        /* with geometry==position, we have the reference once (nColRefOnce==ndim), but
+                           no extra reference group columns before each group (nColRefEachGrp==0)
+                           with geometry==cylinder, we have no initial ref group column (nColRefOnce==0), 
+                           but ndim ref group colums before every group (nColRefEachGrp==ndim)
+                           Distance to reference: */
+                        pos=y[1 + nColRefOnce + g*(nColRefEachGrp+nColPerGrp)][i];
                         break;
                     default:
                         gmx_fatal(FARGS,"Bad error, this error should have been catched before. Ups.\n");
                     }
                 }
-
+                
                 /* printf("grp %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
                 if (bGetMinMax)
                 {
@@ -1542,15 +1937,28 @@ void read_pull_xf(const char *fn, const char *fntpr,
                 }
                 else
                 {
+                    if (opt->bCalcTauInt && !bGetMinMax)
+                    {
+                        /* save time series for autocorrelation analysis */
+                        ntot=window->Ntot[g];
+                        /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
+                        if (ntot>=lennow[g])
+                        {
+                            lennow[g]+=blocklen;
+                            srenew(window->ztime[g],lennow[g]);
+                        }
+                        window->ztime[g][ntot]=pos;
+                    }
+                    
                     ibin=(int) floor((pos-min)/(max-min)*bins);
-                    if (opt->cycl==enCycl_yes)
+                    if (opt->bCycl)
                     {
                         if (ibin<0)
                             while ( (ibin+=bins) < 0);
                         else if (ibin>=bins)
                             while ( (ibin-=bins) >= bins);
-                    }
-                    if(ibin >= 0 && ibin < bins)
+                    }    
+                    if(ibin >= 0 && ibin < bins) 
                     {
                         window->Histo[g][ibin]+=1.;
                         window->N[g]++;
@@ -1566,25 +1974,26 @@ void read_pull_xf(const char *fn, const char *fntpr,
             break;
         }
     }
-
+  
     if (bGetMinMax)
     {
         *mintmp=minfound;
         *maxtmp=maxfound;
     }
+    sfree(lennow);
+    for (i=0;i<ny;i++)
+        sfree(y[i]);
 }
 
-
-void read_tpr_pullxf_files(char **fnTprs,char **fnPull,
-                           int nfiles, t_UmbrellaHeader* header,
-                           t_UmbrellaWindow **window, t_UmbrellaOptions *opt)
+void read_tpr_pullxf_files(char **fnTprs,char **fnPull,int nfiles,
+                           t_UmbrellaHeader* header, 
+                           t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
 {
     int i;
     real mintmp,maxtmp;
-
-
-    printf("Reading %d tpr and pullf files\n",nfiles);
-
+    
+    printf("Reading %d tpr and pullf files\n",nfiles/2);
+    
     /* min and max not given? */
     if (opt->bAuto)
     {
@@ -1598,14 +2007,15 @@ void read_tpr_pullxf_files(char **fnTprs,char **fnPull,
             read_tpr_header(fnTprs[i],header,opt);
             if (whaminFileType(fnPull[i]) != whamin_pullxf)
                 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
-            read_pull_xf(fnPull[i],fnTprs[i],header,NULL,opt,TRUE,&mintmp,&maxtmp);
+            read_pull_xf(fnPull[i],fnTprs[i],header,NULL,opt,TRUE,&mintmp,&maxtmp);      
             if (maxtmp>opt->max)
                 opt->max=maxtmp;
             if (mintmp<opt->min)
                 opt->min=mintmp;
         }
         printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
-        if (opt->bBoundsOnly){
+        if (opt->bBoundsOnly)
+        {
             printf("Found option -boundsonly, now exiting.\n");
             exit (0);
         }
@@ -1613,7 +2023,6 @@ void read_tpr_pullxf_files(char **fnTprs,char **fnPull,
     /* store stepsize in profile */
     opt->dz=(opt->max-opt->min)/opt->bins;
 
-    snew(*window,nfiles);
     for (i=0;i<nfiles; i++)
     {
         if (whaminFileType(fnTprs[i]) != whamin_tpr)
@@ -1621,176 +2030,655 @@ void read_tpr_pullxf_files(char **fnTprs,char **fnPull,
         read_tpr_header(fnTprs[i],header,opt);
         if (whaminFileType(fnPull[i]) != whamin_pullxf)
             gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
-        read_pull_xf(fnPull[i],fnTprs[i],header,*window+i,opt,FALSE,NULL,NULL);
+        read_pull_xf(fnPull[i],fnTprs[i],header,window+i,opt,FALSE,NULL,NULL);
+        if (window[i].Ntot[0] == 0.0)
+            fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
+    }
+
+    for (i=0;i<nfiles; i++)
+    {
+        sfree(fnTprs[i]);
+        sfree(fnPull[i]);
+    }
+    sfree(fnTprs);
+    sfree(fnPull);
+}
+
+/* Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation time. 
+   The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
+*/
+void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt,
+                                        const char* fn)
+{
+    int nlines,ny,i,ig;
+    double **iact;
+
+    printf("Readging Integrated autocorrelation times from %s ...\n",fn);
+    nlines=read_xvg(fn,&iact,&ny);
+    if (nlines!=nwins)
+        gmx_fatal(FARGS,"Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
+                  nlines,fn,nwins);
+    for (i=0;i<nlines;i++){
+        if (window[i].nPull != ny)
+            gmx_fatal(FARGS,"You are providing autocorrelation times with option -iiact and the\n"
+                      "number of pull groups is different in different simulations. That is not\n"
+                      "supported yet. Sorry.\n");
+        for (ig=0;ig<window[i].nPull;ig++){
+            /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
+            window[i].g[ig]=1+2*iact[ig][i]/window[i].dt;  
+      
+            if (iact[ig][i] <= 0.0)
+                fprintf(stderr,"\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i],i,ig);
+        }
     }
 }
 
 
+/* Smooth autocorreltion times along the reaction coordinate. This is useful
+   if the ACT is subject to high uncertainty in case if limited sampling. Note
+   that -in case of limited sampling- the ACT may be severely underestimated. 
+   Note: the g=1+2tau are overwritten.
+   if opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
+   by the smoothing
+*/
+void smoothIact(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
+{
+    int i,ig,j,jg;
+    double pos,dpos2,siglim,siglim2,gaufact,invtwosig2,w,weight,tausm;
+  
+    /* only evaluate within +- 3sigma of the Gausian */
+    siglim=3.0*opt->sigSmoothIact;
+    siglim2=dsqr(siglim);
+    /* pre-factor of Gaussian */
+    gaufact=1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
+    invtwosig2=0.5/dsqr(opt->sigSmoothIact);
+  
+    for (i=0;i<nwins;i++)
+    {
+        snew(window[i].tausmooth,window[i].nPull);
+        for (ig=0;ig<window[i].nPull;ig++)
+        {
+            tausm=0.;
+            weight=0;
+            pos=window[i].pos[ig];
+            for (j=0;j<nwins;j++)
+            {
+                for (jg=0;jg<window[j].nPull;jg++)
+                {
+                    dpos2=dsqr(window[j].pos[jg]-pos);   
+                    if (dpos2<siglim2){
+                        w=gaufact*exp(-dpos2*invtwosig2);
+                        weight+=w;
+                        tausm+=w*window[j].tau[jg];
+                        /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
+                          w,dpos2,pos,gaufact,invtwosig2); */
+                    }
+                }
+            }
+            tausm/=weight;
+            if (opt->bAllowReduceIact || tausm>window[i].tau[ig])
+                window[i].tausmooth[ig]=tausm;
+            else
+                window[i].tausmooth[ig]=window[i].tau[ig];
+            window[i].g[ig] = 1+2*tausm/window[i].dt;
+        }
+    }
+}
+
+/* try to compute the autocorrelation time for each umbrealla window */
+#define WHAM_AC_ZERO_LIMIT 0.05
+void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,
+                                        t_UmbrellaOptions *opt,        const char *fn)
+{
+    int i,ig,ncorr,ntot,j,k,*count,restart;
+    real *corr,c0,dt,timemax,tmp;
+    real *ztime,av,tausteps;
+    FILE *fp,*fpcorr=0;
+  
+    if (opt->verbose)
+        fpcorr=xvgropen("hist_autocorr.xvg","Autocorrelation functions of umbrella windows",
+                        "time [ps]","autocorrelation function",opt->oenv);
+  
+    printf("\n");
+    for (i=0;i<nwins;i++)
+    {
+        printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...",100.*(i+1)/nwins);
+        fflush(stdout);
+        ntot=window[i].Ntot[0];
+
+        /* using half the maximum time as length of autocorrelation function */
+        ncorr=ntot/2;
+        if (ntot<10)
+            gmx_fatal(FARGS,"Tryig to estimtate autocorrelation time from only %d"
+                      " points. Provide more pull data!",ntot);
+        snew(corr,ncorr);
+        /* snew(corrSq,ncorr); */
+        snew(count,ncorr);
+        dt=window[i].dt;
+        timemax=dt*ncorr;
+        snew(window[i].tau,window[i].nPull);
+        restart=(int)(opt->acTrestart/dt+0.5);
+        if (restart==0)
+            restart=1;
+
+        for (ig=0;ig<window[i].nPull;ig++)
+        {            
+            if (ntot != window[i].Ntot[ig])
+                gmx_fatal(FARGS,"Encountered different nr of frames in different pull groups.\n"
+                          "That should not happen. (%d and %d)\n", ntot,window[i].Ntot[ig]);
+            ztime=window[i].ztime[ig];
+            
+            /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
+            for(j=0, av=0; (j<ntot); j++)
+                av+=ztime[j];
+            av/=ntot;
+            for(k=0; (k<ncorr); k++)
+            {
+                corr[k]=0.;
+                count[k]=0;
+            }
+            for(j=0; (j<ntot); j+=restart) 
+                for(k=0; (k<ncorr) && (j+k < ntot); k++)
+                {
+                    tmp=(ztime[j]-av)*(ztime[j+k]-av); 
+                    corr  [k] += tmp;
+                    /* corrSq[k] += tmp*tmp; */
+                    count[k]++;
+                }
+            /* divide by nr of frames for each time displacement */
+            for(k=0; (k<ncorr); k++) 
+            {
+                /* count probably = (ncorr-k+(restart-1))/restart; */
+                corr[k] = corr[k]/count[k];
+                /* variance of autocorrelation function */
+                /* corrSq[k]=corrSq[k]/count[k]; */
+            }
+            /* normalize such that corr[0] == 0 */
+            c0=1./corr[0];
+            for(k=0; (k<ncorr); k++)
+            {
+                corr[k]*=c0;
+                /* corrSq[k]*=c0*c0; */
+            }
+
+            /* write ACFs in verbose mode */
+            if (fpcorr)
+            {
+                for(k=0; (k<ncorr); k++)
+                    fprintf(fpcorr,"%g  %g\n",k*dt,corr[k]);
+                fprintf(fpcorr,"&\n");
+            }
+      
+            /* esimate integrated correlation time, fitting is too unstable */
+            tausteps = 0.5*corr[0];
+            /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
+            for(j=1; (j<ncorr) && (corr[j]>WHAM_AC_ZERO_LIMIT); j++)
+                tausteps += corr[j];
+         
+            /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
+               Kumar et al, eq. 28 ff. */
+            window[i].tau[ig] = tausteps*dt;
+            window[i].g[ig] = 1+2*tausteps;
+            /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
+        } /* ig loop */
+        sfree(corr);
+        sfree(count);
+    }
+    printf(" done\n");
+    if (fpcorr)
+        ffclose(fpcorr);
+  
+    /* plot IACT along reaction coordinate */
+    fp=xvgropen(fn,"Integrated autocorrelation times","z","IACT [ps]",opt->oenv);
+    fprintf(fp,"@    s0 symbol 1\n@    s0 symbol size 0.5\n@    s0 line linestyle 0\n");
+    fprintf(fp,"#  WIN   tau(gr1)  tau(gr2) ...\n");
+    for (i=0;i<nwins;i++)
+    {
+        fprintf(fp,"# %3d   ",i);
+        for (ig=0;ig<window[i].nPull;ig++)
+            fprintf(fp," %11g",window[i].tau[ig]);
+        fprintf(fp,"\n");
+    }
+    for (i=0;i<nwins;i++)
+        for (ig=0;ig<window[i].nPull;ig++)
+            fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tau[ig]);
+    if (opt->sigSmoothIact > 0.0)
+    {
+        printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
+               opt->sigSmoothIact);
+        /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
+        smoothIact(window,nwins,opt);
+        fprintf(fp,"&\n@    s1 symbol 1\n@    s1 symbol size 0.5\n@    s1 line linestyle 0\n");
+        fprintf(fp,"@    s1 symbol color 2\n");
+        for (i=0;i<nwins;i++)
+            for (ig=0;ig<window[i].nPull;ig++)
+                fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tausmooth[ig]);
+    }   
+    ffclose(fp);
+    printf("Wrote %s\n",fn);
+}
+
+/* compute average and sigma of each umbrella window */
+void averageSigma(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
+{
+    int i,ig,ntot,k;
+    real av,sum2,sig,diff,*ztime,nSamplesIndep;
+
+    for (i=0;i<nwins;i++)
+    {
+        snew(window[i].aver, window[i].nPull);
+        snew(window[i].sigma,window[i].nPull);
+
+        ntot=window[i].Ntot[0];
+        for (ig=0;ig<window[i].nPull;ig++)
+        {
+            ztime=window[i].ztime[ig];
+            for (k=0, av=0.; k<ntot; k++)
+                av+=ztime[k];
+            av/=ntot;
+            for (k=0, sum2=0.; k<ntot; k++)
+            {
+                diff=ztime[k]-av;
+                sum2+=diff*diff;
+            }
+            sig=sqrt(sum2/ntot);
+            window[i].aver[ig]=av;
+
+            /* Note: This estimate for sigma is biased from the limited sampling.
+               Correct sigma by n/(n-1) where n = number of independent
+               samples. Only possible if IACT is known.
+            */
+            if (window[i].tau)
+            {
+                nSamplesIndep=window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
+                window[i].sigma[ig]=sig * nSamplesIndep/(nSamplesIndep-1);
+            }
+            else
+                window[i].sigma[ig]=sig;
+            printf("win %d, aver = %f  sig = %f\n",i,av,window[i].sigma[ig]);
+        }
+    }      
+}
+
+
+/* Use histograms to  compute average force on pull group.
+   In addition, compute the sigma of the histogram.
+*/
+void computeAverageForce(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt)
+{
+    int i,j,bins=opt->bins,k;
+    double dz,min=opt->min,max=opt->max,displAv,displAv2,temp,distance,ztot,ztot_half,w,weight;
+    double posmirrored;
+
+    dz=(max-min)/bins;
+    ztot=opt->max-min;
+    ztot_half=ztot/2;
+
+    /* Compute average displacement from histograms */
+    for(j=0;j<nWindows;++j) 
+    {
+        snew(window[j].forceAv,window[j].nPull);
+        for(k=0;k<window[j].nPull;++k) 
+        {
+            displAv = 0.0;
+            displAv2 = 0.0;
+            weight  = 0.0;
+            for(i=0;i<opt->bins;++i) 
+            {    
+                temp=(1.0*i+0.5)*dz+min;
+                distance = temp - window[j].pos[k];
+                if (opt->bCycl)
+                {                                       /* in cyclic wham:             */
+                    if (distance > ztot_half)           /*    |distance| < ztot_half   */
+                        distance-=ztot;
+                    else if (distance < -ztot_half)
+                        distance+=ztot;
+                }
+                w=window[j].Histo[k][i]/window[j].g[k];
+                displAv  += w*distance;
+                displAv2 += w*sqr(distance);
+                weight+=w;
+                /* Are we near min or max? We are getting wron forces from the histgrams since
+                   the histigrams are zero outside [min,max). Therefore, assume that the position 
+                   on the other side of the histomgram center is equally likely. */
+                if (!opt->bCycl)
+                {
+                    posmirrored=window[j].pos[k]-distance;
+                    if (posmirrored>=max || posmirrored<min)
+                    {
+                        displAv  += -w*distance;
+                        displAv2 += w*sqr(-distance);      
+                        weight+=w;
+                    }
+                }
+            }
+            displAv  /= weight;
+            displAv2 /= weight;
+
+            /* average force from average displacement */
+            window[j].forceAv[k] = displAv*window[j].k[k];
+            /* sigma from average square displacement */
+            /* window[j].sigma  [k] = sqrt(displAv2); */
+            /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2));  */
+        }
+    }
+}
+
+/* Check if the complete reaction coordinate is covered by the histograms */
+void  checkReactionCoordinateCovered(t_UmbrellaWindow *window,int nwins,
+                                     t_UmbrellaOptions *opt)
+{
+    int i,ig,j,bins=opt->bins,bBoundary;
+    real avcount=0,z,relcount,*count;
+    snew(count,opt->bins);
+
+    for(j=0;j<opt->bins;++j) 
+    {        
+        for (i=0;i<nwins;i++){
+            for (ig=0;ig<window[i].nPull;ig++)
+                count[j]+=window[i].Histo[ig][j];
+        }
+        avcount+=1.0*count[j];
+    }
+    avcount/=bins;
+    for(j=0;j<bins;++j) 
+    {
+        relcount=count[j]/avcount;
+        z=(j+0.5)*opt->dz+opt->min;
+        bBoundary=( j<bins/20 || (bins-j)>bins/20 );
+        /* check for bins with no data */
+        if (count[j] == 0)
+            fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
+                    "You may not get a reasonable profile. Check your histograms!\n",j,z);
+        /* and check for poor sampling */
+        else if (relcount<0.005 && !bBoundary)
+            fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n",j,z);
+    }
+    sfree(count);
+}
+
+
+void guessPotByIntegration(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt,
+                           char *fn)
+{
+    int i,j,ig,bins=opt->bins,nHist,winmin,groupmin;
+    double dz,min=opt->min,*pot,pos,hispos,dist,diff,fAv,distmin,*f;
+    FILE *fp;
+
+    dz=(opt->max-min)/bins;
+
+    printf("Getting initial potential by integration.\n");
+
+    /* Compute average displacement from histograms */
+    computeAverageForce(window,nWindows,opt);
+
+    /* Get force for each bin from all histograms in this bin, or, alternatively,
+       if no histograms are inside this bin, from the closest histogram */
+    snew(pot,bins);
+    snew(f,bins);
+    for(j=0;j<opt->bins;++j) 
+    {
+        pos=(1.0*j+0.5)*dz+min;
+        nHist=0;
+        fAv=0.;
+        distmin=1e20;
+        groupmin=winmin=0;
+        for (i=0;i<nWindows;i++)
+        {
+            for (ig=0;ig<window[i].nPull;ig++)
+            {
+                hispos=window[i].pos[ig];
+                dist=fabs(hispos-pos);
+                /* average force within bin */
+                if (dist < dz/2)
+                {
+                    nHist++;
+                    fAv+=window[i].forceAv[ig];
+                }
+                /* at the same time, rememer closest histogram */
+                if (dist<distmin){
+                    winmin=i;
+                    groupmin=ig;
+                    distmin=dist;
+                }
+            }
+        }
+        /* if no histogram found in this bin, use closest histogram */
+        if (nHist>0)
+            fAv=fAv/nHist;
+        else{
+            fAv=window[winmin].forceAv[groupmin];
+        }
+        f[j]=fAv;
+    }
+    for(j=1;j<opt->bins;++j)
+        pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
+
+    /* cyclic wham: linearly correct possible offset */
+    if (opt->bCycl)
+    {
+        diff=(pot[bins-1]-pot[0])/(bins-1);
+        for(j=1;j<opt->bins;++j)
+            pot[j]-=j*diff;
+    }
+    if (opt->verbose)
+    {
+        fp=xvgropen("pmfintegrated.xvg","PMF from force integration","z","PMF [kJ/mol]",opt->oenv);
+        for(j=0;j<opt->bins;++j)
+            fprintf(fp,"%g  %g\n",(j+0.5)*dz+opt->min,pot[j]);
+        ffclose(fp);
+        printf("verbose mode: wrote %s with PMF from interated forces\n","pmfintegrated.xvg");
+    }
+
+    /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
+       energy offsets which are usually determined by wham 
+       First: turn pot into probabilities:
+    */
+    for(j=0;j<opt->bins;++j)
+        pot[j]=exp(-pot[j]/(8.314e-3*opt->Temperature));
+    calc_z(pot,window,nWindows,opt,TRUE);
+    
+    sfree(pot);
+    sfree(f);
+}
+
+
 int gmx_wham(int argc,char *argv[])
 {
     const char *desc[] = {
-            "This is an analysis program that implements the Weighted",
-            "Histogram Analysis Method (WHAM). It is intended to analyze",
-            "output files generated by umbrella sampling simulations to ",
-            "compute a potential of mean force (PMF). [PAR]",
-            "At present, three input modes are supported:[BR]",
-            "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
-            "  filenames of the umbrella simulation run-input files (tpr files),",
-            "  AND, with option -ix, a file which contains filenames of",
-            "  the pullx mdrun output files. The tpr and pullx files must",
-            "  be in corresponding order, i.e. the first tpr created the",
-            "  first pullx, etc.[BR]",
-            "[TT]*[tt] Same as the previous input mode, except that the the user",
-            "  provides the pull force ouput file names (pullf.xvg) with option -if.",
-            "  From the pull force the position in the ubrella potential is",
-            "  computed. This does not work with tabulated umbrella potentials.",
-            "[TT]*[tt] With option [TT]-ip[tt], the user provides filenames of (gzipped) pdo files, i.e.",
-            "  the gromacs 3.3 umbrella output files. If you have some unusual",
-            "  reaction coordinate you may also generate your own pdo files and",
-            "  feed them with the -ip option into to g_wham. The pdo file header",
-            "  must be similar to the folowing:[BR]",
-            "[TT]# UMBRELLA      3.0[BR]",
-            "# Component selection: 0 0 1[BR]",
-            "# nSkip 1[BR]",
-            "# Ref. Group 'TestAtom'[BR]",
-            "# Nr. of pull groups 2[BR]",
-            "# Group 1 'GR1'  Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
-            "# Group 2 'GR2'  Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
-            "#####[tt][BR]",
-            "  Nr of pull groups, umbrella positions, force constants, and names",
-            "  may (of course) differ. Following the header, a time column and",
-            "  a data columns for each pull group follow (i.e. the displacement",
-            "  with respect to the umbrella center). Up to four pull groups are possible",
-            "  at present.[PAR]",
-            "By default, the output files are[BR]",
-            "  [TT]-o[tt]      PMF output file[BR]",
-            "  [TT]-hist[tt]   histograms output file[PAR]",
-            "The umbrella potential is assumed to be harmonic and the force constants are ",
-            "read from the tpr or pdo files. If a non-harmonic umbrella force was applied ",
-            "a tabulated potential can be provied with -tab.[PAR]",
-            "WHAM OPTIONS[PAR]",
-            "  [TT]-bins[tt]   Nr of bins used in analysis[BR]",
-            "  [TT]-temp[tt]   Temperature in the simulations[BR]",
-            "  [TT]-tol[tt]    Stop iteration if profile (probability) changed less than tolerance[BR]",
-            "  [TT]-auto[tt]   Automatic determination of boudndaries[BR]",
-            "  [TT]-min,-max[tt]   Boundaries of the profile [BR]",
-            "The data points which are used ",
-            "to compute the profile can be restricted with options -b, -e, and -dt. ",
-            "Play particularly with -b to ensure sufficient equilibration in each ",
-            "umbrella window![PAR]",
-            "With -log (default) the profile is written in energy units, otherwise (-nolog) as ",
-            "probability. The unit can be specified with -unit. With energy output, ",
-            "the energy in the first bin is defined to be zero. If you want the free energy at a different ",
-            "position to be zero, choose with -zprof0 (useful with bootstrapping, see below).[PAR]",
-            "For cyclic (or periodic) reaction coordinates (dihedral angle, channel PMF",
-            "without osmotic gradient), -cycl is useful.[BR]",
-            "[TT]-cycl yes[tt]        min and max are assumed to",
-            "be neighboring points and histogram points outside min and max are mapped into ",
-            "the interval [min,max] (compare histogram output). [BR]",
-            "[TT]-cycl weighted[tt]   First, a non-cyclic profile is computed. Subsequently, ",
-            "periodicity is enforced by adding corrections dG(i) between neighboring bins",
-            "i and i+1. The correction is chosen proportional to 1/[n(i)*n(i+1)]^alpha, where",
-            "n(i) denotes the total nr of data points in bin i as collected from all histograms.",
-            "alpha is defined with -alpha. The corrections are written to the file defined by -wcorr.",
-            " (Compare Hub and de Groot, PNAS 105:1198 (2008))[PAR]",
-            "ERROR ANALYSIS[BR]",
-            "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
-            "otherwise the statistical error may be substantially undererstimated !![BR]",
-            "[TT]-nBootstrap[tt] defines the nr of bootstraps. Two bootstrapping modes are supported.[BR]",
-            "[TT]-histbs[tt]    Complete histograms are considered as independent data points (default). For each",
-            "bootstrap, N histograms are randomly chosen from the N given histograms (allowing duplication).",
-            "To avoid gaps without data along the reaction coordinate blocks of histograms (-histbs-block)",
-            "may be defined. In that case, the given histograms are divided into blocks and ",
-            "only histograms within each block are mixed. Note that the histograms",
-            "within each block must be representative for all possible histograms, otherwise the",
-            "statistical error is undererstimated![BR]",
-            "[TT]-nohistbs[tt]  The given histograms are used to generate new random histograms,",
-            "such that the generated data points are distributed according the given histograms. The number",
-            "of points generated for each bootstrap histogram can be controlled with -bs-dt.",
-            "Note that one data point should be generated for each *independent* point in the given",
-            "histograms. With the long autocorrelations in MD simulations, this procedure may ",
-            "easily understimate the error![BR]",
-            "Bootstrapping output:[BR]",
-            "[TT]-bsres[tt]   Average profile and standard deviations[BR]",
-            "[TT]-bsprof[tt]  All bootstrapping profiles[BR]",
-            "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, and, ",
-            "with [TT]-nohistBS[tt], the cummulants of the histogram.",
+        "This is an analysis program that implements the Weighted",
+        "Histogram Analysis Method (WHAM). It is intended to analyze",
+        "output files generated by umbrella sampling simulations to ",
+        "compute a potential of mean force (PMF). [PAR] ",
+        "At present, three input modes are supported.[BR]",
+        "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the[BR]",
+        "  file names of the umbrella simulation run-input files (tpr files),[BR]",
+        "  AND, with option [TT]-ix[tt], a file which contains file names of [BR]",
+        "  the pullx mdrun output files. The tpr and pullx files must [BR]",
+        "  be in corresponding order, i.e. the first tpr created the [BR]",
+        "  first pullx, etc.[BR]",
+        "[TT]*[tt] Same as the previous input mode, except that the the user [BR]",
+        "  provides the pull force output file names (pullf.xvg) with option [TT]-if[tt].[BR]",
+        "  From the pull force the position in the umbrella potential is [BR]",
+        "  computed. This does not work with tabulated umbrella potentials.[BR]"
+        "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) pdo files, i.e.[BR]",
+        "  the gromacs 3.3 umbrella output files. If you have some unusual[BR]"
+        "  reaction coordinate you may also generate your own pdo files and [BR]",
+        "  feed them with the [TT]-ip[tt] option into to g_wham. The pdo file header [BR]",
+        "  must be similar to the following:[BR]",
+        "[TT]# UMBRELLA      3.0[BR]",
+        "# Component selection: 0 0 1[BR]",
+        "# nSkip 1[BR]",
+        "# Ref. Group 'TestAtom'[BR]",
+        "# Nr. of pull groups 2[BR]",
+        "# Group 1 'GR1'  Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
+        "# Group 2 'GR2'  Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
+        "#####[tt][BR]",
+        "Nr of pull groups, umbrella positions, force constants, and names ",
+        "may (of course) differ. Following the header, a time column and ",
+        "a data columns for each pull group follows (i.e. the displacement",
+        "with respect to the umbrella center). Up to four pull groups are possible ",
+        "per pdo file at present.[PAR]",
+        "By default, the output files are[BR]",
+        "  [TT]-o[tt]      PMF output file[BR]",
+        "  [TT]-hist[tt]   histograms output file[BR]",
+        "Always check whether the histograms sufficiently overlap![PAR]",
+        "The umbrella potential is assumed to be harmonic and the force constants are ",
+        "read from the tpr or pdo files. If a non-harmonic umbrella force was applied ",
+        "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
+        "WHAM OPTIONS[BR]------------[BR]",
+        "  [TT]-bins[tt]   Nr of bins used in analysis[BR]",
+        "  [TT]-temp[tt]   Temperature in the simulations[BR]",
+        "  [TT]-tol[tt]    Stop iteration if profile (probability) changed less than tolerance[BR]",
+        "  [TT]-auto[tt]   Automatic determination of boundaries[BR]",
+        "  [TT]-min,-max[tt]   Boundaries of the profile [BR]",
+        "The data points which are used ",
+        "to compute the profile can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
+        "Play particularly with [TT]-b[tt] to ensure sufficient equilibration in each ",
+        "umbrella window![PAR]",
+        "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ([TT]-nolog[tt]) as ",
+        "probability. The unit can be specified with [TT]-unit[tt]. With energy output, ",
+        "the energy in the first bin is defined to be zero. If you want the free energy at a different ",
+        "position to be zero, choose with [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
+        "For cyclic (or periodic) reaction coordinates (dihedral angle, channel PMF",
+        "without osmotic gradient), the option [TT]-cycl[tt] is useful. g_wham will make use of the ",
+        "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
+        "reaction coordinate will assumed be be neighbors[PAR]",
+        "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output (useful for membrane etc.)[PAR]",
+        "AUTOCORRELATIONS[BR]----------------[BR]",
+        "With [TT]-ac[tt], g_wham estimates the integrated autocorrelation time (IACT) tau for each ",
+        "umbrella window and weights the respective window with 1/[1+2*tau/dt]. The IACTs are written ",
+        "to the file defined with [TT]-oiact[tt]. In verbose mode, all autocorrelation functions (ACFs) are",
+        "written to hist_autocorr.xvg. Because the IACTs can be severely underestimated in case of ",
+        "limited sampling, option [TT]-acsig[tt] allows to smooth the IACTs along the reaction coordinate ",
+        "with a Gaussian (sigma provided with [TT]-acsig[tt], see output in iact.xvg). Note that the ",
+        "IACTs are estimated by simple integration of the ACFs while the ACFs are larger 0.05.",
+        "If you prefer to compute the IACTs by a more sophisticated (but possibly less robust) method ",
+        "such as fitting to a double exponential, you can compute the IACTs with g_analyze and provide",
+        "them to g_wham with the file iact-in.dat (option [TT]-iiact[tt]). iact-in.dat should contain ",
+        "one line per input file (pdo or pullx/f file) and one column per pull group in the respective file.[PAR]"
+        "ERROR ANALYSIS[BR]--------------[BR]",
+        "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
+        "otherwise the statistical error may be substantially underestimated !![BR]",
+        "More background and examples for the bootstrap technique can be found in ",
+        "Hub, de Groot and Van der Spoel, JCTC (2010)[BR]",
+        "-nBootstrap defines the nr of bootstraps (use, e.g., 100). Four bootstrapping methods are supported and ",
+        "selected with [TT]-bs-method[tt].[BR]",
+        "  (1) [TT]b-hist[tt]   Default: complete histograms are considered as independent data points, and ",
+        " the bootstrap is carried out by assigning random weights to the histograms (\"Bayesian bootstrap\").",
+        " Note that each point along the reaction coordinate",
+        "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
+        "statistical error is underestimated![BR]",
+        "  (2) [TT]hist[tt]    Complete histograms are considered as independent data points. For each",
+        "bootstrap, N histograms are randomly chosen from the N given histograms (allowing duplication, i.e. ",
+        "sampling with replacement).",
+        "To avoid gaps without data along the reaction coordinate blocks of histograms ([TT]-histbs-block[tt])",
+        "may be defined. In that case, the given histograms are divided into blocks and ",
+        "only histograms within each block are mixed. Note that the histograms",
+        "within each block must be representative for all possible histograms, otherwise the",
+        "statistical error is underestimated![BR]",
+        "  (3) [TT]traj[tt]  The given histograms are used to generate new random trajectories,",
+        "such that the generated data points are distributed according the given histograms ",
+        "and properly autocorrelated. The ",
+        "autocorrelation time (ACT) for each window must be known, so use [TT]-ac[tt] or provide the ACT",
+        "with [TT]-iiact[TT]. If the ACT of all windows are identical (and known), you can also ",
+        "provide them with [TT]-bs-tau[tt]. Note that this method may severely underestimate the error ",
+        "in case of limited sampling, that is if individual histograms do not represent the complete",
+        "phase space at the respective positions.[BR]",
+        "  (4) [TT]traj-gauss[tt]  The same as Method [TT]traj[tt], but the trajectories are not bootstrapped ",
+        "from the umbrella histograms but from Gaussians with the average and width of the umbrella ",
+        "histograms. That method yields similar error estimates like method [TT]traj[tt].[BR]"
+        "Bootstrapping output:[BR]",
+        "  [TT]-bsres[tt]   Average profile and standard deviations[BR]",
+        "  [TT]-bsprof[tt]  All bootstrapping profiles[BR]",
+        "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, and, ",
+        "with bootstrap method [TT]traj[tt], the cumulative distribution functions of the histograms.",
     };
 
-    static t_UmbrellaOptions opt;
-    static gmx_bool bHistOnly=FALSE;
-
     const char *en_unit[]={NULL,"kJ","kCal","kT",NULL};
-    const char *en_unit_label[]={"","E (kJ mol\\S-1\\N)","E (kcal mol\\S-1\\N)","E (kT)",};
-    const char *en_cycl[]={NULL,"no","yes","weighted",NULL};
-
+    const char *en_unit_label[]={"","E (kJ mol\\S-1\\N)","E (kcal mol\\S-1\\N)","E (kT)",NULL};
+    const char *en_bsMethod[]={ NULL,"b-hist", "hist", "traj", "traj-gauss", NULL };
+  
+    static t_UmbrellaOptions opt;
+  
     t_pargs pa[] = {
-            { "-min", FALSE, etREAL, {&opt.min},
-              "Minimum coordinate in profile"},
-            { "-max", FALSE, etREAL, {&opt.max},
-              "Maximum coordinate in profile"},
-            { "-auto", FALSE, etBOOL, {&opt.bAuto},
-              "determine min and max automatically"},
-            { "-bins",FALSE, etINT, {&opt.bins},
-              "Number of bins in profile"},
-            { "-temp", FALSE, etREAL, {&opt.Temperature},
-              "Temperature"},
-            { "-tol", FALSE, etREAL, {&opt.Tolerance},
-              "Tolerance"},
-            { "-v", FALSE, etBOOL, {&opt.verbose},
-              "verbose mode"},
-            { "-b", FALSE, etREAL, {&opt.tmin},
-              "first time to analyse (ps)"},
-            { "-e", FALSE, etREAL, {&opt.tmax},
-              "last time to analyse (ps)"},
-            { "-dt", FALSE, etREAL, {&opt.dt},
-              "Analyse only every dt ps"},
-            { "-histonly", FALSE, etBOOL, {&bHistOnly},
-              "Write histograms and exit"},
-            { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
-              "Determine min and max and exit (with -auto)"},
-            { "-log", FALSE, etBOOL, {&opt.bLog},
-              "Calculate the log of the profile before printing"},
-            { "-unit", FALSE,  etENUM, {en_unit},
-              "energy unit in case of log output" },
-            { "-zprof0", FALSE, etREAL, {&opt.zProf0},
-              "Define profile to 0.0 at this position (with -log)"},
-            { "-cycl", FALSE, etENUM, {en_cycl},
-              "Create cyclic/periodic profile. Assumes min and max are the same point."},
-            { "-alpha", FALSE, etREAL, {&opt.alpha},
-              "for '-cycl weighted', set parameter alpha"},
-            { "-flip", FALSE, etBOOL, {&opt.bFlipProf},
-              "Combine halves of profile (not supported)"},
-            { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
-              "Enforce equal weight for all histograms. (Non-Weighed-HAM)"},
-            { "-nBootstrap", FALSE,  etINT, {&opt.nBootStrap},
-              "nr of bootstraps to estimate statistical uncertainty" },
-            { "-bs-dt", FALSE, etREAL, {&opt.dtBootStrap},
-              "timestep for synthetic bootstrap histograms (ps). Ensure independent data points!"},
-            { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
-              "seed for bootstrapping. (-1 = use time)"},
-            { "-histbs", FALSE, etBOOL, {&opt.bHistBootStrap},
-              "In bootstrapping, consider complete histograms as one data point. "
-              "Accounts better for long autocorrelations."},
-            { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
-              "when mixin histograms only mix within blocks of -histBS_block."},
-            { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
-                "verbose bootstrapping. Print the cummulants and a histogram file for each bootstrap."},
+        { "-min", FALSE, etREAL, {&opt.min},
+          "Minimum coordinate in profile"},
+        { "-max", FALSE, etREAL, {&opt.max},
+          "Maximum coordinate in profile"},
+        { "-auto", FALSE, etBOOL, {&opt.bAuto},
+          "determine min and max automatically"},
+        { "-bins",FALSE, etINT, {&opt.bins},
+          "Number of bins in profile"},
+        { "-temp", FALSE, etREAL, {&opt.Temperature},
+          "Temperature"},
+        { "-tol", FALSE, etREAL, {&opt.Tolerance},
+          "Tolerance"},
+        { "-v", FALSE, etBOOL, {&opt.verbose},
+          "verbose mode"},
+        { "-b", FALSE, etREAL, {&opt.tmin}, 
+          "first time to analyse (ps)"},
+        { "-e", FALSE, etREAL, {&opt.tmax}, 
+          "last time to analyse (ps)"},
+        { "-dt", FALSE, etREAL, {&opt.dt},
+          "Analyse only every dt ps"},
+        { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
+          "Write histograms and exit"},
+        { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
+          "Determine min and max and exit (with -auto)"},
+        { "-log", FALSE, etBOOL, {&opt.bLog},
+          "Calculate the log of the profile before printing"},
+        { "-unit", FALSE,  etENUM, {en_unit},
+          "energy unit in case of log output" },
+        { "-zprof0", FALSE, etREAL, {&opt.zProf0},
+          "Define profile to 0.0 at this position (with -log)"},
+        { "-cycl", FALSE, etBOOL, {&opt.bCycl},
+          "Create cyclic/periodic profile. Assumes min and max are the same point."},
+        { "-sym", FALSE, etBOOL, {&opt.bSym},
+          "symmetrize profile around z=0"},   
+        { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
+          "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
+        { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
+          "calculate integrated autocorrelation times and use in wham"},
+        { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
+          "Smooth autocorrelation times along reaction coordinate with Gaussian of this sigma"},
+        { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
+          "When computing autocorrelation functions, restart computing every .. (ps)"},
+        { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
+          "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
+          "during smoothing"},
+        { "-nBootstrap", FALSE,  etINT, {&opt.nBootStrap},
+          "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
+        { "-bs-method", FALSE,  etENUM, {en_bsMethod},
+          "bootstrap method" },
+        { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
+          "Autocorrelation time (ACT) assumed for all histograms. Use option -ac if ACT is unknown."},
+        { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
+          "seed for bootstrapping. (-1 = use time)"},
+        { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
+          "when mixing histograms only mix within blocks of -histbs-block."},
+        { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
+          "verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
+        { "-stepout", FALSE, etINT, {&opt.stepchange},
+          "HIDDENWrite maximum change every ... (set to 1 with -v)"},
+        { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
+          "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
     };
-
+  
     t_filenm fnm[] = {
-            { efDAT, "-ix","pullx-files",ffOPTRD},     /* wham input: pullf.xvg's and tprs    */
-            { efDAT, "-if","pullf-files",ffOPTRD},     /* wham input: pullf.xvg's and tprs    */
-            { efDAT, "-it","tpr-files",ffOPTRD},       /* wham input: tprs                    */
-            { efDAT, "-ip","pdo-files",ffOPTRD},       /* wham input: pdo files (gmx3 style)  */
-            { efXVG, "-o", "profile", ffWRITE },       /* output file for profile */
-            { efXVG, "-hist","histo", ffWRITE},               /* output file for histograms */
-            { efXVG, "-bsres","bsResult", ffOPTWR},    /* average and errors of bootstrap analysis */
-            { efXVG, "-bsprof","bsProfs", ffOPTWR},    /* output file for bootstrap profiles       */
-            { efDAT, "-tab","umb-pot",ffOPTRD},        /* Tabulated umbrella potential (if not harmonic) */
-            { efXVG, "-wcorr","cycl-corr",ffOPTRD},    /* Corrections to profile in case -cycl weighted */
+        { efDAT, "-ix","pullx-files",ffOPTRD},  /* wham input: pullf.xvg's and tprs           */
+        { efDAT, "-if","pullf-files",ffOPTRD},  /* wham input: pullf.xvg's and tprs           */
+        { efDAT, "-it","tpr-files",ffOPTRD},    /* wham input: tprs                           */
+        { efDAT, "-ip","pdo-files",ffOPTRD},    /* wham input: pdo files (gmx3 style)         */
+        { efXVG, "-o", "profile", ffWRITE },    /* output file for profile                     */
+        { efXVG, "-hist","histo", ffWRITE},        /* output file for histograms                  */
+        { efXVG, "-oiact","iact",ffOPTWR},      /* writing integrated autocorrelation times    */
+        { efDAT, "-iiact","iact-in",ffOPTRD},   /* reading integrated autocorrelation times   */    
+        { efXVG, "-bsres","bsResult", ffOPTWR}, /* average and errors of bootstrap analysis    */
+        { efXVG, "-bsprof","bsProfs", ffOPTWR}, /* output file for bootstrap profiles          */
+        { efDAT, "-tab","umb-pot",ffOPTRD},     /* Tabulated umbrella potential (if not harmonic) */    
     };
-
+  
     int i,j,l,nfiles,nwins,nfiles2;
     t_UmbrellaHeader header;
     t_UmbrellaWindow * window=NULL;
@@ -1799,69 +2687,71 @@ int gmx_wham(int argc,char *argv[])
     char **fninTpr,**fninPull,**fninPdo;
     const char *fnPull;
     FILE *histout,*profout;
-    char ylabel[256],title[256];
-    output_env_t oenv;
+    char ylabel[256],title[256];  
+
+#define NFILE asize(fnm)
+
+    CopyRight(stderr,argv[0]);
 
     opt.bins=200;
     opt.verbose=FALSE;
-    opt.cycl=enCycl_no;
+    opt.bHistOnly=FALSE;
+    opt.bCycl=FALSE;
     opt.tmin=50;
     opt.tmax=1e20;
     opt.dt=0.0;
-    opt.bShift=TRUE;
-    opt.nBootStrap=0;
-    opt.dtBootStrap=0.0;
-    opt.bsSeed=-1;
-    opt.bHistBootStrap=TRUE;
-    opt.histBootStrapBlockLength=12;
-    opt.zProfZero=0.0;
-    opt.bWeightedCycl=FALSE;
-    opt.alpha=2;
-    opt.bHistOutOnly=FALSE;
     opt.min=0;
     opt.max=0;
-    opt.bLog=TRUE;
-    opt.unit=en_kJ;
-    opt.zProf0=0.0;
+    opt.bAuto=TRUE;
+
+    /* bootstrapping stuff */
     opt.nBootStrap=0;
+    opt.bsMethod=bsMethod_hist;
+    opt.tauBootStrap=0.0;
     opt.bsSeed=-1;
-    opt.bHistBootStrap=TRUE;
     opt.histBootStrapBlockLength=8;
     opt.bs_verbose=FALSE;
+
+    opt.bLog=TRUE;
+    opt.unit=en_kJ;
+    opt.zProf0=0.;
     opt.Temperature=298;
-    opt.bFlipProf=FALSE;
     opt.Tolerance=1e-6;
-    opt.bAuto=TRUE;
     opt.bBoundsOnly=FALSE;
+    opt.bSym=FALSE;
+    opt.bCalcTauInt=FALSE;
+    opt.sigSmoothIact=0.0;
+    opt.bAllowReduceIact=TRUE;
+    opt.bInitPotByIntegration=TRUE;
+    opt.acTrestart=1.0;
+    opt.stepchange=100;
+    opt.stepUpdateContrib=100;
 
-
-#define NFILE asize(fnm)
-
-    CopyRight(stderr,argv[0]);
     parse_common_args(&argc,argv,PCA_BE_NICE,
-            NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
-
+                      NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&opt.oenv);
+  
     opt.unit=nenum(en_unit);
-    opt.cycl=nenum(en_cycl);
+    opt.bsMethod=nenum(en_bsMethod);
 
     opt.bProf0Set=opt2parg_bSet("-zprof0",  asize(pa), pa);
-
+  
     opt.bTab=opt2bSet("-tab",NFILE,fnm);
     opt.bPdo=opt2bSet("-ip",NFILE,fnm);
     opt.bTpr=opt2bSet("-it",NFILE,fnm);
     opt.bPullx=opt2bSet("-ix",NFILE,fnm);
     opt.bPullf=opt2bSet("-if",NFILE,fnm);
+    opt.bTauIntGiven=opt2bSet("-iiact",NFILE,fnm);
     if  (opt.bTab && opt.bPullf)
         gmx_fatal(FARGS,"Force input does not work with tabulated potentials. "
-                "Provide pullx.xvg or pdo files!");
+                  "Provide pullx.xvg or pdo files!");
 
-#define BOOLXOR(a,b) ( ((!(a))&&(b)) || ((a)&&(!(b))))
-    if (!opt.bPdo && !BOOLXOR(opt.bPullx,opt.bPullf))
+#define WHAMBOOLXOR(a,b) ( ((!(a))&&(b)) || ((a)&&(!(b))))
+    if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx,opt.bPullf))
         gmx_fatal(FARGS,"Give either pullx (-ix) OR pullf (-if) data. Not both.");
     if ( !opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
         gmx_fatal(FARGS,"g_wham supports three input modes, pullx, pullf, or pdo file input."
-                "\n\n Check g_wham -h !");
-
+                  "\n\n Check g_wham -h !");
+  
     opt.fnPdo=opt2fn("-ip",NFILE,fnm);
     opt.fnTpr=opt2fn("-it",NFILE,fnm);
     opt.fnPullf=opt2fn("-if",NFILE,fnm);
@@ -1882,28 +2772,41 @@ int gmx_wham(int argc,char *argv[])
         opt.bAuto=FALSE;
     }
 
+    if (opt.bTauIntGiven && opt.bCalcTauInt)
+        gmx_fatal(FARGS,"Either read (option -iiact) or calculate (option -ac) the\n"
+                  "the autocorrelation times. Not both.");
+
+    if (opt.tauBootStrap>0.0 && opt2parg_bSet("-ac",asize(pa), pa))
+        gmx_fatal(FARGS,"Either compute autocorrelation times (ACTs) (option -ac) or "
+                  "provide it with -bs-tau for bootstrapping. Not Both.\n");
+    if (opt.tauBootStrap>0.0 && opt2bSet("-iiact",NFILE,fnm))
+        gmx_fatal(FARGS,"Either provide autocorrelation times (ACTs) with file iact-in.dat "
+                  "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
+  
+
     /* Reading gmx4 pull output and tpr files */
     if (opt.bTpr || opt.bPullf || opt.bPullx)
     {
         read_wham_in(opt.fnTpr,&fninTpr,&nfiles,&opt);
-
+    
         fnPull=opt.bPullf ? opt.fnPullf : opt.fnPullx;
         read_wham_in(fnPull,&fninPull,&nfiles2,&opt);
         printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
-                nfiles,nfiles2,opt.bPullf ? "force" : "position",opt.fnTpr,fnPull);
+               nfiles,nfiles2,opt.bPullf ? "force" : "position",opt.fnTpr,fnPull);
         if (nfiles!=nfiles2)
             gmx_fatal(FARGS,"Found %d file names in %s, but %d in %s\n",nfiles,
-                    opt.fnTpr,nfiles2,fnPull);
-        read_tpr_pullxf_files(fninTpr,fninPull,nfiles, &header, &window, &opt);
+                      opt.fnTpr,nfiles2,fnPull);
+        window=initUmbrellaWindows(nfiles);
+        read_tpr_pullxf_files(fninTpr,fninPull,nfiles, &header, window, &opt);
     }
     else
-    {
-        /* reading pdo files */
+    { /* reading pdo files */
         read_wham_in(opt.fnPdo,&fninPdo,&nfiles,&opt);
         printf("Found %d pdo files in %s\n",nfiles,opt.fnPdo);
-        read_pdo_files(fninPdo,nfiles, &header, &window, &opt);
+        window=initUmbrellaWindows(nfiles);
+        read_pdo_files(fninPdo,nfiles, &header, window, &opt);
     }
-    nwins=nfiles;
+    nwins=nfiles;  
 
     /* enforce equal weight for all histograms? */
     if (opt.bHistEq)
@@ -1911,13 +2814,13 @@ int gmx_wham(int argc,char *argv[])
 
     /* write histograms */
     histout=xvgropen(opt2fn("-hist",NFILE,fnm),"Umbrella histograms",
-            "z","count",oenv);
-    for(l=0;l<opt.bins;++l)
+                     "z","count",opt.oenv);
+    for(l=0;l<opt.bins;++l) 
     {
         fprintf(histout,"%e\t",(double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
-        for(i=0;i<nwins;++i)
-        {
-            for(j=0;j<window[i].nPull;++j)
+        for(i=0;i<nwins;++i) 
+        {      
+            for(j=0;j<window[i].nPull;++j) 
             {
                 fprintf(histout,"%e\t",window[i].Histo[j][l]);
             }
@@ -1925,7 +2828,8 @@ int gmx_wham(int argc,char *argv[])
         fprintf(histout,"\n");
     }
     ffclose(histout);
-    if (bHistOnly)
+    printf("Wrote %s\n",opt2fn("-hist",NFILE,fnm));
+    if (opt.bHistOnly)
     {
         printf("Wrote histograms to %s, now exiting.\n",opt2fn("-hist",NFILE,fnm));
         return 0;
@@ -1934,14 +2838,36 @@ int gmx_wham(int argc,char *argv[])
     /* Using tabulated umbrella potential */
     if (opt.bTab)
         setup_tab(opt2fn("-tab",NFILE,fnm),&opt);
-
-    setup_acc_wham(window,nwins,&opt);
+  
+    /* Integrated autocorrelation times provided ? */
+    if (opt.bTauIntGiven)
+        readIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-iiact",NFILE,fnm));
+
+    /* Compute integrated autocorrelation times */
+    if (opt.bCalcTauInt)
+        calcIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-oiact",NFILE,fnm));
+
+    /* calc average and sigma for each histogram 
+       (maybe required for bootstrapping. If not, this is fast anyhow) */
+    if (opt.nBootStrap && opt.bsMethod==bsMethod_trajGauss)
+        averageSigma(window,nwins,&opt);
+  
+    /* Get initial potential by simple integration */
+    if (opt.bInitPotByIntegration)
+        guessPotByIntegration(window,nwins,&opt,0);  
+
+    /* Check if complete reaction coordinate is covered */
+    checkReactionCoordinateCovered(window,nwins,&opt);
 
     /* Calculate profile */
-    snew(profile,opt.bins);
-    opt.stepchange=(opt.verbose)? 1 : 100;
+    snew(profile,opt.bins);  
+    if (opt.verbose)
+        opt.stepchange=1;
     i=0;
-    do {
+    do 
+    {
+        if ( (i%opt.stepUpdateContrib) == 0)
+            setup_acc_wham(profile,window,nwins,&opt);
         if (maxchange<opt.Tolerance)
         {
             bExact=TRUE;
@@ -1952,9 +2878,14 @@ int gmx_wham(int argc,char *argv[])
         if (((i%opt.stepchange) == 0 || i==1) && !i==0)
             printf("\t%4d) Maximum change %e\n",i,maxchange);
         i++;
-    } while( (maxchange=calc_z(profile, window, nwins, &opt,bExact)) > opt.Tolerance || !bExact);
+    } while ( (maxchange=calc_z(profile, window, nwins, &opt,bExact)) > opt.Tolerance || !bExact);
     printf("Converged in %d iterations. Final maximum change %g\n",i,maxchange);
 
+    /* calc error from Kumar's formula */
+    /* Unclear how the error propagates along reaction coordinate, therefore
+       commented out  */
+    /* calc_error_kumar(profile,window, nwins,&opt); */
+
     /* Write profile in energy units? */
     if (opt.bLog)
     {
@@ -1967,23 +2898,29 @@ int gmx_wham(int argc,char *argv[])
         strcpy(ylabel,"Density of states");
         strcpy(title,"Density of states");
     }
-    /* Force cyclic profile by wheighted correction? */
-    if (opt.cycl==enCycl_weighted)
-        cyclicProfByWeightedCorr(profile,window,nwins,&opt,TRUE,
-                                 opt2fn("-wcorr",NFILE,fnm),oenv);
+    
+    /* symmetrize profile around z=0? */
+    if (opt.bSym)
+        symmetrizeProfile(profile,&opt);
 
-    profout=xvgropen(opt2fn("-o",NFILE,fnm),title,"z",ylabel,oenv);
+    /* write profile or density of states */
+    profout=xvgropen(opt2fn("-o",NFILE,fnm),title,"z",ylabel,opt.oenv);
     for(i=0;i<opt.bins;++i)
-        fprintf(profout,"%e\t%e\n",
-                (double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min,profile[i]);
+        fprintf(profout,"%e\t%e\n",(double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min,profile[i]);
     ffclose(profout);
     printf("Wrote %s\n",opt2fn("-o",NFILE,fnm));
-
+  
     /* Bootstrap Method */
     if (opt.nBootStrap)
-        do_bootstrapping(opt2fn("-bsres",NFILE,fnm),opt2fn("-bsprof",NFILE,fnm),
-                opt2fn("-hist",NFILE,fnm),
-                ylabel, profile, window, nwins, &opt,oenv);
+        do_bootstrapping(opt2fn("-bsres",NFILE,fnm),opt2fn("-bsprof",NFILE,fnm), 
+                         opt2fn("-hist",NFILE,fnm),
+                         ylabel, profile, window, nwins, &opt);
+
+    sfree(profile);
+    freeUmbrellaWindows(window,nfiles);
+
+    printf("\nIn case you use results from g_wham for a publication, please cite:\n");
+    please_cite(stdout,"Hub2010");
 
     thanx(stderr);
     return 0;
index 15b1bd82a75931f0598eca8bcabcd34f98b4316a..ee4311d6c98b2a9549dc049b6818a254ec787f75 100644 (file)
@@ -730,7 +730,7 @@ static int split_chain(t_atoms *atoms,rvec *x,
 static gmx_bool check_have_atoms(t_atoms *atoms, char *string)
 {
   if ( atoms==NULL ) {
-    printf("Can not process '%s' without atoms info\n", string);
+    printf("Can not process '%s' without atom info, use option -f\n", string);
     return FALSE;
   } else
     return TRUE;
index d0b832a525355089732803aef3993a4522d46c7b..5736ab85916bf9876bde0656d75bb92f70c1afe0 100644 (file)
@@ -73,7 +73,7 @@ extern gmx_bool has_dihedral(int Dih,t_dlist *dl);
 extern t_dlist *mk_dlist(FILE *log, 
                         t_atoms *atoms, int *nlist,
                         gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, int maxchi,
-                        int r0,int naa,char **aa);
+                        int r0, gmx_residuetype_t rt);
                         
 extern void pr_dlist(FILE *fp,int nl,t_dlist dl[],real dt,  int printtype,
 gmx_bool bPhi, gmx_bool bPsi,gmx_bool bChi,gmx_bool bOmega, int maxchi);