Merge branch 'master' into pygromacs
authorAlexey Shvetsov <alexxy@omrb.pnpi.spb.ru>
Fri, 10 Jul 2015 16:38:05 +0000 (19:38 +0300)
committerAlexey Shvetsov <alexxy@omrb.pnpi.spb.ru>
Fri, 10 Jul 2015 16:38:05 +0000 (19:38 +0300)
172 files changed:
cmake/FindPythonModule.cmake
cmake/FindSphinx.cmake
cmake/gmxVersionInfo.cmake
docs/CMakeLists.txt
docs/doxygen/gmxtree.py
docs/user-guide/mdp-options.rst
share/top/gurgle.dat
src/config.h.cmakein
src/contrib/anaf.c
src/gromacs/analysisdata/tests/refdata/common-referencedata.xsl
src/gromacs/commandline/cmdlinehelpcontext.cpp
src/gromacs/commandline/cmdlinehelpcontext.h
src/gromacs/commandline/cmdlinehelpmodule.cpp
src/gromacs/commandline/cmdlinehelpwriter.cpp
src/gromacs/commandline/cmdlineprogramcontext.cpp
src/gromacs/commandline/shellcompletions.cpp
src/gromacs/commandline/shellcompletions.h
src/gromacs/commandline/tests/cmdlinehelpmodule.cpp
src/gromacs/commandline/tests/cmdlinehelpwriter.cpp
src/gromacs/commandline/tests/cmdlinemodulemanagertest.cpp
src/gromacs/commandline/tests/cmdlinemodulemanagertest.h
src/gromacs/commandline/tests/pargs.cpp
src/gromacs/commandline/tests/refdata/CommandLineHelpModuleTest_ExportsHelp.xml
src/gromacs/domdec/domdec.cpp
src/gromacs/domdec/domdec_specatomcomm.cpp
src/gromacs/ewald/pme-load-balancing.cpp
src/gromacs/fft/fft.cpp
src/gromacs/fft/fft5d.cpp
src/gromacs/fft/fft5d.h
src/gromacs/fft/fft_fftw3.cpp
src/gromacs/fileio/gmx_system_xdr.c
src/gromacs/fileio/gmxfio-impl.h
src/gromacs/fileio/gmxfio-xdr.cpp
src/gromacs/fileio/gmxfio-xdr.h
src/gromacs/fileio/gmxfio.cpp
src/gromacs/fileio/gmxfio.h
src/gromacs/fileio/md5.c
src/gromacs/fileio/tpxio.c
src/gromacs/fileio/xdrf.h
src/gromacs/gmxana/gmx_dos.c
src/gromacs/gmxlib/gmx_detect_hardware.cpp
src/gromacs/gmxlib/gpu_utils/ocl_compiler.cpp
src/gromacs/gmxpreprocess/fflibutil.cpp
src/gromacs/gmxpreprocess/gmxcpp.h
src/gromacs/gmxpreprocess/pdb2top.cpp
src/gromacs/gmxpreprocess/readpull.c
src/gromacs/legacyheaders/gmx_detect_hardware.h
src/gromacs/legacyheaders/types/commrec.h
src/gromacs/legacyheaders/types/forcerec.h
src/gromacs/listed-forces/listed-forces.cpp
src/gromacs/listed-forces/listed-internal.h
src/gromacs/listed-forces/manage-threading.cpp
src/gromacs/listed-forces/manage-threading.h
src/gromacs/math/utilities.c
src/gromacs/mdlib/force.cpp
src/gromacs/mdlib/forcerec-threading.h
src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/genborn.cpp [moved from src/gromacs/mdlib/genborn.c with 86% similarity]
src/gromacs/mdlib/genborn_allvsall.cpp [moved from src/gromacs/mdlib/genborn_allvsall.c with 97% similarity]
src/gromacs/mdlib/genborn_allvsall_sse2_double.c [deleted file]
src/gromacs/mdlib/genborn_allvsall_sse2_double.h [deleted file]
src/gromacs/mdlib/genborn_allvsall_sse2_single.c [deleted file]
src/gromacs/mdlib/genborn_allvsall_sse2_single.h [deleted file]
src/gromacs/mdlib/genborn_sse2_double.c [deleted file]
src/gromacs/mdlib/genborn_sse2_single.c [deleted file]
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h
src/gromacs/mdlib/nbnxn_search.c
src/gromacs/mdlib/ns.cpp [moved from src/gromacs/mdlib/ns.c with 97% similarity]
src/gromacs/mdlib/nsgrid.cpp [moved from src/gromacs/mdlib/nsgrid.c with 96% similarity]
src/gromacs/mdlib/qm_gamess.cpp [moved from src/gromacs/mdlib/qm_gamess.c with 95% similarity]
src/gromacs/mdlib/qm_gaussian.cpp [moved from src/gromacs/mdlib/qm_gaussian.c with 97% similarity]
src/gromacs/mdlib/qm_mopac.cpp [moved from src/gromacs/mdlib/qm_mopac.c with 91% similarity]
src/gromacs/mdlib/qm_orca.cpp [moved from src/gromacs/mdlib/qm_orca.c with 98% similarity]
src/gromacs/mdlib/qmmm.cpp [moved from src/gromacs/mdlib/qmmm.c with 95% similarity]
src/gromacs/onlinehelp/helpformat.h
src/gromacs/onlinehelp/helptopic.cpp
src/gromacs/onlinehelp/helpwritercontext.cpp
src/gromacs/onlinehelp/helpwritercontext.h
src/gromacs/onlinehelp/tests/helpmanager.cpp
src/gromacs/selection/indexutil.cpp
src/gromacs/selection/indexutil.h
src/gromacs/selection/nbsearch.cpp
src/gromacs/selection/parsetree.cpp
src/gromacs/selection/scanner.cpp
src/gromacs/selection/scanner.h
src/gromacs/selection/scanner.l
src/gromacs/selection/scanner_internal.cpp
src/gromacs/selection/scanner_internal.h
src/gromacs/selection/selection.cpp
src/gromacs/selection/selectioncollection.cpp
src/gromacs/selection/selectioncollection.h
src/gromacs/selection/selhelp.cpp
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesBasicInput.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesContinuation.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesEmptySelections.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesMultiSelectionInputStatus.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesMultipleSelectionsOnLine.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesNoFinalNewline.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesNoninteractiveInput.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInput.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInputNoninteractively.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInputStatus.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesStatusWithExistingSelections.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesStatusWithGroups.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesTwoSelectionInput.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesTwoSelectionInputStatus.xml [new file with mode: 0644]
src/gromacs/selection/tests/refdata/common-referencedata.xsl
src/gromacs/selection/tests/refdata/referencedata.xsl
src/gromacs/selection/tests/selectioncollection.cpp
src/gromacs/tools/CMakeLists.txt
src/gromacs/tools/check.cpp [moved from src/gromacs/tools/check.c with 97% similarity]
src/gromacs/tools/check.h
src/gromacs/tools/compare.cpp [moved from src/gromacs/tools/compare.c with 96% similarity]
src/gromacs/tools/convert_tpr.cpp [moved from src/gromacs/tools/convert_tpr.c with 98% similarity]
src/gromacs/tools/convert_tpr.h
src/gromacs/tools/dump.cpp [moved from src/gromacs/tools/dump.c with 98% similarity]
src/gromacs/tools/dump.h
src/gromacs/trajectoryanalysis/cmdlinerunner.cpp
src/gromacs/trajectoryanalysis/modules/distance.cpp
src/gromacs/trajectoryanalysis/tests/refdata/common-referencedata.xsl
src/gromacs/utility.h
src/gromacs/utility/CMakeLists.txt
src/gromacs/utility/datafilefinder.cpp
src/gromacs/utility/errorcodes.cpp
src/gromacs/utility/exceptions.cpp
src/gromacs/utility/exceptions.h
src/gromacs/utility/file.cpp [deleted file]
src/gromacs/utility/file.h [deleted file]
src/gromacs/utility/fileredirector.cpp
src/gromacs/utility/fileredirector.h
src/gromacs/utility/filestream.cpp [new file with mode: 0644]
src/gromacs/utility/filestream.h [new file with mode: 0644]
src/gromacs/utility/mutex.h [moved from src/gromacs/mdlib/genborn_sse2_single.h with 63% similarity]
src/gromacs/utility/nodelete.h [new file with mode: 0644]
src/gromacs/utility/path.cpp
src/gromacs/utility/path.h
src/gromacs/utility/stringstream.cpp [moved from src/gromacs/mdlib/genborn_sse2_double.h with 62% similarity]
src/gromacs/utility/stringstream.h [new file with mode: 0644]
src/gromacs/utility/stringutil.cpp
src/gromacs/utility/stringutil.h
src/gromacs/utility/tests/CMakeLists.txt
src/gromacs/utility/tests/refdata/TextLineWrapperTest_WrapsCorrectlyWithExtraWhitespace.xml
src/gromacs/utility/tests/refdata/TextWriterTest_WritesLines.xml [new file with mode: 0644]
src/gromacs/utility/tests/refdata/TextWriterTest_WritesLinesInParts.xml [new file with mode: 0644]
src/gromacs/utility/tests/refdata/TextWriterTest_WritesLinesInPartsWithWrapper.xml [new file with mode: 0644]
src/gromacs/utility/tests/refdata/TextWriterTest_WritesWrappedLines.xml [new file with mode: 0644]
src/gromacs/utility/tests/stringutil.cpp
src/gromacs/utility/tests/textwriter.cpp [new file with mode: 0644]
src/gromacs/utility/textreader.cpp [new file with mode: 0644]
src/gromacs/utility/textreader.h [new file with mode: 0644]
src/gromacs/utility/textstream.h [new file with mode: 0644]
src/gromacs/utility/textwriter.cpp [new file with mode: 0644]
src/gromacs/utility/textwriter.h [new file with mode: 0644]
src/programs/mdrun/resource-division.cpp
src/programs/mdrun/resource-division.h
src/programs/mdrun/runner.cpp
src/programs/mdrun/tests/moduletest.cpp
src/testutils/CMakeLists.txt
src/testutils/cmdlinetest.cpp
src/testutils/common-referencedata.xsl
src/testutils/integrationtests.cpp
src/testutils/interactivetest.cpp [new file with mode: 0644]
src/testutils/interactivetest.h [new file with mode: 0644]
src/testutils/stringtest.cpp
src/testutils/stringtest.h
src/testutils/testfileredirector.cpp
src/testutils/testfileredirector.h
src/testutils/testinit.cpp
src/testutils/testoptions.cpp
src/testutils/tests/CMakeLists.txt
src/testutils/tests/interactivetest.cpp [new file with mode: 0644]
src/testutils/testutils-doc.h

index b8e965d1e69ee6955f6faa7d900b6214c8e68758..02773c643f44ee705b0b1ee974cf8f56ad08157b 100644 (file)
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
-# Adapted from code posted on cmake-users by Mark Moll
+# Adapted from code posted on cmake-users by Mark Moll (the execute_process()
+# call remains, but other things have been rewritten for nicer behavior).
 find_package(PythonInterp)
-function(find_python_module module)
-    string(TOUPPER ${module} module_upper)
-    if(NOT PYTHONMODULE_${module_upper})
-        if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED")
-           set(${module}_FIND_REQUIRED TRUE)
-       endif()
-        if (NOT PYTHON_EXECUTABLE)
-            message(STATUS "Cannot find python module ${module} because no python executable is known")
-        else()
-           # A module's location is usually a directory, but for binary modules
-           # it's a .so file.
-           execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
-               "import re, ${module}; print re.compile('/__init__.py.*').sub('',${module}.__file__)"
-               RESULT_VARIABLE _${module}_status 
-               OUTPUT_VARIABLE _${module}_location
-               ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+function (find_python_module module)
+    string(TOUPPER ${module} _module_upper)
+    set(_find_package_module ${module})
+    set(_out_var PYTHONMODULE_${_module_upper})
+
+    include(CMakeParseArguments)
+    set(_options QUIET REQUIRED)
+    cmake_parse_arguments(ARG "${_options}" "" "" ${ARGN})
+    if (ARG_UNPARSED_ARGUMENTS)
+        message(FATAL_ERROR "Unknown arguments: ${ARG_UNPARSED_ARGUMENTS}")
+    endif()
+    if (ARG_REQUIRED)
+        set(${_find_package_module}_FIND_REQUIRED TRUE)
+    endif()
+    if (ARG_QUIET)
+        set(${_find_package_module}_FIND_QUIETLY TRUE)
+    endif()
+
+    if (NOT ${_out_var})
+        set(_status 1)
+        if (PYTHON_EXECUTABLE)
+            # A module's location is usually a directory, but for binary modules
+            # it's a .so file.
+            execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+                "import re, ${module}; print re.compile('/__init__.py.*').sub('',${module}.__file__)"
+                RESULT_VARIABLE _status
+                OUTPUT_VARIABLE _location
+                ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+        endif()
+        if(_status)
+            set(_location ${_find_package_module}-NOTFOUND)
         endif()
-       if(NOT _${module}_status)
-           set(PYTHONMODULE_${module_upper} ${_${module}_location} CACHE STRING 
-               "Location of Python module ${module}")
-       endif()
+        set(${_out_var} ${_location} CACHE STRING
+            "Location of Python module ${module}" FORCE)
+        mark_as_advanced(${_out_var})
     endif()
-    find_package_handle_standard_args(PYTHONMODULE_${module} DEFAULT_MSG PYTHONMODULE_${module_upper})
+    include(FindPackageHandleStandardArgs)
+    find_package_handle_standard_args(
+        ${_find_package_module} DEFAULT_MSG
+        ${_out_var} PYTHON_EXECUTABLE)
 endfunction()
index 46bf447abd082c53b535767578c6650ededb3eee..6eae5130251ce42f8c45fa5af582348e39986d9c 100644 (file)
@@ -41,8 +41,7 @@ find_program(SPHINX_EXECUTABLE NAMES sphinx-build
 mark_as_advanced(SPHINX_EXECUTABLE)
 
 # Detect Sphinx version
-
-if(SPHINX_FOUND AND NOT DEFINED SPHINX_EXECUTABLE_VERSION)
+if (SPHINX_EXECUTABLE AND NOT DEFINED SPHINX_EXECUTABLE_VERSION)
     execute_process(
         COMMAND ${SPHINX_EXECUTABLE} --version
         OUTPUT_VARIABLE SPHINX_VERSION_OUTPUT_VARIABLE
@@ -50,14 +49,17 @@ if(SPHINX_FOUND AND NOT DEFINED SPHINX_EXECUTABLE_VERSION)
         ERROR_QUIET
         OUTPUT_STRIP_TRAILING_WHITESPACE
         )
-    string(REGEX REPLACE "Sphinx \\(${SPHINX_EXECUTABLE}\\) ([^ ]+)" "\\1" SPHINX_EXECUTABLE_VERSION ${SPHINX_VERSION_OUTPUT_VARIABLE})
+    string(REGEX REPLACE "Sphinx \\([^)]*\\) ([^ ]+)" "\\1" SPHINX_EXECUTABLE_VERSION ${SPHINX_VERSION_OUTPUT_VARIABLE})
     set(SPHINX_EXECUTABLE_VERSION "${SPHINX_EXECUTABLE_VERSION}" CACHE INTERNAL "Version of ${SPHINX_EXECUTABLE}")
 endif()
 
+set(_find_deps_options)
+if (Sphinx_FIND_QUIETLY)
+    set(_find_deps_options QUIET)
+endif()
 include(FindPythonModule)
-find_python_module(pygments)
-
-if(PYTHONMODULE_PYGMENTS)
+find_python_module(pygments ${_find_deps_options})
+if (PYTHONMODULE_PYGMENTS)
     set(Sphinx_pygments_FOUND 1)
 endif()
 
index 61e595aa312ae19484182b91815228dd905d00cd..f7a412b281051f9bc5352f926b4885c2f8873f10 100644 (file)
@@ -211,13 +211,13 @@ endif()
 # The GROMACS convention is that these are the version number of the next
 # release that is going to be made from this branch.
 set(GMX_VERSION_MAJOR 5)
-set(GMX_VERSION_MINOR 1)
+set(GMX_VERSION_MINOR 2)
 set(GMX_VERSION_PATCH 0)
 # The suffix, on the other hand, is used mainly for betas and release
 # candidates, where it signifies the most recent such release from
 # this branch; it will be empty before the first such release, as well
 # as after the final release is out.
-set(GMX_VERSION_SUFFIX "-beta1")
+set(GMX_VERSION_SUFFIX "")
 
 # Conventionally with libtool, any ABI change must change the major
 # version number, the minor version number should change if it's just
@@ -228,7 +228,7 @@ set(GMX_VERSION_SUFFIX "-beta1")
 # here. The important thing is to minimize the chance of third-party
 # code being able to dynamically link with a version of libgromacs
 # that might not work.
-set(LIBRARY_SOVERSION_MAJOR 1)
+set(LIBRARY_SOVERSION_MAJOR 2)
 set(LIBRARY_SOVERSION_MINOR 0)
 set(LIBRARY_VERSION ${LIBRARY_SOVERSION_MAJOR}.${LIBRARY_SOVERSION_MINOR}.0)
 
@@ -253,7 +253,7 @@ set(REGRESSIONTEST_BRANCH "refs/heads/master")
 # each release. It's hard to test because it is only used for
 # REGRESSIONTEST_DOWNLOAD, which doesn't work until that tarball has
 # been placed on the server.
-set(REGRESSIONTEST_MD5SUM "6f8531a6e3c2a8912327b9cd450d8745" CACHE INTERNAL "MD5 sum of the regressiontests tarball")
+set(REGRESSIONTEST_MD5SUM "bb67f145095249e9d4a93227fc4c352e" CACHE INTERNAL "MD5 sum of the regressiontests tarball")
 
 math(EXPR GMX_VERSION_NUMERIC
      "${GMX_VERSION_MAJOR}*10000 + ${GMX_VERSION_MINOR}*100 + ${GMX_VERSION_PATCH}")
index d3f190a6358ced923edfa66d449cd76e5acfb4d6..17a7681c8bb774afdaa4c57e95b6b5c4dcf7553c 100644 (file)
@@ -59,7 +59,7 @@ mark_as_advanced(SOURCE_MD5SUM)
 set(EXPECTED_DOXYGEN_VERSION 1.8.5)
 
 find_package(PythonInterp)
-find_package(Sphinx 1.2.3 COMPONENTS pygments)
+find_package(Sphinx 1.2.3 QUIET COMPONENTS pygments)
 
 # Even if we aren't going to make the full webpage, set up to put all
 # the documentation output in the same place, for convenience
@@ -250,6 +250,10 @@ else()
         COMMAND ${CMAKE_COMMAND} -E echo
             "HTML pages cannot be built because Sphinx is not available"
         VERBATIM)
+    add_custom_target(install-guide
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "INSTALL cannot be built because Sphinx is not available"
+        VERBATIM)
     add_custom_target(man
         COMMAND ${CMAKE_COMMAND} -E echo
             "man pages cannot be built because Sphinx is not available"
index a693fe921a081c1e7372367f6549c1e4e9f74b53..c4126d4bcf0516a9feeb18784b736d0815e3ba5f 100644 (file)
@@ -215,7 +215,7 @@ class File(object):
         """Scan the file contents and initialize information based on it."""
         # TODO: Consider a more robust regex.
         include_re = r'^\s*#\s*include\s+(?P<quote>["<])(?P<path>[^">]*)[">]'
-        define_re = r'^\s*#.*define\s+(\w*)'
+        define_re = r'^\s*#.*define(?:01)?\s+(\w*)'
         current_block = None
         with open(self._abspath, 'r') as scanfile:
             contents = scanfile.read()
index 5767bd4f9966c9405716e61103c595bdffdbce84..4fb9c0b204d0e8ed344bc5d32ac9bace94a23294 100644 (file)
@@ -1695,16 +1695,6 @@ applicable pulling coordinate.
    system, *e.g.* a water slab (see Engin et al. J. Chem. Phys. B
    2010).
 
-.. mdp:: pull-coord1-groups
-
-   The two groups indices should be given on which this pull
-   coordinate will operate. The first index can be 0, in which case an
-   absolute reference of :mdp:`pull-coord1-origin` is used. With an
-   absolute reference the system is no longer translation invariant
-   and one should think about what to do with the center of mass
-   motion. Note that (only) for :mdp:`pull-coord1-geometry` =
-   :mdp-value:`direction-relative` four groups are required.
-
 .. mdp:: pull-coord1-type:
 
    .. mdp-value:: umbrella
@@ -1780,6 +1770,16 @@ applicable pulling coordinate.
       component. This geometry is not supported with constraint
       pulling.
 
+.. mdp:: pull-coord1-groups
+
+   The two groups indices should be given on which this pull
+   coordinate will operate. The first index can be 0, in which case an
+   absolute reference of :mdp:`pull-coord1-origin` is used. With an
+   absolute reference the system is no longer translation invariant
+   and one should think about what to do with the center of mass
+   motion. Note that (only) for :mdp:`pull-coord1-geometry` =
+   :mdp-value:`direction-relative` four groups are required.
+
 .. mdp:: pull-coord1-dim
 
    (Y Y Y)
index c10a49811a7fc692fea8de572aedc84adcbe0567..d14c7cf03afcd6587b7b9d382fc9d545cef49415 100644 (file)
@@ -1,4 +1,4 @@
-586
+592
 If You Want Something Done You Have to Do It Yourself_(Highlander II)
 I Live the Life They Wish They Did_(Tricky)
 Jesus Built My Hotrod_(Ministry)
@@ -585,3 +585,9 @@ Weaseling out of things is important to learn. It's what separates us from the a
 In science, truth always wins._(Max Perutz)
 Creativity in science, as in art, cannot be organized. It arises spontaneously from individual talent. Well-run laboratories can foster it, but hierarchical organizations, inflexible bureaucratic rules, and mountains of futile paperwork can kill it._(Max Perutz)
 Every electron is sacred._(Greg McMullan, on Cryo-EM detectors)
+Science adjusts its views based on what's observed. Faith is the denial of observation so that belief can be preserved._(Tim Minchin)
+Isn’t this enough? Just this world? Just this beautiful, complex wonderfully unfathomable world? How does it so fail to hold our attention that we have to diminish it with the invention of cheap, man-made myths and monsters?_(Tim Minchin)
+If you open your mind too much, your brains will fall out._(Tim Minchin)
+"Everything organic and natural is good" - ignoring the fact that organic natural substances include arsenic and poo and crocodiles. And everything chemical is bad, ignoring the fact that... everything is chemicals._(Tim Minchin)
+A program that has not been tested does not work._(Bjarne Stroustrup)
+You could give Aristotle a tutorial. And you could thrill him to the core of his being. Such is the privilege of living after Newton, Darwin, Einstein, Planck, Watson, Crick and their colleagues._(Richard Dawkins)
index 59993ea72a1a97a9515c8108d06474831b3adc69..51f3fb0ec646243814a9a51a566639ec0a7012bd 100644 (file)
 /* IEEE754 floating-point format. Memory layout is defined by macros
  * GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER and GMX_IEEE754_BIG_ENDIAN_WORD_ORDER. 
  */
-#cmakedefine GMX_FLOAT_FORMAT_IEEE754
+#cmakedefine01 GMX_FLOAT_FORMAT_IEEE754
 
 /* Work around broken calloc() */
 #cmakedefine GMX_BROKEN_CALLOC
 
 /* Do not optimize FFTW setups (not needed with SSE FFT kernels) */
-#cmakedefine GMX_DISABLE_FFTW_MEASURE
-
-/* Use Built-in FFTPACK FFT library */
-#cmakedefine GMX_FFT_FFTPACK
+#cmakedefine01 GMX_DISABLE_FFTW_MEASURE
 
 /* Use FFTW3 FFT library */
-#cmakedefine GMX_FFT_FFTW3
-
-/* Use Intel MKL FFT library */
-#cmakedefine GMX_FFT_MKL
+#cmakedefine01 GMX_FFT_FFTW3
 
 /* Target platform is x86 or x86_64 */
 #cmakedefine GMX_TARGET_X86
 #define GMX_SIMD_ACCURACY_BITS_DOUBLE @GMX_SIMD_ACCURACY_BITS_DOUBLE@
 
 /* Integer byte order is big endian. */
-#cmakedefine GMX_INTEGER_BIG_ENDIAN
+#cmakedefine01 GMX_INTEGER_BIG_ENDIAN
 
 /* Use our own instead of system XDR libraries */
-#cmakedefine GMX_INTERNAL_XDR
+#cmakedefine01 GMX_INTERNAL_XDR
 
 /* Compile to use TNG library */
 #cmakedefine GMX_USE_TNG
 
 /* Bytes in IEEE fp word are in big-endian order if set, little-endian if not.
    Only relevant when FLOAT_FORMAT_IEEE754 is defined. */
-#cmakedefine GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER
+#cmakedefine01 GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER
 
 /* The two words in a double precision variable are in b ig-endian order if
    set, little-endian if not. Do NOT assume this is the same as the byte
    order! Only relevant when FLOAT_FORMAT_IEEE754 is defined. */
-#cmakedefine GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#cmakedefine01 GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
 
 /* Define if SIGUSR1 is present */
 #cmakedefine HAVE_SIGUSR1
index 89971946ac027ee5228a5ba559b1ac2c575d25e3..044d987048438025bd5420b110705ca1396d8f03 100644 (file)
@@ -70,7 +70,6 @@ static void list_trn(char *fn)
   printf("Going to open %s\n",fn);
   fpread  = open_trn(fn,"r"); 
   fpwrite = open_tpx(NULL,"w");
-  gmx_fio_setdebug(fpwrite,TRUE);
   
   mmm=mass[0]+2*mass[1];
   for(i=0; (i<5); i++) 
index 1ae38d84525b8dfc62b0f5178ad9305cb3948268..b6e9bcfdb67f16b17e6b79715d4980024cf82b08 100644 (file)
@@ -68,4 +68,31 @@ and use the copy_xsl.sh script to copy it to relevant locations.
     <xsl:value-of select="."/>
 </xsl:template>
 
+<xsl:template match="InteractiveSession">
+    <pre>
+        <xsl:for-each select="*">
+            <xsl:choose>
+                <xsl:when test="starts-with(@Name, 'Output')">
+                    <xsl:value-of select="substring(.,2)"/>
+                </xsl:when>
+                <xsl:when test="string-length(.)=1">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:when>
+                <xsl:when test="contains(substring(.,2), '&#10;')">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="translate(substring(.,2), '&#10;', '&#x23ce;')"/>
+                    <xsl:text>&#10;</xsl:text>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="substring(.,2)"/>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:for-each>
+        <xsl:text>[EOF]</xsl:text>
+    </pre>
+</xsl:template>
+
 </xsl:stylesheet>
index cfe32fbf38905227976a8f86633fd6fdb9ace6d6..d1af037ef5898431e6d590d52f98676823d32245 100644 (file)
@@ -74,8 +74,9 @@ class CommandLineHelpContext::Impl
 {
     public:
         //! Creates the implementation class and the low-level context.
-        Impl(File *file, HelpOutputFormat format, const HelpLinks *links)
-            : writerContext_(file, format, links), moduleDisplayName_("gmx"),
+        Impl(TextOutputStream *stream, HelpOutputFormat format,
+             const HelpLinks *links)
+            : writerContext_(stream, format, links), moduleDisplayName_("gmx"),
               completionWriter_(NULL), bHidden_(false)
         {
         }
@@ -97,9 +98,9 @@ class CommandLineHelpContext::Impl
 };
 
 CommandLineHelpContext::CommandLineHelpContext(
-        File *file, HelpOutputFormat format, const HelpLinks *links,
-        const std::string &programName)
-    : impl_(new Impl(file, format, links))
+        TextOutputStream *stream, HelpOutputFormat format,
+        const HelpLinks *links, const std::string &programName)
+    : impl_(new Impl(stream, format, links))
 {
     impl_->writerContext_.setReplacement("[PROGRAM]", programName);
 }
@@ -112,7 +113,7 @@ CommandLineHelpContext::CommandLineHelpContext(
 
 CommandLineHelpContext::CommandLineHelpContext(
         ShellCompletionWriter *writer)
-    : impl_(new Impl(writer->outputFile(), eHelpOutputFormat_Other, NULL))
+    : impl_(new Impl(&writer->outputStream(), eHelpOutputFormat_Other, NULL))
 {
     impl_->completionWriter_ = writer;
 }
index 330f20aa6f486572d71bbf11c57abd27698c4876..be59e70cbb9d7a444108fff3e4503f7442a53a61 100644 (file)
@@ -76,8 +76,8 @@ class CommandLineHelpContext
          *
          * Wraps the constructor of HelpWriterContext.
          */
-        CommandLineHelpContext(File *file, HelpOutputFormat format,
-                               const HelpLinks *links,
+        CommandLineHelpContext(TextOutputStream *stream,
+                               HelpOutputFormat format, const HelpLinks *links,
                                const std::string &programName);
         //! Creates a context for a particular HelpWriterContext.
         explicit CommandLineHelpContext(const HelpWriterContext &writerContext);
index 2a2959cbf86e19b22c25d284f47e7a0d88b339b0..3d0020d3cdfc6671c5d88569b712408055cc8d5c 100644 (file)
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/baseversion.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/fileredirector.h"
 #include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/path.h"
 #include "gromacs/utility/programcontext.h"
+#include "gromacs/utility/stringstream.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textreader.h"
+#include "gromacs/utility/textstream.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "shellcompletions.h"
 
@@ -376,7 +380,7 @@ void CommandsHelpTopic::writeHelp(const HelpWriterContext &context) const
     context.writeTextBlock(
             "Usage: [PROGRAM] [<options>] <command> [<args>][PAR]"
             "Available commands:");
-    File              &file = context.outputFile();
+    TextWriter        &file = context.outputFile();
     TextTableFormatter formatter;
     formatter.addColumn(NULL, maxNameLength + 1, false);
     formatter.addColumn(NULL, 72 - maxNameLength, true);
@@ -490,8 +494,9 @@ class HelpExportReStructuredText : public HelpExportInterface
 {
     public:
         //! Initializes reST exporter.
-        explicit HelpExportReStructuredText(
-            const CommandLineHelpModuleImpl &helpModule);
+        HelpExportReStructuredText(
+            const CommandLineHelpModuleImpl &helpModule,
+            FileOutputRedirectorInterface   *outputRedirector);
 
         virtual void startModuleExport();
         virtual void exportModuleHelp(
@@ -511,19 +516,20 @@ class HelpExportReStructuredText : public HelpExportInterface
         FileOutputRedirectorInterface  *outputRedirector_;
         const std::string              &binaryName_;
         HelpLinks                       links_;
-        boost::scoped_ptr<File>         indexFile_;
-        boost::scoped_ptr<File>         manPagesFile_;
+        boost::scoped_ptr<TextWriter>   indexFile_;
+        boost::scoped_ptr<TextWriter>   manPagesFile_;
 };
 
 HelpExportReStructuredText::HelpExportReStructuredText(
-        const CommandLineHelpModuleImpl &helpModule)
-    : outputRedirector_(helpModule.outputRedirector_),
+        const CommandLineHelpModuleImpl &helpModule,
+        FileOutputRedirectorInterface   *outputRedirector)
+    : outputRedirector_(outputRedirector),
       binaryName_(helpModule.binaryName_),
       links_(eHelpOutputFormat_Rst)
 {
-    File             linksFile("links.dat", "r");
-    std::string      line;
-    while (linksFile.readLine(&line))
+    TextReader   linksFile("links.dat");
+    std::string  line;
+    while (linksFile.readLineTrimmed(&line))
     {
         links_.addLink("[REF]." + line + "[ref]",
                        formatString(":ref:`.%s <%s>`", line.c_str(), line.c_str()),
@@ -537,12 +543,14 @@ HelpExportReStructuredText::HelpExportReStructuredText(
 void HelpExportReStructuredText::startModuleExport()
 {
     indexFile_.reset(
-            new File(outputRedirector_->openFileForWriting("fragments/byname.rst")));
+            new TextWriter(
+                    outputRedirector_->openTextOutputFile("fragments/byname.rst")));
     indexFile_->writeLine(formatString("* :doc:`%s </onlinehelp/%s>` - %s",
                                        binaryName_.c_str(), binaryName_.c_str(),
                                        RootHelpText::title));
     manPagesFile_.reset(
-            new File(outputRedirector_->openFileForWriting("conf-man.py")));
+            new TextWriter(
+                    outputRedirector_->openTextOutputFile("conf-man.py")));
     manPagesFile_->writeLine("man_pages = [");
 }
 
@@ -551,33 +559,33 @@ void HelpExportReStructuredText::exportModuleHelp(
         const std::string                &tag,
         const std::string                &displayName)
 {
-    // TODO: Ideally, the file would only be touched if it really changes.
-    // This would make Sphinx reruns much faster.
-    File file(outputRedirector_->openFileForWriting("onlinehelp/" + tag + ".rst"));
-    file.writeLine(formatString(".. _%s:", displayName.c_str()));
+    TextOutputStreamPointer file
+        = outputRedirector_->openTextOutputFile("onlinehelp/" + tag + ".rst");
+    TextWriter              writer(file);
+    writer.writeLine(formatString(".. _%s:", displayName.c_str()));
     if (0 == displayName.compare(binaryName_ + " mdrun"))
     {
         // Make an extra link target for the convenience of
         // MPI-specific documentation
-        file.writeLine(".. _mdrun_mpi:");
+        writer.writeLine(".. _mdrun_mpi:");
     }
-    file.writeLine();
+    writer.writeLine();
 
-    CommandLineHelpContext context(&file, eHelpOutputFormat_Rst, &links_, binaryName_);
+    CommandLineHelpContext context(file.get(), eHelpOutputFormat_Rst, &links_, binaryName_);
     context.enterSubSection(displayName);
     context.setModuleDisplayName(displayName);
     module.writeHelp(context);
 
-    file.writeLine();
-    file.writeLine(".. only:: man");
-    file.writeLine();
-    file.writeLine("   See also");
-    file.writeLine("   --------");
-    file.writeLine();
-    file.writeLine(formatString("   :manpage:`%s(1)`", binaryName_.c_str()));
-    file.writeLine();
-    file.writeLine("   More information about |Gromacs| is available at <http://www.gromacs.org/>.");
-    file.close();
+    writer.writeLine();
+    writer.writeLine(".. only:: man");
+    writer.writeLine();
+    writer.writeLine("   See also");
+    writer.writeLine("   --------");
+    writer.writeLine();
+    writer.writeLine(formatString("   :manpage:`%s(1)`", binaryName_.c_str()));
+    writer.writeLine();
+    writer.writeLine("   More information about |Gromacs| is available at <http://www.gromacs.org/>.");
+    file->close();
 
     indexFile_->writeLine(formatString("* :doc:`%s </onlinehelp/%s>` - %s",
                                        displayName.c_str(), tag.c_str(),
@@ -604,9 +612,11 @@ void HelpExportReStructuredText::finishModuleExport()
 void HelpExportReStructuredText::startModuleGroupExport()
 {
     indexFile_.reset(
-            new File(outputRedirector_->openFileForWriting("fragments/bytopic.rst")));
+            new TextWriter(
+                    outputRedirector_->openTextOutputFile("fragments/bytopic.rst")));
     manPagesFile_.reset(
-            new File(outputRedirector_->openFileForWriting("fragments/bytopic-man.rst")));
+            new TextWriter(
+                    outputRedirector_->openTextOutputFile("fragments/bytopic-man.rst")));
 }
 
 void HelpExportReStructuredText::exportModuleGroup(
@@ -650,12 +660,13 @@ void HelpExportReStructuredText::finishModuleGroupExport()
 
 void HelpExportReStructuredText::exportTopic(const HelpTopicInterface &topic)
 {
-    const std::string      path("onlinehelp/" + std::string(topic.name()) + ".rst");
-    File                   file(outputRedirector_->openFileForWriting(path));
-    CommandLineHelpContext context(&file, eHelpOutputFormat_Rst, &links_,
-                                   binaryName_);
-    HelpManager            manager(topic, context.writerContext());
+    const std::string       path("onlinehelp/" + std::string(topic.name()) + ".rst");
+    TextOutputStreamPointer file(outputRedirector_->openTextOutputFile(path));
+    CommandLineHelpContext  context(file.get(), eHelpOutputFormat_Rst, &links_,
+                                    binaryName_);
+    HelpManager             manager(topic, context.writerContext());
     manager.writeCurrentTopic();
+    file->close();
 }
 
 /********************************************************************
@@ -775,6 +786,76 @@ void CommandLineHelpModuleImpl::exportHelp(HelpExportInterface *exporter)
     rootTopic_->exportHelp(exporter);
 }
 
+namespace
+{
+
+/********************************************************************
+ * ModificationCheckingFileOutputStream
+ */
+
+class ModificationCheckingFileOutputStream : public TextOutputStream
+{
+    public:
+        ModificationCheckingFileOutputStream(
+            const char                    *path,
+            FileOutputRedirectorInterface *redirector)
+            : path_(path), redirector_(redirector)
+        {
+        }
+
+        virtual void write(const char *str) { contents_.write(str); }
+        virtual void close()
+        {
+            const std::string &newContents = contents_.toString();
+            // TODO: Redirect these for unit tests.
+            if (File::exists(path_))
+            {
+                const std::string originalContents_
+                    = TextReader::readFileToString(path_);
+                if (originalContents_ == newContents)
+                {
+                    return;
+                }
+            }
+            TextWriter writer(redirector_->openTextOutputFile(path_));
+            writer.writeString(newContents);
+        }
+
+    private:
+        std::string                     path_;
+        StringOutputStream              contents_;
+        FileOutputRedirectorInterface  *redirector_;
+};
+
+/********************************************************************
+ * ModificationCheckingFileOutputRedirector
+ */
+
+class ModificationCheckingFileOutputRedirector : public FileOutputRedirectorInterface
+{
+    public:
+        explicit ModificationCheckingFileOutputRedirector(
+            FileOutputRedirectorInterface *redirector)
+            : redirector_(redirector)
+        {
+        }
+
+        virtual TextOutputStream &standardOutput()
+        {
+            return redirector_->standardOutput();
+        }
+        virtual TextOutputStreamPointer openTextOutputFile(const char *filename)
+        {
+            return TextOutputStreamPointer(
+                    new ModificationCheckingFileOutputStream(filename, redirector_));
+        }
+
+    private:
+        FileOutputRedirectorInterface  *redirector_;
+};
+
+}   // namespace
+
 /********************************************************************
  * CommandLineHelpModule
  */
@@ -833,10 +914,11 @@ int CommandLineHelpModule::run(int argc, char *argv[])
     CommandLineParser(&options).parse(&argc, argv);
     if (!exportFormat.empty())
     {
-        boost::scoped_ptr<HelpExportInterface> exporter;
+        ModificationCheckingFileOutputRedirector redirector(impl_->outputRedirector_);
+        boost::scoped_ptr<HelpExportInterface>   exporter;
         if (exportFormat == "rst")
         {
-            exporter.reset(new HelpExportReStructuredText(*impl_));
+            exporter.reset(new HelpExportReStructuredText(*impl_, &redirector));
         }
         else if (exportFormat == "completion")
         {
@@ -850,7 +932,7 @@ int CommandLineHelpModule::run(int argc, char *argv[])
         return 0;
     }
 
-    File                  &outputFile = impl_->outputRedirector_->standardOutput();
+    TextOutputStream      &outputFile = impl_->outputRedirector_->standardOutput();
     HelpLinks              links(eHelpOutputFormat_Console);
     initProgramLinks(&links, *impl_);
     CommandLineHelpContext context(&outputFile, eHelpOutputFormat_Console, &links,
index addc8c8361c0e01a1c100a0d35990088cbd7cd4b..1241c667550539d71c5ea9c3302be89d1aebe8dd 100644 (file)
@@ -59,8 +59,8 @@
 #include "gromacs/options/timeunitmanager.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "shellcompletions.h"
 
@@ -413,7 +413,7 @@ void SynopsisFormatter::start(const char *name)
 {
     currentLength_ = std::strlen(name) + 1;
     indent_        = std::min(currentLength_, 13);
-    File &file = context_.outputFile();
+    TextWriter &file = context_.outputFile();
     switch (context_.outputFormat())
     {
         case eHelpOutputFormat_Console:
@@ -436,7 +436,7 @@ void SynopsisFormatter::start(const char *name)
 
 void SynopsisFormatter::finish()
 {
-    File &file = context_.outputFile();
+    TextWriter &file = context_.outputFile();
     file.writeLine();
     file.writeLine();
 }
@@ -456,7 +456,7 @@ void SynopsisFormatter::formatOption(const OptionInfo &option)
     }
     fullOptionText.append(bFormatted_ ? "`]" : "]");
 
-    File       &file = context_.outputFile();
+    TextWriter &file = context_.outputFile();
     currentLength_ += totalLength;
     if (currentLength_ >= lineLength_)
     {
index 5d98a4ae5166157bc29e6c54046e06ac6405d461..64308a1a4cf64137efd204d0461f5df06d52b3da 100644 (file)
 
 #include <boost/scoped_ptr.hpp>
 
-#include "thread_mpi/mutex.h"
-
 #include "buildinfo.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/mutex.h"
 #include "gromacs/utility/path.h"
 #include "gromacs/utility/stringutil.h"
 
@@ -322,7 +320,7 @@ class CommandLineProgramContext::Impl
         mutable std::string           fullBinaryPath_;
         mutable std::string           installationPrefix_;
         mutable bool                  bSourceLayout_;
-        mutable tMPI::mutex           binaryPathMutex_;
+        mutable Mutex                 binaryPathMutex_;
 };
 
 CommandLineProgramContext::Impl::Impl()
@@ -415,14 +413,14 @@ const char *CommandLineProgramContext::commandLine() const
 
 const char *CommandLineProgramContext::fullBinaryPath() const
 {
-    tMPI::lock_guard<tMPI::mutex> lock(impl_->binaryPathMutex_);
+    lock_guard<Mutex> lock(impl_->binaryPathMutex_);
     impl_->findBinaryPath();
     return impl_->fullBinaryPath_.c_str();
 }
 
 InstallationPrefixInfo CommandLineProgramContext::installationPrefix() const
 {
-    tMPI::lock_guard<tMPI::mutex> lock(impl_->binaryPathMutex_);
+    lock_guard<Mutex> lock(impl_->binaryPathMutex_);
     if (impl_->installationPrefix_.empty())
     {
         impl_->findBinaryPath();
index 1eefeda1cf6fd282965b63511aab391030368ca4..a0ef63e4c607e8917d6adb61ca97bb34d0ec1164 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -59,9 +59,9 @@
 #include "gromacs/options/optionsvisitor.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 namespace gmx
 {
@@ -107,7 +107,7 @@ class OptionsListWriter : public OptionsVisitor
 class OptionCompletionWriter : public OptionsVisitor
 {
     public:
-        explicit OptionCompletionWriter(File *out) : out_(*out) {}
+        explicit OptionCompletionWriter(TextWriter *out) : out_(*out) {}
 
         virtual void visitSubSection(const Options &section)
         {
@@ -121,7 +121,7 @@ class OptionCompletionWriter : public OptionsVisitor
         void writeOptionCompletion(const OptionInfo  &option,
                                    const std::string &completion);
 
-        File &out_;
+        TextWriter &out_;
 };
 
 void OptionCompletionWriter::visitOption(const OptionInfo &option)
@@ -196,8 +196,8 @@ class ShellCompletionWriter::Impl
             return formatString("_%s_%s_compl", binaryName_.c_str(), moduleName);
         }
 
-        std::string             binaryName_;
-        boost::scoped_ptr<File> file_;
+        std::string                   binaryName_;
+        boost::scoped_ptr<TextWriter> file_;
 };
 
 ShellCompletionWriter::ShellCompletionWriter(const std::string     &binaryName,
@@ -210,14 +210,14 @@ ShellCompletionWriter::~ShellCompletionWriter()
 {
 }
 
-File *ShellCompletionWriter::outputFile()
+TextOutputStream &ShellCompletionWriter::outputStream()
 {
-    return impl_->file_.get();
+    return impl_->file_->stream();
 }
 
 void ShellCompletionWriter::startCompletions()
 {
-    impl_->file_.reset(new File(impl_->binaryName_ + "-completion.bash", "w"));
+    impl_->file_.reset(new TextWriter(impl_->binaryName_ + "-completion.bash"));
     impl_->file_->writeLine("shopt -s extglob");
 }
 
@@ -225,7 +225,7 @@ void ShellCompletionWriter::writeModuleCompletions(
         const char    *moduleName,
         const Options &options)
 {
-    File &out = *impl_->file_;
+    TextWriter &out = *impl_->file_;
     out.writeLine(formatString("%s() {", impl_->completionFunctionName(moduleName).c_str()));
     out.writeLine("local IFS=$'\\n'");
     out.writeLine("local c=${COMP_WORDS[COMP_CWORD]}");
index 88435475436e8a4951eaf6e893ebbfccb4f3b15b..f8f11906b6c09f3690baaa3eaf5ebd52c81397b1 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -53,8 +53,8 @@ namespace gmx
 {
 
 class CommandLineHelpContext;
-class File;
 class Options;
+class TextOutputStream;
 
 //! \cond internal
 //! \addtogroup module_commandline
@@ -78,7 +78,7 @@ class ShellCompletionWriter
                               ShellCompletionFormat  format);
         ~ShellCompletionWriter();
 
-        File *outputFile();
+        TextOutputStream &outputStream();
 
         void startCompletions();
         void writeModuleCompletions(const char    *moduleName,
index b8cb8eae28e2bc4e658bebe3925ffdc61571a8e6..5fada52c35de53dd3fec1d0fd73e1095e89bb9fe 100644 (file)
@@ -48,7 +48,7 @@
 #include "gromacs/commandline/cmdlinemodulemanager.h"
 #include "gromacs/options/basicoptions.h"
 #include "gromacs/options/options.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "gromacs/onlinehelp/tests/mock_helptopic.h"
 #include "testutils/cmdlinetest.h"
@@ -74,14 +74,13 @@ TEST_F(CommandLineHelpModuleTest, PrintsGeneralHelp)
     };
     CommandLine       args(cmdline);
     initManager(args, "test");
-    redirectManagerOutput();
     addModule("module", "First module");
     addModule("other", "Second module");
     addHelpTopic("topic", "Test topic");
     int rc = 0;
     ASSERT_NO_THROW_GMX(rc = manager().run(args.argc(), args.argv()));
     ASSERT_EQ(0, rc);
-    checkRedirectedOutputFiles();
+    checkRedirectedOutput();
 }
 
 TEST_F(CommandLineHelpModuleTest, PrintsHelpOnTopic)
@@ -91,7 +90,6 @@ TEST_F(CommandLineHelpModuleTest, PrintsHelpOnTopic)
     };
     CommandLine       args(cmdline);
     initManager(args, "test");
-    redirectManagerOutput();
     addModule("module", "First module");
     MockHelpTopic &topic = addHelpTopic("topic", "Test topic");
     topic.addSubTopic("sub1", "Subtopic 1", "");
@@ -101,7 +99,7 @@ TEST_F(CommandLineHelpModuleTest, PrintsHelpOnTopic)
     int rc = 0;
     ASSERT_NO_THROW_GMX(rc = manager().run(args.argc(), args.argv()));
     ASSERT_EQ(0, rc);
-    checkRedirectedOutputFiles();
+    checkRedirectedOutput();
 }
 
 /*! \brief
@@ -125,10 +123,9 @@ TEST_F(CommandLineHelpModuleTest, ExportsHelp)
         "test", "help", "-export", "rst"
     };
     // TODO: Find a more elegant solution, or get rid of the links.dat altogether.
-    gmx::File::writeFileFromString("links.dat", "");
-    CommandLine       args(cmdline);
+    gmx::TextWriter::writeFileFromString("links.dat", "");
+    CommandLine        args(cmdline);
     initManager(args, "test");
-    redirectManagerOutput();
     MockOptionsModule &mod1 = addOptionsModule("module", "First module");
     MockOptionsModule &mod2 = addOptionsModule("other", "Second module");
     {
@@ -156,7 +153,7 @@ TEST_F(CommandLineHelpModuleTest, ExportsHelp)
     int rc = 0;
     ASSERT_NO_THROW_GMX(rc = manager().run(args.argc(), args.argv()));
     ASSERT_EQ(0, rc);
-    checkRedirectedOutputFiles();
+    checkRedirectedOutput();
     std::remove("links.dat");
 }
 
index 317cc4c9ab8c5dec62e564fb9c3278bd85c7a0a8..7d3a282e2e4ec6650a61bb8a52f69082caedd8be 100644 (file)
 #include "gromacs/options/basicoptions.h"
 #include "gromacs/options/filenameoption.h"
 #include "gromacs/options/options.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/stringstream.h"
 
 #include "testutils/stringtest.h"
-#include "testutils/testfilemanager.h"
 
 namespace
 {
@@ -71,21 +70,19 @@ class CommandLineHelpWriterTest : public ::gmx::test::StringTestBase
 
         void checkHelp(gmx::CommandLineHelpWriter *writer);
 
-        gmx::test::TestFileManager tempFiles_;
         bool                       bHidden_;
 };
 
 void CommandLineHelpWriterTest::checkHelp(gmx::CommandLineHelpWriter *writer)
 {
-    std::string                 filename = tempFiles_.getTemporaryFilePath("helptext.txt");
-    gmx::File                   file(filename, "w");
-    gmx::CommandLineHelpContext context(&file, gmx::eHelpOutputFormat_Console,
+    gmx::StringOutputStream     stream;
+    gmx::CommandLineHelpContext context(&stream, gmx::eHelpOutputFormat_Console,
                                         NULL, "test");
     context.setShowHidden(bHidden_);
     writer->writeHelp(context);
-    file.close();
+    stream.close();
 
-    checkFileContents(filename, "HelpText");
+    checkText(stream.toString(), "HelpText");
 }
 
 
index f85cabe057ec0cabe853a08aa1c26f97c32a4843..5c33490f3a9e789997c673add39e030737d224b9 100644 (file)
@@ -56,7 +56,7 @@
 
 #include "gromacs/onlinehelp/tests/mock_helptopic.h"
 #include "testutils/cmdlinetest.h"
-#include "testutils/testfilemanager.h"
+#include "testutils/testfileredirector.h"
 
 namespace gmx
 {
@@ -131,9 +131,9 @@ MockOptionsModule::~MockOptionsModule()
 class CommandLineModuleManagerTestBase::Impl
 {
     public:
+        TestFileOutputRedirector                     redirector_;
         boost::scoped_ptr<CommandLineProgramContext> programContext_;
         boost::scoped_ptr<CommandLineModuleManager>  manager_;
-        TestFileManager                              fileManager_;
 };
 
 CommandLineModuleManagerTestBase::CommandLineModuleManagerTestBase()
@@ -154,6 +154,7 @@ void CommandLineModuleManagerTestBase::initManager(
     impl_->manager_.reset(new gmx::CommandLineModuleManager(
                                   realBinaryName, impl_->programContext_.get()));
     impl_->manager_->setQuiet(true);
+    impl_->manager_->setOutputRedirector(&impl_->redirector_);
 }
 
 MockModule &
@@ -186,9 +187,9 @@ CommandLineModuleManager &CommandLineModuleManagerTestBase::manager()
     return *impl_->manager_;
 }
 
-void CommandLineModuleManagerTestBase::redirectManagerOutput()
+void CommandLineModuleManagerTestBase::checkRedirectedOutput()
 {
-    impl_->manager_->setOutputRedirector(&initOutputRedirector(&impl_->fileManager_));
+    impl_->redirector_.checkRedirectedFiles(&checker());
 }
 
 } // namespace test
index c156561606b01a59fecd095c6c88743e23042bdc..ecb77a4f66e67c6ac8495646b8e8cabf23249649 100644 (file)
@@ -60,6 +60,7 @@ namespace test
 
 class CommandLine;
 class MockHelpTopic;
+class TestFileOutputRedirector;
 
 /*! \internal \brief
  * Mock implementation of gmx::CommandLineModuleInterface.
@@ -140,16 +141,14 @@ class CommandLineModuleManagerTestBase : public gmx::test::StringTestBase
         CommandLineModuleManager &manager();
 
         /*! \brief
-         * Redirects all manager output to files.
+         * Checks all output from the manager using reference data.
          *
-         * Can be used to silence tests that would otherwise print out
-         * something, and/or checkRedirectedFileContents() from the base class
-         * can be used to check the output.
+         * Both output to `stdout` and to files is checked.
          *
          * The manager is put into quiet mode by default, so the manager will
          * only print out information if, e.g., help is explicitly requested.
          */
-        void redirectManagerOutput();
+        void checkRedirectedOutput();
 
     private:
         class Impl;
index d74f12f9877b437e37aa97e6dc8a13f98449ba0d..53ea895be10a4677c24204753d5ce2837f869932 100644 (file)
@@ -51,9 +51,9 @@
 #include <gtest/gtest.h>
 
 #include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/path.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "testutils/cmdlinetest.h"
 #include "testutils/testasserts.h"
@@ -108,7 +108,7 @@ class ParseCommonArgsTest : public ::testing::Test
                                FileArgumentType type)
         {
             std::string filename(tempFiles_.getTemporaryFilePath(extension));
-            gmx::File::writeFileFromString(filename, "Dummy file");
+            gmx::TextWriter::writeFileFromString(filename, "Dummy file");
             if (name != NULL)
             {
                 args_.append(name);
index 40c8fb826d058692cc38007f8acca1a4a5decf11..df3572fa4125dcb88d65cbd597b8251af100708e 100644 (file)
@@ -1,20 +1,6 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
 <ReferenceData>
-  <String Name="fragments/byname.rst"><![CDATA[
-* :doc:`test </onlinehelp/test>` - molecular dynamics simulation suite
-* :doc:`test help </onlinehelp/test-help>` - Print help information
-* :doc:`test module </onlinehelp/test-module>` - First module
-* :doc:`test other </onlinehelp/test-other>` - Second module
-]]></String>
-  <String Name="conf-man.py"><![CDATA[
-man_pages = [
-    ('onlinehelp/test-help', 'test-help', "Print help information", '', 1),
-    ('onlinehelp/test-module', 'test-module', "First module", '', 1),
-    ('onlinehelp/test-other', 'test-other', "Second module", '', 1),
-    ('onlinehelp/test', 'test', 'molecular dynamics simulation suite', '', 1)
-]
-]]></String>
   <String Name="onlinehelp/test-help.rst"><![CDATA[
 .. _test help:
 
@@ -83,6 +69,20 @@ Synopsis
    :manpage:`test(1)`
 
    More information about |Gromacs| is available at <http://www.gromacs.org/>.
+]]></String>
+  <String Name="fragments/byname.rst"><![CDATA[
+* :doc:`test </onlinehelp/test>` - molecular dynamics simulation suite
+* :doc:`test help </onlinehelp/test-help>` - Print help information
+* :doc:`test module </onlinehelp/test-module>` - First module
+* :doc:`test other </onlinehelp/test-other>` - Second module
+]]></String>
+  <String Name="conf-man.py"><![CDATA[
+man_pages = [
+    ('onlinehelp/test-help', 'test-help', "Print help information", '', 1),
+    ('onlinehelp/test-module', 'test-module', "First module", '', 1),
+    ('onlinehelp/test-other', 'test-other', "Second module", '', 1),
+    ('onlinehelp/test', 'test', 'molecular dynamics simulation suite', '', 1)
+]
 ]]></String>
   <String Name="fragments/bytopic.rst"><![CDATA[
 Group 1
index e8de945b94ed92b198368896fec394f7ba5f441e..3986ed0562d64a22bccafd585c90c8dcc6926125 100644 (file)
@@ -216,9 +216,20 @@ enum {
 };
 
 enum {
-    edlbAUTO, edlbNO, edlbYES, edlbNR
+    edlbsOffForever,           /* DLB is off and will never be turned on */
+    edlbsOffCanTurnOn,         /* DLB is off and will turn on with imbalance */
+    edlbsOffTemporarilyLocked, /* DLB is off and temporarily can not turn on */
+    edlbsOn,                   /* DLB is on and will stay on forever */
+    edlbsNR
 };
-const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
+/* Allowed DLB state transitions:
+ *   edlbsOffCanTurnOn         -> edlbsOn
+ *   edlbsOffCanTurnOn         -> edlbsOffForever
+ *   edlbsOffCanTurnOn         -> edlbsOffTemporarilyLocked
+ *   edlbsOffTemporarilyLocked -> edlbsOffCanTurnOn
+ */
+
+const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on" };
 
 typedef struct
 {
@@ -297,14 +308,10 @@ typedef struct gmx_domdec_comm
     t_blocka *cglink;
     char     *bLocalCG;
 
-    /* The DLB option */
-    int      eDLB;
-    /* Is eDLB=edlbAUTO locked such that we currently can't turn it on? */
-    gmx_bool bDLB_locked;
-    /* With eDLB=edlbAUTO, should we check if to DLB on at the next DD? */
+    /* The DLB state, possible values are defined above */
+    int      dlbState;
+    /* With dlbState=edlbsOffCanTurnOn, should we check if to DLB on at the next DD? */
     gmx_bool bCheckWhetherToTurnDlbOn;
-    /* Are we actually using DLB? */
-    gmx_bool bDynLoadBal;
 
     /* Cell sizes for static load balancing, first index cartesian */
     real **slb_frac;
@@ -587,6 +594,11 @@ t_block *dd_charge_groups_global(gmx_domdec_t *dd)
     return &dd->comm->cgs_gl;
 }
 
+static bool dlbIsOn(const gmx_domdec_comm_t *comm)
+{
+    return (comm->dlbState == edlbsOn);
+}
+
 static void vec_rvec_init(vec_rvec_t *v)
 {
     v->nalloc = 0;
@@ -665,7 +677,7 @@ void dd_get_ns_ranges(gmx_domdec_t *dd, int icg,
         dim         = dd->dim[d];
         shift0[dim] = zones->izone[izone].shift0[dim];
         shift1[dim] = zones->izone[izone].shift1[dim];
-        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
+        if (dd->comm->tric_dir[dim] || (dlbIsOn(dd->comm) && d > 0))
         {
             /* A conservative approach, this can be optimized */
             shift0[dim] -= 1;
@@ -3219,7 +3231,7 @@ static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
         }
     }
 
-    if (!comm->bDynLoadBal)
+    if (!dlbIsOn(comm))
     {
         copy_rvec(cellsize_min, comm->cellsize_min);
     }
@@ -3815,7 +3827,7 @@ static void set_dd_cell_sizes(gmx_domdec_t *dd,
     copy_rvec(comm->cell_x0, comm->old_cell_x0);
     copy_rvec(comm->cell_x1, comm->old_cell_x1);
 
-    if (comm->bDynLoadBal)
+    if (dlbIsOn(comm))
     {
         if (DDMASTER(dd))
         {
@@ -3856,7 +3868,7 @@ static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
         /* Without PBC we don't have restrictions on the outer cells */
         if (!(dim >= ddbox->npbcdim &&
               (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
-            comm->bDynLoadBal &&
+            dlbIsOn(comm) &&
             (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
             comm->cellsize_min[dim])
         {
@@ -3870,11 +3882,11 @@ static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
         }
     }
 
-    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
+    if ((dlbIsOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
     {
         /* Communicate the boundaries and update cell_ns_x0/1 */
         dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
-        if (dd->bGridJump && dd->ndim > 1)
+        if (dlbIsOn(dd->comm) && dd->ndim > 1)
         {
             check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
         }
@@ -5000,7 +5012,7 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
                 /* Check which direction this cg should go */
                 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
                 {
-                    if (dd->bGridJump)
+                    if (dlbIsOn(dd->comm))
                     {
                         /* The cell boundaries for dimension d2 are not equal
                          * for each cell row of the lower dimension(s),
@@ -5299,7 +5311,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
             (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
         {
             load = &comm->load[d];
-            if (dd->bGridJump)
+            if (dlbIsOn(dd->comm))
             {
                 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
             }
@@ -5308,7 +5320,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
             {
                 sbuf[pos++] = dd_force_load(comm);
                 sbuf[pos++] = sbuf[0];
-                if (dd->bGridJump)
+                if (dlbIsOn(dd->comm))
                 {
                     sbuf[pos++] = sbuf[0];
                     sbuf[pos++] = cell_frac;
@@ -5328,7 +5340,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
             {
                 sbuf[pos++] = comm->load[d+1].sum;
                 sbuf[pos++] = comm->load[d+1].max;
-                if (dd->bGridJump)
+                if (dlbIsOn(dd->comm))
                 {
                     sbuf[pos++] = comm->load[d+1].sum_m;
                     sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
@@ -5357,7 +5369,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
             if (dd->ci[dim] == dd->master_ci[dim])
             {
                 /* We are the root, process this row */
-                if (comm->bDynLoadBal)
+                if (dlbIsOn(comm))
                 {
                     root = comm->root[d];
                 }
@@ -5374,7 +5386,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
                     load->sum += load->load[pos++];
                     load->max  = std::max(load->max, load->load[pos]);
                     pos++;
-                    if (dd->bGridJump)
+                    if (dlbIsOn(dd->comm))
                     {
                         if (root->bLimited)
                         {
@@ -5408,7 +5420,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
                         pos++;
                     }
                 }
-                if (comm->bDynLoadBal && root->bLimited)
+                if (dlbIsOn(comm) && root->bLimited)
                 {
                     load->sum_m *= dd->nc[dim];
                     load->flags |= (1<<d);
@@ -5423,7 +5435,7 @@ static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
         comm->load_step  += comm->cycl[ddCyclStep];
         comm->load_sum   += comm->load[0].sum;
         comm->load_max   += comm->load[0].max;
-        if (comm->bDynLoadBal)
+        if (dlbIsOn(comm))
         {
             for (d = 0; d < dd->ndim; d++)
             {
@@ -5492,7 +5504,7 @@ static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
             fprintf(stderr, "%s", buf);
         }
         bLim = FALSE;
-        if (comm->bDynLoadBal)
+        if (dlbIsOn(comm))
         {
             sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
             for (d = 0; d < dd->ndim; d++)
@@ -5535,7 +5547,7 @@ static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
             sprintf(buf,
                     "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
                     "      in the domain decomposition.\n", lossf*100);
-            if (!comm->bDynLoadBal)
+            if (!dlbIsOn(comm))
             {
                 sprintf(buf+strlen(buf), "      You might want to use dynamic load balancing (option -dlb.)\n");
             }
@@ -5580,6 +5592,9 @@ static float dd_f_imbal(gmx_domdec_t *dd)
 
 float dd_pme_f_ratio(gmx_domdec_t *dd)
 {
+    /* Should only be called on the DD master rank */
+    assert(DDMASTER(dd));
+
     if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
     {
         return dd->comm->load[0].pme/dd->comm->load[0].mdf;
@@ -5610,7 +5625,7 @@ static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
         fprintf(fplog, "\n");
     }
     fprintf(fplog, "DD  step %s", gmx_step_str(step, buf));
-    if (dd->comm->bDynLoadBal)
+    if (dlbIsOn(dd->comm))
     {
         fprintf(fplog, "  vol min/aver %5.3f%c",
                 dd_vol_min(dd), flags ? '!' : ' ');
@@ -5628,7 +5643,7 @@ static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
 
 static void dd_print_load_verbose(gmx_domdec_t *dd)
 {
-    if (dd->comm->bDynLoadBal)
+    if (dlbIsOn(dd->comm))
     {
         fprintf(stderr, "vol %4.2f%c ",
                 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
@@ -5669,7 +5684,7 @@ static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
     if (bPartOfGroup)
     {
         dd->comm->mpi_comm_load[dim_ind] = c_row;
-        if (dd->comm->eDLB != edlbNO)
+        if (dd->comm->dlbState != edlbsOffForever)
         {
             if (dd->ci[dim] == dd->master_ci[dim])
             {
@@ -5963,7 +5978,7 @@ void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
         }
     }
 
-    if (dd->comm->eDLB != edlbNO)
+    if (dd->comm->dlbState != edlbsOffForever)
     {
         snew(dd->comm->root, dd->ndim);
     }
@@ -6540,60 +6555,60 @@ static int check_dlb_support(FILE *fplog, t_commrec *cr,
                              const char *dlb_opt, gmx_bool bRecordLoad,
                              unsigned long Flags, t_inputrec *ir)
 {
-    int           eDLB = -1;
+    int           dlbState = -1;
     char          buf[STRLEN];
 
     switch (dlb_opt[0])
     {
-        case 'a': eDLB = edlbAUTO; break;
-        case 'n': eDLB = edlbNO;   break;
-        case 'y': eDLB = edlbYES;  break;
+        case 'a': dlbState = edlbsOffCanTurnOn; break;
+        case 'n': dlbState = edlbsOffForever;   break;
+        case 'y': dlbState = edlbsOn;           break;
         default: gmx_incons("Unknown dlb_opt");
     }
 
     if (Flags & MD_RERUN)
     {
-        return edlbNO;
+        return edlbsOffForever;
     }
 
     if (!EI_DYNAMICS(ir->eI))
     {
-        if (eDLB == edlbYES)
+        if (dlbState == edlbsOn)
         {
             sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
             dd_warning(cr, fplog, buf);
         }
 
-        return edlbNO;
+        return edlbsOffForever;
     }
 
     if (!bRecordLoad)
     {
         dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
 
-        return edlbNO;
+        return edlbsOffForever;
     }
 
     if (Flags & MD_REPRODUCIBLE)
     {
-        switch (eDLB)
+        switch (dlbState)
         {
-            case edlbNO:
+            case edlbsOffForever:
                 break;
-            case edlbAUTO:
+            case edlbsOffCanTurnOn:
                 dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
-                eDLB = edlbNO;
+                dlbState = edlbsOffForever;
                 break;
-            case edlbYES:
+            case edlbsOn:
                 dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
                 break;
             default:
-                gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
+                gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
                 break;
         }
     }
 
-    return eDLB;
+    return dlbState;
 }
 
 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
@@ -6739,16 +6754,14 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
     /* Initialize to GPU share count to 0, might change later */
     comm->nrank_gpu_shared = 0;
 
-    comm->eDLB                     = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
-    comm->bDLB_locked              = FALSE;
+    comm->dlbState                 = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
     comm->bCheckWhetherToTurnDlbOn = TRUE;
 
-    comm->bDynLoadBal = (comm->eDLB == edlbYES);
     if (fplog)
     {
-        fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
+        fprintf(fplog, "Dynamic load balancing: %s\n",
+                edlbs_names[comm->dlbState]);
     }
-    dd->bGridJump              = comm->bDynLoadBal;
     comm->bPMELoadBalDLBLimits = FALSE;
 
     if (comm->nstSortCG)
@@ -6941,7 +6954,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
 
         /* We need to choose the optimal DD grid and possibly PME nodes */
         limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
-                               comm->eDLB != edlbNO, dlb_scale,
+                               comm->dlbState != edlbsOffForever, dlb_scale,
                                comm->cellsize_limit, comm->cutoff,
                                comm->bInterCGBondeds);
 
@@ -6950,7 +6963,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
             bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
             sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
                     !bC ? "-rdd" : "-rcon",
-                    comm->eDLB != edlbNO ? " or -dds" : "",
+                    comm->dlbState != edlbsOffForever ? " or -dds" : "",
                     bC ? " or your LINCS settings" : "");
 
             gmx_fatal_collective(FARGS, cr, NULL,
@@ -7045,7 +7058,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
     *npme_y = comm->npmenodes_y;
 
     snew(comm->slb_frac, DIM);
-    if (comm->eDLB == edlbNO)
+    if (comm->dlbState == edlbsOffForever)
     {
         comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
         comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
@@ -7054,7 +7067,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
 
     if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
     {
-        if (comm->bBondComm || comm->eDLB != edlbNO)
+        if (comm->bBondComm || comm->dlbState != edlbsOffForever)
         {
             /* Set the bonded communication distance to halfway
              * the minimum and the maximum,
@@ -7062,7 +7075,7 @@ gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
              */
             acs                = average_cellsize_min(dd, ddbox);
             comm->cutoff_mbody = 0.5*(r_bonded + acs);
-            if (comm->eDLB != edlbNO)
+            if (comm->dlbState != edlbsOffForever)
             {
                 /* Check if this does not limit the scaling */
                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
@@ -7153,14 +7166,13 @@ static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
         dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
 
         /* Change DLB from "auto" to "no". */
-        comm->eDLB = edlbNO;
+        comm->dlbState = edlbsOffForever;
 
         return;
     }
 
     dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
-    comm->bDynLoadBal = TRUE;
-    dd->bGridJump     = TRUE;
+    comm->dlbState = edlbsOn;
 
     set_dlb_limits(dd);
 
@@ -7331,7 +7343,7 @@ static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
                     std::max(comm->cutoff, comm->cutoff_mbody));
             fprintf(fplog, "%40s  %-7s %6.3f nm\n",
                     "multi-body bonded interactions", "(-rdd)",
-                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
+                    (comm->bBondComm || dlbIsOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
         }
         if (dd->vsite_comm)
         {
@@ -7450,7 +7462,7 @@ static void set_cell_limits_dlb(gmx_domdec_t      *dd,
     {
         comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
     }
-    if (comm->bDynLoadBal)
+    if (dlbIsOn(comm))
     {
         set_dlb_limits(dd);
     }
@@ -7509,13 +7521,13 @@ void set_dd_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
     {
         fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
     }
-    if (comm->eDLB != edlbNO)
+    if (comm->dlbState != edlbsOffForever)
     {
         set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
     }
 
-    print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
-    if (comm->eDLB == edlbAUTO)
+    print_dd_settings(fplog, dd, ir, dlbIsOn(comm), dlb_scale, ddbox);
+    if (comm->dlbState == edlbsOffCanTurnOn)
     {
         if (fplog)
         {
@@ -7571,7 +7583,7 @@ static gmx_bool test_dd_cutoff(t_commrec *cr,
 
         np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
 
-        if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
+        if (dd->comm->dlbState != edlbsOffForever && dim < ddbox.npbcdim &&
             dd->comm->cd[d].np_dlb > 0)
         {
             if (np > dd->comm->cd[d].np_dlb)
@@ -7590,12 +7602,12 @@ static gmx_bool test_dd_cutoff(t_commrec *cr,
         }
     }
 
-    if (dd->comm->eDLB != edlbNO)
+    if (dd->comm->dlbState != edlbsOffForever)
     {
         /* If DLB is not active yet, we don't need to check the grid jumps.
          * Actually we shouldn't, because then the grid jump data is not set.
          */
-        if (dd->comm->bDynLoadBal &&
+        if (dlbIsOn(dd->comm) &&
             check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
         {
             LocallyLimited = 1;
@@ -7655,7 +7667,7 @@ void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
  */
 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
 {
-    if (dd->comm->eDLB == edlbAUTO && !dd_dlb_is_locked(dd))
+    if (dd->comm->dlbState == edlbsOffCanTurnOn)
     {
         dd->comm->bCheckWhetherToTurnDlbOn = bValue;
     }
@@ -7668,7 +7680,7 @@ static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
 {
     const int nddp_chk_dlb = 100;
 
-    if (dd->comm->eDLB != edlbAUTO)
+    if (dd->comm->dlbState != edlbsOffCanTurnOn)
     {
         return FALSE;
     }
@@ -7695,30 +7707,30 @@ static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
 
 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
 {
-    return dd->comm->bDynLoadBal;
+    return (dd->comm->dlbState == edlbsOn);
 }
 
 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
 {
-    return dd->comm->bDLB_locked;
+    return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
 }
 
 void dd_dlb_lock(gmx_domdec_t *dd)
 {
     /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
-    if (dd->comm->eDLB == edlbAUTO)
+    if (dd->comm->dlbState == edlbsOffCanTurnOn)
     {
-        dd->comm->bDLB_locked = TRUE;
+        dd->comm->dlbState = edlbsOffTemporarilyLocked;
     }
 }
 
 void dd_dlb_unlock(gmx_domdec_t *dd)
 {
     /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
-    if (dd->comm->eDLB == edlbAUTO)
+    if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
     {
-        dd->comm->bDLB_locked = FALSE;
-        dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, !dd->comm->bDynLoadBal);
+        dd->comm->dlbState = edlbsOffCanTurnOn;
+        dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
     }
 }
 
@@ -7883,7 +7895,7 @@ set_dd_corners(const gmx_domdec_t *dd,
         c->c[1][0] = comm->cell_x0[dim1];
         /* All rows can see this row */
         c->c[1][1] = comm->cell_x0[dim1];
-        if (dd->bGridJump)
+        if (dlbIsOn(dd->comm))
         {
             c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
             if (bDistMB)
@@ -7902,7 +7914,7 @@ set_dd_corners(const gmx_domdec_t *dd,
             {
                 c->c[2][j] = comm->cell_x0[dim2];
             }
-            if (dd->bGridJump)
+            if (dlbIsOn(dd->comm))
             {
                 /* Use the maximum of the i-cells that see a j-cell */
                 for (i = 0; i < zones->nizone; i++)
@@ -7937,7 +7949,7 @@ set_dd_corners(const gmx_domdec_t *dd,
              */
             c->cr1[0] = comm->cell_x1[dim1];
             c->cr1[3] = comm->cell_x1[dim1];
-            if (dd->bGridJump)
+            if (dlbIsOn(dd->comm))
             {
                 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
                 if (bDistMB)
@@ -8276,7 +8288,7 @@ static void setup_dd_communication(gmx_domdec_t *dd,
     bBondComm = comm->bBondComm;
 
     /* Do we need to determine extra distances for multi-body bondeds? */
-    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
+    bDistMB = (comm->bInterCGMultiBody && dlbIsOn(dd->comm) && dd->ndim > 1);
 
     /* Do we need to determine extra distances for only two-body bondeds? */
     bDist2B = (bBondComm && !bDistMB);
@@ -8708,7 +8720,7 @@ static void set_zones_size(gmx_domdec_t *dd,
     zones = &comm->zones;
 
     /* Do we need to determine extra distances for multi-body bondeds? */
-    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
+    bDistMB = (comm->bInterCGMultiBody && dlbIsOn(dd->comm) && dd->ndim > 1);
 
     for (z = zone_start; z < zone_end; z++)
     {
@@ -8728,7 +8740,7 @@ static void set_zones_size(gmx_domdec_t *dd,
             /* With a staggered grid we have different sizes
              * for non-shifted dimensions.
              */
-            if (dd->bGridJump && zones->shift[z][dim] == 0)
+            if (dlbIsOn(dd->comm) && zones->shift[z][dim] == 0)
             {
                 if (d == 1)
                 {
@@ -8757,7 +8769,7 @@ static void set_zones_size(gmx_domdec_t *dd,
             if (zones->shift[z][dim] > 0)
             {
                 dim = dd->dim[d];
-                if (!dd->bGridJump || d == 0)
+                if (!dlbIsOn(dd->comm) || d == 0)
                 {
                     zones->size[z].x0[dim] = comm->cell_x1[dim];
                     zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
@@ -9469,7 +9481,7 @@ void dd_partition_system(FILE                *fplog,
 
     bNStGlobalComm = (step % nstglobalcomm == 0);
 
-    if (!comm->bDynLoadBal)
+    if (!dlbIsOn(comm))
     {
         bDoDLB = FALSE;
     }
@@ -9630,7 +9642,7 @@ void dd_partition_system(FILE                *fplog,
         set_ddbox(dd, bMasterState, cr, ir, state_local->box,
                   TRUE, &top_local->cgs, state_local->x, &ddbox);
 
-        bRedist = comm->bDynLoadBal;
+        bRedist = dlbIsOn(comm);
     }
     else
     {
index abc52ba315ff1505a201ba75ba33dca2e0f03fdf..75d6a2e3d1d761362878195e719ae99763d44af6 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2006,2007,2008,2009,2010,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2006,2007,2008,2009,2010,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -603,7 +603,7 @@ int setup_specat_communication(gmx_domdec_t             *dd,
                   dd->ci[XX], dd->ci[YY], dd->ci[ZZ],
                   nrecv_local, ireq->n, specat_type,
                   specat_type, add_err,
-                  dd->bGridJump ? " or use the -rcon option of mdrun" : "");
+                  dd_dlb_is_on(dd) ? " or use the -rcon option of mdrun" : "");
     }
 
     spac->at_start = at_start;
index f8facc562183a87e048b29f33b1a87766a935aa9..089361143ab59700537fdd283d5bb1c129a5bf02 100644 (file)
@@ -934,11 +934,17 @@ void pme_loadbal_do(pme_load_balancing_t *pme_lb,
         {
             pme_lb->bBalance = dd_dlb_is_on(cr->dd);
         }
-        else
+        /* We should ignore the first timing to avoid timing allocation
+         * overhead. And since the PME load balancing is called just
+         * before DD repartitioning, the ratio returned by dd_pme_f_ratio
+         * is not over the last nstlist steps, but the nstlist steps before
+         * that. So the first useful ratio is available at step_rel=3*nstlist.
+         */
+        else if (step_rel >= 3*ir->nstlist)
         {
             if (DDMASTER(cr->dd))
             {
-                /* PME node load is too high, start tuning */
+                /* If PME rank load is too high, start tuning */
                 pme_lb->bBalance =
                     (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor);
             }
@@ -1019,7 +1025,7 @@ void pme_loadbal_do(pme_load_balancing_t *pme_lb,
     }
 
     if (!pme_lb->bBalance &&
-        (!pme_lb->bSepPMERanks || (step_rel <= pme_lb->step_rel_stop)))
+        (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop))
     {
         /* We have just deactivated the balancing and we're not measuring PP/PME
          * imbalance during the first steps of the run: deactivate the tuning.
index 2d8a81e17b58b2df9c82c4429eb7e1bbc966842f..9ead1601ff498473d61d36b4d0f1c8f097db2b95 100644 (file)
@@ -2,7 +2,7 @@
  * This file is part of the GROMACS molecular simulation package.
  *
  * Copyright (c) 1991-2003 Erik Lindahl, David van der Spoel, University of Groningen.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -53,7 +53,7 @@
  * files like fft_fftw3.c or fft_mkl.c for that.
  */
 
-#ifndef GMX_FFT_FFTW3
+#if !GMX_FFT_FFTW3
 
 struct gmx_many_fft {
     int       howmany;
index 5cd3dfae97cc6cdab74c4c113b4f5e62e636a22a..4982dfbf59656ca99206740cbd802e1255bb187b 100644 (file)
 FILE* debug = 0;
 #endif
 
-#ifdef GMX_FFT_FFTW3
-#include "thread_mpi/mutex.h"
+#if GMX_FFT_FFTW3
 
 #include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/mutex.h"
 /* none of the fftw3 calls, except execute(), are thread-safe, so
    we need to serialize them with this mutex. */
-static tMPI::mutex big_fftw_mutex;
+static gmx::Mutex big_fftw_mutex;
 #define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 #define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 #endif /* GMX_FFT_FFTW3 */
@@ -452,7 +452,7 @@ fft5d_plan fft5d_plan_3d(int NG, int MG, int KG, MPI_Comm comm[2], int flags, t_
         fprintf(debug, "Running on %d threads\n", nthreads);
     }
 
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
     /* Don't add more stuff here! We have already had at least one bug because we are reimplementing
      * the low-level FFT interface instead of using the Gromacs FFT module. If we need more
      * generic functionality it is far better to extend the interface so we can use it for
@@ -614,7 +614,7 @@ fft5d_plan fft5d_plan_3d(int NG, int MG, int KG, MPI_Comm comm[2], int flags, t_
         }
     }
 
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
 }
 #endif
     if ((flags&FFT5D_ORDER_YZ))   /*plan->cart is in the order of transposes */
@@ -985,7 +985,7 @@ void fft5d_execute(fft5d_plan plan, int thread, fft5d_time times)
     int    s = 0, tstart, tend, bParallelDim;
 
 
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
     if (plan->p3d)
     {
         if (thread == 0)
@@ -1277,7 +1277,7 @@ void fft5d_destroy(fft5d_plan plan)
             plan->oNout[s] = 0;
         }
     }
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
     FFTW_LOCK;
 #ifdef FFT5D_MPI_TRANSPOS
     for (s = 0; s < 2; s++)
index ba6753d473e453058e79caa75be20ce7f8fd537c..ceb87ddc89cea61c3b76ef960162eede02218e3d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2009,2010,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -53,7 +53,7 @@ double MPI_Wtime();
 #endif
 
 /*currently only special optimization for FFTE*/
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
 #include <fftw3.h>
 #endif
 
@@ -93,7 +93,7 @@ struct fft5d_plan_t {
     t_complex *lin;
     t_complex *lout, *lout2, *lout3;
     gmx_fft_t* p1d[3]; /*1D plans*/
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
     FFTW(plan) p2d;    /*2D plan: used for 1D decomposition if FFT supports transposed output*/
     FFTW(plan) p3d;    /*3D plan: used for 0D decomposition if FFT supports transposed output*/
     FFTW(plan) mpip[2];
index dee2ff284b473c41aa250438cbde4f824cf9dd53..891a7ee2d9b4bb0e093dfa061d3326512a7d8c27 100644 (file)
@@ -2,7 +2,7 @@
  * This file is part of the GROMACS molecular simulation package.
  *
  * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #include <fftw3.h>
 
-#include "thread_mpi/mutex.h"
-
 #include "gromacs/fft/fft.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/mutex.h"
 
 #ifdef GMX_DOUBLE
 #define FFTWPREFIX(name) fftw_ ## name
@@ -56,7 +55,7 @@
 
 /* none of the fftw3 calls, except execute(), are thread-safe, so
    we need to serialize them with this mutex. */
-static tMPI::mutex big_fftw_mutex;
+static gmx::Mutex big_fftw_mutex;
 #define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 #define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 
@@ -115,7 +114,7 @@ gmx_fft_init_many_1d(gmx_fft_t *        pfft,
     int                    i, j, k;
     int                    fftw_flags;
 
-#ifdef GMX_DISABLE_FFTW_MEASURE
+#if GMX_DISABLE_FFTW_MEASURE
     flags |= GMX_FFT_FLAG_CONSERVATIVE;
 #endif
 
@@ -232,7 +231,7 @@ gmx_fft_init_many_1d_real(gmx_fft_t *        pfft,
     int                    i, j, k;
     int                    fftw_flags;
 
-#ifdef GMX_DISABLE_FFTW_MEASURE
+#if GMX_DISABLE_FFTW_MEASURE
     flags |= GMX_FFT_FLAG_CONSERVATIVE;
 #endif
 
@@ -343,7 +342,7 @@ gmx_fft_init_2d_real(gmx_fft_t *        pfft,
     int                    i, j, k;
     int                    fftw_flags;
 
-#ifdef GMX_DISABLE_FFTW_MEASURE
+#if GMX_DISABLE_FFTW_MEASURE
     flags |= GMX_FFT_FLAG_CONSERVATIVE;
 #endif
 
index 0a86fa9c23f8622eb771487e7d9d8850f35740b6..3a01286733ad3609e9e2ce4d592a627b34ac51b3 100644 (file)
@@ -38,7 +38,7 @@
 
 #include "config.h"
 
-#ifdef GMX_INTERNAL_XDR
+#if GMX_INTERNAL_XDR
 
 
 #include "gmx_system_xdr.h"
index f42facdc52b2c9a29e685acefe7bfd92dce9b444..0b455979dd6fb23c18c77202b81a6fe2e97349c6 100644 (file)
 struct t_fileio
 {
     FILE           *fp;                /* the file pointer */
-    gmx_bool        bOpen,             /* the file is open */
-                    bRead,             /* the file is open for reading */
+    gmx_bool        bRead,             /* the file is open for reading */
                     bDouble,           /* write doubles instead of floats */
-                    bDebug,            /* the file ops should come with debug info */
                     bReadWrite;        /* the file is open for reading and writing */
     char        *fn;                   /* the file name */
     XDR         *xdr;                  /* the xdr data pointer */
index 2c6f98fef3acc8945cfb6854ebdbc31a77315a77..b2876d0566d8e9cc738334e57e39bbbfdb9cf387 100644 (file)
@@ -62,6 +62,13 @@ static const char *eioNames[eioNR] =
     "IVEC", "STRING"
 };
 
+void gmx_fio_setprecision(t_fileio *fio, gmx_bool bDouble)
+{
+    gmx_fio_lock(fio);
+    fio->bDouble = bDouble;
+    gmx_fio_unlock(fio);
+}
+
 XDR *gmx_fio_getxdr(t_fileio *fio)
 {
     XDR *ret = NULL;
@@ -336,11 +343,6 @@ static gmx_bool do_xdr(t_fileio *fio, void *item, int nitem, int eio,
         default:
             gmx_fio_fe(fio, eio, desc, srcfile, line);
     }
-    if ((res == 0) && (fio->bDebug))
-    {
-        fprintf(stderr, "Error in xdr I/O %s %s to file %s (source %s, line %d)\n",
-                eioNames[eio], desc, fio->fn, srcfile, line);
-    }
 
     return (res != 0);
 }
index 17c3fbc60b822bd5b3d5c53935e293180c981e6d..982724d64fe4deeffafd0cc561531b5d832d81c2 100644 (file)
@@ -48,6 +48,9 @@ extern "C" {
 
 struct t_fileio;
 
+void gmx_fio_setprecision(struct t_fileio *fio, gmx_bool bDouble);
+/* Select the floating point precision for reading and writing files */
+
 XDR *gmx_fio_getxdr(struct t_fileio *fio);
 /* Return the file pointer itself */
 
index 67ab811570f8f3f70d05c6b937ed07c54d9bc691..a094435c73495cb42ddd81f823155c74d2c6a155 100644 (file)
@@ -95,10 +95,6 @@ static int gmx_fio_int_flush(t_fileio* fio)
     {
         rc = fflush(fio->fp);
     }
-    else if (fio->xdr)
-    {
-        rc = fflush((FILE *) fio->xdr->x_private);
-    }
 
     return rc;
 }
@@ -309,11 +305,7 @@ t_fileio *gmx_fio_open(const char *fn, const char *mode)
     /* Check if it should be opened as a binary file */
     if (!ftp_is_text(fn2ftp(fn)))
     {
-        /* Not ascii, add b to file mode */
-        if ((strchr(newmode, 'b') == NULL) && (strchr(newmode, 'B') == NULL))
-        {
-            strcat(newmode, "b");
-        }
+        strcat(newmode, "b");
     }
 
     snew(fio, 1);
@@ -331,30 +323,10 @@ t_fileio *gmx_fio_open(const char *fn, const char *mode)
         fio->iFTP   = fn2ftp(fn);
         fio->fn     = gmx_strdup(fn);
 
+        fio->fp = gmx_ffopen(fn, newmode);
         /* If this file type is in the list of XDR files, open it like that */
         if (ftp_is_xdr(fio->iFTP))
         {
-            /* First check whether we have to make a backup,
-             * only for writing, not for read or append.
-             */
-            if (newmode[0] == 'w')
-            {
-#ifndef GMX_FAHCORE
-                /* only make backups for normal gromacs */
-                make_backup(fn);
-#endif
-            }
-            else
-            {
-                /* Check whether file exists */
-                if (!gmx_fexist(fn))
-                {
-                    gmx_open(fn);
-                }
-            }
-            /* Open the file */
-            fio->fp = gmx_ffopen(fn, newmode);
-
             /* determine the XDR direction */
             if (newmode[0] == 'w' || newmode[0] == 'a')
             {
@@ -364,15 +336,9 @@ t_fileio *gmx_fio_open(const char *fn, const char *mode)
             {
                 fio->xdrmode = XDR_DECODE;
             }
-
             snew(fio->xdr, 1);
             xdrstdio_create(fio->xdr, fio->fp, fio->xdrmode);
         }
-        else
-        {
-            /* If it is not, open it as a regular file */
-            fio->fp = gmx_ffopen(fn, newmode);
-        }
 
         /* for appending seek to end of file to make sure ftell gives correct position
          * important for checkpointing */
@@ -384,8 +350,6 @@ t_fileio *gmx_fio_open(const char *fn, const char *mode)
     fio->bRead             = bRead;
     fio->bReadWrite        = bReadWrite;
     fio->bDouble           = (sizeof(real) == sizeof(double));
-    fio->bDebug            = FALSE;
-    fio->bOpen             = TRUE;
 
     /* and now insert this file into the list of open files. */
     gmx_fio_insert(fio);
@@ -396,12 +360,7 @@ static int gmx_fio_close_locked(t_fileio *fio)
 {
     int rc = 0;
 
-    if (!fio->bOpen)
-    {
-        gmx_fatal(FARGS, "File %s closed twice!\n", fio->fn);
-    }
-
-    if (ftp_is_xdr(fio->iFTP))
+    if (fio->xdr != NULL)
     {
         xdr_destroy(fio->xdr);
         sfree(fio->xdr);
@@ -412,7 +371,6 @@ static int gmx_fio_close_locked(t_fileio *fio)
         rc = gmx_ffclose(fio->fp); /* fclose returns 0 if happy */
 
     }
-    fio->bOpen = FALSE;
 
     return rc;
 }
@@ -425,10 +383,6 @@ int gmx_fio_close(t_fileio *fio)
     /* We don't want two processes operating on the list at the same time */
     tMPI_Thread_mutex_lock(&open_file_mutex);
 
-    if (fio->iFTP == efTNG)
-    {
-        gmx_incons("gmx_fio_close should not be called on a TNG file");
-    }
     gmx_fio_lock(fio);
     /* first remove it from the list */
     gmx_fio_remove(fio);
@@ -448,7 +402,7 @@ int gmx_fio_fp_close(t_fileio *fio)
 {
     int rc = 0;
     gmx_fio_lock(fio);
-    if (!ftp_is_xdr(fio->iFTP))
+    if (fio->xdr == NULL)
     {
         rc      = gmx_ffclose(fio->fp); /* fclose returns 0 if happy */
         fio->fp = NULL;
@@ -643,7 +597,7 @@ int gmx_fio_get_output_file_positions(gmx_file_position_t **p_outputfiles,
     {
         /* Skip the checkpoint files themselves, since they could be open when
            we call this routine... */
-        if (cur->bOpen && !cur->bRead && cur->iFTP != efCPT)
+        if (!cur->bRead && cur->iFTP != efCPT)
         {
             /* This is an output file currently open for writing, add it */
             if (nfiles == nalloc)
@@ -674,20 +628,6 @@ int gmx_fio_get_output_file_positions(gmx_file_position_t **p_outputfiles,
 }
 
 
-void gmx_fio_setprecision(t_fileio *fio, gmx_bool bDouble)
-{
-    gmx_fio_lock(fio);
-    fio->bDouble = bDouble;
-    gmx_fio_unlock(fio);
-}
-
-void gmx_fio_setdebug(t_fileio *fio, gmx_bool bDebug)
-{
-    gmx_fio_lock(fio);
-    fio->bDebug = bDebug;
-    gmx_fio_unlock(fio);
-}
-
 char *gmx_fio_getname(t_fileio *fio)
 {
     char *ret;
@@ -748,11 +688,6 @@ static int gmx_fio_int_fsync(t_fileio *fio)
     {
         rc = gmx_fsync(fio->fp);
     }
-    else if (fio->xdr) /* this should normally not happen */
-    {
-        rc = gmx_fsync((FILE*) fio->xdr->x_private);
-        /* ^ is this actually OK? */
-    }
 
     return rc;
 }
@@ -779,7 +714,7 @@ t_fileio *gmx_fio_all_output_fsync(void)
     cur = gmx_fio_get_first();
     while (cur)
     {
-        if (cur->bOpen && !cur->bRead)
+        if (!cur->bRead)
         {
             /* if any of them fails, return failure code */
             int rc = gmx_fio_int_fsync(cur);
index dabf39c7eb9878ecb256bc38f9b9b8153bcf2a3f..d08249e4a15ac045784afb177c6f1d46d60cc510 100644 (file)
@@ -97,9 +97,6 @@ int gmx_fio_fclose(FILE *fp);
  * Change properties of the open file
  ********************************************************/
 
-void gmx_fio_setprecision(t_fileio *fio, gmx_bool bDouble);
-/* Select the floating point precision for reading and writing files */
-
 char *gmx_fio_getname(t_fileio *fio);
 /* Return the filename corresponding to the fio index */
 
@@ -109,9 +106,6 @@ int gmx_fio_getftp(t_fileio *fio);
     was opened as a specific file type and changing that midway is most
     likely an evil hack. */
 
-void gmx_fio_setdebug(t_fileio *fio, gmx_bool bDebug);
-/* Set the debug mode */
-
 gmx_bool gmx_fio_getread(t_fileio *fio);
 /* Return  whether read mode is on in fio  */
 
index ff74cc68d073b5a4b4c5b73926a7c8424b48e48c..521f37c0906f2f21f7b9c55ea847dfa45d45e7ce 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2009,2010,2011,2012,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2011,2012,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -40,7 +40,7 @@
 
 #include "config.h"
 
-#ifdef GMX_INTEGER_BIG_ENDIAN
+#if GMX_INTEGER_BIG_ENDIAN
 #define ARCH_IS_BIG_ENDIAN 1
 #else
 #define ARCH_IS_BIG_ENDIAN 0
index 63f76d6f1c7a6aaf67a6bf6aa989b23f683f7cb7..509aa4a15d75b25879d458db6b048e87510ade5f 100644 (file)
@@ -3222,8 +3222,6 @@ static void do_tpxheader(t_fileio *fio, gmx_bool bRead, t_tpxheader *tpx,
     int       idum = 0;
     real      rdum = 0;
 
-    gmx_fio_setdebug(fio, bDebugMode());
-
     /* XDR binary topology file */
     precision = sizeof(real);
     if (bRead)
index f58ffb9f5556d5aa6c7ad747639b798c7b52538e..258a19b71c9c1f0b4fa3b655bf9e73946c513c75 100644 (file)
@@ -48,7 +48,7 @@
 
 #include "config.h"
 
-#ifdef GMX_INTERNAL_XDR
+#if GMX_INTERNAL_XDR
 #include "gromacs/fileio/gmx_system_xdr.h"
 #else
 #include <rpc/rpc.h>
index 7592091e9b5253518470e14026979b8945646846..d4423adc47ca1e50d943d761b0b704f1f4d2d1c6 100644 (file)
@@ -56,6 +56,7 @@
 #include "gromacs/math/units.h"
 #include "gromacs/math/utilities.h"
 #include "gromacs/math/vec.h"
+#include "gromacs/topology/index.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/futil.h"
 #include "gromacs/utility/smalloc.h"
@@ -65,6 +66,40 @@ enum {
     VACF, MVACF, DOS, DOS_SOLID, DOS_DIFF, DOS_CP, DOS_S, DOS_A, DOS_E, DOS_NR
 };
 
+static int calcMoleculesInIndexGroup(t_block *mols, int natoms, atom_id *index, int nindex)
+{
+    int   i    = 0;
+    int   mol  = 0;
+    int   nMol = 0;
+    int   j;
+
+    while (i < nindex)
+    {
+        while (index[i] > mols->index[mol])
+        {
+            mol++;
+            if (mol >= mols->nr)
+            {
+                gmx_fatal(FARGS, "Atom index out of range: %d", index[i]+1);
+            }
+        }
+        for (j = mols->index[mol]; j < mols->index[mol+1]; j++)
+        {
+            if (index[i] != j)
+            {
+                gmx_fatal(FARGS, "The index group does not consist of whole molecules");
+            }
+            i++;
+            if (i == natoms)
+            {
+                gmx_fatal(FARGS, "Index contains atom numbers larger than the topology");
+            }
+        }
+        nMol++;
+    }
+    return nMol;
+}
+
 static double FD(double Delta, double f)
 {
     return (2*pow(Delta, -4.5)*pow(f, 7.5) -
@@ -214,47 +249,6 @@ static real wEsolid(real nu, real beta)
     }
 }
 
-static void dump_fy(output_env_t oenv, real toler)
-{
-    FILE       *fp;
-    double      Delta, f, y, DD;
-    const char *leg[] = { "f", "fy", "y" };
-
-    DD = pow(10.0, 0.125);
-    fp = xvgropen("fy.xvg", "Fig. 2, Lin2003a", "Delta", "y or fy", oenv);
-    xvgr_legend(fp, asize(leg), leg, oenv);
-    if (output_env_get_print_xvgr_codes(oenv))
-    {
-        fprintf(fp, "@    world 1e-05, 0, 1000, 1\n");
-        fprintf(fp, "@    xaxes scale Logarithmic\n");
-    }
-    for (Delta = 1e-5; (Delta <= 1000); Delta *= DD)
-    {
-        f = calc_fluidicity(Delta, toler);
-        y = calc_y(f, Delta, toler);
-        fprintf(fp, "%10g  %10g  %10g  %10g\n", Delta, f, f*y, y);
-    }
-    xvgrclose(fp);
-}
-
-static void dump_w(output_env_t oenv, real beta)
-{
-    FILE       *fp;
-    double      nu;
-    const char *leg[] = { "wCv", "wS", "wA", "wE" };
-
-    fp = xvgropen("w.xvg", "Fig. 1, Berens1983a", "\\f{12}b\\f{4}h\\f{12}n",
-                  "w", oenv);
-    xvgr_legend(fp, asize(leg), leg, oenv);
-    for (nu = 1; (nu < 100); nu += 0.05)
-    {
-        fprintf(fp, "%10g  %10g  %10g  %10g  %10g\n", beta*PLANCK*nu,
-                wCsolid(nu, beta), wSsolid(nu, beta),
-                wAsolid(nu, beta), wEsolid(nu, beta));
-    }
-    xvgrclose(fp);
-}
-
 int gmx_dos(int argc, char *argv[])
 {
     const char         *desc[] = {
@@ -264,6 +258,11 @@ int gmx_dos(int argc, char *argv[])
         "all vibrations. For flexible systems that would be around a few fs",
         "between saving. Properties based on the DoS are printed on the",
         "standard output."
+        "Note that the density of states is calculated from the mass-weighted",
+        "autocorrelation, and by default only from the square of the real",
+        "component rather than absolute value. This means the shape can differ",
+        "substantially from the plain vibrational power spectrum you can",
+        "calculate with gmx velacc."
     };
     const char         *bugs[] = {
         "This program needs a lot of memory: total usage equals the number of atoms times 3 times number of frames times 4 (or 8 when run in double precision)."
@@ -284,10 +283,16 @@ int gmx_dos(int argc, char *argv[])
     gmx_fft_t           fft;
     double              cP, S, A, E, DiffCoeff, Delta, f, y, z, sigHS, Shs, Sig, DoS0, recip_fac;
     double              wCdiff, wSdiff, wAdiff, wEdiff;
-
-    static     gmx_bool bVerbose = TRUE, bAbsolute = FALSE, bNormalize = FALSE;
-    static     gmx_bool bRecip   = FALSE, bDump = FALSE;
+    int                 grpNatoms;
+    atom_id            *index;
+    char               *grpname;
+    double              invNormalize;
+    gmx_bool            normalizeAutocorrelation;
+
+    static     gmx_bool bVerbose = TRUE, bAbsolute = FALSE, bNormalizeDos = FALSE;
+    static     gmx_bool bRecip   = FALSE;
     static     real     Temp     = 298.15, toler = 1e-6;
+
     t_pargs             pa[]     = {
         { "-v", FALSE, etBOOL, {&bVerbose},
           "Be loud and noisy." },
@@ -295,14 +300,12 @@ int gmx_dos(int argc, char *argv[])
           "Use cm^-1 on X-axis instead of 1/ps for DoS plots." },
         { "-abs", FALSE, etBOOL, {&bAbsolute},
           "Use the absolute value of the Fourier transform of the VACF as the Density of States. Default is to use the real component only" },
-        { "-normdos", FALSE, etBOOL, {&bNormalize},
-          "Normalize the DoS such that it adds up to 3N. This is a hack that should not be necessary." },
+        { "-normdos", FALSE, etBOOL, {&bNormalizeDos},
+          "Normalize the DoS such that it adds up to 3N. This should usually not be necessary." },
         { "-T", FALSE, etREAL, {&Temp},
           "Temperature in the simulation" },
         { "-toler", FALSE, etREAL, {&toler},
-          "[HIDDEN]Tolerance when computing the fluidicity using bisection algorithm" },
-        { "-dump", FALSE, etBOOL, {&bDump},
-          "[HIDDEN]Dump the y/fy plot corresponding to Fig. 2 inLin2003a and the and the weighting functions corresponding to Fig. 1 in Berens1983a." }
+          "[HIDDEN]Tolerance when computing the fluidicity using bisection algorithm" }
     };
 
     t_filenm            fnm[] = {
@@ -331,30 +334,26 @@ int gmx_dos(int argc, char *argv[])
     }
 
     beta = 1/(Temp*BOLTZ);
-    if (bDump)
-    {
-        printf("Dumping reference figures. Thanks for your patience.\n");
-        dump_fy(oenv, toler);
-        dump_w(oenv, beta);
-        exit(0);
-    }
 
     fplog = gmx_fio_fopen(ftp2fn(efLOG, NFILE, fnm), "w");
     fprintf(fplog, "Doing density of states analysis based on trajectory.\n");
     please_cite(fplog, "Pascal2011a");
     please_cite(fplog, "Caleman2011b");
 
-    read_tps_conf(ftp2fn(efTPR, NFILE, fnm), title, &top, &ePBC, NULL, NULL, box,
-                  TRUE);
+    read_tps_conf(ftp2fn(efTPR, NFILE, fnm), title, &top, &ePBC, NULL, NULL, box, TRUE);
+
+    /* Handle index groups */
+    get_index(&top.atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &grpNatoms, &index, &grpname);
+
     V     = det(box);
     tmass = 0;
-    for (i = 0; (i < top.atoms.nr); i++)
+    for (i = 0; i < grpNatoms; i++)
     {
-        tmass += top.atoms.atom[i].m;
+        tmass += top.atoms.atom[index[i]].m;
     }
 
-    Natom = top.atoms.nr;
-    Nmol  = top.mols.nr;
+    Natom = grpNatoms;
+    Nmol  = calcMoleculesInIndexGroup(&top.mols, top.atoms.nr, index, grpNatoms);
     gnx   = Natom*DIM;
 
     /* Correlation stuff */
@@ -390,9 +389,9 @@ int gmx_dos(int argc, char *argv[])
         }
         for (i = 0; i < gnx; i += DIM)
         {
-            c1[i+XX][nframes] = fr.v[i/DIM][XX];
-            c1[i+YY][nframes] = fr.v[i/DIM][YY];
-            c1[i+ZZ][nframes] = fr.v[i/DIM][ZZ];
+            c1[i+XX][nframes] = fr.v[index[i/DIM]][XX];
+            c1[i+YY][nframes] = fr.v[index[i/DIM]][YY];
+            c1[i+ZZ][nframes] = fr.v[index[i/DIM]][ZZ];
         }
 
         t1 = fr.time;
@@ -413,6 +412,21 @@ int gmx_dos(int argc, char *argv[])
         printf("Going to do %d fourier transforms of length %d. Hang on.\n",
                gnx, nframes);
     }
+    /* Unfortunately the -normalize program option for the autocorrelation
+     * function calculation is added as a hack with a static variable in the
+     * autocorrelation.c source. That would work if we called the normal
+     * do_autocorr(), but this routine overrides that by directly calling
+     * the low-level functionality. That unfortunately leads to ignoring the
+     * default value for the option (which is to normalize).
+     * Since the absolute value seems to be important for the subsequent
+     * analysis below, we detect the value directly from the option, calculate
+     * the autocorrelation without normalization, and then apply the
+     * normalization just to the autocorrelation output
+     * (or not, if the user asked for a non-normalized autocorrelation).
+     */
+    normalizeAutocorrelation = opt2parg_bool("-normalize", npargs, ppa);
+
+    /* Note that we always disable normalization here, regardless of user settings */
     low_do_autocorr(NULL, oenv, NULL, nframes, gnx, nframes, c1, dt, eacNormal, 0, FALSE,
                     FALSE, FALSE, -1, -1, 0);
     snew(dos, DOS_NR);
@@ -427,7 +441,7 @@ int gmx_dos(int argc, char *argv[])
     }
     for (i = 0; (i < gnx); i += DIM)
     {
-        mi = top.atoms.atom[i/DIM].m;
+        mi = top.atoms.atom[index[i/DIM]].m;
         for (j = 0; (j < nframes/2); j++)
         {
             c1j            = (c1[i+XX][j] + c1[i+YY][j] + c1[i+ZZ][j]);
@@ -435,20 +449,28 @@ int gmx_dos(int argc, char *argv[])
             dos[MVACF][j] += mi*c1j;
         }
     }
-    fp = xvgropen(opt2fn("-vacf", NFILE, fnm), "Velocity ACF",
+
+    fp = xvgropen(opt2fn("-vacf", NFILE, fnm), "Velocity autocorrelation function",
                   "Time (ps)", "C(t)", oenv);
     snew(tt, nframes/2);
+
+    invNormalize = normalizeAutocorrelation ? 1.0/dos[VACF][0] : 1.0;
+
     for (j = 0; (j < nframes/2); j++)
     {
         tt[j] = j*dt;
-        fprintf(fp, "%10g  %10g\n", tt[j], dos[VACF][j]);
+        fprintf(fp, "%10g  %10g\n", tt[j], dos[VACF][j] * invNormalize);
     }
     xvgrclose(fp);
-    fp = xvgropen(opt2fn("-mvacf", NFILE, fnm), "Mass-weighted velocity ACF",
+
+    fp = xvgropen(opt2fn("-mvacf", NFILE, fnm), "Mass-weighted velocity autocorrelation function",
                   "Time (ps)", "C(t)", oenv);
+
+    invNormalize = normalizeAutocorrelation ? 1.0/dos[VACF][0] : 1.0;
+
     for (j = 0; (j < nframes/2); j++)
     {
-        fprintf(fp, "%10g  %10g\n", tt[j], dos[MVACF][j]);
+        fprintf(fp, "%10g  %10g\n", tt[j], dos[MVACF][j] * invNormalize);
     }
     xvgrclose(fp);
 
@@ -483,7 +505,7 @@ int gmx_dos(int argc, char *argv[])
     }
     /* Normalize it */
     dostot = evaluate_integral(nframes/4, nu, dos[DOS], NULL, nframes/4, &stddev);
-    if (bNormalize)
+    if (bNormalizeDos)
     {
         for (j = 0; (j < nframes/4); j++)
         {
@@ -571,18 +593,6 @@ int gmx_dos(int argc, char *argv[])
     cP = BOLTZ * evaluate_integral(nframes/4, nu, dos[DOS_CP], NULL,
                                    nframes/4, &stddev);
     fprintf(fplog, "Heat capacity %g J/mol K\n", 1000*cP/Nmol);
-
-    /*
-       S  = BOLTZ * evaluate_integral(nframes/4,nu,dos[DOS_S],NULL,
-                                   nframes/4,&stddev);
-       fprintf(fplog,"Entropy %g J/mol K\n",1000*S/Nmol);
-       A  = BOLTZ * evaluate_integral(nframes/4,nu,dos[DOS_A],NULL,
-                                   nframes/4,&stddev);
-       fprintf(fplog,"Helmholtz energy %g kJ/mol\n",A/Nmol);
-       E  = BOLTZ * evaluate_integral(nframes/4,nu,dos[DOS_E],NULL,
-                                   nframes/4,&stddev);
-       fprintf(fplog,"Internal energy %g kJ/mol\n",E/Nmol);
-     */
     fprintf(fplog, "\nArrivederci!\n");
     gmx_fio_fclose(fplog);
 
index 5e4733df1d9ed932ba935cc4c13ddec26156c892..e93fe43d10971cb0dda10d1aa38f38be56fce2d6 100644 (file)
 
 
 #ifdef GMX_GPU
-const gmx_bool bGPUBinary = TRUE;
+
+static const bool  bGPUBinary = TRUE;
+
 #  ifdef GMX_USE_OPENCL
-const char    *gpu_implementation        = "OpenCL";
+
+static const char *gpu_implementation       = "OpenCL";
 /* Our current OpenCL implementation only supports using exactly one
  * GPU per PP rank, so sharing is impossible */
-const gmx_bool bGpuSharingSupported      = FALSE;
+static const bool bGpuSharingSupported      = false;
 /* Our current OpenCL implementation is not known to handle
  * concurrency correctly (at context creation, JIT compilation, or JIT
  * cache-management stages). OpenCL runtimes need not support it
  * either; library MPI segfaults when creating OpenCL contexts;
  * thread-MPI seems to work but is not yet known to be safe. */
-const gmx_bool bMultiGpuPerNodeSupported = FALSE;
-#  else
-const char    *gpu_implementation        = "CUDA";
-const gmx_bool bGpuSharingSupported      = TRUE;
-const gmx_bool bMultiGpuPerNodeSupported = TRUE;
-#  endif
-#else
-const gmx_bool bGPUBinary                = FALSE;
-const char    *gpu_implementation        = "non-GPU";
-const gmx_bool bGpuSharingSupported      = FALSE;
-const gmx_bool bMultiGpuPerNodeSupported = FALSE;
-#endif
+static const bool bMultiGpuPerNodeSupported = false;
+
+#  else /* GMX_USE_OPENCL */
+
+// Our CUDA implementation supports everything
+static const char *gpu_implementation        = "CUDA";
+static const bool  bGpuSharingSupported      = true;
+static const bool  bMultiGpuPerNodeSupported = true;
+
+#  endif /* GMX_USE_OPENCL */
+
+#else    /* GMX_GPU */
+
+// Not compiled with GPU support
+static const bool  bGPUBinary                = false;
+static const char *gpu_implementation        = "non-GPU";
+static const bool  bGpuSharingSupported      = false;
+static const bool  bMultiGpuPerNodeSupported = false;
+
+#endif /* GMX_GPU */
 
 /* Names of the GPU detection/check results (see e_gpu_detect_res_t in hw_info.h). */
 const char * const gpu_detect_res_str[egpuNR] =
@@ -124,6 +135,16 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank);
 static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
                                     const gmx_gpu_opt_t  *gpu_opt);
 
+gmx_bool gmx_multiple_gpu_per_node_supported()
+{
+    return bMultiGpuPerNodeSupported;
+}
+
+gmx_bool gmx_gpu_sharing_supported()
+{
+    return bGpuSharingSupported;
+}
+
 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
 {
     int      i, ndev;
@@ -440,7 +461,8 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
         }
         else
         {
-            if (ngpu_comp > npppn)
+            /* TODO Should we have a gpu_opt->n_dev_supported field? */
+            if (ngpu_comp > npppn && gmx_multiple_gpu_per_node_supported())
             {
                 md_print_warn(cr, fplog,
                               "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
@@ -460,13 +482,26 @@ void gmx_check_hw_runconf_consistency(FILE                *fplog,
                  */
                 if (cr->rank_pp_intranode == 0)
                 {
+                    std::string reasonForLimit;
+                    if (ngpu_comp > 1 &&
+                        ngpu_use == 1 &&
+                        !gmx_multiple_gpu_per_node_supported())
+                    {
+                        reasonForLimit  = "can be used by ";
+                        reasonForLimit += gpu_implementation;
+                        reasonForLimit += " in GROMACS";
+                    }
+                    else
+                    {
+                        reasonForLimit = "was detected";
+                    }
                     gmx_fatal(FARGS,
                               "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
-                              "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
+                              "%s was started with %d PP %s%s%s, but only %d GPU%s %s.",
                               th_or_proc, btMPI ? "s" : "es", pernode,
                               ShortProgram(), npppn, th_or_proc,
                               th_or_proc_plural, pernode,
-                              ngpu_use, gpu_use_plural);
+                              ngpu_use, gpu_use_plural, reasonForLimit.c_str());
                 }
             }
         }
@@ -552,7 +587,7 @@ static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
     {
         int device_id;
 
-        device_id           = bGpuSharingSupported ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
+        device_id           = gmx_gpu_sharing_supported() ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
         uniq_ids[device_id] = 1;
     }
     /* Count the devices used. */
@@ -1121,11 +1156,11 @@ void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
         parse_digits_from_plain_string(env,
                                        &gpu_opt->n_dev_use,
                                        &gpu_opt->dev_use);
-        if (!bMultiGpuPerNodeSupported && 1 < gpu_opt->n_dev_use)
+        if (!gmx_multiple_gpu_per_node_supported() && 1 < gpu_opt->n_dev_use)
         {
             gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", gpu_implementation);
         }
-        if (!bGpuSharingSupported && anyGpuIdIsRepeated(gpu_opt))
+        if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(gpu_opt))
         {
             gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", gpu_implementation);
         }
@@ -1231,7 +1266,7 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
     {
         if (nrank % gpu_opt->n_dev_compatible == 0)
         {
-            nshare = bGpuSharingSupported ? nrank/gpu_opt->n_dev_compatible : 1;
+            nshare = gmx_gpu_sharing_supported() ? nrank/gpu_opt->n_dev_compatible : 1;
         }
         else
         {
@@ -1252,7 +1287,7 @@ static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
 
     /* Here we will waste GPUs when nrank < gpu_opt->n_dev_compatible */
     gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_compatible*nshare, nrank);
-    if (!bMultiGpuPerNodeSupported)
+    if (!gmx_multiple_gpu_per_node_supported())
     {
         gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
     }
index 783016de1a7766766a1568d8020df9a2c996b025..4e43fd0e1c634829241bb7bb1edc454953260c3b 100644 (file)
@@ -750,7 +750,7 @@ ocl_get_build_options_string(cl_context           context,
 
         for (std::string::size_type i = 0; i < unescaped_ocl_root_path.length(); i++)
         {
-            if (inputStr[i] == ' ')
+            if (unescaped_ocl_root_path[i] == ' ')
             {
                 ocl_root_path.push_back('\\');
             }
index f986f5e545afac46530ad7042743a2ed528bbdbc..11e1f5ae8479677c603445a69a469efc61a77555 100644 (file)
@@ -47,7 +47,6 @@
 #include "gromacs/utility/directoryenumerator.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/futil.h"
 #include "gromacs/utility/path.h"
 #include "gromacs/utility/smalloc.h"
index 5488a220cf2c13cf285715fa173e4ac3b0ae800c..035fdc56e9ac943c6a86c05ee1f156090ef01207 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2012,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #ifndef GMX_GMXPREPROCESS_GMXCPP_H
 #define GMX_GMXPREPROCESS_GMXCPP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct gmx_cpp *gmx_cpp_t;
 
 /* The possible return codes for these functions */
@@ -93,4 +98,9 @@ void cpp_done();
    NOT THREAD SAFE
  */
 char *cpp_error(gmx_cpp_t *handlep, int status);
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
index 920c3a38f9db69948ce9a2ff4d7dc53ef8800c4a..49b7316358b22428befbfc097570f5610bdb3508 100644 (file)
@@ -70,7 +70,6 @@
 #include "gromacs/utility/dir_separator.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/futil.h"
 #include "gromacs/utility/path.h"
 #include "gromacs/utility/programcontext.h"
index 49793283ccf7f5a9bcc232805e68cae1cd5d74cc..f2fb2d220309a6e6e44b9fa8d471b588f773cf1b 100644 (file)
@@ -262,12 +262,12 @@ char **read_pullparams(int *ninp_p, t_inpfile **inp_p,
         int ngroup;
 
         pcrd = &pull->coord[i-1];
-        sprintf(buf, "pull-coord%d-groups", i);
-        STYPE(buf,              groups, "");
         sprintf(buf, "pull-coord%d-type", i);
         EETYPE(buf,             pcrd->eType, epull_names);
         sprintf(buf, "pull-coord%d-geometry", i);
         EETYPE(buf,             pcrd->eGeom, epullg_names);
+        sprintf(buf, "pull-coord%d-groups", i);
+        STYPE(buf,              groups, "");
 
         nscan  = sscanf(groups, "%d %d %d %d %d", &pcrd->group[0], &pcrd->group[1],  &pcrd->group[2], &pcrd->group[3], &idum);
         ngroup = (pcrd->eGeom == epullgDIRRELATIVE) ? 4 : 2;
index 56b0384fe6fee1d84f97e1503a3a13a0e57a8702..338ad8dd81a15684b3c262332514043d6f16c817 100644 (file)
@@ -45,6 +45,18 @@ extern "C" {
 } /* fixes auto-indentation problems */
 #endif
 
+/*! \brief Return whether mdrun can use more than one GPU per node
+ *
+ * The OpenCL implementation cannot use more than one GPU per node,
+ * for example. */
+gmx_bool gmx_multiple_gpu_per_node_supported();
+
+/*! \brief Return whether PP ranks can share a GPU
+ *
+ * The OpenCL implementation cannot share a GPU between ranks, for
+ * example. */
+gmx_bool gmx_gpu_sharing_supported();
+
 /* the init and consistency functions depend on commrec that may not be
    consistent in cuda because MPI types don't exist there.  */
 #ifndef __CUDACC__
index cfc7669a2f1876d0bbd9b55985d4fe15c1ad97ea..64521b85aeee77be7712adc7ada3a7d6dc756142 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -161,7 +161,6 @@ struct gmx_domdec_t {
     ivec     nc;
     int      ndim;
     ivec     dim; /* indexed by 0 to ndim */
-    gmx_bool bGridJump;
 
     /* PBC from dim 0 to npbcdim */
     int npbcdim;
index 33c44a6442059f0aaf6e522bae9b8bdb2ee3a032..c0d8aa99192648668186357c03be75126fb60a33 100644 (file)
@@ -60,6 +60,7 @@ extern "C" {
 /* Abstract type for PME that is defined only in the routine that use them. */
 struct gmx_pme_t;
 struct nonbonded_verlet_t;
+struct bonded_threading_t;
 
 /* Structure describing the data in a single table */
 typedef struct
@@ -185,7 +186,7 @@ typedef struct {
 /* Forward declaration of type for managing Ewald tables */
 struct gmx_ewald_tab_t;
 
-typedef struct f_thread_t f_thread_t;
+typedef struct ewald_corr_thread_t ewald_corr_thread_t;
 
 typedef struct {
     interaction_const_t *ic;
@@ -468,18 +469,14 @@ typedef struct {
     real userreal3;
     real userreal4;
 
-    /* Thread local force and energy data */
-    /* FIXME move to bonded_thread_data_t */
-    int         nthreads;
-    int         red_ashift;
-    int         red_nblock;
-    f_thread_t *f_t;
+    /* Pointer to struct for managing threading of bonded force calculation */
+    struct bonded_threading_t *bonded_threading;
 
-    /* Maximum thread count for uniform distribution of bondeds over threads */
-    int   bonded_max_nthread_uniform;
-
-    /* Exclusion load distribution over the threads */
-    int  *excl_load;
+    /* Ewald correction thread local virial and energy data */
+    int                  nthread_ewc;
+    ewald_corr_thread_t *ewc_t;
+    /* Ewald charge correction load distribution over the threads */
+    int                 *excl_load;
 } t_forcerec;
 
 /* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
index 3f68624adddcea65c36c0ae5bb72e319f9182611..1fa6afccd2635c5f5a678dc3d9779f975f4b2b8b 100644 (file)
 #include "gromacs/listed-forces/bonded.h"
 #include "gromacs/listed-forces/position-restraints.h"
 #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/forcerec-threading.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/smalloc.h"
 
+#include "listed-internal.h"
 #include "pairs.h"
 
 namespace
@@ -405,14 +405,18 @@ void calc_listed(const gmx_multisim_t *ms,
                  t_fcdata *fcd, int *global_atom_index,
                  int force_flags)
 {
-    gmx_bool      bCalcEnerVir;
-    int           i;
-    real          dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
-                                                        of lambda, which will be thrown away in the end*/
-    const  t_pbc *pbc_null;
-    int           thread;
+    struct bonded_threading_t *bt;
+    gmx_bool                   bCalcEnerVir;
+    int                        i;
+    /* The dummy array is to have a place to store the dhdl at other values
+       of lambda, which will be thrown away in the end */
+    real                       dvdl[efptNR];
+    const  t_pbc              *pbc_null;
+    int                        thread;
+
+    bt = fr->bonded_threading;
 
-    assert(fr->nthreads == idef->nthreads);
+    assert(bt->nthreads == idef->nthreads);
 
     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
 
@@ -485,8 +489,8 @@ void calc_listed(const gmx_multisim_t *ms,
     }
 
     wallcycle_sub_start(wcycle, ewcsLISTED);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-    for (thread = 0; thread < fr->nthreads; thread++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+    for (thread = 0; thread < bt->nthreads; thread++)
     {
         int                ftype;
         real              *epot, v;
@@ -505,14 +509,14 @@ void calc_listed(const gmx_multisim_t *ms,
         }
         else
         {
-            zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
-                               fr->red_nblock, 1<<fr->red_ashift);
-
-            ft     = fr->f_t[thread].f;
-            fshift = fr->f_t[thread].fshift;
-            epot   = fr->f_t[thread].ener;
-            grpp   = &fr->f_t[thread].grpp;
-            dvdlt  = fr->f_t[thread].dvdl;
+            zero_thread_forces(&bt->f_t[thread], fr->natoms_force,
+                               bt->red_nblock, 1<<bt->red_ashift);
+
+            ft     = bt->f_t[thread].f;
+            fshift = bt->f_t[thread].fshift;
+            epot   = bt->f_t[thread].ener;
+            grpp   = &bt->f_t[thread].grpp;
+            dvdlt  = bt->f_t[thread].dvdl;
         }
         /* Loop over all bonded force types to calculate the bonded forces */
         for (ftype = 0; (ftype < F_NRE); ftype++)
@@ -530,13 +534,13 @@ void calc_listed(const gmx_multisim_t *ms,
     }
     wallcycle_sub_stop(wcycle, ewcsLISTED);
 
-    if (fr->nthreads > 1)
+    if (bt->nthreads > 1)
     {
         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
         reduce_thread_forces(fr->natoms_force, f, fr->fshift,
                              enerd->term, &enerd->grpp, dvdl,
-                             fr->nthreads, fr->f_t,
-                             fr->red_nblock, 1<<fr->red_ashift,
+                             bt->nthreads, bt->f_t,
+                             bt->red_nblock, 1<<bt->red_ashift,
                              bCalcEnerVir,
                              force_flags & GMX_FORCE_DHDL);
         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
index 7e2982bc1dae7c000e412464b26c0f7bd80eee13..a6fa29c174f8f0e80160af86c960f6448d27c30a 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef GMX_LISTED_FORCES_LISTED_INTERNAL_H
 #define GMX_LISTED_FORCES_LISTED_INTERNAL_H
 
+#include "gromacs/legacyheaders/types/forcerec.h"
+#include "gromacs/math/vectypes.h"
+#include "gromacs/topology/idef.h"
+#include "gromacs/utility/bitmask.h"
+
+/*! \internal \brief struct with output for bonded forces, used per thread */
+typedef struct
+{
+    rvec             *f;            /**< Force array */
+    int               f_nalloc;     /**< Allocation size of f */
+    gmx_bitmask_t     red_mask;     /**< Mask for marking which parts of f are filled */
+    rvec             *fshift;       /**< Shift force array, size SHIFTS */
+    real              ener[F_NRE];  /**< Energy array */
+    gmx_grppairener_t grpp;         /**< Group pair energy data for pairs */
+    real              dvdl[efptNR]; /**< Free-energy dV/dl output */
+}
+f_thread_t;
+
+/*! \internal \brief struct contain all data for bonded force threading */
+struct bonded_threading_t
+{
+    /* Thread local force and energy data */
+    int         nthreads;   /**< Number of threads to be used for bondeds */
+    int         red_ashift; /**< Size of force reduction blocks in bits */
+    int         red_nblock; /**< The number of force blocks to reduce */
+    f_thread_t *f_t;        /**< Force/enegry data per thread, size nthreads */
+
+    /* There are two different ways to distribute the bonded force calculation
+     * over the threads. We dedice which to use based on the number of threads.
+     */
+    int bonded_max_nthread_uniform; /**< Maximum thread count for uniform distribution of bondeds over threads */
+};
+
+
 /*! \brief Returns the global topology atom number belonging to local
  * atom index i.
  *
index b703d3d8ce0ff5162ac8f7c75d7991ff4cda3aee..183b819befd4c2db6552cc8b9389e8cfcd19a283 100644 (file)
 
 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 #include "gromacs/listed-forces/listed-forces.h"
-#include "gromacs/mdlib/forcerec-threading.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/stringutil.h"
 
+#include "listed-internal.h"
+
 /*! \brief struct for passing all data required for a function type */
 typedef struct {
     int      ftype; /**< the function type index */
@@ -343,65 +344,68 @@ const int maxBlockBits = BITMASK_SIZE;
 
 void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
 {
-    int t;
-    int ctot, c, b;
+    bonded_threading_t *bt;
+    int                 t;
+    int                 ctot, c, b;
+
+    bt = fr->bonded_threading;
 
-    assert(fr->nthreads >= 1);
+    assert(bt->nthreads >= 1);
 
     /* Divide the bonded interaction over the threads */
     divide_bondeds_over_threads(idef,
-                                fr->nthreads,
-                                fr->bonded_max_nthread_uniform);
+                                bt->nthreads,
+                                bt->bonded_max_nthread_uniform);
 
-    if (fr->nthreads == 1)
+    if (bt->nthreads == 1)
     {
-        fr->red_nblock = 0;
+        bt->red_nblock = 0;
 
         return;
     }
 
-    fr->red_ashift = 6;
-    while (fr->natoms_force > (int)(maxBlockBits*(1U<<fr->red_ashift)))
+    bt->red_ashift = 6;
+    while (fr->natoms_force > (int)(maxBlockBits*(1U<<bt->red_ashift)))
     {
-        fr->red_ashift++;
+        bt->red_ashift++;
     }
     if (debug)
     {
         fprintf(debug, "bonded force buffer block atom shift %d bits\n",
-                fr->red_ashift);
+                bt->red_ashift);
     }
 
     /* Determine to which blocks each thread's bonded force calculation
      * contributes. Store this is a mask for each thread.
      */
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-    for (t = 1; t < fr->nthreads; t++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+    for (t = 1; t < bt->nthreads; t++)
     {
-        calc_bonded_reduction_mask(&fr->f_t[t].red_mask,
-                                   idef, fr->red_ashift, t, fr->nthreads);
+        calc_bonded_reduction_mask(&bt->f_t[t].red_mask,
+                                   idef, bt->red_ashift, t, bt->nthreads);
     }
 
     /* Determine the maximum number of blocks we need to reduce over */
-    fr->red_nblock = 0;
+    bt->red_nblock = 0;
     ctot           = 0;
-    for (t = 0; t < fr->nthreads; t++)
+    for (t = 0; t < bt->nthreads; t++)
     {
         c = 0;
         for (b = 0; b < maxBlockBits; b++)
         {
-            if (bitmask_is_set(fr->f_t[t].red_mask, b))
+            if (bitmask_is_set(bt->f_t[t].red_mask, b))
             {
-                fr->red_nblock = std::max(fr->red_nblock, b+1);
+                bt->red_nblock = std::max(bt->red_nblock, b+1);
                 c++;
             }
         }
         if (debug)
         {
 #if BITMASK_SIZE <= 64 //move into bitmask when it is C++
-            std::string flags = gmx::formatString("%x", fr->f_t[t].red_mask);
+            std::string flags = gmx::formatString("%x", bt->f_t[t].red_mask);
 #else
-            std::string flags = gmx::formatAndJoin(fr->f_t[t].red_mask,
-                                                   fr->f_t[t].red_mask+BITMASK_ALEN,
+            std::string flags = gmx::formatAndJoin(bt->f_t[t].red_mask,
+                                                   bt->f_t[t].red_mask+BITMASK_ALEN,
                                                    "", gmx::StringFormatter("%x"));
 #endif
             fprintf(debug, "thread %d flags %s count %d\n",
@@ -412,38 +416,43 @@ void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
     if (debug)
     {
         fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
-                fr->red_nblock, 1<<fr->red_ashift);
+                bt->red_nblock, 1<<bt->red_ashift);
         fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
-                ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
-                ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
+                ctot*(1<<bt->red_ashift)/(double)fr->natoms_force,
+                ctot*(1<<bt->red_ashift)/(double)(fr->natoms_force*bt->nthreads));
     }
 }
 
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
+void init_bonded_threading(FILE *fplog, int nenergrp,
+                           struct bonded_threading_t **bt_ptr)
 {
+    bonded_threading_t *bt;
+
+    snew(bt, 1);
+
     /* These thread local data structures are used for bondeds only */
-    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
+    bt->nthreads = gmx_omp_nthreads_get(emntBonded);
 
-    if (fr->nthreads > 1)
+    if (bt->nthreads > 1)
     {
         int t;
 
-        snew(fr->f_t, fr->nthreads);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
-        for (t = 0; t < fr->nthreads; t++)
+        snew(bt->f_t, bt->nthreads);
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+        for (t = 0; t < bt->nthreads; t++)
         {
             /* Thread 0 uses the global force and energy arrays */
             if (t > 0)
             {
                 int i;
 
-                fr->f_t[t].f        = NULL;
-                fr->f_t[t].f_nalloc = 0;
-                snew(fr->f_t[t].fshift, SHIFTS);
-                fr->f_t[t].grpp.nener = nenergrp*nenergrp;
+                bt->f_t[t].f        = NULL;
+                bt->f_t[t].f_nalloc = 0;
+                snew(bt->f_t[t].fshift, SHIFTS);
+                bt->f_t[t].grpp.nener = nenergrp*nenergrp;
                 for (i = 0; i < egNR; i++)
                 {
-                    snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
+                    snew(bt->f_t[t].grpp.ener[i], bt->f_t[t].grpp.nener);
                 }
             }
         }
@@ -457,16 +466,18 @@ void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
 
         if ((ptr = getenv("GMX_BONDED_NTHREAD_UNIFORM")) != NULL)
         {
-            sscanf(ptr, "%d", &fr->bonded_max_nthread_uniform);
+            sscanf(ptr, "%d", &bt->bonded_max_nthread_uniform);
             if (fplog != NULL)
             {
                 fprintf(fplog, "\nMax threads for uniform bonded distribution set to %d by env.var.\n",
-                        fr->bonded_max_nthread_uniform);
+                        bt->bonded_max_nthread_uniform);
             }
         }
         else
         {
-            fr->bonded_max_nthread_uniform = max_nthread_uniform;
+            bt->bonded_max_nthread_uniform = max_nthread_uniform;
         }
     }
+
+    *bt_ptr = bt;
 }
index 2096ec63ae1b719f7ebebb5011675cd079d5a28a..c8718da6eb5d71266a3a7e108fa16559ace548b6 100644 (file)
@@ -60,8 +60,13 @@ extern "C" {
  */
 void setup_bonded_threading(t_forcerec *fr, t_idef *idef);
 
-/*! \brief Initialize the bonded threading data structures */
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp);
+/*! \brief Initialize the bonded threading data structures
+ *
+ * Allocates and initializes a bonded threading data structure.
+ * A pointer to this struct is returned as \p *bb_ptr.
+ */
+void init_bonded_threading(FILE *fplog, int nenergrp,
+                           struct bonded_threading_t **bt_ptr);
 
 #ifdef __cplusplus
 }
index 25f24b5beb913c33f738adad63857cdd8f30ff3b..d50bae92932b6f22ad09c790532ded731eb64477 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -177,7 +177,7 @@ static const double
 
 double gmx_erfd(double x)
 {
-#ifdef GMX_FLOAT_FORMAT_IEEE754
+#if GMX_FLOAT_FORMAT_IEEE754
     gmx_int32_t hx, ix, i;
     double      R, S, P, Q, s, y, z, r;
 
@@ -190,7 +190,7 @@ double gmx_erfd(double x)
 
     conv.d = x;
 
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
     hx = conv.i[0];
 #else
     hx = conv.i[1];
@@ -266,7 +266,7 @@ double gmx_erfd(double x)
 
     conv.d = x;
 
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
     conv.i[1] = 0;
 #else
     conv.i[0] = 0;
@@ -292,7 +292,7 @@ double gmx_erfd(double x)
 
 double gmx_erfcd(double x)
 {
-#ifdef GMX_FLOAT_FORMAT_IEEE754
+#if GMX_FLOAT_FORMAT_IEEE754
     gmx_int32_t hx, ix;
     double      R, S, P, Q, s, y, z, r;
 
@@ -305,7 +305,7 @@ double gmx_erfcd(double x)
 
     conv.d = x;
 
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
     hx = conv.i[0];
 #else
     hx = conv.i[1];
@@ -383,7 +383,7 @@ double gmx_erfcd(double x)
 
         conv.d = x;
 
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
         conv.i[1] = 0;
 #else
         conv.i[0] = 0;
index 20ecfe36bdc27f6cb3586aed03692ddd9a9e0bcc..e27e347587d05bdebe58eb1f7063116d067e9c8c 100644 (file)
@@ -113,34 +113,22 @@ void ns(FILE              *fp,
     }
 }
 
-static void reduce_thread_forces(int n, rvec *f,
-                                 tensor vir_q, tensor vir_lj,
-                                 real *Vcorr_q, real *Vcorr_lj,
-                                 real *dvdl_q, real *dvdl_lj,
-                                 int nthreads, f_thread_t *f_t)
+static void reduce_thread_energies(tensor vir_q, tensor vir_lj,
+                                   real *Vcorr_q, real *Vcorr_lj,
+                                   real *dvdl_q, real *dvdl_lj,
+                                   int nthreads,
+                                   ewald_corr_thread_t *ewc_t)
 {
-    int t, i;
-    int nthreads_loop gmx_unused;
+    int t;
 
-    // cppcheck-suppress unreadVariable
-    nthreads_loop = gmx_omp_nthreads_get(emntBonded);
-    /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
-    for (i = 0; i < n; i++)
-    {
-        for (t = 1; t < nthreads; t++)
-        {
-            rvec_inc(f[i], f_t[t].f[i]);
-        }
-    }
     for (t = 1; t < nthreads; t++)
     {
-        *Vcorr_q  += f_t[t].Vcorr_q;
-        *Vcorr_lj += f_t[t].Vcorr_lj;
-        *dvdl_q   += f_t[t].dvdl[efptCOUL];
-        *dvdl_lj  += f_t[t].dvdl[efptVDW];
-        m_add(vir_q, f_t[t].vir_q, vir_q);
-        m_add(vir_lj, f_t[t].vir_lj, vir_lj);
+        *Vcorr_q  += ewc_t[t].Vcorr_q;
+        *Vcorr_lj += ewc_t[t].Vcorr_lj;
+        *dvdl_q   += ewc_t[t].dvdl[efptCOUL];
+        *dvdl_lj  += ewc_t[t].dvdl[efptVDW];
+        m_add(vir_q, ewc_t[t].vir_q, vir_q);
+        m_add(vir_lj, ewc_t[t].vir_lj, vir_lj);
     }
 }
 
@@ -440,17 +428,14 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                 }
 
-                nthreads = gmx_omp_nthreads_get(emntBonded);
+                nthreads = fr->nthread_ewc;
 #pragma omp parallel for num_threads(nthreads) schedule(static)
                 for (t = 0; t < nthreads; t++)
                 {
-                    int     i;
-                    rvec   *fnv;
                     tensor *vir_q, *vir_lj;
                     real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
                     if (t == 0)
                     {
-                        fnv       = fr->f_novirsum;
                         vir_q     = &fr->vir_el_recip;
                         vir_lj    = &fr->vir_lj_recip;
                         Vcorrt_q  = &Vcorr_q;
@@ -460,23 +445,23 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                     }
                     else
                     {
-                        fnv       = fr->f_t[t].f;
-                        vir_q     = &fr->f_t[t].vir_q;
-                        vir_lj    = &fr->f_t[t].vir_lj;
-                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
-                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
-                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
-                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
-                        for (i = 0; i < fr->natoms_force; i++)
-                        {
-                            clear_rvec(fnv[i]);
-                        }
+                        vir_q     = &fr->ewc_t[t].vir_q;
+                        vir_lj    = &fr->ewc_t[t].vir_lj;
+                        Vcorrt_q  = &fr->ewc_t[t].Vcorr_q;
+                        Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
+                        dvdlt_q   = &fr->ewc_t[t].dvdl[efptCOUL];
+                        dvdlt_lj  = &fr->ewc_t[t].dvdl[efptVDW];
                         clear_mat(*vir_q);
                         clear_mat(*vir_lj);
                     }
                     *dvdlt_q  = 0;
                     *dvdlt_lj = 0;
 
+                    /* Threading is only supported with the Verlet cut-off
+                     * scheme and then only single particle forces (no
+                     * exclusion forces) are calculated, so we can store
+                     * the forces in the normal, single fr->f_novirsum array.
+                     */
                     ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                        cr, t, fr,
                                        md->chargeA, md->chargeB,
@@ -488,19 +473,18 @@ void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                                        excl, x, bSB ? boxs : box, mu_tot,
                                        ir->ewald_geometry,
                                        ir->epsilon_surface,
-                                       fnv, *vir_q, *vir_lj,
+                                       fr->f_novirsum, *vir_q, *vir_lj,
                                        Vcorrt_q, Vcorrt_lj,
                                        lambda[efptCOUL], lambda[efptVDW],
                                        dvdlt_q, dvdlt_lj);
                 }
                 if (nthreads > 1)
                 {
-                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
-                                         fr->vir_el_recip, fr->vir_lj_recip,
-                                         &Vcorr_q, &Vcorr_lj,
-                                         &dvdl_long_range_correction_q,
-                                         &dvdl_long_range_correction_lj,
-                                         nthreads, fr->f_t);
+                    reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
+                                           &Vcorr_q, &Vcorr_lj,
+                                           &dvdl_long_range_correction_q,
+                                           &dvdl_long_range_correction_lj,
+                                           nthreads, fr->ewc_t);
                 }
                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
             }
index b40a18ba32ff37607fd32f8fd21b1363286337cf..070b476ec56d5861ad2dd60e8368c9f9c4167884 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef GMX_MDLIB_FORCEREC_THREADING_H
 #define GMX_MDLIB_FORCEREC_THREADING_H
 
-#include "gromacs/utility/bitmask.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct f_thread_t {
-    rvec             *f;
-    int               f_nalloc;
-    gmx_bitmask_t     red_mask; /* Mask for marking which parts of f are filled */
-    rvec             *fshift;
-    real              ener[F_NRE];
-    gmx_grppairener_t grpp;
+struct ewald_corr_thread_t {
     real              Vcorr_q;
     real              Vcorr_lj;
     real              dvdl[efptNR];
index 7de43327b5d2a1e5456f216aef92012ba9f20f47..b04b09a16baed48e9a6ec1ebe9ffc45ea7ebb0e5 100644 (file)
@@ -3212,9 +3212,12 @@ void init_forcerec(FILE              *fp,
     }
 
     /* Initialize the thread working data for bonded interactions */
-    init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
+    init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
+                          &fr->bonded_threading);
 
-    snew(fr->excl_load, fr->nthreads+1);
+    fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
+    snew(fr->ewc_t, fr->nthread_ewc);
+    snew(fr->excl_load, fr->nthread_ewc + 1);
 
     /* fr->ic is used both by verlet and group kernels (to some extent) now */
     init_interaction_const(fp, &fr->ic, fr);
@@ -3285,9 +3288,9 @@ void forcerec_set_excl_load(t_forcerec           *fr,
     fr->excl_load[0] = 0;
     n                = 0;
     i                = 0;
-    for (t = 1; t <= fr->nthreads; t++)
+    for (t = 1; t <= fr->nthread_ewc; t++)
     {
-        ntarget = (ntot*t)/fr->nthreads;
+        ntarget = (ntot*t)/fr->nthread_ewc;
         while (i < top->excls.nr && n < ntarget)
         {
             for (j = ind[i]; j < ind[i+1]; j++)
similarity index 86%
rename from src/gromacs/mdlib/genborn.c
rename to src/gromacs/mdlib/genborn.cpp
index a4f34b8ad597615babffb9cc025b5d36dd3ed812..ebcd9b327d81b9e5256a28c4ad7adb3dff944833 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #include "gromacs/legacyheaders/genborn.h"
 
-#include <math.h>
 #include <string.h>
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "gromacs/domdec/domdec.h"
 #include "gromacs/fileio/pdbio.h"
 #include "gromacs/legacyheaders/names.h"
@@ -51,6 +54,7 @@
 #include "gromacs/legacyheaders/types/commrec.h"
 #include "gromacs/math/units.h"
 #include "gromacs/math/vec.h"
+#include "gromacs/mdlib/genborn_allvsall.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/mshift.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/utility/gmxmpi.h"
 #include "gromacs/utility/smalloc.h"
 
-#ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
-#  ifdef GMX_DOUBLE
-#    include "gromacs/mdlib/genborn_allvsall_sse2_double.h"
-#    include "gromacs/mdlib/genborn_sse2_double.h"
-#  else
-#    include "gromacs/mdlib/genborn_allvsall_sse2_single.h"
-#    include "gromacs/mdlib/genborn_sse2_single.h"
-#  endif /* GMX_DOUBLE */
-#endif   /* SSE or AVX present */
-
-#include "gromacs/mdlib/genborn_allvsall.h"
-
-/*#define DISABLE_SSE*/
 
 typedef struct {
     int  shift;
@@ -131,12 +122,8 @@ static int init_gb_still(const t_atomtypes *atype, t_idef *idef, t_atoms *atoms,
                          gmx_genborn_t *born, int natoms)
 {
 
-    int   i, j, i1, i2, k, m, nbond, nang, ia, ib, ic, id, nb, idx, idx2, at;
-    int   iam, ibm;
-    int   at0, at1;
-    real  length, angle;
-    real  r, ri, rj, ri2, ri3, rj2, r2, r3, r4, rk, ratio, term, h, doffset;
-    real  p1, p2, p3, factor, cosine, rab, rbc;
+    int   i, j, m, ia, ib;
+    real  r, ri, rj, ri2, rj2, r3, r4, ratio, term, h, doffset;
 
     real *vsol;
     real *gp;
@@ -145,9 +132,6 @@ static int init_gb_still(const t_atomtypes *atype, t_idef *idef, t_atoms *atoms,
     snew(gp, natoms);
     snew(born->gpol_still_work, natoms+3);
 
-    at0 = 0;
-    at1 = natoms;
-
     doffset = born->gb_doffset;
 
     for (i = 0; i < natoms; i++)
@@ -177,7 +161,6 @@ static int init_gb_still(const t_atomtypes *atype, t_idef *idef, t_atoms *atoms,
         rj   = atype->gb_radius[atoms->atom[ib].type];
 
         ri2  = ri*ri;
-        ri3  = ri2*ri;
         rj2  = rj*rj;
 
         ratio  = (rj2-ri2-r*r)/(2*ri*r);
@@ -249,8 +232,8 @@ int init_gb(gmx_genborn_t **p_born,
             t_forcerec *fr, const t_inputrec *ir,
             const gmx_mtop_t *mtop, int gb_algorithm)
 {
-    int             i, j, m, ai, aj, jj, natoms, nalloc;
-    real            rai, sk, p, doffset;
+    int             i, jj, natoms;
+    real            rai, sk, doffset;
 
     t_atoms         atoms;
     gmx_genborn_t  *born;
@@ -375,10 +358,10 @@ calc_gb_rad_still(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
                   rvec x[], t_nblist *nl,
                   gmx_genborn_t *born, t_mdatoms *md)
 {
-    int  i, k, n, nj0, nj1, ai, aj, type;
+    int  i, k, n, nj0, nj1, ai, aj;
     int  shift;
     real shX, shY, shZ;
-    real gpi, dr, dr2, dr4, idr4, rvdw, ratio, ccf, theta, term, rai, raj;
+    real gpi, dr2, idr4, rvdw, ratio, ccf, theta, term, rai, raj;
     real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
     real rinv, idr2, idr6, vaj, dccf, cosq, sinq, prod, gpi2;
     real factor;
@@ -503,14 +486,14 @@ calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
                 rvec x[], t_nblist *nl,
                 gmx_genborn_t *born, t_mdatoms *md)
 {
-    int   i, k, n, ai, aj, nj0, nj1, at0, at1;
+    int   i, k, n, ai, aj, nj0, nj1;
     int   shift;
     real  shX, shY, shZ;
-    real  rai, raj, gpi, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai;
+    real  rai, raj, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai;
     real  rad, min_rad, rinv, rai_inv;
     real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
     real  lij2, uij2, lij3, uij3, t1, t2, t3;
-    real  lij_inv, dlij, duij, sk2_rinv, prod, log_term;
+    real  lij_inv, dlij, sk2_rinv, prod, log_term;
     real  doffset, raj_inv, dadx_val;
     real *gb_radius;
 
@@ -524,7 +507,6 @@ calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
 
     /* Keep the compiler happy */
     n    = 0;
-    prod = 0;
 
     for (i = 0; i < nl->nri; i++)
     {
@@ -597,7 +579,7 @@ calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
                 sk2_rinv = sk2*rinv;
                 prod     = 0.25*sk2_rinv;
 
-                log_term = log(uij*lij_inv);
+                log_term = std::log(uij*lij_inv);
 
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
                     prod*(-diff2);
@@ -653,7 +635,7 @@ calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
 
                 /* log_term = table_log(uij*lij_inv,born->log_table,
                    LOG_TABLE_ACCURACY); */
-                log_term = log(uij*lij_inv);
+                log_term = std::log(uij*lij_inv);
 
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
                     prod*(-diff2);
@@ -698,7 +680,7 @@ calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
             min_rad = rai + doffset;
             rad     = 1.0/sum_ai;
 
-            born->bRad[i]   = rad > min_rad ? rad : min_rad;
+            born->bRad[i]   = std::max(rad, min_rad);
             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
         }
     }
@@ -718,21 +700,19 @@ static int
 calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
                 rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md)
 {
-    int   i, k, ai, aj, nj0, nj1, n, at0, at1;
+    int   i, k, ai, aj, nj0, nj1, n;
     int   shift;
     real  shX, shY, shZ;
-    real  rai, raj, gpi, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai;
-    real  rad, min_rad, sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2;
+    real  rai, raj, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai;
+    real  sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2;
     real  log_term, prod, sk2_rinv, sk_ai, sk2_ai;
     real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
-    real  lij2, uij2, lij3, uij3, dlij, duij, t1, t2, t3;
+    real  lij2, uij2, lij3, uij3, dlij, t1, t2, t3;
     real  doffset, raj_inv, dadx_val;
     real *gb_radius;
 
     /* Keep the compiler happy */
     n    = 0;
-    prod = 0;
-    raj  = 0;
 
     doffset   = born->gb_doffset;
     gb_radius = born->gb_radius;
@@ -813,7 +793,7 @@ calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
                 sk2_rinv = sk2*rinv;
                 prod     = 0.25*sk2_rinv;
 
-                log_term = log(uij*lij_inv);
+                log_term = std::log(uij*lij_inv);
 
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 
@@ -865,7 +845,7 @@ calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
                 prod     = 0.25 * sk2_rinv;
 
                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                log_term = log(uij*lij_inv);
+                log_term = std::log(uij*lij_inv);
 
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 
@@ -940,7 +920,6 @@ calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
 int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *top,
                 rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, t_nrnb     *nrnb)
 {
-    real *p;
     int   cnt;
     int   ndadx;
 
@@ -978,43 +957,13 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *t
 
         if (ir->gb_algorithm == egbSTILL)
         {
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-            if (fr->use_simd_kernels)
-            {
-#  ifdef GMX_DOUBLE
-                genborn_allvsall_calc_still_radii_sse2_double(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
-#  else
-                genborn_allvsall_calc_still_radii_sse2_single(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
-#  endif
-            }
-            else
-            {
-                genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
-            }
-#else
             genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], &fr->AllvsAll_workgb);
-#endif
             /* 13 flops in outer loop, 47 flops in inner loop */
             inc_nrnb(nrnb, eNR_BORN_AVA_RADII_STILL, md->homenr*13+cnt*47);
         }
         else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC)
         {
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-            if (fr->use_simd_kernels)
-            {
-#  ifdef GMX_DOUBLE
-                genborn_allvsall_calc_hct_obc_radii_sse2_double(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
-#  else
-                genborn_allvsall_calc_hct_obc_radii_sse2_single(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
-#  endif
-            }
-            else
-            {
-                genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
-            }
-#else
             genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], &fr->AllvsAll_workgb);
-#endif
             /* 24 flops in outer loop, 183 in inner */
             inc_nrnb(nrnb, eNR_BORN_AVA_RADII_HCT_OBC, md->homenr*24+cnt*183);
         }
@@ -1028,45 +977,6 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *t
     /* Switch for determining which algorithm to use for Born radii calculation */
 #ifdef GMX_DOUBLE
 
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-    /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
-    switch (ir->gb_algorithm)
-    {
-        case egbSTILL:
-            if (fr->use_simd_kernels)
-            {
-                calc_gb_rad_still_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born);
-            }
-            else
-            {
-                calc_gb_rad_still(cr, fr, top, x, nl, born, md);
-            }
-            break;
-        case egbHCT:
-            if (fr->use_simd_kernels)
-            {
-                calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
-            }
-            else
-            {
-                calc_gb_rad_hct(cr, fr, top, x, nl, born, md);
-            }
-            break;
-        case egbOBC:
-            if (fr->use_simd_kernels)
-            {
-                calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
-            }
-            else
-            {
-                calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
-            }
-            break;
-
-        default:
-            gmx_fatal(FARGS, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
-    }
-#else
     switch (ir->gb_algorithm)
     {
         case egbSTILL:
@@ -1083,51 +993,8 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *t
             gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d", ir->gb_algorithm);
     }
 
-#endif
-
 #else
 
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-    /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
-    switch (ir->gb_algorithm)
-    {
-        case egbSTILL:
-            if (fr->use_simd_kernels)
-            {
-                calc_gb_rad_still_sse2_single(cr, fr, born->nr, top, x[0], nl, born);
-            }
-            else
-            {
-                calc_gb_rad_still(cr, fr, top, x, nl, born, md);
-            }
-            break;
-        case egbHCT:
-            if (fr->use_simd_kernels)
-            {
-                calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
-            }
-            else
-            {
-                calc_gb_rad_hct(cr, fr, top, x, nl, born, md);
-            }
-            break;
-
-        case egbOBC:
-            if (fr->use_simd_kernels)
-            {
-                calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
-            }
-            else
-            {
-                calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
-            }
-            break;
-
-        default:
-            gmx_fatal(FARGS, "Unknown sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
-    }
-
-#else
     switch (ir->gb_algorithm)
     {
         case egbSTILL:
@@ -1144,8 +1011,6 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *t
             gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d", ir->gb_algorithm);
     }
 
-#endif /* Single precision sse */
-
 #endif /* Double or single precision */
 
     if (fr->bAllvsAll == FALSE)
@@ -1176,14 +1041,14 @@ real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtab
                   real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
                   real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph)
 {
-    int      i, j, n0, m, nnn, type, ai, aj;
+    int      i, j, n0, m, nnn, ai, aj;
     int      ki;
 
     real     isai, isaj;
     real     r, rsq11;
     real     rinv11, iq;
     real     isaprod, qq, gbscale, gbtabscale, Y, F, Geps, Heps2, Fp, VV, FF, rt, eps, eps2;
-    real     vgb, fgb, vcoul, fijC, dvdatmp, fscal, dvdaj;
+    real     vgb, fgb, fijC, dvdatmp, fscal;
     real     vctot;
 
     rvec     dx;
@@ -1223,7 +1088,7 @@ real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtab
             gbscale       = isaprod*gbtabscale;
             r             = rsq11*rinv11;
             rt            = r*gbscale;
-            n0            = rt;
+            n0            = static_cast<int>(rt);
             eps           = rt-n0;
             eps2          = eps*eps;
             nnn           = 4*n0;
@@ -1312,12 +1177,9 @@ real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr, int natoms, gmx_genborn_t *
                       real *dvda, t_mdatoms *md)
 {
     int  ai, i, at0, at1;
-    real e, es, rai, rbi, term, probe, tmp, factor;
+    real e, es, rai, term, probe, tmp, factor;
     real rbi_inv, rbi_inv2;
 
-    /* To keep the compiler happy */
-    factor = 0;
-
     if (DOMAINDECOMP(cr))
     {
         at0 = 0;
@@ -1331,19 +1193,6 @@ real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr, int natoms, gmx_genborn_t *
 
     /* factor is the surface tension */
     factor = born->sa_surface_tension;
-    /*
-
-       // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
-       if(gb_algorithm==egbSTILL)
-       {
-          factor=0.0049*100*CAL2JOULE;
-       }
-       else
-       {
-          factor=0.0054*100*CAL2JOULE;
-       }
-     */
-    /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
 
     es    = 0;
     probe = 0.14;
@@ -1377,11 +1226,10 @@ real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[
     int          i, k, n, ai, aj, nj0, nj1, n0, n1;
     int          shift;
     real         shX, shY, shZ;
-    real         fgb, fij, rb2, rbi, fix1, fiy1, fiz1;
-    real         ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11, rsq11;
-    real         rinv11, tx, ty, tz, rbai, rbaj, fgb_ai;
+    real         fgb, rbi, fix1, fiy1, fiz1;
+    real         ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
+    real         tx, ty, tz, rbai, rbaj, fgb_ai;
     real        *rb;
-    volatile int idx;
 
     n  = 0;
     rb = born->work;
@@ -1492,9 +1340,7 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
                rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb,
                const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd)
 {
-    real v = 0;
     int  cnt;
-    int  i;
 
     /* PBC or not? */
     const t_pbc *pbc_null;
@@ -1530,48 +1376,15 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
 
     if (fr->bAllvsAll)
     {
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-        if (fr->use_simd_kernels)
-        {
-#  ifdef GMX_DOUBLE
-            genborn_allvsall_calc_chainrule_sse2_double(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-#  else
-            genborn_allvsall_calc_chainrule_sse2_single(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-#  endif
-        }
-        else
-        {
-            genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-        }
-#else
         genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-#endif
         cnt = md->homenr*(md->nr/2+1);
         /* 9 flops for outer loop, 15 for inner */
         inc_nrnb(nrnb, eNR_BORN_AVA_CHAINRULE, md->homenr*9+cnt*15);
         return;
     }
 
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-    if (fr->use_simd_kernels)
-    {
-#  ifdef GMX_DOUBLE
-        calc_gb_chainrule_sse2_double(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
-                                      f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
-#  else
-        calc_gb_chainrule_sse2_single(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
-                                      f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
-#  endif
-    }
-    else
-    {
-        calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
-                          x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);
-    }
-#else
     calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
                       x, f, fr->fshift, fr->shift_vec, gb_algorithm, born);
-#endif
 
     if (!fr->bAllvsAll)
     {
@@ -1627,18 +1440,17 @@ static void add_bondeds_to_gblist(t_ilist *il,
                                   gmx_bool bMolPBC, t_pbc *pbc, t_graph *g, rvec *x,
                                   struct gbtmpnbls *nls)
 {
-    int         ind, j, ai, aj, shift, found;
+    int         ind, j, ai, aj, found;
     rvec        dx;
     ivec        dt;
     gbtmpnbl_t *list;
 
-    shift = CENTRAL;
     for (ind = 0; ind < il->nr; ind += 3)
     {
         ai = il->iatoms[ind+1];
         aj = il->iatoms[ind+2];
 
-        shift = CENTRAL;
+        int shift = CENTRAL;
         if (g != NULL)
         {
             rvec_sub(x[ai], x[aj], dx);
@@ -1690,8 +1502,7 @@ int make_gb_nblist(t_commrec *cr, int gb_algorithm,
                    rvec x[], matrix box,
                    t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born)
 {
-    int               i, l, ii, j, k, n, nj0, nj1, ai, aj, at0, at1, found, shift, s;
-    int               apa;
+    int               i, j, k, n, nj0, nj1, ai, shift, s;
     t_nblist         *nblist;
     t_pbc             pbc;
 
similarity index 97%
rename from src/gromacs/mdlib/genborn_allvsall.c
rename to src/gromacs/mdlib/genborn_allvsall.cpp
index a18f2e1ac5cbc267a47a4ec97a3bd0b1f1ff1a11..47b14467edf8cce4b91c4fbd27e2802d6c8ae229 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -38,7 +38,9 @@
 
 #include "genborn_allvsall.h"
 
-#include <math.h>
+#include <cmath>
+
+#include <algorithm>
 
 #include "gromacs/legacyheaders/genborn.h"
 #include "gromacs/legacyheaders/network.h"
@@ -115,12 +117,10 @@ setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
                                 gmx_bool                       bInclude13,
                                 gmx_bool                       bInclude14)
 {
-    int i, j, k, tp;
+    int i, j, k;
     int a1, a2;
-    int nj0, nj1;
     int max_offset;
     int max_excl_offset;
-    int nj;
 
     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
@@ -168,7 +168,7 @@ setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
                 }
                 if (k > 0 && k <= max_offset)
                 {
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
+                    max_excl_offset = std::max(k, max_excl_offset);
                 }
             }
         }
@@ -194,7 +194,7 @@ setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
                 }
                 if (k > 0 && k <= max_offset)
                 {
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
+                    max_excl_offset = std::max(k, max_excl_offset);
                 }
             }
         }
@@ -220,11 +220,11 @@ setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
                 }
                 if (k > 0 && k <= max_offset)
                 {
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
+                    max_excl_offset = std::max(k, max_excl_offset);
                 }
             }
         }
-        max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
+        max_excl_offset = std::min(max_offset, max_excl_offset);
 
         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
 
@@ -329,9 +329,7 @@ genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
                        gmx_bool                       bInclude13,
                        gmx_bool                       bInclude14)
 {
-    int                     i, j, idx;
     gmx_allvsallgb2_data_t *aadata;
-    real                   *p;
 
     snew(aadata, 1);
     *p_aadata = aadata;
@@ -584,8 +582,6 @@ genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
     ni1                 = mdatoms->homenr;
 
     n       = 0;
-    prod    = 0;
-    raj     = 0;
     doffset = born->gb_doffset;
 
     aadata = *((gmx_allvsallgb2_data_t **)work);
@@ -679,7 +675,7 @@ genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
                     sk2_rinv = sk2*rinv;
                     prod     = 0.25*sk2_rinv;
 
-                    log_term = log(uij*lij_inv);
+                    log_term = std::log(uij*lij_inv);
                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 
@@ -730,7 +726,7 @@ genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
                     prod     = 0.25 * sk2_rinv;
 
                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                    log_term = log(uij*lij_inv);
+                    log_term = std::log(uij*lij_inv);
 
                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 
@@ -806,7 +802,7 @@ genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
                 sk2_rinv = sk2*rinv;
                 prod     = 0.25*sk2_rinv;
 
-                log_term = log(uij*lij_inv);
+                log_term = std::log(uij*lij_inv);
                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 
@@ -857,7 +853,7 @@ genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
                 prod     = 0.25 * sk2_rinv;
 
                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
-                log_term = log(uij*lij_inv);
+                log_term = std::log(uij*lij_inv);
 
                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 
@@ -898,7 +894,7 @@ genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
                 min_rad = rai + born->gb_doffset;
                 rad     = 1.0/sum_ai;
 
-                born->bRad[i]   = rad > min_rad ? rad : min_rad;
+                born->bRad[i]   = std::max(rad, min_rad);
                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
             }
         }
diff --git a/src/gromacs/mdlib/genborn_allvsall_sse2_double.c b/src/gromacs/mdlib/genborn_allvsall_sse2_double.c
deleted file mode 100644 (file)
index 5847525..0000000
+++ /dev/null
@@ -1,2506 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2012,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/types/simple.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/genborn_allvsall.h"
-#include "gromacs/utility/smalloc.h"
-
-
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-
-#include <gmx_sse2_double.h>
-
-
-#define SIMD_WIDTH 2
-#define UNROLLI    2
-#define UNROLLJ    2
-
-
-
-
-
-
-
-
-
-typedef struct
-{
-    int   *      jindex_gb;
-    int   **     prologue_mask_gb;
-    int   **     epilogue_mask;
-    int   *      imask;
-    double *     gb_radius;
-    double *     workparam;
-    double *     work;
-    double *     x_align;
-    double *     y_align;
-    double *     z_align;
-    double *     fx_align;
-    double *     fy_align;
-    double *     fz_align;
-}
-gmx_allvsallgb2_data_t;
-
-
-static int
-calc_maxoffset(int i, int natoms)
-{
-    int maxoffset;
-
-    if ((natoms % 2) == 1)
-    {
-        /* Odd number of atoms, easy */
-        maxoffset = natoms/2;
-    }
-    else if ((natoms % 4) == 0)
-    {
-        /* Multiple of four is hard */
-        if (i < natoms/2)
-        {
-            if ((i % 2) == 0)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-        else
-        {
-            if ((i % 2) == 1)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-    }
-    else
-    {
-        /* natoms/2 = odd */
-        if ((i % 2) == 0)
-        {
-            maxoffset = natoms/2;
-        }
-        else
-        {
-            maxoffset = natoms/2-1;
-        }
-    }
-
-    return maxoffset;
-}
-
-static void
-setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
-                                t_ilist     *                  ilist,
-                                int                            start,
-                                int                            end,
-                                int                            natoms,
-                                gmx_bool                       bInclude12,
-                                gmx_bool                       bInclude13,
-                                gmx_bool                       bInclude14)
-{
-    int   i, j, k, tp;
-    int   a1, a2;
-    int   ni0, ni1, nj0, nj1, nj;
-    int   imin, imax, iexcl;
-    int   max_offset;
-    int   max_excl_offset;
-    int   firstinteraction;
-    int   ibase;
-    int  *pi;
-
-    /* This routine can appear to be a bit complex, but it is mostly book-keeping.
-     * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
-     * whether they should interact or not.
-     *
-     * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
-     * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
-     * we create a jindex array with three elements per i atom: the starting point, the point to
-     * which we need to check exclusions, and the end point.
-     * This way we only have to allocate a short exclusion mask per i atom.
-     */
-
-    ni0 = (start/UNROLLI)*UNROLLI;
-    ni1 = ((end+UNROLLI-1)/UNROLLI)*UNROLLI;
-
-    /* Set the interaction mask to only enable the i atoms we want to include */
-    snew(pi, 2*(natoms+UNROLLI+2*SIMD_WIDTH));
-    aadata->imask = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-    for (i = 0; i < natoms+UNROLLI; i++)
-    {
-        aadata->imask[2*i]   = (i >= start && i < end) ? 0xFFFFFFFF : 0;
-        aadata->imask[2*i+1] = (i >= start && i < end) ? 0xFFFFFFFF : 0;
-    }
-
-    /* Allocate memory for our modified jindex array */
-    snew(aadata->jindex_gb, 4*(natoms+UNROLLI));
-    for (i = 0; i < 4*(natoms+UNROLLI); i++)
-    {
-        aadata->jindex_gb[i] = 0;
-    }
-
-    /* Create the exclusion masks for the prologue part */
-    snew(aadata->prologue_mask_gb, natoms+UNROLLI); /* list of pointers */
-
-    /* First zero everything to avoid uninitialized data */
-    for (i = 0; i < natoms+UNROLLI; i++)
-    {
-        aadata->prologue_mask_gb[i] = NULL;
-    }
-
-    /* Calculate the largest exclusion range we need for each UNROLLI-tuplet of i atoms. */
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        max_excl_offset = -1;
-
-        /* First find maxoffset for the next 4 atoms (or fewer if we are close to end) */
-        imax = ((ibase+UNROLLI) < end) ? (ibase+UNROLLI) : end;
-
-        /* Which atom is the first we (might) interact with? */
-        imin = natoms; /* Guaranteed to be overwritten by one of 'firstinteraction' */
-        for (i = ibase; i < imax; i++)
-        {
-            /* Before exclusions, which atom is the first we (might) interact with? */
-            firstinteraction = i+1;
-            max_offset       = calc_maxoffset(i, natoms);
-
-            if (!bInclude12)
-            {
-                for (j = 0; j < ilist[F_GB12].nr; j += 3)
-                {
-                    a1 = ilist[F_GB12].iatoms[j+1];
-                    a2 = ilist[F_GB12].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k == firstinteraction)
-                    {
-                        firstinteraction++;
-                    }
-                }
-            }
-            if (!bInclude13)
-            {
-                for (j = 0; j < ilist[F_GB13].nr; j += 3)
-                {
-                    a1 = ilist[F_GB13].iatoms[j+1];
-                    a2 = ilist[F_GB13].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k == firstinteraction)
-                    {
-                        firstinteraction++;
-                    }
-                }
-            }
-            if (!bInclude14)
-            {
-                for (j = 0; j < ilist[F_GB14].nr; j += 3)
-                {
-                    a1 = ilist[F_GB14].iatoms[j+1];
-                    a2 = ilist[F_GB14].iatoms[j+2];
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k == firstinteraction)
-                    {
-                        firstinteraction++;
-                    }
-                }
-            }
-            imin = (firstinteraction < imin) ? firstinteraction : imin;
-        }
-        /* round down to j unrolling factor */
-        imin = (imin/UNROLLJ)*UNROLLJ;
-
-        for (i = ibase; i < imax; i++)
-        {
-            max_offset = calc_maxoffset(i, natoms);
-
-            if (!bInclude12)
-            {
-                for (j = 0; j < ilist[F_GB12].nr; j += 3)
-                {
-                    a1 = ilist[F_GB12].iatoms[j+1];
-                    a2 = ilist[F_GB12].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k < imin)
-                    {
-                        k += natoms;
-                    }
-
-                    if (k > i+max_offset)
-                    {
-                        continue;
-                    }
-
-                    k = k - imin;
-
-                    if (k+natoms <= max_offset)
-                    {
-                        k += natoms;
-                    }
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
-                }
-            }
-            if (!bInclude13)
-            {
-                for (j = 0; j < ilist[F_GB13].nr; j += 3)
-                {
-                    a1 = ilist[F_GB13].iatoms[j+1];
-                    a2 = ilist[F_GB13].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k < imin)
-                    {
-                        k += natoms;
-                    }
-
-                    if (k > i+max_offset)
-                    {
-                        continue;
-                    }
-
-                    k = k - imin;
-
-                    if (k+natoms <= max_offset)
-                    {
-                        k += natoms;
-                    }
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
-                }
-            }
-            if (!bInclude14)
-            {
-                for (j = 0; j < ilist[F_GB14].nr; j += 3)
-                {
-                    a1 = ilist[F_GB14].iatoms[j+1];
-                    a2 = ilist[F_GB14].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k < imin)
-                    {
-                        k += natoms;
-                    }
-
-                    if (k > i+max_offset)
-                    {
-                        continue;
-                    }
-
-                    k = k - imin;
-
-                    if (k+natoms <= max_offset)
-                    {
-                        k += natoms;
-                    }
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
-                }
-            }
-        }
-
-        /* The offset specifies the last atom to be excluded, so add one unit to get an upper loop limit */
-        max_excl_offset++;
-        /* round up to j unrolling factor */
-        max_excl_offset = (max_excl_offset/UNROLLJ+1)*UNROLLJ;
-
-        /* Set all the prologue masks length to this value (even for i>end) */
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-            aadata->jindex_gb[4*i]   = imin;
-            aadata->jindex_gb[4*i+1] = imin+max_excl_offset;
-        }
-    }
-
-    /* Now the hard part, loop over it all again to calculate the actual contents of the prologue masks */
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-            nj   = aadata->jindex_gb[4*i+1] - aadata->jindex_gb[4*i];
-            imin = aadata->jindex_gb[4*i];
-
-            /* Allocate aligned memory */
-            snew(pi, 2*(nj+2*SIMD_WIDTH));
-            aadata->prologue_mask_gb[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
-            max_offset = calc_maxoffset(i, natoms);
-
-            /* Include interactions i+1 <= j < i+maxoffset */
-            for (k = 0; k < nj; k++)
-            {
-                j = imin + k;
-
-                if ( (j > i) && (j <= i+max_offset) )
-                {
-                    aadata->prologue_mask_gb[i][2*k]   = 0xFFFFFFFF;
-                    aadata->prologue_mask_gb[i][2*k+1] = 0xFFFFFFFF;
-                }
-                else
-                {
-                    aadata->prologue_mask_gb[i][2*k]   = 0;
-                    aadata->prologue_mask_gb[i][2*k+1] = 0;
-                }
-            }
-
-            /* Clear out the explicit exclusions */
-            if (i < end)
-            {
-                if (!bInclude12)
-                {
-                    for (j = 0; j < ilist[F_GB12].nr; j += 3)
-                    {
-                        a1 = ilist[F_GB12].iatoms[j+1];
-                        a2 = ilist[F_GB12].iatoms[j+2];
-
-                        if (a1 == i)
-                        {
-                            k = a2;
-                        }
-                        else if (a2 == i)
-                        {
-                            k = a1;
-                        }
-                        else
-                        {
-                            continue;
-                        }
-
-                        if (k > i+max_offset)
-                        {
-                            continue;
-                        }
-                        k = k-i;
-
-                        if (k+natoms <= max_offset)
-                        {
-                            k += natoms;
-                        }
-
-                        k = k+i-imin;
-                        if (k >= 0)
-                        {
-                            aadata->prologue_mask_gb[i][2*k]   = 0;
-                            aadata->prologue_mask_gb[i][2*k+1] = 0;
-                        }
-                    }
-                }
-                if (!bInclude13)
-                {
-                    for (j = 0; j < ilist[F_GB13].nr; j += 3)
-                    {
-                        a1 = ilist[F_GB13].iatoms[j+1];
-                        a2 = ilist[F_GB13].iatoms[j+2];
-
-                        if (a1 == i)
-                        {
-                            k = a2;
-                        }
-                        else if (a2 == i)
-                        {
-                            k = a1;
-                        }
-                        else
-                        {
-                            continue;
-                        }
-
-                        if (k > i+max_offset)
-                        {
-                            continue;
-                        }
-                        k = k-i;
-
-                        if (k+natoms <= max_offset)
-                        {
-                            k += natoms;
-                        }
-
-                        k = k+i-imin;
-                        if (k >= 0)
-                        {
-                            aadata->prologue_mask_gb[i][2*k]   = 0;
-                            aadata->prologue_mask_gb[i][2*k+1] = 0;
-                        }
-                    }
-                }
-                if (!bInclude14)
-                {
-                    for (j = 0; j < ilist[F_GB14].nr; j += 3)
-                    {
-                        a1 = ilist[F_GB14].iatoms[j+1];
-                        a2 = ilist[F_GB14].iatoms[j+2];
-
-                        if (a1 == i)
-                        {
-                            k = a2;
-                        }
-                        else if (a2 == i)
-                        {
-                            k = a1;
-                        }
-                        else
-                        {
-                            continue;
-                        }
-
-                        if (k > i+max_offset)
-                        {
-                            continue;
-                        }
-                        k = k-i;
-
-                        if (k+natoms <= max_offset)
-                        {
-                            k += natoms;
-                        }
-
-                        k = k+i-imin;
-                        if (k >= 0)
-                        {
-                            aadata->prologue_mask_gb[i][2*k]   = 0;
-                            aadata->prologue_mask_gb[i][2*k+1] = 0;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    /* Construct the epilogue mask - this just contains the check for maxoffset */
-    snew(aadata->epilogue_mask, natoms+UNROLLI);
-
-    /* First zero everything to avoid uninitialized data */
-    for (i = 0; i < natoms+UNROLLI; i++)
-    {
-        aadata->jindex_gb[4*i+2]    = aadata->jindex_gb[4*i+1];
-        aadata->jindex_gb[4*i+3]    = aadata->jindex_gb[4*i+1];
-        aadata->epilogue_mask[i]    = NULL;
-    }
-
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        /* Find the lowest index for which we need to use the epilogue */
-        imin       = ibase;
-        max_offset = calc_maxoffset(imin, natoms);
-
-        imin = imin + 1 + max_offset;
-
-        /* Find largest index for which we need to use the epilogue */
-        imax = ibase + UNROLLI-1;
-        imax = (imax < end) ? imax : end;
-
-        max_offset = calc_maxoffset(imax, natoms);
-        imax       = imax + 1 + max_offset + UNROLLJ - 1;
-
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-            /* Start of epilogue - round down to j tile limit */
-            aadata->jindex_gb[4*i+2] = (imin/UNROLLJ)*UNROLLJ;
-            /* Make sure we dont overlap - for small systems everything is done in the prologue */
-            aadata->jindex_gb[4*i+2] = (aadata->jindex_gb[4*i+1] > aadata->jindex_gb[4*i+2]) ? aadata->jindex_gb[4*i+1] : aadata->jindex_gb[4*i+2];
-            /* Round upwards to j tile limit */
-            aadata->jindex_gb[4*i+3] = (imax/UNROLLJ)*UNROLLJ;
-            /* Make sure we dont have a negative range for the epilogue */
-            aadata->jindex_gb[4*i+3] = (aadata->jindex_gb[4*i+2] > aadata->jindex_gb[4*i+3]) ? aadata->jindex_gb[4*i+2] : aadata->jindex_gb[4*i+3];
-        }
-    }
-
-    /* And fill it with data... */
-
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-
-            nj = aadata->jindex_gb[4*i+3] - aadata->jindex_gb[4*i+2];
-
-            /* Allocate aligned memory */
-            snew(pi, 2*(nj+2*SIMD_WIDTH));
-            aadata->epilogue_mask[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
-            max_offset = calc_maxoffset(i, natoms);
-
-            for (k = 0; k < nj; k++)
-            {
-                j = aadata->jindex_gb[4*i+2] + k;
-                aadata->epilogue_mask[i][2*k]   = (j <= i+max_offset) ? 0xFFFFFFFF : 0;
-                aadata->epilogue_mask[i][2*k+1] = (j <= i+max_offset) ? 0xFFFFFFFF : 0;
-            }
-        }
-    }
-}
-
-
-static void
-genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
-                       gmx_localtop_t     *           top,
-                       gmx_genborn_t     *            born,
-                       t_mdatoms     *                mdatoms,
-                       double                         radius_offset,
-                       int                            gb_algorithm,
-                       gmx_bool                       bInclude12,
-                       gmx_bool                       bInclude13,
-                       gmx_bool                       bInclude14)
-{
-    int                     i, j, idx;
-    int                     natoms;
-    gmx_allvsallgb2_data_t *aadata;
-    double                 *p;
-
-    natoms = mdatoms->nr;
-
-    snew(aadata, 1);
-    *p_aadata = aadata;
-
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->x_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->y_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->z_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->fx_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->fy_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->fz_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
-    aadata->gb_radius = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
-    aadata->workparam = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
-    aadata->work = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    for (i = 0; i < mdatoms->nr; i++)
-    {
-        aadata->gb_radius[i] = top->atomtypes.gb_radius[mdatoms->typeA[i]] - radius_offset;
-        if (gb_algorithm == egbSTILL)
-        {
-            aadata->workparam[i] = born->vsolv[i];
-        }
-        else if (gb_algorithm == egbOBC)
-        {
-            aadata->workparam[i] = born->param[i];
-        }
-        aadata->work[i]      = 0.0;
-    }
-    for (i = 0; i < mdatoms->nr; i++)
-    {
-        aadata->gb_radius[natoms+i] = aadata->gb_radius[i];
-        aadata->workparam[natoms+i] = aadata->workparam[i];
-        aadata->work[natoms+i]      = aadata->work[i];
-    }
-
-    for (i = 0; i < 2*natoms+SIMD_WIDTH; i++)
-    {
-        aadata->x_align[i]  = 0.0;
-        aadata->y_align[i]  = 0.0;
-        aadata->z_align[i]  = 0.0;
-        aadata->fx_align[i] = 0.0;
-        aadata->fy_align[i] = 0.0;
-        aadata->fz_align[i] = 0.0;
-    }
-
-    setup_gb_exclusions_and_indices(aadata, top->idef.il, 0, mdatoms->homenr, mdatoms->nr,
-                                    bInclude12, bInclude13, bInclude14);
-}
-
-
-/*
- * This routine apparently hits a compiler bug visual studio has had 'forever'.
- * It is present both in VS2005 and VS2008, and the only way around it is to
- * decrease optimization. We do that with at pragma, and only for MSVC, so it
- * will not hurt any of the well-behaving and supported compilers out there.
- * MS: Fix your compiler, it sucks like a black hole!
- */
-#ifdef _MSC_VER
-#pragma optimize("t",off)
-#endif
-
-int
-genborn_allvsall_calc_still_radii_sse2_double(t_forcerec   *           fr,
-                                              t_mdatoms   *            mdatoms,
-                                              gmx_genborn_t   *        born,
-                                              gmx_localtop_t   *       top,
-                                              double *                 x,
-                                              t_commrec   *            cr,
-                                              void   *                 paadata)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2, nj3;
-    int                     i, j, k, n;
-    int              *      mask;
-    int              *      pmask0;
-    int              *      pmask1;
-    int              *      emask0;
-    int              *      emask1;
-    double                  ix, iy, iz;
-    double                  jx, jy, jz;
-    double                  dx, dy, dz;
-    double                  rsq, rinv;
-    double                  gpi, rai, vai;
-    double                  prod_ai;
-    double                  irsq, idr4, idr6;
-    double                  raj, rvdw, ratio;
-    double                  vaj, ccf, dccf, theta, cosq;
-    double                  term, prod, icf4, icf6, gpi2, factor, sinq;
-    double            *     gb_radius;
-    double            *     vsolv;
-    double            *     work;
-    double                  tmpsum[2];
-    double            *     x_align;
-    double            *     y_align;
-    double            *     z_align;
-    int              *      jindex;
-    double            *     dadx;
-
-    __m128d                 ix_SSE0, iy_SSE0, iz_SSE0;
-    __m128d                 ix_SSE1, iy_SSE1, iz_SSE1;
-    __m128d                 gpi_SSE0, rai_SSE0, prod_ai_SSE0;
-    __m128d                 gpi_SSE1, rai_SSE1, prod_ai_SSE1;
-    __m128d                 imask_SSE0, jmask_SSE0;
-    __m128d                 imask_SSE1, jmask_SSE1;
-    __m128d                 jx_SSE, jy_SSE, jz_SSE;
-    __m128d                 dx_SSE0, dy_SSE0, dz_SSE0;
-    __m128d                 dx_SSE1, dy_SSE1, dz_SSE1;
-    __m128d                 rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
-    __m128d                 rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
-    __m128d                 raj_SSE, vaj_SSE, prod_SSE;
-    __m128d                 rvdw_SSE0, ratio_SSE0;
-    __m128d                 rvdw_SSE1, ratio_SSE1;
-    __m128d                 theta_SSE0, sinq_SSE0, cosq_SSE0, term_SSE0;
-    __m128d                 theta_SSE1, sinq_SSE1, cosq_SSE1, term_SSE1;
-    __m128d                 ccf_SSE0, dccf_SSE0;
-    __m128d                 ccf_SSE1, dccf_SSE1;
-    __m128d                 icf4_SSE0, icf6_SSE0;
-    __m128d                 icf4_SSE1, icf6_SSE1;
-    __m128d                 half_SSE, one_SSE, two_SSE, four_SSE;
-    __m128d                 still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-
-    n = 0;
-
-    aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
-    if (aadata == NULL)
-    {
-        genborn_allvsall_setup(&aadata, top, born, mdatoms, 0.0,
-                               egbSTILL, FALSE, FALSE, TRUE);
-        *((gmx_allvsallgb2_data_t **)paadata) = aadata;
-    }
-
-    x_align = aadata->x_align;
-    y_align = aadata->y_align;
-    z_align = aadata->z_align;
-
-    gb_radius = aadata->gb_radius;
-    vsolv     = aadata->workparam;
-    work      = aadata->work;
-    jindex    = aadata->jindex_gb;
-    dadx      = fr->dadx;
-
-    still_p4_SSE    = _mm_set1_pd(STILL_P4);
-    still_p5inv_SSE = _mm_set1_pd(STILL_P5INV);
-    still_pip5_SSE  = _mm_set1_pd(STILL_PIP5);
-    half_SSE        = _mm_set1_pd(0.5);
-    one_SSE         = _mm_set1_pd(1.0);
-    two_SSE         = _mm_set1_pd(2.0);
-    four_SSE        = _mm_set1_pd(4.0);
-
-    /* This will be summed, so it has to extend to natoms + buffer */
-    for (i = 0; i < natoms+1+natoms/2; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = ni0; i < ni1+1+natoms/2; i++)
-    {
-        k           = i%natoms;
-        x_align[i]  = x[3*k];
-        y_align[i]  = x[3*k+1];
-        z_align[i]  = x[3*k+2];
-        work[i]     = 0;
-    }
-
-    for (i = ni0; i < ni1; i += UNROLLI)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-        /* Load i atom data */
-        ix_SSE0          = _mm_load1_pd(x_align+i);
-        iy_SSE0          = _mm_load1_pd(y_align+i);
-        iz_SSE0          = _mm_load1_pd(z_align+i);
-        ix_SSE1          = _mm_load1_pd(x_align+i+1);
-        iy_SSE1          = _mm_load1_pd(y_align+i+1);
-        iz_SSE1          = _mm_load1_pd(z_align+i+1);
-
-        gpi_SSE0         = _mm_setzero_pd();
-        gpi_SSE1         = _mm_setzero_pd();
-
-        rai_SSE0         = _mm_load1_pd(gb_radius+i);
-        rai_SSE1         = _mm_load1_pd(gb_radius+i+1);
-
-        prod_ai_SSE0     = _mm_set1_pd(STILL_P4*vsolv[i]);
-        prod_ai_SSE1     = _mm_set1_pd(STILL_P4*vsolv[i+1]);
-
-        /* Load limits for loop over neighbors */
-        nj0              = jindex[4*i];
-        nj1              = jindex[4*i+1];
-        nj2              = jindex[4*i+2];
-        nj3              = jindex[4*i+3];
-
-        pmask0           = aadata->prologue_mask_gb[i];
-        pmask1           = aadata->prologue_mask_gb[i+1];
-        emask0           = aadata->epilogue_mask[i];
-        emask1           = aadata->epilogue_mask[i+1];
-
-        imask_SSE0        = _mm_load1_pd((double *)(aadata->imask+2*i));
-        imask_SSE1        = _mm_load1_pd((double *)(aadata->imask+2*i+2));
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_pd((double *)pmask0);
-            jmask_SSE1 = _mm_load_pd((double *)pmask1);
-            pmask0    += 2*UNROLLJ;
-            pmask1    += 2*UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_pd(x_align+j);
-            jy_SSE            = _mm_load_pd(y_align+j);
-            jz_SSE            = _mm_load_pd(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_pd(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_pd(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_pd(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_pd(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_pd(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_pd(iz_SSE1, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_pd(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_pd(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_pd(rsq_SSE1);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_pd(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
-            irsq_SSE0          = _mm_mul_pd(rinv_SSE0, rinv_SSE0);
-            irsq_SSE1          = _mm_mul_pd(rinv_SSE1, rinv_SSE1);
-            idr4_SSE0          = _mm_mul_pd(irsq_SSE0, irsq_SSE0);
-            idr4_SSE1          = _mm_mul_pd(irsq_SSE1, irsq_SSE1);
-            idr6_SSE0          = _mm_mul_pd(idr4_SSE0, irsq_SSE0);
-            idr6_SSE1          = _mm_mul_pd(idr4_SSE1, irsq_SSE1);
-
-            raj_SSE            = _mm_load_pd(gb_radius+j);
-            vaj_SSE            = _mm_load_pd(vsolv+j);
-
-            rvdw_SSE0          = _mm_add_pd(rai_SSE0, raj_SSE);
-            rvdw_SSE1          = _mm_add_pd(rai_SSE1, raj_SSE);
-
-            ratio_SSE0         = _mm_mul_pd(rsq_SSE0, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE0, rvdw_SSE0)));
-            ratio_SSE1         = _mm_mul_pd(rsq_SSE1, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE1, rvdw_SSE1)));
-
-            ratio_SSE0         = _mm_min_pd(ratio_SSE0, still_p5inv_SSE);
-            ratio_SSE1         = _mm_min_pd(ratio_SSE1, still_p5inv_SSE);
-            theta_SSE0         = _mm_mul_pd(ratio_SSE0, still_pip5_SSE);
-            theta_SSE1         = _mm_mul_pd(ratio_SSE1, still_pip5_SSE);
-            gmx_mm_sincos_pd(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
-            gmx_mm_sincos_pd(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
-            term_SSE0          = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE0));
-            term_SSE1          = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE1));
-            ccf_SSE0           = _mm_mul_pd(term_SSE0, term_SSE0);
-            ccf_SSE1           = _mm_mul_pd(term_SSE1, term_SSE1);
-            dccf_SSE0          = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE0),
-                                            _mm_mul_pd(sinq_SSE0, theta_SSE0));
-            dccf_SSE1          = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE1),
-                                            _mm_mul_pd(sinq_SSE1, theta_SSE1));
-
-            prod_SSE           = _mm_mul_pd(still_p4_SSE, vaj_SSE);
-            icf4_SSE0          = _mm_mul_pd(ccf_SSE0, idr4_SSE0);
-            icf4_SSE1          = _mm_mul_pd(ccf_SSE1, idr4_SSE1);
-            icf6_SSE0          = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
-            icf6_SSE1          = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-
-            _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
-                                            _mm_add_pd(_mm_mul_pd(prod_ai_SSE0, icf4_SSE0),
-                                                       _mm_mul_pd(prod_ai_SSE1, icf4_SSE1))));
-
-
-            gpi_SSE0           = _mm_add_pd(gpi_SSE0, _mm_mul_pd(prod_SSE, icf4_SSE0));
-            gpi_SSE1           = _mm_add_pd(gpi_SSE1, _mm_mul_pd(prod_SSE, icf4_SSE1));
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE1));
-            dadx += 2;
-
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE0, icf6_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE1, icf6_SSE1));
-            dadx += 2;
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j += UNROLLJ)
-        {
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_pd(x_align+j);
-            jy_SSE            = _mm_load_pd(y_align+j);
-            jz_SSE            = _mm_load_pd(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_pd(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_pd(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_pd(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_pd(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_pd(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_pd(iz_SSE1, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_pd(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_pd(rsq_SSE1);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_pd(rinv_SSE0, imask_SSE0);
-            rinv_SSE1          = _mm_and_pd(rinv_SSE1, imask_SSE1);
-
-            irsq_SSE0          = _mm_mul_pd(rinv_SSE0, rinv_SSE0);
-            irsq_SSE1          = _mm_mul_pd(rinv_SSE1, rinv_SSE1);
-            idr4_SSE0          = _mm_mul_pd(irsq_SSE0, irsq_SSE0);
-            idr4_SSE1          = _mm_mul_pd(irsq_SSE1, irsq_SSE1);
-            idr6_SSE0          = _mm_mul_pd(idr4_SSE0, irsq_SSE0);
-            idr6_SSE1          = _mm_mul_pd(idr4_SSE1, irsq_SSE1);
-
-            raj_SSE            = _mm_load_pd(gb_radius+j);
-
-            rvdw_SSE0          = _mm_add_pd(rai_SSE0, raj_SSE);
-            rvdw_SSE1          = _mm_add_pd(rai_SSE1, raj_SSE);
-            vaj_SSE            = _mm_load_pd(vsolv+j);
-
-            ratio_SSE0         = _mm_mul_pd(rsq_SSE0, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE0, rvdw_SSE0)));
-            ratio_SSE1         = _mm_mul_pd(rsq_SSE1, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE1, rvdw_SSE1)));
-
-            ratio_SSE0         = _mm_min_pd(ratio_SSE0, still_p5inv_SSE);
-            ratio_SSE1         = _mm_min_pd(ratio_SSE1, still_p5inv_SSE);
-            theta_SSE0         = _mm_mul_pd(ratio_SSE0, still_pip5_SSE);
-            theta_SSE1         = _mm_mul_pd(ratio_SSE1, still_pip5_SSE);
-            gmx_mm_sincos_pd(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
-            gmx_mm_sincos_pd(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
-            term_SSE0          = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE0));
-            term_SSE1          = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE1));
-            ccf_SSE0           = _mm_mul_pd(term_SSE0, term_SSE0);
-            ccf_SSE1           = _mm_mul_pd(term_SSE1, term_SSE1);
-            dccf_SSE0          = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE0),
-                                            _mm_mul_pd(sinq_SSE0, theta_SSE0));
-            dccf_SSE1          = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE1),
-                                            _mm_mul_pd(sinq_SSE1, theta_SSE1));
-
-            prod_SSE           = _mm_mul_pd(still_p4_SSE, vaj_SSE );
-            icf4_SSE0          = _mm_mul_pd(ccf_SSE0, idr4_SSE0);
-            icf4_SSE1          = _mm_mul_pd(ccf_SSE1, idr4_SSE1);
-            icf6_SSE0          = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
-            icf6_SSE1          = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-
-            _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
-                                            _mm_add_pd(_mm_mul_pd(prod_ai_SSE0, icf4_SSE0),
-                                                       _mm_mul_pd(prod_ai_SSE1, icf4_SSE1))));
-
-            gpi_SSE0           = _mm_add_pd(gpi_SSE0, _mm_mul_pd(prod_SSE, icf4_SSE0));
-            gpi_SSE1           = _mm_add_pd(gpi_SSE1, _mm_mul_pd(prod_SSE, icf4_SSE1));
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE1));
-            dadx += 2;
-
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE0, icf6_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE1, icf6_SSE1));
-            dadx += 2;
-        }
-        /* Epilogue part, including exclusion mask */
-        for (j = nj2; j < nj3; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_pd((double *)emask0);
-            jmask_SSE1 = _mm_load_pd((double *)emask1);
-            emask0    += 2*UNROLLJ;
-            emask1    += 2*UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_pd(x_align+j);
-            jy_SSE            = _mm_load_pd(y_align+j);
-            jz_SSE            = _mm_load_pd(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_pd(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_pd(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_pd(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_pd(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_pd(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_pd(iz_SSE1, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_pd(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_pd(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_pd(rsq_SSE1);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_pd(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
-            irsq_SSE0          = _mm_mul_pd(rinv_SSE0, rinv_SSE0);
-            irsq_SSE1          = _mm_mul_pd(rinv_SSE1, rinv_SSE1);
-            idr4_SSE0          = _mm_mul_pd(irsq_SSE0, irsq_SSE0);
-            idr4_SSE1          = _mm_mul_pd(irsq_SSE1, irsq_SSE1);
-            idr6_SSE0          = _mm_mul_pd(idr4_SSE0, irsq_SSE0);
-            idr6_SSE1          = _mm_mul_pd(idr4_SSE1, irsq_SSE1);
-
-            raj_SSE            = _mm_load_pd(gb_radius+j);
-            vaj_SSE            = _mm_load_pd(vsolv+j);
-
-            rvdw_SSE0          = _mm_add_pd(rai_SSE0, raj_SSE);
-            rvdw_SSE1          = _mm_add_pd(rai_SSE1, raj_SSE);
-
-            ratio_SSE0         = _mm_mul_pd(rsq_SSE0, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE0, rvdw_SSE0)));
-            ratio_SSE1         = _mm_mul_pd(rsq_SSE1, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE1, rvdw_SSE1)));
-
-            ratio_SSE0         = _mm_min_pd(ratio_SSE0, still_p5inv_SSE);
-            ratio_SSE1         = _mm_min_pd(ratio_SSE1, still_p5inv_SSE);
-            theta_SSE0         = _mm_mul_pd(ratio_SSE0, still_pip5_SSE);
-            theta_SSE1         = _mm_mul_pd(ratio_SSE1, still_pip5_SSE);
-            gmx_mm_sincos_pd(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
-            gmx_mm_sincos_pd(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
-            term_SSE0          = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE0));
-            term_SSE1          = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE1));
-            ccf_SSE0           = _mm_mul_pd(term_SSE0, term_SSE0);
-            ccf_SSE1           = _mm_mul_pd(term_SSE1, term_SSE1);
-            dccf_SSE0          = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE0),
-                                            _mm_mul_pd(sinq_SSE0, theta_SSE0));
-            dccf_SSE1          = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE1),
-                                            _mm_mul_pd(sinq_SSE1, theta_SSE1));
-
-            prod_SSE           = _mm_mul_pd(still_p4_SSE, vaj_SSE);
-            icf4_SSE0          = _mm_mul_pd(ccf_SSE0, idr4_SSE0);
-            icf4_SSE1          = _mm_mul_pd(ccf_SSE1, idr4_SSE1);
-            icf6_SSE0          = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
-            icf6_SSE1          = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-
-            _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
-                                            _mm_add_pd(_mm_mul_pd(prod_ai_SSE0, icf4_SSE0),
-                                                       _mm_mul_pd(prod_ai_SSE1, icf4_SSE1))));
-
-            gpi_SSE0           = _mm_add_pd(gpi_SSE0, _mm_mul_pd(prod_SSE, icf4_SSE0));
-            gpi_SSE1           = _mm_add_pd(gpi_SSE1, _mm_mul_pd(prod_SSE, icf4_SSE1));
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE1));
-            dadx += 2;
-
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE0, icf6_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE1, icf6_SSE1));
-            dadx += 2;
-        }
-        GMX_MM_TRANSPOSE2_PD(gpi_SSE0, gpi_SSE1);
-        gpi_SSE0 = _mm_add_pd(gpi_SSE0, gpi_SSE1);
-        _mm_store_pd(work+i, _mm_add_pd(gpi_SSE0, _mm_load_pd(work+i)));
-    }
-
-    /* In case we have written anything beyond natoms, move it back.
-     * Never mind that we leave stuff above natoms; that will not
-     * be accessed later in the routine.
-     * In principle this should be a move rather than sum, but this
-     * way we dont have to worry about even/odd offsets...
-     */
-    for (i = natoms; i < ni1+1+natoms/2; i++)
-    {
-        work[i-natoms] += work[i];
-    }
-
-    /* Parallel summations would go here if ever implemented with DD */
-
-    factor  = 0.5 * ONE_4PI_EPS0;
-    /* Calculate the radii - should we do all atoms, or just our local ones? */
-    for (i = 0; i < natoms; i++)
-    {
-        if (born->use[i] != 0)
-        {
-            gpi             = born->gpol[i]+work[i];
-            gpi2            = gpi * gpi;
-            born->bRad[i]   = factor*gmx_invsqrt(gpi2);
-            fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-        }
-    }
-
-    return 0;
-}
-/* Reinstate MSVC optimization */
-#ifdef _MSC_VER
-#pragma optimize("",on)
-#endif
-
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_double(t_forcerec   *           fr,
-                                                t_mdatoms   *            mdatoms,
-                                                gmx_genborn_t   *        born,
-                                                int                      gb_algorithm,
-                                                gmx_localtop_t   *       top,
-                                                double *                 x,
-                                                t_commrec   *            cr,
-                                                void   *                 paadata)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2, nj3;
-    int                     i, j, k, n;
-    int              *      mask;
-    int              *      pmask0;
-    int              *      pmask1;
-    int              *      emask0;
-    int              *      emask1;
-    double            *     gb_radius;
-    double            *     vsolv;
-    double            *     work;
-    double                  tmpsum[2];
-    double            *     x_align;
-    double            *     y_align;
-    double            *     z_align;
-    int              *      jindex;
-    double            *     dadx;
-    double            *     obc_param;
-    double                  rad, min_rad;
-    double                  rai, rai_inv, rai_inv2, sum_ai, sum_ai2, sum_ai3, tsum, tchain;
-
-    __m128d                 ix_SSE0, iy_SSE0, iz_SSE0;
-    __m128d                 ix_SSE1, iy_SSE1, iz_SSE1;
-    __m128d                 gpi_SSE0, rai_SSE0, prod_ai_SSE0;
-    __m128d                 gpi_SSE1, rai_SSE1, prod_ai_SSE1;
-    __m128d                 imask_SSE0, jmask_SSE0;
-    __m128d                 imask_SSE1, jmask_SSE1;
-    __m128d                 jx_SSE, jy_SSE, jz_SSE;
-    __m128d                 dx_SSE0, dy_SSE0, dz_SSE0;
-    __m128d                 dx_SSE1, dy_SSE1, dz_SSE1;
-    __m128d                 rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
-    __m128d                 rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
-    __m128d                 raj_SSE, raj_inv_SSE, sk_aj_SSE, sk2_aj_SSE;
-    __m128d                 ccf_SSE0, dccf_SSE0, prod_SSE0;
-    __m128d                 ccf_SSE1, dccf_SSE1, prod_SSE1;
-    __m128d                 icf4_SSE0, icf6_SSE0;
-    __m128d                 icf4_SSE1, icf6_SSE1;
-    __m128d                 oneeighth_SSE, onefourth_SSE, half_SSE, one_SSE, two_SSE, four_SSE;
-    __m128d                 still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
-    __m128d                 rai_inv_SSE0;
-    __m128d                 rai_inv_SSE1;
-    __m128d                 sk_ai_SSE0, sk2_ai_SSE0, sum_ai_SSE0;
-    __m128d                 sk_ai_SSE1, sk2_ai_SSE1, sum_ai_SSE1;
-    __m128d                 lij_inv_SSE0, sk2_rinv_SSE0;
-    __m128d                 lij_inv_SSE1, sk2_rinv_SSE1;
-    __m128d                 dr_SSE0;
-    __m128d                 dr_SSE1;
-    __m128d                 t1_SSE0, t2_SSE0, t3_SSE0, t4_SSE0;
-    __m128d                 t1_SSE1, t2_SSE1, t3_SSE1, t4_SSE1;
-    __m128d                 obc_mask1_SSE0, obc_mask2_SSE0, obc_mask3_SSE0;
-    __m128d                 obc_mask1_SSE1, obc_mask2_SSE1, obc_mask3_SSE1;
-    __m128d                 uij_SSE0, uij2_SSE0, uij3_SSE0;
-    __m128d                 uij_SSE1, uij2_SSE1, uij3_SSE1;
-    __m128d                 lij_SSE0, lij2_SSE0, lij3_SSE0;
-    __m128d                 lij_SSE1, lij2_SSE1, lij3_SSE1;
-    __m128d                 dlij_SSE0, diff2_SSE0, logterm_SSE0;
-    __m128d                 dlij_SSE1, diff2_SSE1, logterm_SSE1;
-    __m128d                 doffset_SSE, tmpSSE;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-
-    n = 0;
-
-    aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
-    if (aadata == NULL)
-    {
-        genborn_allvsall_setup(&aadata, top, born, mdatoms, born->gb_doffset,
-                               egbOBC, TRUE, TRUE, TRUE);
-        *((gmx_allvsallgb2_data_t **)paadata) = aadata;
-    }
-
-    x_align = aadata->x_align;
-    y_align = aadata->y_align;
-    z_align = aadata->z_align;
-
-    gb_radius = aadata->gb_radius;
-    work      = aadata->work;
-    jindex    = aadata->jindex_gb;
-    dadx      = fr->dadx;
-    obc_param = aadata->workparam;
-
-    oneeighth_SSE   = _mm_set1_pd(0.125);
-    onefourth_SSE   = _mm_set1_pd(0.25);
-    half_SSE        = _mm_set1_pd(0.5);
-    one_SSE         = _mm_set1_pd(1.0);
-    two_SSE         = _mm_set1_pd(2.0);
-    four_SSE        = _mm_set1_pd(4.0);
-    doffset_SSE     = _mm_set1_pd(born->gb_doffset);
-
-    for (i = 0; i < natoms; i++)
-    {
-        x_align[i]  = x[3*i];
-        y_align[i]  = x[3*i+1];
-        z_align[i]  = x[3*i+2];
-    }
-
-    /* Copy again */
-    for (i = 0; i < natoms/2+1; i++)
-    {
-        x_align[natoms+i]  = x_align[i];
-        y_align[natoms+i]  = y_align[i];
-        z_align[natoms+i]  = z_align[i];
-    }
-
-    for (i = 0; i < natoms+natoms/2+1; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = ni0; i < ni1; i += UNROLLI)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix_SSE0          = _mm_load1_pd(x_align+i);
-        iy_SSE0          = _mm_load1_pd(y_align+i);
-        iz_SSE0          = _mm_load1_pd(z_align+i);
-        ix_SSE1          = _mm_load1_pd(x_align+i+1);
-        iy_SSE1          = _mm_load1_pd(y_align+i+1);
-        iz_SSE1          = _mm_load1_pd(z_align+i+1);
-
-        rai_SSE0         = _mm_load1_pd(gb_radius+i);
-        rai_SSE1         = _mm_load1_pd(gb_radius+i+1);
-        rai_inv_SSE0     = gmx_mm_inv_pd(rai_SSE0);
-        rai_inv_SSE1     = gmx_mm_inv_pd(rai_SSE1);
-
-        sk_ai_SSE0       = _mm_load1_pd(obc_param+i);
-        sk_ai_SSE1       = _mm_load1_pd(obc_param+i+1);
-        sk2_ai_SSE0      = _mm_mul_pd(sk_ai_SSE0, sk_ai_SSE0);
-        sk2_ai_SSE1      = _mm_mul_pd(sk_ai_SSE1, sk_ai_SSE1);
-
-        sum_ai_SSE0      = _mm_setzero_pd();
-        sum_ai_SSE1      = _mm_setzero_pd();
-
-        /* Load limits for loop over neighbors */
-        nj0              = jindex[4*i];
-        nj1              = jindex[4*i+1];
-        nj2              = jindex[4*i+2];
-        nj3              = jindex[4*i+3];
-
-        pmask0           = aadata->prologue_mask_gb[i];
-        pmask1           = aadata->prologue_mask_gb[i+1];
-        emask0           = aadata->epilogue_mask[i];
-        emask1           = aadata->epilogue_mask[i+1];
-
-        imask_SSE0        = _mm_load1_pd((double *)(aadata->imask+2*i));
-        imask_SSE1        = _mm_load1_pd((double *)(aadata->imask+2*i+2));
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_pd((double *)pmask0);
-            jmask_SSE1 = _mm_load_pd((double *)pmask1);
-            pmask0    += 2*UNROLLJ;
-            pmask1    += 2*UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_pd(x_align+j);
-            jy_SSE            = _mm_load_pd(y_align+j);
-            jz_SSE            = _mm_load_pd(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_pd(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_pd(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_pd(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_pd(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_pd(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_pd(iz_SSE1, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_pd(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_pd(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_pd(rsq_SSE1);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_pd(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
-            dr_SSE0            = _mm_mul_pd(rsq_SSE0, rinv_SSE0);
-            dr_SSE1            = _mm_mul_pd(rsq_SSE1, rinv_SSE1);
-
-            sk_aj_SSE          = _mm_load_pd(obc_param+j);
-            raj_SSE            = _mm_load_pd(gb_radius+j);
-            raj_inv_SSE        = gmx_mm_inv_pd(raj_SSE);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1_SSE0            = _mm_add_pd(dr_SSE0, sk_aj_SSE);
-            t1_SSE1            = _mm_add_pd(dr_SSE1, sk_aj_SSE);
-            t2_SSE0            = _mm_sub_pd(dr_SSE0, sk_aj_SSE);
-            t2_SSE1            = _mm_sub_pd(dr_SSE1, sk_aj_SSE);
-            t3_SSE0            = _mm_sub_pd(sk_aj_SSE, dr_SSE0);
-            t3_SSE1            = _mm_sub_pd(sk_aj_SSE, dr_SSE1);
-
-            obc_mask1_SSE0     = _mm_cmplt_pd(rai_SSE0, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_pd(rai_SSE1, t1_SSE1);
-            obc_mask2_SSE0     = _mm_cmplt_pd(rai_SSE0, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_pd(rai_SSE1, t2_SSE1);
-            obc_mask3_SSE0     = _mm_cmplt_pd(rai_SSE0, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_pd(rai_SSE1, t3_SSE1);
-            obc_mask1_SSE0     = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
-            uij_SSE0           = gmx_mm_inv_pd(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_pd(t1_SSE1);
-            lij_SSE0           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
-                                              _mm_andnot_pd(obc_mask2_SSE0, rai_inv_SSE0));
-            lij_SSE1           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
-                                              _mm_andnot_pd(obc_mask2_SSE1, rai_inv_SSE1));
-            dlij_SSE0          = _mm_and_pd(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
-            uij2_SSE0          = _mm_mul_pd(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_pd(uij_SSE1, uij_SSE1);
-            uij3_SSE0          = _mm_mul_pd(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_pd(uij2_SSE1, uij_SSE1);
-            lij2_SSE0          = _mm_mul_pd(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_pd(lij_SSE1, lij_SSE1);
-            lij3_SSE0          = _mm_mul_pd(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
-            diff2_SSE0         = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
-            lij_inv_SSE0       = gmx_mm_invsqrt_pd(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_pd(lij2_SSE1);
-            sk2_aj_SSE         = _mm_mul_pd(sk_aj_SSE, sk_aj_SSE);
-            sk2_rinv_SSE0      = _mm_mul_pd(sk2_aj_SSE, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_pd(sk2_aj_SSE, rinv_SSE1);
-            prod_SSE0          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
-            logterm_SSE0       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-
-            t1_SSE0            = _mm_sub_pd(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_pd(lij_SSE1, uij_SSE1);
-            t2_SSE0            = _mm_mul_pd(diff2_SSE0,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_pd(diff2_SSE1,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-
-            t3_SSE0            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
-            t1_SSE0            = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
-            t4_SSE0            = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE0, lij_SSE0));
-            t4_SSE1            = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE1, lij_SSE1));
-            t4_SSE0            = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
-            t1_SSE0            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
-            sum_ai_SSE0        = _mm_add_pd(sum_ai_SSE0, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            sum_ai_SSE1        = _mm_add_pd(sum_ai_SSE1, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-
-            t1_SSE0            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
-                                            _mm_mul_pd(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
-                                            _mm_mul_pd(prod_SSE1, lij3_SSE1));
-            t1_SSE0            = _mm_sub_pd(t1_SSE0,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_pd(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_pd(t1_SSE1,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-
-            t2_SSE0            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_pd(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_pd(uij3_SSE1, dr_SSE1)));
-            t2_SSE0            = _mm_sub_pd(t2_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
-                                                       _mm_mul_pd(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_pd(t2_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
-                                                       _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-            t3_SSE0            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_pd(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-            t3_SSE0            = _mm_sub_pd(t3_SSE0,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_pd(t3_SSE1,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
-            t1_SSE0            = _mm_mul_pd(rinv_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_pd(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_pd(rinv_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_pd(t2_SSE1, t3_SSE1)));
-
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-            dadx += 2;
-
-            /* Evaluate influence of atom ai -> aj */
-            t1_SSE0            = _mm_add_pd(dr_SSE0, sk_ai_SSE0);
-            t1_SSE1            = _mm_add_pd(dr_SSE1, sk_ai_SSE1);
-            t2_SSE0            = _mm_sub_pd(dr_SSE0, sk_ai_SSE0);
-            t2_SSE1            = _mm_sub_pd(dr_SSE1, sk_ai_SSE1);
-            t3_SSE0            = _mm_sub_pd(sk_ai_SSE0, dr_SSE0);
-            t3_SSE1            = _mm_sub_pd(sk_ai_SSE1, dr_SSE1);
-
-            obc_mask1_SSE0     = _mm_cmplt_pd(raj_SSE, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_pd(raj_SSE, t1_SSE1);
-            obc_mask2_SSE0     = _mm_cmplt_pd(raj_SSE, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_pd(raj_SSE, t2_SSE1);
-            obc_mask3_SSE0     = _mm_cmplt_pd(raj_SSE, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_pd(raj_SSE, t3_SSE1);
-            obc_mask1_SSE0     = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
-            uij_SSE0           = gmx_mm_inv_pd(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_pd(t1_SSE1);
-            lij_SSE0           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
-                                              _mm_andnot_pd(obc_mask2_SSE0, raj_inv_SSE));
-            lij_SSE1           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
-                                              _mm_andnot_pd(obc_mask2_SSE1, raj_inv_SSE));
-            dlij_SSE0          = _mm_and_pd(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
-            uij2_SSE0          = _mm_mul_pd(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_pd(uij_SSE1, uij_SSE1);
-            uij3_SSE0          = _mm_mul_pd(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_pd(uij2_SSE1, uij_SSE1);
-            lij2_SSE0          = _mm_mul_pd(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_pd(lij_SSE1, lij_SSE1);
-            lij3_SSE0          = _mm_mul_pd(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
-            diff2_SSE0         = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
-            lij_inv_SSE0       = gmx_mm_invsqrt_pd(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_pd(lij2_SSE1);
-            sk2_rinv_SSE0      = _mm_mul_pd(sk2_ai_SSE0, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_pd(sk2_ai_SSE1, rinv_SSE1);
-            prod_SSE0          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
-            logterm_SSE0       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-            t1_SSE0            = _mm_sub_pd(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_pd(lij_SSE1, uij_SSE1);
-            t2_SSE0            = _mm_mul_pd(diff2_SSE0,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_pd(diff2_SSE1,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t3_SSE0            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
-            t1_SSE0            = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
-            t4_SSE0            = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE0));
-            t4_SSE1            = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE1));
-            t4_SSE0            = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
-            t1_SSE0            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
-            _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
-                                            _mm_add_pd(_mm_and_pd(t1_SSE0, obc_mask1_SSE0),
-                                                       _mm_and_pd(t1_SSE1, obc_mask1_SSE1))));
-
-            t1_SSE0            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
-                                            _mm_mul_pd(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
-                                            _mm_mul_pd(prod_SSE1, lij3_SSE1));
-            t1_SSE0            = _mm_sub_pd(t1_SSE0,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_pd(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_pd(t1_SSE1,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-            t2_SSE0            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_pd(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_pd(uij3_SSE1, dr_SSE1)));
-            t2_SSE0            = _mm_sub_pd(t2_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
-                                                       _mm_mul_pd(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_pd(t2_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
-                                                       _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-
-            t3_SSE0            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_pd(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-
-            t3_SSE0            = _mm_sub_pd(t3_SSE0,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_pd(t3_SSE1,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
-
-            t1_SSE0            = _mm_mul_pd(rinv_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_pd(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_pd(rinv_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_pd(t2_SSE1, t3_SSE1)));
-
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-            dadx += 2;
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j += UNROLLJ)
-        {
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_pd(x_align+j);
-            jy_SSE            = _mm_load_pd(y_align+j);
-            jz_SSE            = _mm_load_pd(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_pd(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_pd(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_pd(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_pd(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_pd(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_pd(iz_SSE1, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_pd(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_pd(rsq_SSE1);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_pd(rinv_SSE0, imask_SSE0);
-            rinv_SSE1          = _mm_and_pd(rinv_SSE1, imask_SSE1);
-
-            dr_SSE0            = _mm_mul_pd(rsq_SSE0, rinv_SSE0);
-            dr_SSE1            = _mm_mul_pd(rsq_SSE1, rinv_SSE1);
-
-            sk_aj_SSE          = _mm_load_pd(obc_param+j);
-            raj_SSE            = _mm_load_pd(gb_radius+j);
-
-            raj_inv_SSE        = gmx_mm_inv_pd(raj_SSE);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1_SSE0            = _mm_add_pd(dr_SSE0, sk_aj_SSE);
-            t1_SSE1            = _mm_add_pd(dr_SSE1, sk_aj_SSE);
-            t2_SSE0            = _mm_sub_pd(dr_SSE0, sk_aj_SSE);
-            t2_SSE1            = _mm_sub_pd(dr_SSE1, sk_aj_SSE);
-            t3_SSE0            = _mm_sub_pd(sk_aj_SSE, dr_SSE0);
-            t3_SSE1            = _mm_sub_pd(sk_aj_SSE, dr_SSE1);
-
-            obc_mask1_SSE0     = _mm_cmplt_pd(rai_SSE0, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_pd(rai_SSE1, t1_SSE1);
-            obc_mask2_SSE0     = _mm_cmplt_pd(rai_SSE0, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_pd(rai_SSE1, t2_SSE1);
-            obc_mask3_SSE0     = _mm_cmplt_pd(rai_SSE0, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_pd(rai_SSE1, t3_SSE1);
-            obc_mask1_SSE0     = _mm_and_pd(obc_mask1_SSE0, imask_SSE0);
-            obc_mask1_SSE1     = _mm_and_pd(obc_mask1_SSE1, imask_SSE1);
-
-            uij_SSE0           = gmx_mm_inv_pd(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_pd(t1_SSE1);
-            lij_SSE0           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
-                                              _mm_andnot_pd(obc_mask2_SSE0, rai_inv_SSE0));
-            lij_SSE1           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
-                                              _mm_andnot_pd(obc_mask2_SSE1, rai_inv_SSE1));
-            dlij_SSE0          = _mm_and_pd(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
-            uij2_SSE0          = _mm_mul_pd(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_pd(uij_SSE1, uij_SSE1);
-            uij3_SSE0          = _mm_mul_pd(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_pd(uij2_SSE1, uij_SSE1);
-            lij2_SSE0          = _mm_mul_pd(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_pd(lij_SSE1, lij_SSE1);
-            lij3_SSE0          = _mm_mul_pd(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
-            diff2_SSE0         = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
-            lij_inv_SSE0       = gmx_mm_invsqrt_pd(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_pd(lij2_SSE1);
-            sk2_aj_SSE         = _mm_mul_pd(sk_aj_SSE, sk_aj_SSE);
-            sk2_rinv_SSE0      = _mm_mul_pd(sk2_aj_SSE, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_pd(sk2_aj_SSE, rinv_SSE1);
-            prod_SSE0          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
-            logterm_SSE0       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-
-            t1_SSE0            = _mm_sub_pd(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_pd(lij_SSE1, uij_SSE1);
-            t2_SSE0            = _mm_mul_pd(diff2_SSE0,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_pd(diff2_SSE1,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-
-            t3_SSE0            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
-            t1_SSE0            = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
-            t4_SSE0            = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE0, lij_SSE0));
-            t4_SSE1            = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE1, lij_SSE1));
-            t4_SSE0            = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
-            t1_SSE0            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
-            sum_ai_SSE0        = _mm_add_pd(sum_ai_SSE0, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            sum_ai_SSE1        = _mm_add_pd(sum_ai_SSE1, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-
-            t1_SSE0            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
-                                            _mm_mul_pd(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
-                                            _mm_mul_pd(prod_SSE1, lij3_SSE1));
-
-            t1_SSE0            = _mm_sub_pd(t1_SSE0,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_pd(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_pd(t1_SSE1,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-
-            t2_SSE0            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_pd(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_pd(uij3_SSE1, dr_SSE1)));
-            t2_SSE0            = _mm_sub_pd(t2_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
-                                                       _mm_mul_pd(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_pd(t2_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
-                                                       _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-            t3_SSE0            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_pd(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-            t3_SSE0            = _mm_sub_pd(t3_SSE0,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_pd(t3_SSE1,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
-            t1_SSE0            = _mm_mul_pd(rinv_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_pd(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_pd(rinv_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_pd(t2_SSE1, t3_SSE1)));
-
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-            dadx += 2;
-
-            /* Evaluate influence of atom ai -> aj */
-            t1_SSE0            = _mm_add_pd(dr_SSE0, sk_ai_SSE0);
-            t1_SSE1            = _mm_add_pd(dr_SSE1, sk_ai_SSE1);
-            t2_SSE0            = _mm_sub_pd(dr_SSE0, sk_ai_SSE0);
-            t2_SSE1            = _mm_sub_pd(dr_SSE1, sk_ai_SSE1);
-            t3_SSE0            = _mm_sub_pd(sk_ai_SSE0, dr_SSE0);
-            t3_SSE1            = _mm_sub_pd(sk_ai_SSE1, dr_SSE1);
-
-            obc_mask1_SSE0     = _mm_cmplt_pd(raj_SSE, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_pd(raj_SSE, t1_SSE1);
-            obc_mask2_SSE0     = _mm_cmplt_pd(raj_SSE, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_pd(raj_SSE, t2_SSE1);
-            obc_mask3_SSE0     = _mm_cmplt_pd(raj_SSE, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_pd(raj_SSE, t3_SSE1);
-            obc_mask1_SSE0     = _mm_and_pd(obc_mask1_SSE0, imask_SSE0);
-            obc_mask1_SSE1     = _mm_and_pd(obc_mask1_SSE1, imask_SSE1);
-
-            uij_SSE0           = gmx_mm_inv_pd(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_pd(t1_SSE1);
-            lij_SSE0           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
-                                              _mm_andnot_pd(obc_mask2_SSE0, raj_inv_SSE));
-            lij_SSE1           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
-                                              _mm_andnot_pd(obc_mask2_SSE1, raj_inv_SSE));
-            dlij_SSE0          = _mm_and_pd(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
-            uij2_SSE0          = _mm_mul_pd(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_pd(uij_SSE1, uij_SSE1);
-            uij3_SSE0          = _mm_mul_pd(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_pd(uij2_SSE1, uij_SSE1);
-            lij2_SSE0          = _mm_mul_pd(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_pd(lij_SSE1, lij_SSE1);
-            lij3_SSE0          = _mm_mul_pd(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
-            diff2_SSE0         = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
-            lij_inv_SSE0       = gmx_mm_invsqrt_pd(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_pd(lij2_SSE1);
-            sk2_rinv_SSE0      = _mm_mul_pd(sk2_ai_SSE0, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_pd(sk2_ai_SSE1, rinv_SSE1);
-            prod_SSE0          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
-            logterm_SSE0       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-            t1_SSE0            = _mm_sub_pd(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_pd(lij_SSE1, uij_SSE1);
-            t2_SSE0            = _mm_mul_pd(diff2_SSE0,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_pd(diff2_SSE1,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t3_SSE0            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
-            t1_SSE0            = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
-            t4_SSE0            = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE0));
-            t4_SSE1            = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE1));
-            t4_SSE0            = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
-            t1_SSE0            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
-            _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
-                                            _mm_add_pd(_mm_and_pd(t1_SSE0, obc_mask1_SSE0),
-                                                       _mm_and_pd(t1_SSE1, obc_mask1_SSE1))));
-
-            t1_SSE0            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
-                                            _mm_mul_pd(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
-                                            _mm_mul_pd(prod_SSE1, lij3_SSE1));
-            t1_SSE0            = _mm_sub_pd(t1_SSE0,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_pd(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_pd(t1_SSE1,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-            t2_SSE0            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_pd(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_pd(uij3_SSE1, dr_SSE1)));
-            t2_SSE0            = _mm_sub_pd(t2_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
-                                                       _mm_mul_pd(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_pd(t2_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
-                                                       _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-
-            t3_SSE0            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_pd(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-
-            t3_SSE0            = _mm_sub_pd(t3_SSE0,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_pd(t3_SSE1,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
-            t1_SSE0            = _mm_mul_pd(rinv_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_pd(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_pd(rinv_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_pd(t2_SSE1, t3_SSE1)));
-
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-            dadx += 2;
-        }
-
-        /* Epilogue part, including exclusion mask */
-        for (j = nj2; j < nj3; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_pd((double *)emask0);
-            jmask_SSE1 = _mm_load_pd((double *)emask1);
-            emask0    += 2*UNROLLJ;
-            emask1    += 2*UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_pd(x_align+j);
-            jy_SSE            = _mm_load_pd(y_align+j);
-            jz_SSE            = _mm_load_pd(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_pd(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_pd(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_pd(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_pd(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_pd(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_pd(iz_SSE1, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_pd(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_pd(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_pd(rsq_SSE1);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_pd(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
-            dr_SSE0            = _mm_mul_pd(rsq_SSE0, rinv_SSE0);
-            dr_SSE1            = _mm_mul_pd(rsq_SSE1, rinv_SSE1);
-
-            sk_aj_SSE          = _mm_load_pd(obc_param+j);
-            raj_SSE            = _mm_load_pd(gb_radius+j);
-
-            raj_inv_SSE        = gmx_mm_inv_pd(raj_SSE);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1_SSE0            = _mm_add_pd(dr_SSE0, sk_aj_SSE);
-            t1_SSE1            = _mm_add_pd(dr_SSE1, sk_aj_SSE);
-            t2_SSE0            = _mm_sub_pd(dr_SSE0, sk_aj_SSE);
-            t2_SSE1            = _mm_sub_pd(dr_SSE1, sk_aj_SSE);
-            t3_SSE0            = _mm_sub_pd(sk_aj_SSE, dr_SSE0);
-            t3_SSE1            = _mm_sub_pd(sk_aj_SSE, dr_SSE1);
-
-            obc_mask1_SSE0     = _mm_cmplt_pd(rai_SSE0, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_pd(rai_SSE1, t1_SSE1);
-            obc_mask2_SSE0     = _mm_cmplt_pd(rai_SSE0, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_pd(rai_SSE1, t2_SSE1);
-            obc_mask3_SSE0     = _mm_cmplt_pd(rai_SSE0, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_pd(rai_SSE1, t3_SSE1);
-            obc_mask1_SSE0     = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
-            uij_SSE0           = gmx_mm_inv_pd(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_pd(t1_SSE1);
-            lij_SSE0           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
-                                              _mm_andnot_pd(obc_mask2_SSE0, rai_inv_SSE0));
-            lij_SSE1           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
-                                              _mm_andnot_pd(obc_mask2_SSE1, rai_inv_SSE1));
-
-            dlij_SSE0          = _mm_and_pd(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
-            uij2_SSE0          = _mm_mul_pd(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_pd(uij_SSE1, uij_SSE1);
-            uij3_SSE0          = _mm_mul_pd(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_pd(uij2_SSE1, uij_SSE1);
-            lij2_SSE0          = _mm_mul_pd(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_pd(lij_SSE1, lij_SSE1);
-            lij3_SSE0          = _mm_mul_pd(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
-            diff2_SSE0         = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
-            lij_inv_SSE0       = gmx_mm_invsqrt_pd(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_pd(lij2_SSE1);
-            sk2_aj_SSE         = _mm_mul_pd(sk_aj_SSE, sk_aj_SSE);
-            sk2_rinv_SSE0      = _mm_mul_pd(sk2_aj_SSE, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_pd(sk2_aj_SSE, rinv_SSE1);
-            prod_SSE0          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
-            logterm_SSE0       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-
-            t1_SSE0            = _mm_sub_pd(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_pd(lij_SSE1, uij_SSE1);
-            t2_SSE0            = _mm_mul_pd(diff2_SSE0,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_pd(diff2_SSE1,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-
-            t3_SSE0            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
-            t1_SSE0            = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
-            t4_SSE0            = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE0, lij_SSE0));
-            t4_SSE1            = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE1, lij_SSE1));
-            t4_SSE0            = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
-            t1_SSE0            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
-            sum_ai_SSE0        = _mm_add_pd(sum_ai_SSE0, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            sum_ai_SSE1        = _mm_add_pd(sum_ai_SSE1, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-
-            t1_SSE0            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
-                                            _mm_mul_pd(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
-                                            _mm_mul_pd(prod_SSE1, lij3_SSE1));
-            t1_SSE0            = _mm_sub_pd(t1_SSE0,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_pd(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_pd(t1_SSE1,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-
-            t2_SSE0            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_pd(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_pd(uij3_SSE1, dr_SSE1)));
-            t2_SSE0            = _mm_sub_pd(t2_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
-                                                       _mm_mul_pd(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_pd(t2_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
-                                                       _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-            t3_SSE0            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_pd(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-            t3_SSE0            = _mm_sub_pd(t3_SSE0,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_pd(t3_SSE1,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
-            t1_SSE0            = _mm_mul_pd(rinv_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_pd(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_pd(rinv_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_pd(t2_SSE1, t3_SSE1)));
-
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-            dadx += 2;
-
-            /* Evaluate influence of atom ai -> aj */
-            t1_SSE0            = _mm_add_pd(dr_SSE0, sk_ai_SSE0);
-            t1_SSE1            = _mm_add_pd(dr_SSE1, sk_ai_SSE1);
-            t2_SSE0            = _mm_sub_pd(dr_SSE0, sk_ai_SSE0);
-            t2_SSE1            = _mm_sub_pd(dr_SSE1, sk_ai_SSE1);
-            t3_SSE0            = _mm_sub_pd(sk_ai_SSE0, dr_SSE0);
-            t3_SSE1            = _mm_sub_pd(sk_ai_SSE1, dr_SSE1);
-
-            obc_mask1_SSE0     = _mm_cmplt_pd(raj_SSE, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_pd(raj_SSE, t1_SSE1);
-            obc_mask2_SSE0     = _mm_cmplt_pd(raj_SSE, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_pd(raj_SSE, t2_SSE1);
-            obc_mask3_SSE0     = _mm_cmplt_pd(raj_SSE, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_pd(raj_SSE, t3_SSE1);
-            obc_mask1_SSE0     = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
-            uij_SSE0           = gmx_mm_inv_pd(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_pd(t1_SSE1);
-            lij_SSE0           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
-                                              _mm_andnot_pd(obc_mask2_SSE0, raj_inv_SSE));
-            lij_SSE1           = _mm_or_pd(   _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
-                                              _mm_andnot_pd(obc_mask2_SSE1, raj_inv_SSE));
-
-            dlij_SSE0          = _mm_and_pd(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
-            uij2_SSE0          = _mm_mul_pd(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_pd(uij_SSE1, uij_SSE1);
-            uij3_SSE0          = _mm_mul_pd(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_pd(uij2_SSE1, uij_SSE1);
-            lij2_SSE0          = _mm_mul_pd(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_pd(lij_SSE1, lij_SSE1);
-            lij3_SSE0          = _mm_mul_pd(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
-            diff2_SSE0         = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
-            lij_inv_SSE0       = gmx_mm_invsqrt_pd(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_pd(lij2_SSE1);
-            sk2_rinv_SSE0      = _mm_mul_pd(sk2_ai_SSE0, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_pd(sk2_ai_SSE1, rinv_SSE1);
-            prod_SSE0          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
-            logterm_SSE0       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-            t1_SSE0            = _mm_sub_pd(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_pd(lij_SSE1, uij_SSE1);
-            t2_SSE0            = _mm_mul_pd(diff2_SSE0,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_pd(diff2_SSE1,
-                                            _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t3_SSE0            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
-            t1_SSE0            = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
-            t4_SSE0            = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE0));
-            t4_SSE1            = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE1));
-            t4_SSE0            = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
-            t1_SSE0            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
-            _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
-                                            _mm_add_pd(_mm_and_pd(t1_SSE0, obc_mask1_SSE0),
-                                                       _mm_and_pd(t1_SSE1, obc_mask1_SSE1))));
-
-            t1_SSE0            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
-                                            _mm_mul_pd(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
-                                            _mm_mul_pd(prod_SSE1, lij3_SSE1));
-
-            t1_SSE0            = _mm_sub_pd(t1_SSE0,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_pd(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_pd(t1_SSE1,
-                                            _mm_mul_pd(onefourth_SSE,
-                                                       _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-            t2_SSE0            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_pd(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_pd(onefourth_SSE,
-                                            _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_pd(uij3_SSE1, dr_SSE1)));
-            t2_SSE0            = _mm_sub_pd(t2_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
-                                                       _mm_mul_pd(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_pd(t2_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
-                                                       _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-
-            t3_SSE0            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_pd(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-
-            t3_SSE0            = _mm_sub_pd(t3_SSE0,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_pd(t3_SSE1,
-                                            _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_pd(one_SSE,
-                                                                  _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
-            t1_SSE0            = _mm_mul_pd(rinv_SSE0,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_pd(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_pd(rinv_SSE1,
-                                            _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_pd(t2_SSE1, t3_SSE1)));
-
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-            dadx += 2;
-        }
-        GMX_MM_TRANSPOSE2_PD(sum_ai_SSE0, sum_ai_SSE1);
-        sum_ai_SSE0 = _mm_add_pd(sum_ai_SSE0, sum_ai_SSE1);
-        _mm_store_pd(work+i, _mm_add_pd(sum_ai_SSE0, _mm_load_pd(work+i)));
-    }
-
-
-    for (i = 0; i < natoms/2+1; i++)
-    {
-        work[i] += work[natoms+i];
-    }
-
-    /* Parallel summations would go here if ever implemented in DD */
-
-    if (gb_algorithm == egbHCT)
-    {
-        /* HCT */
-        for (i = 0; i < natoms; i++)
-        {
-            if (born->use[i] != 0)
-            {
-                rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
-                sum_ai  = 1.0/rai - work[i];
-                min_rad = rai + born->gb_doffset;
-                rad     = 1.0/sum_ai;
-
-                born->bRad[i]   = rad > min_rad ? rad : min_rad;
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-            }
-        }
-
-    }
-    else
-    {
-        /* OBC */
-
-        /* Calculate the radii */
-        for (i = 0; i < natoms; i++)
-        {
-
-            if (born->use[i] != 0)
-            {
-                rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
-                rai_inv2   = 1.0/rai;
-                rai        = rai-born->gb_doffset;
-                rai_inv    = 1.0/rai;
-                sum_ai     = rai * work[i];
-                sum_ai2    = sum_ai  * sum_ai;
-                sum_ai3    = sum_ai2 * sum_ai;
-
-                tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
-                born->bRad[i] = rai_inv - tsum*rai_inv2;
-                born->bRad[i] = 1.0 / born->bRad[i];
-
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
-                tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
-                born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
-            }
-        }
-    }
-
-    return 0;
-}
-
-
-
-
-
-
-
-
-int
-genborn_allvsall_calc_chainrule_sse2_double(t_forcerec   *           fr,
-                                            t_mdatoms   *            mdatoms,
-                                            gmx_genborn_t   *        born,
-                                            double *                 x,
-                                            double *                 f,
-                                            int                      gb_algorithm,
-                                            void   *                 paadata)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2, nj3;
-    int                     i, j, k, n;
-    int                     idx;
-    int              *      mask;
-    int              *      pmask0;
-    int              *      emask0;
-    int              *      jindex;
-
-    double                  ix, iy, iz;
-    double                  fix, fiy, fiz;
-    double                  jx, jy, jz;
-    double                  dx, dy, dz;
-    double                  tx, ty, tz;
-    double                  rbai, rbaj, fgb, fgb_ai, rbi;
-    double            *     rb;
-    double            *     dadx;
-    double            *     x_align;
-    double            *     y_align;
-    double            *     z_align;
-    double            *     fx_align;
-    double            *     fy_align;
-    double            *     fz_align;
-    double                  tmpsum[2];
-
-    __m128d                 jmask_SSE0, jmask_SSE1;
-    __m128d                 ix_SSE0, iy_SSE0, iz_SSE0;
-    __m128d                 ix_SSE1, iy_SSE1, iz_SSE1;
-    __m128d                 fix_SSE0, fiy_SSE0, fiz_SSE0;
-    __m128d                 fix_SSE1, fiy_SSE1, fiz_SSE1;
-    __m128d                 rbai_SSE0, rbai_SSE1;
-    __m128d                 imask_SSE0, imask_SSE1;
-    __m128d                 jx_SSE, jy_SSE, jz_SSE, rbaj_SSE;
-    __m128d                 dx_SSE0, dy_SSE0, dz_SSE0;
-    __m128d                 dx_SSE1, dy_SSE1, dz_SSE1;
-    __m128d                 fgb_SSE0, fgb_ai_SSE0;
-    __m128d                 fgb_SSE1, fgb_ai_SSE1;
-    __m128d                 tx_SSE0, ty_SSE0, tz_SSE0;
-    __m128d                 tx_SSE1, ty_SSE1, tz_SSE1;
-    __m128d                 t1, t2, tmpSSE;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-
-    aadata = (gmx_allvsallgb2_data_t *)paadata;
-
-    x_align  = aadata->x_align;
-    y_align  = aadata->y_align;
-    z_align  = aadata->z_align;
-    fx_align = aadata->fx_align;
-    fy_align = aadata->fy_align;
-    fz_align = aadata->fz_align;
-
-    jindex    = aadata->jindex_gb;
-    dadx      = fr->dadx;
-
-    n  = 0;
-    rb = aadata->work;
-
-    /* Loop to get the proper form for the Born radius term */
-    if (gb_algorithm == egbSTILL)
-    {
-        for (i = 0; i < natoms; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
-        }
-    }
-    else if (gb_algorithm == egbHCT)
-    {
-        for (i = 0; i < natoms; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * fr->dvda[i];
-        }
-    }
-    else if (gb_algorithm == egbOBC)
-    {
-        for (idx = 0; idx < natoms; idx++)
-        {
-            rbi     = born->bRad[idx];
-            rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
-        }
-    }
-
-    for (i = 0; i < 2*natoms; i++)
-    {
-        fx_align[i]       = 0;
-        fy_align[i]       = 0;
-        fz_align[i]       = 0;
-    }
-
-
-    for (i = 0; i < natoms; i++)
-    {
-        rb[i+natoms] = rb[i];
-    }
-
-    for (i = ni0; i < ni1; i += UNROLLI)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix_SSE0          = _mm_load1_pd(x_align+i);
-        iy_SSE0          = _mm_load1_pd(y_align+i);
-        iz_SSE0          = _mm_load1_pd(z_align+i);
-        ix_SSE1          = _mm_load1_pd(x_align+i+1);
-        iy_SSE1          = _mm_load1_pd(y_align+i+1);
-        iz_SSE1          = _mm_load1_pd(z_align+i+1);
-
-        fix_SSE0         = _mm_setzero_pd();
-        fiy_SSE0         = _mm_setzero_pd();
-        fiz_SSE0         = _mm_setzero_pd();
-        fix_SSE1         = _mm_setzero_pd();
-        fiy_SSE1         = _mm_setzero_pd();
-        fiz_SSE1         = _mm_setzero_pd();
-
-        rbai_SSE0        = _mm_load1_pd(rb+i);
-        rbai_SSE1        = _mm_load1_pd(rb+i+1);
-
-        /* Load limits for loop over neighbors */
-        nj0              = jindex[4*i];
-        nj3              = jindex[4*i+3];
-
-        /* No masks necessary, since the stored chain rule derivatives will be zero in those cases! */
-        for (j = nj0; j < nj3; j += UNROLLJ)
-        {
-            /* load j atom coordinates */
-            jx_SSE           = _mm_load_pd(x_align+j);
-            jy_SSE           = _mm_load_pd(y_align+j);
-            jz_SSE           = _mm_load_pd(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0          = _mm_sub_pd(ix_SSE0, jx_SSE);
-            dy_SSE0          = _mm_sub_pd(iy_SSE0, jy_SSE);
-            dz_SSE0          = _mm_sub_pd(iz_SSE0, jz_SSE);
-            dx_SSE1          = _mm_sub_pd(ix_SSE1, jx_SSE);
-            dy_SSE1          = _mm_sub_pd(iy_SSE1, jy_SSE);
-            dz_SSE1          = _mm_sub_pd(iz_SSE1, jz_SSE);
-
-            rbaj_SSE         = _mm_load_pd(rb+j);
-
-            fgb_SSE0         = _mm_mul_pd(rbai_SSE0, _mm_load_pd(dadx));
-            dadx            += 2;
-            fgb_SSE1         = _mm_mul_pd(rbai_SSE1, _mm_load_pd(dadx));
-            dadx            += 2;
-
-            fgb_ai_SSE0      = _mm_mul_pd(rbaj_SSE, _mm_load_pd(dadx));
-            dadx            += 2;
-            fgb_ai_SSE1      = _mm_mul_pd(rbaj_SSE, _mm_load_pd(dadx));
-            dadx            += 2;
-
-            /* Total force between ai and aj is the sum of ai->aj and aj->ai */
-            fgb_SSE0         = _mm_add_pd(fgb_SSE0, fgb_ai_SSE0);
-            fgb_SSE1         = _mm_add_pd(fgb_SSE1, fgb_ai_SSE1);
-
-            /* Calculate temporary vectorial force */
-            tx_SSE0            = _mm_mul_pd(fgb_SSE0, dx_SSE0);
-            ty_SSE0            = _mm_mul_pd(fgb_SSE0, dy_SSE0);
-            tz_SSE0            = _mm_mul_pd(fgb_SSE0, dz_SSE0);
-            tx_SSE1            = _mm_mul_pd(fgb_SSE1, dx_SSE1);
-            ty_SSE1            = _mm_mul_pd(fgb_SSE1, dy_SSE1);
-            tz_SSE1            = _mm_mul_pd(fgb_SSE1, dz_SSE1);
-
-            /* Increment i atom force */
-            fix_SSE0          = _mm_add_pd(fix_SSE0, tx_SSE0);
-            fiy_SSE0          = _mm_add_pd(fiy_SSE0, ty_SSE0);
-            fiz_SSE0          = _mm_add_pd(fiz_SSE0, tz_SSE0);
-            fix_SSE1          = _mm_add_pd(fix_SSE1, tx_SSE1);
-            fiy_SSE1          = _mm_add_pd(fiy_SSE1, ty_SSE1);
-            fiz_SSE1          = _mm_add_pd(fiz_SSE1, tz_SSE1);
-
-            /* Decrement j atom force */
-            _mm_store_pd(fx_align+j,
-                         _mm_sub_pd( _mm_load_pd(fx_align+j), _mm_add_pd(tx_SSE0, tx_SSE1) ));
-            _mm_store_pd(fy_align+j,
-                         _mm_sub_pd( _mm_load_pd(fy_align+j), _mm_add_pd(ty_SSE0, ty_SSE1) ));
-            _mm_store_pd(fz_align+j,
-                         _mm_sub_pd( _mm_load_pd(fz_align+j), _mm_add_pd(tz_SSE0, tz_SSE1) ));
-        }
-
-        /* Add i forces to mem */
-        GMX_MM_TRANSPOSE2_PD(fix_SSE0, fix_SSE1);
-        fix_SSE0 = _mm_add_pd(fix_SSE0, fix_SSE1);
-        _mm_store_pd(fx_align+i, _mm_add_pd(fix_SSE0, _mm_load_pd(fx_align+i)));
-
-        GMX_MM_TRANSPOSE2_PD(fiy_SSE0, fiy_SSE1);
-        fiy_SSE0 = _mm_add_pd(fiy_SSE0, fiy_SSE1);
-        _mm_store_pd(fy_align+i, _mm_add_pd(fiy_SSE0, _mm_load_pd(fy_align+i)));
-
-        GMX_MM_TRANSPOSE2_PD(fiz_SSE0, fiz_SSE1);
-        fiz_SSE0 = _mm_add_pd(fiz_SSE0, fiz_SSE1);
-        _mm_store_pd(fz_align+i, _mm_add_pd(fiz_SSE0, _mm_load_pd(fz_align+i)));
-    }
-
-    for (i = 0; i < natoms; i++)
-    {
-        f[3*i]       += fx_align[i] + fx_align[natoms+i];
-        f[3*i+1]     += fy_align[i] + fy_align[natoms+i];
-        f[3*i+2]     += fz_align[i] + fz_align[natoms+i];
-    }
-
-    return 0;
-}
-
-#else
-/* dummy variable when not using SSE */
-int genborn_allvsall_sse2_double_dummy;
-
-
-#endif
diff --git a/src/gromacs/mdlib/genborn_allvsall_sse2_double.h b/src/gromacs/mdlib/genborn_allvsall_sse2_double.h
deleted file mode 100644 (file)
index 3629475..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _GENBORN_ALLVSALL_SSE2_DOUBLE_H
-#define _GENBORN_ALLVSALL_SSE2_DOUBLE_H
-
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/simple.h"
-
-int
-genborn_allvsall_calc_still_radii_sse2_double(t_forcerec *           fr,
-                                              t_mdatoms *            mdatoms,
-                                              gmx_genborn_t *        born,
-                                              gmx_localtop_t *       top,
-                                              double *               x,
-                                              t_commrec *            cr,
-                                              void *                 work);
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_double(t_forcerec *           fr,
-                                                t_mdatoms *            mdatoms,
-                                                gmx_genborn_t *        born,
-                                                int                    gb_algorithm,
-                                                gmx_localtop_t *       top,
-                                                double *               x,
-                                                t_commrec *            cr,
-                                                void *                 work);
-
-int
-genborn_allvsall_calc_chainrule_sse2_double(t_forcerec *           fr,
-                                            t_mdatoms *            mdatoms,
-                                            gmx_genborn_t *        born,
-                                            double *               x,
-                                            double *               f,
-                                            int                    gb_algorithm,
-                                            void *                 work);
-
-#endif
diff --git a/src/gromacs/mdlib/genborn_allvsall_sse2_single.c b/src/gromacs/mdlib/genborn_allvsall_sse2_single.c
deleted file mode 100644 (file)
index 8c3ce47..0000000
+++ /dev/null
@@ -1,3500 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2012,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/types/simple.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/genborn_allvsall.h"
-#include "gromacs/utility/smalloc.h"
-
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-
-#include <gmx_sse2_single.h>
-
-
-#define SIMD_WIDTH 4
-#define UNROLLI    4
-#define UNROLLJ    4
-
-
-
-
-
-
-
-
-
-typedef struct
-{
-    int *      jindex_gb;
-    int **     prologue_mask_gb;
-    int **     epilogue_mask;
-    int *      imask;
-    real *     gb_radius;
-    real *     workparam;
-    real *     work;
-    real *     x_align;
-    real *     y_align;
-    real *     z_align;
-    real *     fx_align;
-    real *     fy_align;
-    real *     fz_align;
-}
-gmx_allvsallgb2_data_t;
-
-
-static int
-calc_maxoffset(int i, int natoms)
-{
-    int maxoffset;
-
-    if ((natoms % 2) == 1)
-    {
-        /* Odd number of atoms, easy */
-        maxoffset = natoms/2;
-    }
-    else if ((natoms % 4) == 0)
-    {
-        /* Multiple of four is hard */
-        if (i < natoms/2)
-        {
-            if ((i % 2) == 0)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-        else
-        {
-            if ((i % 2) == 1)
-            {
-                maxoffset = natoms/2;
-            }
-            else
-            {
-                maxoffset = natoms/2-1;
-            }
-        }
-    }
-    else
-    {
-        /* natoms/2 = odd */
-        if ((i % 2) == 0)
-        {
-            maxoffset = natoms/2;
-        }
-        else
-        {
-            maxoffset = natoms/2-1;
-        }
-    }
-
-    return maxoffset;
-}
-
-static void
-setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
-                                t_ilist     *                  ilist,
-                                int                            start,
-                                int                            end,
-                                int                            natoms,
-                                gmx_bool                       bInclude12,
-                                gmx_bool                       bInclude13,
-                                gmx_bool                       bInclude14)
-{
-    int   i, j, k, tp;
-    int   a1, a2;
-    int   ni0, ni1, nj0, nj1, nj;
-    int   imin, imax, iexcl;
-    int   max_offset;
-    int   max_excl_offset;
-    int   firstinteraction;
-    int   ibase;
-    int  *pi;
-
-    /* This routine can appear to be a bit complex, but it is mostly book-keeping.
-     * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
-     * whether they should interact or not.
-     *
-     * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
-     * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
-     * we create a jindex array with three elements per i atom: the starting point, the point to
-     * which we need to check exclusions, and the end point.
-     * This way we only have to allocate a short exclusion mask per i atom.
-     */
-
-    ni0 = (start/UNROLLI)*UNROLLI;
-    ni1 = ((end+UNROLLI-1)/UNROLLI)*UNROLLI;
-
-    /* Set the interaction mask to only enable the i atoms we want to include */
-    snew(pi, natoms+UNROLLI+2*SIMD_WIDTH);
-    aadata->imask = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-    for (i = 0; i < natoms+UNROLLI; i++)
-    {
-        aadata->imask[i] = (i >= start && i < end) ? 0xFFFFFFFF : 0;
-    }
-
-    /* Allocate memory for our modified jindex array */
-    snew(aadata->jindex_gb, 4*(natoms+UNROLLI));
-    for (i = 0; i < 4*(natoms+UNROLLI); i++)
-    {
-        aadata->jindex_gb[i] = 0;
-    }
-
-    /* Create the exclusion masks for the prologue part */
-    snew(aadata->prologue_mask_gb, natoms+UNROLLI); /* list of pointers */
-
-    /* First zero everything to avoid uninitialized data */
-    for (i = 0; i < natoms+UNROLLI; i++)
-    {
-        aadata->prologue_mask_gb[i] = NULL;
-    }
-
-    /* Calculate the largest exclusion range we need for each UNROLLI-tuplet of i atoms. */
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        max_excl_offset = -1;
-
-        /* First find maxoffset for the next 4 atoms (or fewer if we are close to end) */
-        imax = ((ibase+UNROLLI) < end) ? (ibase+UNROLLI) : end;
-
-        /* Which atom is the first we (might) interact with? */
-        imin = natoms; /* Guaranteed to be overwritten by one of 'firstinteraction' */
-        for (i = ibase; i < imax; i++)
-        {
-            /* Before exclusions, which atom is the first we (might) interact with? */
-            firstinteraction = i+1;
-            max_offset       = calc_maxoffset(i, natoms);
-
-            if (!bInclude12)
-            {
-                for (j = 0; j < ilist[F_GB12].nr; j += 3)
-                {
-                    a1 = ilist[F_GB12].iatoms[j+1];
-                    a2 = ilist[F_GB12].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k == firstinteraction)
-                    {
-                        firstinteraction++;
-                    }
-                }
-            }
-            if (!bInclude13)
-            {
-                for (j = 0; j < ilist[F_GB13].nr; j += 3)
-                {
-                    a1 = ilist[F_GB13].iatoms[j+1];
-                    a2 = ilist[F_GB13].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k == firstinteraction)
-                    {
-                        firstinteraction++;
-                    }
-                }
-            }
-            if (!bInclude14)
-            {
-                for (j = 0; j < ilist[F_GB14].nr; j += 3)
-                {
-                    a1 = ilist[F_GB14].iatoms[j+1];
-                    a2 = ilist[F_GB14].iatoms[j+2];
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k == firstinteraction)
-                    {
-                        firstinteraction++;
-                    }
-                }
-            }
-            imin = (firstinteraction < imin) ? firstinteraction : imin;
-        }
-        /* round down to j unrolling factor */
-        imin = (imin/UNROLLJ)*UNROLLJ;
-
-        for (i = ibase; i < imax; i++)
-        {
-            max_offset = calc_maxoffset(i, natoms);
-
-            if (!bInclude12)
-            {
-                for (j = 0; j < ilist[F_GB12].nr; j += 3)
-                {
-                    a1 = ilist[F_GB12].iatoms[j+1];
-                    a2 = ilist[F_GB12].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k < imin)
-                    {
-                        k += natoms;
-                    }
-
-                    if (k > i+max_offset)
-                    {
-                        continue;
-                    }
-
-                    k = k - imin;
-
-                    if (k+natoms <= max_offset)
-                    {
-                        k += natoms;
-                    }
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
-                }
-            }
-            if (!bInclude13)
-            {
-                for (j = 0; j < ilist[F_GB13].nr; j += 3)
-                {
-                    a1 = ilist[F_GB13].iatoms[j+1];
-                    a2 = ilist[F_GB13].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k < imin)
-                    {
-                        k += natoms;
-                    }
-
-                    if (k > i+max_offset)
-                    {
-                        continue;
-                    }
-
-                    k = k - imin;
-
-                    if (k+natoms <= max_offset)
-                    {
-                        k += natoms;
-                    }
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
-                }
-            }
-            if (!bInclude14)
-            {
-                for (j = 0; j < ilist[F_GB14].nr; j += 3)
-                {
-                    a1 = ilist[F_GB14].iatoms[j+1];
-                    a2 = ilist[F_GB14].iatoms[j+2];
-
-                    if (a1 == i)
-                    {
-                        k = a2;
-                    }
-                    else if (a2 == i)
-                    {
-                        k = a1;
-                    }
-                    else
-                    {
-                        continue;
-                    }
-
-                    if (k < imin)
-                    {
-                        k += natoms;
-                    }
-
-                    if (k > i+max_offset)
-                    {
-                        continue;
-                    }
-
-                    k = k - imin;
-
-                    if (k+natoms <= max_offset)
-                    {
-                        k += natoms;
-                    }
-                    max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
-                }
-            }
-        }
-
-        /* The offset specifies the last atom to be excluded, so add one unit to get an upper loop limit */
-        max_excl_offset++;
-        /* round up to j unrolling factor */
-        max_excl_offset = (max_excl_offset/UNROLLJ+1)*UNROLLJ;
-
-        /* Set all the prologue masks length to this value (even for i>end) */
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-            aadata->jindex_gb[4*i]   = imin;
-            aadata->jindex_gb[4*i+1] = imin+max_excl_offset;
-        }
-    }
-
-    /* Now the hard part, loop over it all again to calculate the actual contents of the prologue masks */
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-            nj   = aadata->jindex_gb[4*i+1] - aadata->jindex_gb[4*i];
-            imin = aadata->jindex_gb[4*i];
-
-            /* Allocate aligned memory */
-            snew(pi, nj+2*SIMD_WIDTH);
-            aadata->prologue_mask_gb[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
-            max_offset = calc_maxoffset(i, natoms);
-
-            /* Include interactions i+1 <= j < i+maxoffset */
-            for (k = 0; k < nj; k++)
-            {
-                j = imin + k;
-
-                if ( (j > i) && (j <= i+max_offset) )
-                {
-                    aadata->prologue_mask_gb[i][k] = 0xFFFFFFFF;
-                }
-                else
-                {
-                    aadata->prologue_mask_gb[i][k] = 0;
-                }
-            }
-
-            /* Clear out the explicit exclusions */
-            if (i < end)
-            {
-                if (!bInclude12)
-                {
-                    for (j = 0; j < ilist[F_GB12].nr; j += 3)
-                    {
-                        a1 = ilist[F_GB12].iatoms[j+1];
-                        a2 = ilist[F_GB12].iatoms[j+2];
-
-                        if (a1 == i)
-                        {
-                            k = a2;
-                        }
-                        else if (a2 == i)
-                        {
-                            k = a1;
-                        }
-                        else
-                        {
-                            continue;
-                        }
-
-                        if (k > i+max_offset)
-                        {
-                            continue;
-                        }
-                        k = k-i;
-
-                        if (k+natoms <= max_offset)
-                        {
-                            k += natoms;
-                        }
-
-                        k = k+i-imin;
-                        if (k >= 0)
-                        {
-                            aadata->prologue_mask_gb[i][k] = 0;
-                        }
-                    }
-                }
-                if (!bInclude13)
-                {
-                    for (j = 0; j < ilist[F_GB13].nr; j += 3)
-                    {
-                        a1 = ilist[F_GB13].iatoms[j+1];
-                        a2 = ilist[F_GB13].iatoms[j+2];
-
-                        if (a1 == i)
-                        {
-                            k = a2;
-                        }
-                        else if (a2 == i)
-                        {
-                            k = a1;
-                        }
-                        else
-                        {
-                            continue;
-                        }
-
-                        if (k > i+max_offset)
-                        {
-                            continue;
-                        }
-                        k = k-i;
-
-                        if (k+natoms <= max_offset)
-                        {
-                            k += natoms;
-                        }
-
-                        k = k+i-imin;
-                        if (k >= 0)
-                        {
-                            aadata->prologue_mask_gb[i][k] = 0;
-                        }
-                    }
-                }
-                if (!bInclude14)
-                {
-                    for (j = 0; j < ilist[F_GB14].nr; j += 3)
-                    {
-                        a1 = ilist[F_GB14].iatoms[j+1];
-                        a2 = ilist[F_GB14].iatoms[j+2];
-
-                        if (a1 == i)
-                        {
-                            k = a2;
-                        }
-                        else if (a2 == i)
-                        {
-                            k = a1;
-                        }
-                        else
-                        {
-                            continue;
-                        }
-
-                        if (k > i+max_offset)
-                        {
-                            continue;
-                        }
-                        k = k-i;
-
-                        if (k+natoms <= max_offset)
-                        {
-                            k += natoms;
-                        }
-
-                        k = k+i-imin;
-                        if (k >= 0)
-                        {
-                            aadata->prologue_mask_gb[i][k] = 0;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    /* Construct the epilogue mask - this just contains the check for maxoffset */
-    snew(aadata->epilogue_mask, natoms+UNROLLI);
-
-    /* First zero everything to avoid uninitialized data */
-    for (i = 0; i < natoms+UNROLLI; i++)
-    {
-        aadata->jindex_gb[4*i+2]    = aadata->jindex_gb[4*i+1];
-        aadata->jindex_gb[4*i+3]    = aadata->jindex_gb[4*i+1];
-        aadata->epilogue_mask[i]    = NULL;
-    }
-
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        /* Find the lowest index for which we need to use the epilogue */
-        imin       = ibase;
-        max_offset = calc_maxoffset(imin, natoms);
-
-        imin = imin + 1 + max_offset;
-
-        /* Find largest index for which we need to use the epilogue */
-        imax = ibase + UNROLLI-1;
-        imax = (imax < end) ? imax : end;
-
-        max_offset = calc_maxoffset(imax, natoms);
-        imax       = imax + 1 + max_offset + UNROLLJ - 1;
-
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-            /* Start of epilogue - round down to j tile limit */
-            aadata->jindex_gb[4*i+2] = (imin/UNROLLJ)*UNROLLJ;
-            /* Make sure we dont overlap - for small systems everything is done in the prologue */
-            aadata->jindex_gb[4*i+2] = (aadata->jindex_gb[4*i+1] > aadata->jindex_gb[4*i+2]) ? aadata->jindex_gb[4*i+1] : aadata->jindex_gb[4*i+2];
-            /* Round upwards to j tile limit */
-            aadata->jindex_gb[4*i+3] = (imax/UNROLLJ)*UNROLLJ;
-            /* Make sure we dont have a negative range for the epilogue */
-            aadata->jindex_gb[4*i+3] = (aadata->jindex_gb[4*i+2] > aadata->jindex_gb[4*i+3]) ? aadata->jindex_gb[4*i+2] : aadata->jindex_gb[4*i+3];
-        }
-    }
-
-    /* And fill it with data... */
-
-    for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
-    {
-        for (i = ibase; i < ibase+UNROLLI; i++)
-        {
-
-            nj = aadata->jindex_gb[4*i+3] - aadata->jindex_gb[4*i+2];
-
-            /* Allocate aligned memory */
-            snew(pi, nj+2*SIMD_WIDTH);
-            aadata->epilogue_mask[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
-            max_offset = calc_maxoffset(i, natoms);
-
-            for (k = 0; k < nj; k++)
-            {
-                j = aadata->jindex_gb[4*i+2] + k;
-                aadata->epilogue_mask[i][k] = (j <= i+max_offset) ? 0xFFFFFFFF : 0;
-            }
-        }
-    }
-}
-
-
-static void
-genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
-                       gmx_localtop_t     *           top,
-                       gmx_genborn_t     *            born,
-                       t_mdatoms     *                mdatoms,
-                       real                           radius_offset,
-                       int                            gb_algorithm,
-                       gmx_bool                       bInclude12,
-                       gmx_bool                       bInclude13,
-                       gmx_bool                       bInclude14)
-{
-    int                     i, j, idx;
-    int                     natoms;
-    gmx_allvsallgb2_data_t *aadata;
-    real                   *p;
-
-    natoms = mdatoms->nr;
-
-    snew(aadata, 1);
-    *p_aadata = aadata;
-
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->x_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->y_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->z_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->fx_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->fy_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-    snew(p, 2*natoms+2*SIMD_WIDTH);
-    aadata->fz_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
-    aadata->gb_radius = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
-    aadata->workparam = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
-    aadata->work = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
-    for (i = 0; i < mdatoms->nr; i++)
-    {
-        aadata->gb_radius[i] = top->atomtypes.gb_radius[mdatoms->typeA[i]] - radius_offset;
-        if (gb_algorithm == egbSTILL)
-        {
-            aadata->workparam[i] = born->vsolv[i];
-        }
-        else if (gb_algorithm == egbOBC)
-        {
-            aadata->workparam[i] = born->param[i];
-        }
-        aadata->work[i]      = 0.0;
-    }
-    for (i = 0; i < mdatoms->nr; i++)
-    {
-        aadata->gb_radius[natoms+i] = aadata->gb_radius[i];
-        aadata->workparam[natoms+i] = aadata->workparam[i];
-        aadata->work[natoms+i]      = aadata->work[i];
-    }
-
-    for (i = 0; i < 2*natoms+SIMD_WIDTH; i++)
-    {
-        aadata->x_align[i]  = 0.0;
-        aadata->y_align[i]  = 0.0;
-        aadata->z_align[i]  = 0.0;
-        aadata->fx_align[i] = 0.0;
-        aadata->fy_align[i] = 0.0;
-        aadata->fz_align[i] = 0.0;
-    }
-
-    setup_gb_exclusions_and_indices(aadata, top->idef.il, 0, mdatoms->homenr, mdatoms->nr,
-                                    bInclude12, bInclude13, bInclude14);
-}
-
-
-int
-genborn_allvsall_calc_still_radii_sse2_single(t_forcerec *           fr,
-                                              t_mdatoms *            mdatoms,
-                                              gmx_genborn_t *        born,
-                                              gmx_localtop_t *       top,
-                                              real *                 x,
-                                              t_commrec *            cr,
-                                              void *                 paadata)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2, nj3;
-    int                     i, j, k, n;
-    int              *      mask;
-    int              *      pmask0;
-    int              *      pmask1;
-    int              *      pmask2;
-    int              *      pmask3;
-    int              *      emask0;
-    int              *      emask1;
-    int              *      emask2;
-    int              *      emask3;
-    real                    ix, iy, iz;
-    real                    jx, jy, jz;
-    real                    dx, dy, dz;
-    real                    rsq, rinv;
-    real                    gpi, rai, vai;
-    real                    prod_ai;
-    real                    irsq, idr4, idr6;
-    real                    raj, rvdw, ratio;
-    real                    vaj, ccf, dccf, theta, cosq;
-    real                    term, prod, icf4, icf6, gpi2, factor, sinq;
-    real              *     gb_radius;
-    real              *     vsolv;
-    real              *     work;
-    real                    tmpsum[4];
-    real              *     x_align;
-    real              *     y_align;
-    real              *     z_align;
-    int              *      jindex;
-    real              *     dadx;
-
-    __m128                  ix_SSE0, iy_SSE0, iz_SSE0;
-    __m128                  ix_SSE1, iy_SSE1, iz_SSE1;
-    __m128                  ix_SSE2, iy_SSE2, iz_SSE2;
-    __m128                  ix_SSE3, iy_SSE3, iz_SSE3;
-    __m128                  gpi_SSE0, rai_SSE0, prod_ai_SSE0;
-    __m128                  gpi_SSE1, rai_SSE1, prod_ai_SSE1;
-    __m128                  gpi_SSE2, rai_SSE2, prod_ai_SSE2;
-    __m128                  gpi_SSE3, rai_SSE3, prod_ai_SSE3;
-    __m128                  imask_SSE0, jmask_SSE0;
-    __m128                  imask_SSE1, jmask_SSE1;
-    __m128                  imask_SSE2, jmask_SSE2;
-    __m128                  imask_SSE3, jmask_SSE3;
-    __m128                  jx_SSE, jy_SSE, jz_SSE;
-    __m128                  dx_SSE0, dy_SSE0, dz_SSE0;
-    __m128                  dx_SSE1, dy_SSE1, dz_SSE1;
-    __m128                  dx_SSE2, dy_SSE2, dz_SSE2;
-    __m128                  dx_SSE3, dy_SSE3, dz_SSE3;
-    __m128                  rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
-    __m128                  rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
-    __m128                  rsq_SSE2, rinv_SSE2, irsq_SSE2, idr4_SSE2, idr6_SSE2;
-    __m128                  rsq_SSE3, rinv_SSE3, irsq_SSE3, idr4_SSE3, idr6_SSE3;
-    __m128                  raj_SSE, vaj_SSE, prod_SSE;
-    __m128                  rvdw_SSE0, ratio_SSE0;
-    __m128                  rvdw_SSE1, ratio_SSE1;
-    __m128                  rvdw_SSE2, ratio_SSE2;
-    __m128                  rvdw_SSE3, ratio_SSE3;
-    __m128                  theta_SSE0, sinq_SSE0, cosq_SSE0, term_SSE0;
-    __m128                  theta_SSE1, sinq_SSE1, cosq_SSE1, term_SSE1;
-    __m128                  theta_SSE2, sinq_SSE2, cosq_SSE2, term_SSE2;
-    __m128                  theta_SSE3, sinq_SSE3, cosq_SSE3, term_SSE3;
-    __m128                  ccf_SSE0, dccf_SSE0;
-    __m128                  ccf_SSE1, dccf_SSE1;
-    __m128                  ccf_SSE2, dccf_SSE2;
-    __m128                  ccf_SSE3, dccf_SSE3;
-    __m128                  icf4_SSE0, icf6_SSE0;
-    __m128                  icf4_SSE1, icf6_SSE1;
-    __m128                  icf4_SSE2, icf6_SSE2;
-    __m128                  icf4_SSE3, icf6_SSE3;
-    __m128                  half_SSE, one_SSE, two_SSE, four_SSE;
-    __m128                  still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-
-    n = 0;
-
-    aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
-    if (aadata == NULL)
-    {
-        genborn_allvsall_setup(&aadata, top, born, mdatoms, 0.0,
-                               egbSTILL, FALSE, FALSE, TRUE);
-        *((gmx_allvsallgb2_data_t **)paadata) = aadata;
-    }
-
-    x_align = aadata->x_align;
-    y_align = aadata->y_align;
-    z_align = aadata->z_align;
-
-    gb_radius = aadata->gb_radius;
-    vsolv     = aadata->workparam;
-    work      = aadata->work;
-    jindex    = aadata->jindex_gb;
-    dadx      = fr->dadx;
-
-    still_p4_SSE    = _mm_set1_ps(STILL_P4);
-    still_p5inv_SSE = _mm_set1_ps(STILL_P5INV);
-    still_pip5_SSE  = _mm_set1_ps(STILL_PIP5);
-    half_SSE        = _mm_set1_ps(0.5);
-    one_SSE         = _mm_set1_ps(1.0);
-    two_SSE         = _mm_set1_ps(2.0);
-    four_SSE        = _mm_set1_ps(4.0);
-
-    /* This will be summed, so it has to extend to natoms + buffer */
-    for (i = 0; i < natoms+1+natoms/2; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = ni0; i < ni1+1+natoms/2; i++)
-    {
-        k           = i%natoms;
-        x_align[i]  = x[3*k];
-        y_align[i]  = x[3*k+1];
-        z_align[i]  = x[3*k+2];
-        work[i]     = 0;
-    }
-
-
-    for (i = ni0; i < ni1; i += UNROLLI)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix_SSE0          = _mm_load1_ps(x_align+i);
-        iy_SSE0          = _mm_load1_ps(y_align+i);
-        iz_SSE0          = _mm_load1_ps(z_align+i);
-        ix_SSE1          = _mm_load1_ps(x_align+i+1);
-        iy_SSE1          = _mm_load1_ps(y_align+i+1);
-        iz_SSE1          = _mm_load1_ps(z_align+i+1);
-        ix_SSE2          = _mm_load1_ps(x_align+i+2);
-        iy_SSE2          = _mm_load1_ps(y_align+i+2);
-        iz_SSE2          = _mm_load1_ps(z_align+i+2);
-        ix_SSE3          = _mm_load1_ps(x_align+i+3);
-        iy_SSE3          = _mm_load1_ps(y_align+i+3);
-        iz_SSE3          = _mm_load1_ps(z_align+i+3);
-
-        gpi_SSE0         = _mm_setzero_ps();
-        gpi_SSE1         = _mm_setzero_ps();
-        gpi_SSE2         = _mm_setzero_ps();
-        gpi_SSE3         = _mm_setzero_ps();
-
-        rai_SSE0         = _mm_load1_ps(gb_radius+i);
-        rai_SSE1         = _mm_load1_ps(gb_radius+i+1);
-        rai_SSE2         = _mm_load1_ps(gb_radius+i+2);
-        rai_SSE3         = _mm_load1_ps(gb_radius+i+3);
-
-        prod_ai_SSE0     = _mm_set1_ps(STILL_P4*vsolv[i]);
-        prod_ai_SSE1     = _mm_set1_ps(STILL_P4*vsolv[i+1]);
-        prod_ai_SSE2     = _mm_set1_ps(STILL_P4*vsolv[i+2]);
-        prod_ai_SSE3     = _mm_set1_ps(STILL_P4*vsolv[i+3]);
-
-        /* Load limits for loop over neighbors */
-        nj0              = jindex[4*i];
-        nj1              = jindex[4*i+1];
-        nj2              = jindex[4*i+2];
-        nj3              = jindex[4*i+3];
-
-        pmask0           = aadata->prologue_mask_gb[i];
-        pmask1           = aadata->prologue_mask_gb[i+1];
-        pmask2           = aadata->prologue_mask_gb[i+2];
-        pmask3           = aadata->prologue_mask_gb[i+3];
-        emask0           = aadata->epilogue_mask[i];
-        emask1           = aadata->epilogue_mask[i+1];
-        emask2           = aadata->epilogue_mask[i+2];
-        emask3           = aadata->epilogue_mask[i+3];
-
-        imask_SSE0        = _mm_load1_ps((real *)(aadata->imask+i));
-        imask_SSE1        = _mm_load1_ps((real *)(aadata->imask+i+1));
-        imask_SSE2        = _mm_load1_ps((real *)(aadata->imask+i+2));
-        imask_SSE3        = _mm_load1_ps((real *)(aadata->imask+i+3));
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_ps((real *)pmask0);
-            jmask_SSE1 = _mm_load_ps((real *)pmask1);
-            jmask_SSE2 = _mm_load_ps((real *)pmask2);
-            jmask_SSE3 = _mm_load_ps((real *)pmask3);
-            pmask0    += UNROLLJ;
-            pmask1    += UNROLLJ;
-            pmask2    += UNROLLJ;
-            pmask3    += UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_ps(x_align+j);
-            jy_SSE            = _mm_load_ps(y_align+j);
-            jz_SSE            = _mm_load_ps(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_ps(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_ps(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_ps(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_ps(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_ps(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_ps(iz_SSE1, jz_SSE);
-            dx_SSE2            = _mm_sub_ps(ix_SSE2, jx_SSE);
-            dy_SSE2            = _mm_sub_ps(iy_SSE2, jy_SSE);
-            dz_SSE2            = _mm_sub_ps(iz_SSE2, jz_SSE);
-            dx_SSE3            = _mm_sub_ps(ix_SSE3, jx_SSE);
-            dy_SSE3            = _mm_sub_ps(iy_SSE3, jy_SSE);
-            dz_SSE3            = _mm_sub_ps(iz_SSE3, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
-            rsq_SSE2           = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
-            rsq_SSE3           = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_ps(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_ps(jmask_SSE1, imask_SSE1);
-            jmask_SSE2         = _mm_and_ps(jmask_SSE2, imask_SSE2);
-            jmask_SSE3         = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_ps(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_ps(rsq_SSE1);
-            rinv_SSE2          = gmx_mm_invsqrt_ps(rsq_SSE2);
-            rinv_SSE3          = gmx_mm_invsqrt_ps(rsq_SSE3);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_ps(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_ps(rinv_SSE1, jmask_SSE1);
-            rinv_SSE2          = _mm_and_ps(rinv_SSE2, jmask_SSE2);
-            rinv_SSE3          = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
-            irsq_SSE0          = _mm_mul_ps(rinv_SSE0, rinv_SSE0);
-            irsq_SSE1          = _mm_mul_ps(rinv_SSE1, rinv_SSE1);
-            irsq_SSE2          = _mm_mul_ps(rinv_SSE2, rinv_SSE2);
-            irsq_SSE3          = _mm_mul_ps(rinv_SSE3, rinv_SSE3);
-            idr4_SSE0          = _mm_mul_ps(irsq_SSE0, irsq_SSE0);
-            idr4_SSE1          = _mm_mul_ps(irsq_SSE1, irsq_SSE1);
-            idr4_SSE2          = _mm_mul_ps(irsq_SSE2, irsq_SSE2);
-            idr4_SSE3          = _mm_mul_ps(irsq_SSE3, irsq_SSE3);
-            idr6_SSE0          = _mm_mul_ps(idr4_SSE0, irsq_SSE0);
-            idr6_SSE1          = _mm_mul_ps(idr4_SSE1, irsq_SSE1);
-            idr6_SSE2          = _mm_mul_ps(idr4_SSE2, irsq_SSE2);
-            idr6_SSE3          = _mm_mul_ps(idr4_SSE3, irsq_SSE3);
-
-            raj_SSE            = _mm_load_ps(gb_radius+j);
-            vaj_SSE            = _mm_load_ps(vsolv+j);
-
-            rvdw_SSE0          = _mm_add_ps(rai_SSE0, raj_SSE);
-            rvdw_SSE1          = _mm_add_ps(rai_SSE1, raj_SSE);
-            rvdw_SSE2          = _mm_add_ps(rai_SSE2, raj_SSE);
-            rvdw_SSE3          = _mm_add_ps(rai_SSE3, raj_SSE);
-
-            ratio_SSE0         = _mm_mul_ps(rsq_SSE0, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE0, rvdw_SSE0)));
-            ratio_SSE1         = _mm_mul_ps(rsq_SSE1, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE1, rvdw_SSE1)));
-            ratio_SSE2         = _mm_mul_ps(rsq_SSE2, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE2, rvdw_SSE2)));
-            ratio_SSE3         = _mm_mul_ps(rsq_SSE3, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE3, rvdw_SSE3)));
-
-            ratio_SSE0         = _mm_min_ps(ratio_SSE0, still_p5inv_SSE);
-            ratio_SSE1         = _mm_min_ps(ratio_SSE1, still_p5inv_SSE);
-            ratio_SSE2         = _mm_min_ps(ratio_SSE2, still_p5inv_SSE);
-            ratio_SSE3         = _mm_min_ps(ratio_SSE3, still_p5inv_SSE);
-            theta_SSE0         = _mm_mul_ps(ratio_SSE0, still_pip5_SSE);
-            theta_SSE1         = _mm_mul_ps(ratio_SSE1, still_pip5_SSE);
-            theta_SSE2         = _mm_mul_ps(ratio_SSE2, still_pip5_SSE);
-            theta_SSE3         = _mm_mul_ps(ratio_SSE3, still_pip5_SSE);
-            gmx_mm_sincos_ps(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
-            gmx_mm_sincos_ps(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
-            gmx_mm_sincos_ps(theta_SSE2, &sinq_SSE2, &cosq_SSE2);
-            gmx_mm_sincos_ps(theta_SSE3, &sinq_SSE3, &cosq_SSE3);
-            term_SSE0          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE0));
-            term_SSE1          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE1));
-            term_SSE2          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE2));
-            term_SSE3          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE3));
-            ccf_SSE0           = _mm_mul_ps(term_SSE0, term_SSE0);
-            ccf_SSE1           = _mm_mul_ps(term_SSE1, term_SSE1);
-            ccf_SSE2           = _mm_mul_ps(term_SSE2, term_SSE2);
-            ccf_SSE3           = _mm_mul_ps(term_SSE3, term_SSE3);
-            dccf_SSE0          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE0),
-                                            _mm_mul_ps(sinq_SSE0, theta_SSE0));
-            dccf_SSE1          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE1),
-                                            _mm_mul_ps(sinq_SSE1, theta_SSE1));
-            dccf_SSE2          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE2),
-                                            _mm_mul_ps(sinq_SSE2, theta_SSE2));
-            dccf_SSE3          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE3),
-                                            _mm_mul_ps(sinq_SSE3, theta_SSE3));
-
-            prod_SSE           = _mm_mul_ps(still_p4_SSE, vaj_SSE);
-            icf4_SSE0          = _mm_mul_ps(ccf_SSE0, idr4_SSE0);
-            icf4_SSE1          = _mm_mul_ps(ccf_SSE1, idr4_SSE1);
-            icf4_SSE2          = _mm_mul_ps(ccf_SSE2, idr4_SSE2);
-            icf4_SSE3          = _mm_mul_ps(ccf_SSE3, idr4_SSE3);
-            icf6_SSE0          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
-            icf6_SSE1          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-            icf6_SSE2          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE2), dccf_SSE2), idr6_SSE2);
-            icf6_SSE3          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE3), dccf_SSE3), idr6_SSE3);
-
-            _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
-                                            gmx_mm_sum4_ps(_mm_mul_ps(prod_ai_SSE0, icf4_SSE0),
-                                                           _mm_mul_ps(prod_ai_SSE1, icf4_SSE1),
-                                                           _mm_mul_ps(prod_ai_SSE2, icf4_SSE2),
-                                                           _mm_mul_ps(prod_ai_SSE3, icf4_SSE3))));
-
-            gpi_SSE0           = _mm_add_ps(gpi_SSE0, _mm_mul_ps(prod_SSE, icf4_SSE0));
-            gpi_SSE1           = _mm_add_ps(gpi_SSE1, _mm_mul_ps(prod_SSE, icf4_SSE1));
-            gpi_SSE2           = _mm_add_ps(gpi_SSE2, _mm_mul_ps(prod_SSE, icf4_SSE2));
-            gpi_SSE3           = _mm_add_ps(gpi_SSE3, _mm_mul_ps(prod_SSE, icf4_SSE3));
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE3));
-            dadx += 4;
-
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE0, icf6_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE1, icf6_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE2, icf6_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE3, icf6_SSE3));
-            dadx += 4;
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j += UNROLLJ)
-        {
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_ps(x_align+j);
-            jy_SSE            = _mm_load_ps(y_align+j);
-            jz_SSE            = _mm_load_ps(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_ps(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_ps(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_ps(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_ps(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_ps(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_ps(iz_SSE1, jz_SSE);
-            dx_SSE2            = _mm_sub_ps(ix_SSE2, jx_SSE);
-            dy_SSE2            = _mm_sub_ps(iy_SSE2, jy_SSE);
-            dz_SSE2            = _mm_sub_ps(iz_SSE2, jz_SSE);
-            dx_SSE3            = _mm_sub_ps(ix_SSE3, jx_SSE);
-            dy_SSE3            = _mm_sub_ps(iy_SSE3, jy_SSE);
-            dz_SSE3            = _mm_sub_ps(iz_SSE3, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
-            rsq_SSE2           = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
-            rsq_SSE3           = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_ps(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_ps(rsq_SSE1);
-            rinv_SSE2          = gmx_mm_invsqrt_ps(rsq_SSE2);
-            rinv_SSE3          = gmx_mm_invsqrt_ps(rsq_SSE3);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_ps(rinv_SSE0, imask_SSE0);
-            rinv_SSE1          = _mm_and_ps(rinv_SSE1, imask_SSE1);
-            rinv_SSE2          = _mm_and_ps(rinv_SSE2, imask_SSE2);
-            rinv_SSE3          = _mm_and_ps(rinv_SSE3, imask_SSE3);
-
-            irsq_SSE0          = _mm_mul_ps(rinv_SSE0, rinv_SSE0);
-            irsq_SSE1          = _mm_mul_ps(rinv_SSE1, rinv_SSE1);
-            irsq_SSE2          = _mm_mul_ps(rinv_SSE2, rinv_SSE2);
-            irsq_SSE3          = _mm_mul_ps(rinv_SSE3, rinv_SSE3);
-            idr4_SSE0          = _mm_mul_ps(irsq_SSE0, irsq_SSE0);
-            idr4_SSE1          = _mm_mul_ps(irsq_SSE1, irsq_SSE1);
-            idr4_SSE2          = _mm_mul_ps(irsq_SSE2, irsq_SSE2);
-            idr4_SSE3          = _mm_mul_ps(irsq_SSE3, irsq_SSE3);
-            idr6_SSE0          = _mm_mul_ps(idr4_SSE0, irsq_SSE0);
-            idr6_SSE1          = _mm_mul_ps(idr4_SSE1, irsq_SSE1);
-            idr6_SSE2          = _mm_mul_ps(idr4_SSE2, irsq_SSE2);
-            idr6_SSE3          = _mm_mul_ps(idr4_SSE3, irsq_SSE3);
-
-            raj_SSE            = _mm_load_ps(gb_radius+j);
-
-            rvdw_SSE0          = _mm_add_ps(rai_SSE0, raj_SSE);
-            rvdw_SSE1          = _mm_add_ps(rai_SSE1, raj_SSE);
-            rvdw_SSE2          = _mm_add_ps(rai_SSE2, raj_SSE);
-            rvdw_SSE3          = _mm_add_ps(rai_SSE3, raj_SSE);
-            vaj_SSE            = _mm_load_ps(vsolv+j);
-
-            ratio_SSE0         = _mm_mul_ps(rsq_SSE0, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE0, rvdw_SSE0)));
-            ratio_SSE1         = _mm_mul_ps(rsq_SSE1, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE1, rvdw_SSE1)));
-            ratio_SSE2         = _mm_mul_ps(rsq_SSE2, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE2, rvdw_SSE2)));
-            ratio_SSE3         = _mm_mul_ps(rsq_SSE3, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE3, rvdw_SSE3)));
-
-            ratio_SSE0         = _mm_min_ps(ratio_SSE0, still_p5inv_SSE);
-            ratio_SSE1         = _mm_min_ps(ratio_SSE1, still_p5inv_SSE);
-            ratio_SSE2         = _mm_min_ps(ratio_SSE2, still_p5inv_SSE);
-            ratio_SSE3         = _mm_min_ps(ratio_SSE3, still_p5inv_SSE);
-            theta_SSE0         = _mm_mul_ps(ratio_SSE0, still_pip5_SSE);
-            theta_SSE1         = _mm_mul_ps(ratio_SSE1, still_pip5_SSE);
-            theta_SSE2         = _mm_mul_ps(ratio_SSE2, still_pip5_SSE);
-            theta_SSE3         = _mm_mul_ps(ratio_SSE3, still_pip5_SSE);
-            gmx_mm_sincos_ps(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
-            gmx_mm_sincos_ps(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
-            gmx_mm_sincos_ps(theta_SSE2, &sinq_SSE2, &cosq_SSE2);
-            gmx_mm_sincos_ps(theta_SSE3, &sinq_SSE3, &cosq_SSE3);
-            term_SSE0          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE0));
-            term_SSE1          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE1));
-            term_SSE2          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE2));
-            term_SSE3          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE3));
-            ccf_SSE0           = _mm_mul_ps(term_SSE0, term_SSE0);
-            ccf_SSE1           = _mm_mul_ps(term_SSE1, term_SSE1);
-            ccf_SSE2           = _mm_mul_ps(term_SSE2, term_SSE2);
-            ccf_SSE3           = _mm_mul_ps(term_SSE3, term_SSE3);
-            dccf_SSE0          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE0),
-                                            _mm_mul_ps(sinq_SSE0, theta_SSE0));
-            dccf_SSE1          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE1),
-                                            _mm_mul_ps(sinq_SSE1, theta_SSE1));
-            dccf_SSE2          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE2),
-                                            _mm_mul_ps(sinq_SSE2, theta_SSE2));
-            dccf_SSE3          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE3),
-                                            _mm_mul_ps(sinq_SSE3, theta_SSE3));
-
-            prod_SSE           = _mm_mul_ps(still_p4_SSE, vaj_SSE );
-            icf4_SSE0          = _mm_mul_ps(ccf_SSE0, idr4_SSE0);
-            icf4_SSE1          = _mm_mul_ps(ccf_SSE1, idr4_SSE1);
-            icf4_SSE2          = _mm_mul_ps(ccf_SSE2, idr4_SSE2);
-            icf4_SSE3          = _mm_mul_ps(ccf_SSE3, idr4_SSE3);
-            icf6_SSE0          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
-            icf6_SSE1          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-            icf6_SSE2          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE2), dccf_SSE2), idr6_SSE2);
-            icf6_SSE3          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE3), dccf_SSE3), idr6_SSE3);
-
-            _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
-                                            gmx_mm_sum4_ps(_mm_mul_ps(prod_ai_SSE0, icf4_SSE0),
-                                                           _mm_mul_ps(prod_ai_SSE1, icf4_SSE1),
-                                                           _mm_mul_ps(prod_ai_SSE2, icf4_SSE2),
-                                                           _mm_mul_ps(prod_ai_SSE3, icf4_SSE3))));
-
-            gpi_SSE0           = _mm_add_ps(gpi_SSE0, _mm_mul_ps(prod_SSE, icf4_SSE0));
-            gpi_SSE1           = _mm_add_ps(gpi_SSE1, _mm_mul_ps(prod_SSE, icf4_SSE1));
-            gpi_SSE2           = _mm_add_ps(gpi_SSE2, _mm_mul_ps(prod_SSE, icf4_SSE2));
-            gpi_SSE3           = _mm_add_ps(gpi_SSE3, _mm_mul_ps(prod_SSE, icf4_SSE3));
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE3));
-            dadx += 4;
-
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE0, icf6_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE1, icf6_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE2, icf6_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE3, icf6_SSE3));
-            dadx += 4;
-        }
-        /* Epilogue part, including exclusion mask */
-        for (j = nj2; j < nj3; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_ps((real *)emask0);
-            jmask_SSE1 = _mm_load_ps((real *)emask1);
-            jmask_SSE2 = _mm_load_ps((real *)emask2);
-            jmask_SSE3 = _mm_load_ps((real *)emask3);
-            emask0    += UNROLLJ;
-            emask1    += UNROLLJ;
-            emask2    += UNROLLJ;
-            emask3    += UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_ps(x_align+j);
-            jy_SSE            = _mm_load_ps(y_align+j);
-            jz_SSE            = _mm_load_ps(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_ps(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_ps(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_ps(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_ps(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_ps(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_ps(iz_SSE1, jz_SSE);
-            dx_SSE2            = _mm_sub_ps(ix_SSE2, jx_SSE);
-            dy_SSE2            = _mm_sub_ps(iy_SSE2, jy_SSE);
-            dz_SSE2            = _mm_sub_ps(iz_SSE2, jz_SSE);
-            dx_SSE3            = _mm_sub_ps(ix_SSE3, jx_SSE);
-            dy_SSE3            = _mm_sub_ps(iy_SSE3, jy_SSE);
-            dz_SSE3            = _mm_sub_ps(iz_SSE3, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
-            rsq_SSE2           = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
-            rsq_SSE3           = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_ps(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_ps(jmask_SSE1, imask_SSE1);
-            jmask_SSE2         = _mm_and_ps(jmask_SSE2, imask_SSE2);
-            jmask_SSE3         = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_ps(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_ps(rsq_SSE1);
-            rinv_SSE2          = gmx_mm_invsqrt_ps(rsq_SSE2);
-            rinv_SSE3          = gmx_mm_invsqrt_ps(rsq_SSE3);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_ps(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_ps(rinv_SSE1, jmask_SSE1);
-            rinv_SSE2          = _mm_and_ps(rinv_SSE2, jmask_SSE2);
-            rinv_SSE3          = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
-            irsq_SSE0          = _mm_mul_ps(rinv_SSE0, rinv_SSE0);
-            irsq_SSE1          = _mm_mul_ps(rinv_SSE1, rinv_SSE1);
-            irsq_SSE2          = _mm_mul_ps(rinv_SSE2, rinv_SSE2);
-            irsq_SSE3          = _mm_mul_ps(rinv_SSE3, rinv_SSE3);
-            idr4_SSE0          = _mm_mul_ps(irsq_SSE0, irsq_SSE0);
-            idr4_SSE1          = _mm_mul_ps(irsq_SSE1, irsq_SSE1);
-            idr4_SSE2          = _mm_mul_ps(irsq_SSE2, irsq_SSE2);
-            idr4_SSE3          = _mm_mul_ps(irsq_SSE3, irsq_SSE3);
-            idr6_SSE0          = _mm_mul_ps(idr4_SSE0, irsq_SSE0);
-            idr6_SSE1          = _mm_mul_ps(idr4_SSE1, irsq_SSE1);
-            idr6_SSE2          = _mm_mul_ps(idr4_SSE2, irsq_SSE2);
-            idr6_SSE3          = _mm_mul_ps(idr4_SSE3, irsq_SSE3);
-
-            raj_SSE            = _mm_load_ps(gb_radius+j);
-            vaj_SSE            = _mm_load_ps(vsolv+j);
-
-            rvdw_SSE0          = _mm_add_ps(rai_SSE0, raj_SSE);
-            rvdw_SSE1          = _mm_add_ps(rai_SSE1, raj_SSE);
-            rvdw_SSE2          = _mm_add_ps(rai_SSE2, raj_SSE);
-            rvdw_SSE3          = _mm_add_ps(rai_SSE3, raj_SSE);
-
-            ratio_SSE0         = _mm_mul_ps(rsq_SSE0, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE0, rvdw_SSE0)));
-            ratio_SSE1         = _mm_mul_ps(rsq_SSE1, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE1, rvdw_SSE1)));
-            ratio_SSE2         = _mm_mul_ps(rsq_SSE2, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE2, rvdw_SSE2)));
-            ratio_SSE3         = _mm_mul_ps(rsq_SSE3, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE3, rvdw_SSE3)));
-
-            ratio_SSE0         = _mm_min_ps(ratio_SSE0, still_p5inv_SSE);
-            ratio_SSE1         = _mm_min_ps(ratio_SSE1, still_p5inv_SSE);
-            ratio_SSE2         = _mm_min_ps(ratio_SSE2, still_p5inv_SSE);
-            ratio_SSE3         = _mm_min_ps(ratio_SSE3, still_p5inv_SSE);
-            theta_SSE0         = _mm_mul_ps(ratio_SSE0, still_pip5_SSE);
-            theta_SSE1         = _mm_mul_ps(ratio_SSE1, still_pip5_SSE);
-            theta_SSE2         = _mm_mul_ps(ratio_SSE2, still_pip5_SSE);
-            theta_SSE3         = _mm_mul_ps(ratio_SSE3, still_pip5_SSE);
-            gmx_mm_sincos_ps(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
-            gmx_mm_sincos_ps(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
-            gmx_mm_sincos_ps(theta_SSE2, &sinq_SSE2, &cosq_SSE2);
-            gmx_mm_sincos_ps(theta_SSE3, &sinq_SSE3, &cosq_SSE3);
-            term_SSE0          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE0));
-            term_SSE1          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE1));
-            term_SSE2          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE2));
-            term_SSE3          = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE3));
-            ccf_SSE0           = _mm_mul_ps(term_SSE0, term_SSE0);
-            ccf_SSE1           = _mm_mul_ps(term_SSE1, term_SSE1);
-            ccf_SSE2           = _mm_mul_ps(term_SSE2, term_SSE2);
-            ccf_SSE3           = _mm_mul_ps(term_SSE3, term_SSE3);
-            dccf_SSE0          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE0),
-                                            _mm_mul_ps(sinq_SSE0, theta_SSE0));
-            dccf_SSE1          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE1),
-                                            _mm_mul_ps(sinq_SSE1, theta_SSE1));
-            dccf_SSE2          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE2),
-                                            _mm_mul_ps(sinq_SSE2, theta_SSE2));
-            dccf_SSE3          = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE3),
-                                            _mm_mul_ps(sinq_SSE3, theta_SSE3));
-
-            prod_SSE           = _mm_mul_ps(still_p4_SSE, vaj_SSE);
-            icf4_SSE0          = _mm_mul_ps(ccf_SSE0, idr4_SSE0);
-            icf4_SSE1          = _mm_mul_ps(ccf_SSE1, idr4_SSE1);
-            icf4_SSE2          = _mm_mul_ps(ccf_SSE2, idr4_SSE2);
-            icf4_SSE3          = _mm_mul_ps(ccf_SSE3, idr4_SSE3);
-            icf6_SSE0          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
-            icf6_SSE1          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-            icf6_SSE2          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE2), dccf_SSE2), idr6_SSE2);
-            icf6_SSE3          = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE3), dccf_SSE3), idr6_SSE3);
-
-            _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
-                                            gmx_mm_sum4_ps(_mm_mul_ps(prod_ai_SSE0, icf4_SSE0),
-                                                           _mm_mul_ps(prod_ai_SSE1, icf4_SSE1),
-                                                           _mm_mul_ps(prod_ai_SSE2, icf4_SSE2),
-                                                           _mm_mul_ps(prod_ai_SSE3, icf4_SSE3))));
-
-            gpi_SSE0           = _mm_add_ps(gpi_SSE0, _mm_mul_ps(prod_SSE, icf4_SSE0));
-            gpi_SSE1           = _mm_add_ps(gpi_SSE1, _mm_mul_ps(prod_SSE, icf4_SSE1));
-            gpi_SSE2           = _mm_add_ps(gpi_SSE2, _mm_mul_ps(prod_SSE, icf4_SSE2));
-            gpi_SSE3           = _mm_add_ps(gpi_SSE3, _mm_mul_ps(prod_SSE, icf4_SSE3));
-
-            /* Save ai->aj and aj->ai chain rule terms */
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE3));
-            dadx += 4;
-
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE0, icf6_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE1, icf6_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE2, icf6_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE3, icf6_SSE3));
-            dadx += 4;
-        }
-        _MM_TRANSPOSE4_PS(gpi_SSE0, gpi_SSE1, gpi_SSE2, gpi_SSE3);
-        gpi_SSE0 = _mm_add_ps(gpi_SSE0, gpi_SSE1);
-        gpi_SSE2 = _mm_add_ps(gpi_SSE2, gpi_SSE3);
-        gpi_SSE0 = _mm_add_ps(gpi_SSE0, gpi_SSE2);
-        _mm_store_ps(work+i, _mm_add_ps(gpi_SSE0, _mm_load_ps(work+i)));
-    }
-
-    /* In case we have written anything beyond natoms, move it back.
-     * Never mind that we leave stuff above natoms; that will not
-     * be accessed later in the routine.
-     * In principle this should be a move rather than sum, but this
-     * way we dont have to worry about even/odd offsets...
-     */
-    for (i = natoms; i < ni1+1+natoms/2; i++)
-    {
-        work[i-natoms] += work[i];
-    }
-
-    /* Parallel summations would go here if ever implemented with DD */
-
-    factor  = 0.5 * ONE_4PI_EPS0;
-    /* Calculate the radii - should we do all atoms, or just our local ones? */
-    for (i = 0; i < natoms; i++)
-    {
-        if (born->use[i] != 0)
-        {
-            gpi             = born->gpol[i]+work[i];
-            gpi2            = gpi * gpi;
-            born->bRad[i]   = factor*gmx_invsqrt(gpi2);
-            fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-        }
-    }
-
-    return 0;
-}
-
-
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_single(t_forcerec *           fr,
-                                                t_mdatoms *            mdatoms,
-                                                gmx_genborn_t *        born,
-                                                int                    gb_algorithm,
-                                                gmx_localtop_t *       top,
-                                                real *                 x,
-                                                t_commrec *            cr,
-                                                void *                 paadata)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2, nj3;
-    int                     i, j, k, n;
-    int              *      mask;
-    int              *      pmask0;
-    int              *      pmask1;
-    int              *      pmask2;
-    int              *      pmask3;
-    int              *      emask0;
-    int              *      emask1;
-    int              *      emask2;
-    int              *      emask3;
-    real              *     gb_radius;
-    real              *     vsolv;
-    real              *     work;
-    real                    tmpsum[4];
-    real              *     x_align;
-    real              *     y_align;
-    real              *     z_align;
-    int              *      jindex;
-    real              *     dadx;
-    real              *     obc_param;
-    real                    rad, min_rad;
-    real                    rai, rai_inv, rai_inv2, sum_ai, sum_ai2, sum_ai3, tsum, tchain;
-
-    __m128                  ix_SSE0, iy_SSE0, iz_SSE0;
-    __m128                  ix_SSE1, iy_SSE1, iz_SSE1;
-    __m128                  ix_SSE2, iy_SSE2, iz_SSE2;
-    __m128                  ix_SSE3, iy_SSE3, iz_SSE3;
-    __m128                  gpi_SSE0, rai_SSE0, prod_ai_SSE0;
-    __m128                  gpi_SSE1, rai_SSE1, prod_ai_SSE1;
-    __m128                  gpi_SSE2, rai_SSE2, prod_ai_SSE2;
-    __m128                  gpi_SSE3, rai_SSE3, prod_ai_SSE3;
-    __m128                  imask_SSE0, jmask_SSE0;
-    __m128                  imask_SSE1, jmask_SSE1;
-    __m128                  imask_SSE2, jmask_SSE2;
-    __m128                  imask_SSE3, jmask_SSE3;
-    __m128                  jx_SSE, jy_SSE, jz_SSE;
-    __m128                  dx_SSE0, dy_SSE0, dz_SSE0;
-    __m128                  dx_SSE1, dy_SSE1, dz_SSE1;
-    __m128                  dx_SSE2, dy_SSE2, dz_SSE2;
-    __m128                  dx_SSE3, dy_SSE3, dz_SSE3;
-    __m128                  rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
-    __m128                  rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
-    __m128                  rsq_SSE2, rinv_SSE2, irsq_SSE2, idr4_SSE2, idr6_SSE2;
-    __m128                  rsq_SSE3, rinv_SSE3, irsq_SSE3, idr4_SSE3, idr6_SSE3;
-    __m128                  raj_SSE, raj_inv_SSE, sk_aj_SSE, sk2_aj_SSE;
-    __m128                  ccf_SSE0, dccf_SSE0, prod_SSE0;
-    __m128                  ccf_SSE1, dccf_SSE1, prod_SSE1;
-    __m128                  ccf_SSE2, dccf_SSE2, prod_SSE2;
-    __m128                  ccf_SSE3, dccf_SSE3, prod_SSE3;
-    __m128                  icf4_SSE0, icf6_SSE0;
-    __m128                  icf4_SSE1, icf6_SSE1;
-    __m128                  icf4_SSE2, icf6_SSE2;
-    __m128                  icf4_SSE3, icf6_SSE3;
-    __m128                  oneeighth_SSE, onefourth_SSE, half_SSE, one_SSE, two_SSE, four_SSE;
-    __m128                  still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
-    __m128                  rai_inv_SSE0;
-    __m128                  rai_inv_SSE1;
-    __m128                  rai_inv_SSE2;
-    __m128                  rai_inv_SSE3;
-    __m128                  sk_ai_SSE0, sk2_ai_SSE0, sum_ai_SSE0;
-    __m128                  sk_ai_SSE1, sk2_ai_SSE1, sum_ai_SSE1;
-    __m128                  sk_ai_SSE2, sk2_ai_SSE2, sum_ai_SSE2;
-    __m128                  sk_ai_SSE3, sk2_ai_SSE3, sum_ai_SSE3;
-    __m128                  lij_inv_SSE0, sk2_rinv_SSE0;
-    __m128                  lij_inv_SSE1, sk2_rinv_SSE1;
-    __m128                  lij_inv_SSE2, sk2_rinv_SSE2;
-    __m128                  lij_inv_SSE3, sk2_rinv_SSE3;
-    __m128                  dr_SSE0;
-    __m128                  dr_SSE1;
-    __m128                  dr_SSE2;
-    __m128                  dr_SSE3;
-    __m128                  t1_SSE0, t2_SSE0, t3_SSE0, t4_SSE0;
-    __m128                  t1_SSE1, t2_SSE1, t3_SSE1, t4_SSE1;
-    __m128                  t1_SSE2, t2_SSE2, t3_SSE2, t4_SSE2;
-    __m128                  t1_SSE3, t2_SSE3, t3_SSE3, t4_SSE3;
-    __m128                  obc_mask1_SSE0, obc_mask2_SSE0, obc_mask3_SSE0;
-    __m128                  obc_mask1_SSE1, obc_mask2_SSE1, obc_mask3_SSE1;
-    __m128                  obc_mask1_SSE2, obc_mask2_SSE2, obc_mask3_SSE2;
-    __m128                  obc_mask1_SSE3, obc_mask2_SSE3, obc_mask3_SSE3;
-    __m128                  uij_SSE0, uij2_SSE0, uij3_SSE0;
-    __m128                  uij_SSE1, uij2_SSE1, uij3_SSE1;
-    __m128                  uij_SSE2, uij2_SSE2, uij3_SSE2;
-    __m128                  uij_SSE3, uij2_SSE3, uij3_SSE3;
-    __m128                  lij_SSE0, lij2_SSE0, lij3_SSE0;
-    __m128                  lij_SSE1, lij2_SSE1, lij3_SSE1;
-    __m128                  lij_SSE2, lij2_SSE2, lij3_SSE2;
-    __m128                  lij_SSE3, lij2_SSE3, lij3_SSE3;
-    __m128                  dlij_SSE0, diff2_SSE0, logterm_SSE0;
-    __m128                  dlij_SSE1, diff2_SSE1, logterm_SSE1;
-    __m128                  dlij_SSE2, diff2_SSE2, logterm_SSE2;
-    __m128                  dlij_SSE3, diff2_SSE3, logterm_SSE3;
-    __m128                  doffset_SSE;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-
-    n = 0;
-
-    aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
-    if (aadata == NULL)
-    {
-        genborn_allvsall_setup(&aadata, top, born, mdatoms, born->gb_doffset,
-                               egbOBC, TRUE, TRUE, TRUE);
-        *((gmx_allvsallgb2_data_t **)paadata) = aadata;
-    }
-
-    x_align = aadata->x_align;
-    y_align = aadata->y_align;
-    z_align = aadata->z_align;
-
-    gb_radius = aadata->gb_radius;
-    work      = aadata->work;
-    jindex    = aadata->jindex_gb;
-    dadx      = fr->dadx;
-    obc_param = aadata->workparam;
-
-    oneeighth_SSE   = _mm_set1_ps(0.125);
-    onefourth_SSE   = _mm_set1_ps(0.25);
-    half_SSE        = _mm_set1_ps(0.5);
-    one_SSE         = _mm_set1_ps(1.0);
-    two_SSE         = _mm_set1_ps(2.0);
-    four_SSE        = _mm_set1_ps(4.0);
-    doffset_SSE     = _mm_set1_ps(born->gb_doffset);
-
-    for (i = 0; i < natoms; i++)
-    {
-        x_align[i]  = x[3*i];
-        y_align[i]  = x[3*i+1];
-        z_align[i]  = x[3*i+2];
-    }
-
-    /* Copy again */
-    for (i = 0; i < natoms/2+1; i++)
-    {
-        x_align[natoms+i]  = x_align[i];
-        y_align[natoms+i]  = y_align[i];
-        z_align[natoms+i]  = z_align[i];
-    }
-
-    for (i = 0; i < natoms+natoms/2+1; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = ni0; i < ni1; i += UNROLLI)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix_SSE0          = _mm_load1_ps(x_align+i);
-        iy_SSE0          = _mm_load1_ps(y_align+i);
-        iz_SSE0          = _mm_load1_ps(z_align+i);
-        ix_SSE1          = _mm_load1_ps(x_align+i+1);
-        iy_SSE1          = _mm_load1_ps(y_align+i+1);
-        iz_SSE1          = _mm_load1_ps(z_align+i+1);
-        ix_SSE2          = _mm_load1_ps(x_align+i+2);
-        iy_SSE2          = _mm_load1_ps(y_align+i+2);
-        iz_SSE2          = _mm_load1_ps(z_align+i+2);
-        ix_SSE3          = _mm_load1_ps(x_align+i+3);
-        iy_SSE3          = _mm_load1_ps(y_align+i+3);
-        iz_SSE3          = _mm_load1_ps(z_align+i+3);
-
-        rai_SSE0         = _mm_load1_ps(gb_radius+i);
-        rai_SSE1         = _mm_load1_ps(gb_radius+i+1);
-        rai_SSE2         = _mm_load1_ps(gb_radius+i+2);
-        rai_SSE3         = _mm_load1_ps(gb_radius+i+3);
-        rai_inv_SSE0     = gmx_mm_inv_ps(rai_SSE0);
-        rai_inv_SSE1     = gmx_mm_inv_ps(rai_SSE1);
-        rai_inv_SSE2     = gmx_mm_inv_ps(rai_SSE2);
-        rai_inv_SSE3     = gmx_mm_inv_ps(rai_SSE3);
-
-        sk_ai_SSE0       = _mm_load1_ps(obc_param+i);
-        sk_ai_SSE1       = _mm_load1_ps(obc_param+i+1);
-        sk_ai_SSE2       = _mm_load1_ps(obc_param+i+2);
-        sk_ai_SSE3       = _mm_load1_ps(obc_param+i+3);
-        sk2_ai_SSE0      = _mm_mul_ps(sk_ai_SSE0, sk_ai_SSE0);
-        sk2_ai_SSE1      = _mm_mul_ps(sk_ai_SSE1, sk_ai_SSE1);
-        sk2_ai_SSE2      = _mm_mul_ps(sk_ai_SSE2, sk_ai_SSE2);
-        sk2_ai_SSE3      = _mm_mul_ps(sk_ai_SSE3, sk_ai_SSE3);
-
-        sum_ai_SSE0      = _mm_setzero_ps();
-        sum_ai_SSE1      = _mm_setzero_ps();
-        sum_ai_SSE2      = _mm_setzero_ps();
-        sum_ai_SSE3      = _mm_setzero_ps();
-
-        /* Load limits for loop over neighbors */
-        nj0              = jindex[4*i];
-        nj1              = jindex[4*i+1];
-        nj2              = jindex[4*i+2];
-        nj3              = jindex[4*i+3];
-
-        pmask0           = aadata->prologue_mask_gb[i];
-        pmask1           = aadata->prologue_mask_gb[i+1];
-        pmask2           = aadata->prologue_mask_gb[i+2];
-        pmask3           = aadata->prologue_mask_gb[i+3];
-        emask0           = aadata->epilogue_mask[i];
-        emask1           = aadata->epilogue_mask[i+1];
-        emask2           = aadata->epilogue_mask[i+2];
-        emask3           = aadata->epilogue_mask[i+3];
-
-        imask_SSE0        = _mm_load1_ps((real *)(aadata->imask+i));
-        imask_SSE1        = _mm_load1_ps((real *)(aadata->imask+i+1));
-        imask_SSE2        = _mm_load1_ps((real *)(aadata->imask+i+2));
-        imask_SSE3        = _mm_load1_ps((real *)(aadata->imask+i+3));
-
-        /* Prologue part, including exclusion mask */
-        for (j = nj0; j < nj1; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_ps((real *)pmask0);
-            jmask_SSE1 = _mm_load_ps((real *)pmask1);
-            jmask_SSE2 = _mm_load_ps((real *)pmask2);
-            jmask_SSE3 = _mm_load_ps((real *)pmask3);
-            pmask0    += UNROLLJ;
-            pmask1    += UNROLLJ;
-            pmask2    += UNROLLJ;
-            pmask3    += UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_ps(x_align+j);
-            jy_SSE            = _mm_load_ps(y_align+j);
-            jz_SSE            = _mm_load_ps(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_ps(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_ps(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_ps(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_ps(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_ps(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_ps(iz_SSE1, jz_SSE);
-            dx_SSE2            = _mm_sub_ps(ix_SSE2, jx_SSE);
-            dy_SSE2            = _mm_sub_ps(iy_SSE2, jy_SSE);
-            dz_SSE2            = _mm_sub_ps(iz_SSE2, jz_SSE);
-            dx_SSE3            = _mm_sub_ps(ix_SSE3, jx_SSE);
-            dy_SSE3            = _mm_sub_ps(iy_SSE3, jy_SSE);
-            dz_SSE3            = _mm_sub_ps(iz_SSE3, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
-            rsq_SSE2           = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
-            rsq_SSE3           = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_ps(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_ps(jmask_SSE1, imask_SSE1);
-            jmask_SSE2         = _mm_and_ps(jmask_SSE2, imask_SSE2);
-            jmask_SSE3         = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_ps(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_ps(rsq_SSE1);
-            rinv_SSE2          = gmx_mm_invsqrt_ps(rsq_SSE2);
-            rinv_SSE3          = gmx_mm_invsqrt_ps(rsq_SSE3);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_ps(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_ps(rinv_SSE1, jmask_SSE1);
-            rinv_SSE2          = _mm_and_ps(rinv_SSE2, jmask_SSE2);
-            rinv_SSE3          = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
-            dr_SSE0            = _mm_mul_ps(rsq_SSE0, rinv_SSE0);
-            dr_SSE1            = _mm_mul_ps(rsq_SSE1, rinv_SSE1);
-            dr_SSE2            = _mm_mul_ps(rsq_SSE2, rinv_SSE2);
-            dr_SSE3            = _mm_mul_ps(rsq_SSE3, rinv_SSE3);
-
-            sk_aj_SSE          = _mm_load_ps(obc_param+j);
-            raj_SSE            = _mm_load_ps(gb_radius+j);
-            raj_inv_SSE        = gmx_mm_inv_ps(raj_SSE);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1_SSE0            = _mm_add_ps(dr_SSE0, sk_aj_SSE);
-            t1_SSE1            = _mm_add_ps(dr_SSE1, sk_aj_SSE);
-            t1_SSE2            = _mm_add_ps(dr_SSE2, sk_aj_SSE);
-            t1_SSE3            = _mm_add_ps(dr_SSE3, sk_aj_SSE);
-            t2_SSE0            = _mm_sub_ps(dr_SSE0, sk_aj_SSE);
-            t2_SSE1            = _mm_sub_ps(dr_SSE1, sk_aj_SSE);
-            t2_SSE2            = _mm_sub_ps(dr_SSE2, sk_aj_SSE);
-            t2_SSE3            = _mm_sub_ps(dr_SSE3, sk_aj_SSE);
-            t3_SSE0            = _mm_sub_ps(sk_aj_SSE, dr_SSE0);
-            t3_SSE1            = _mm_sub_ps(sk_aj_SSE, dr_SSE1);
-            t3_SSE2            = _mm_sub_ps(sk_aj_SSE, dr_SSE2);
-            t3_SSE3            = _mm_sub_ps(sk_aj_SSE, dr_SSE3);
-
-            obc_mask1_SSE0     = _mm_cmplt_ps(rai_SSE0, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_ps(rai_SSE1, t1_SSE1);
-            obc_mask1_SSE2     = _mm_cmplt_ps(rai_SSE2, t1_SSE2);
-            obc_mask1_SSE3     = _mm_cmplt_ps(rai_SSE3, t1_SSE3);
-            obc_mask2_SSE0     = _mm_cmplt_ps(rai_SSE0, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_ps(rai_SSE1, t2_SSE1);
-            obc_mask2_SSE2     = _mm_cmplt_ps(rai_SSE2, t2_SSE2);
-            obc_mask2_SSE3     = _mm_cmplt_ps(rai_SSE3, t2_SSE3);
-            obc_mask3_SSE0     = _mm_cmplt_ps(rai_SSE0, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_ps(rai_SSE1, t3_SSE1);
-            obc_mask3_SSE2     = _mm_cmplt_ps(rai_SSE2, t3_SSE2);
-            obc_mask3_SSE3     = _mm_cmplt_ps(rai_SSE3, t3_SSE3);
-            obc_mask1_SSE0     = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
-            obc_mask1_SSE2     = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
-            obc_mask1_SSE3     = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
-            uij_SSE0           = gmx_mm_inv_ps(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_ps(t1_SSE1);
-            uij_SSE2           = gmx_mm_inv_ps(t1_SSE2);
-            uij_SSE3           = gmx_mm_inv_ps(t1_SSE3);
-            lij_SSE0           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
-                                              _mm_andnot_ps(obc_mask2_SSE0, rai_inv_SSE0));
-            lij_SSE1           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
-                                              _mm_andnot_ps(obc_mask2_SSE1, rai_inv_SSE1));
-            lij_SSE2           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
-                                              _mm_andnot_ps(obc_mask2_SSE2, rai_inv_SSE2));
-            lij_SSE3           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
-                                              _mm_andnot_ps(obc_mask2_SSE3, rai_inv_SSE3));
-            dlij_SSE0          = _mm_and_ps(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_ps(one_SSE, obc_mask2_SSE1);
-            dlij_SSE2          = _mm_and_ps(one_SSE, obc_mask2_SSE2);
-            dlij_SSE3          = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
-            uij2_SSE0          = _mm_mul_ps(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_ps(uij_SSE1, uij_SSE1);
-            uij2_SSE2          = _mm_mul_ps(uij_SSE2, uij_SSE2);
-            uij2_SSE3          = _mm_mul_ps(uij_SSE3, uij_SSE3);
-            uij3_SSE0          = _mm_mul_ps(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_ps(uij2_SSE1, uij_SSE1);
-            uij3_SSE2          = _mm_mul_ps(uij2_SSE2, uij_SSE2);
-            uij3_SSE3          = _mm_mul_ps(uij2_SSE3, uij_SSE3);
-            lij2_SSE0          = _mm_mul_ps(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_ps(lij_SSE1, lij_SSE1);
-            lij2_SSE2          = _mm_mul_ps(lij_SSE2, lij_SSE2);
-            lij2_SSE3          = _mm_mul_ps(lij_SSE3, lij_SSE3);
-            lij3_SSE0          = _mm_mul_ps(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_ps(lij2_SSE1, lij_SSE1);
-            lij3_SSE2          = _mm_mul_ps(lij2_SSE2, lij_SSE2);
-            lij3_SSE3          = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
-            diff2_SSE0         = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
-            diff2_SSE2         = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
-            diff2_SSE3         = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
-            lij_inv_SSE0       = gmx_mm_invsqrt_ps(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_ps(lij2_SSE1);
-            lij_inv_SSE2       = gmx_mm_invsqrt_ps(lij2_SSE2);
-            lij_inv_SSE3       = gmx_mm_invsqrt_ps(lij2_SSE3);
-            sk2_aj_SSE         = _mm_mul_ps(sk_aj_SSE, sk_aj_SSE);
-            sk2_rinv_SSE0      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE1);
-            sk2_rinv_SSE2      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE2);
-            sk2_rinv_SSE3      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE3);
-            prod_SSE0          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
-            prod_SSE2          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
-            prod_SSE3          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
-            logterm_SSE0       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
-            logterm_SSE2       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
-            logterm_SSE3       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-
-            t1_SSE0            = _mm_sub_ps(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_ps(lij_SSE1, uij_SSE1);
-            t1_SSE2            = _mm_sub_ps(lij_SSE2, uij_SSE2);
-            t1_SSE3            = _mm_sub_ps(lij_SSE3, uij_SSE3);
-            t2_SSE0            = _mm_mul_ps(diff2_SSE0,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_ps(diff2_SSE1,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t2_SSE2            = _mm_mul_ps(diff2_SSE2,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
-                                                       prod_SSE2));
-            t2_SSE3            = _mm_mul_ps(diff2_SSE3,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
-                                                       prod_SSE3));
-
-            t3_SSE0            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
-            t3_SSE2            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
-            t3_SSE3            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
-            t1_SSE0            = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
-            t1_SSE2            = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
-            t1_SSE3            = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
-            t4_SSE0            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE0, lij_SSE0));
-            t4_SSE1            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE1, lij_SSE1));
-            t4_SSE2            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE2, lij_SSE2));
-            t4_SSE3            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE3, lij_SSE3));
-            t4_SSE0            = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
-            t4_SSE2            = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
-            t4_SSE3            = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
-            t1_SSE0            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
-            t1_SSE2            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
-            t1_SSE3            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
-            sum_ai_SSE0        = _mm_add_ps(sum_ai_SSE0, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            sum_ai_SSE1        = _mm_add_ps(sum_ai_SSE1, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            sum_ai_SSE2        = _mm_add_ps(sum_ai_SSE2, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            sum_ai_SSE3        = _mm_add_ps(sum_ai_SSE3, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-
-            t1_SSE0            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
-                                            _mm_mul_ps(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
-                                            _mm_mul_ps(prod_SSE1, lij3_SSE1));
-            t1_SSE2            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
-                                            _mm_mul_ps(prod_SSE2, lij3_SSE2));
-            t1_SSE3            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
-                                            _mm_mul_ps(prod_SSE3, lij3_SSE3));
-            t1_SSE0            = _mm_sub_ps(t1_SSE0,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_ps(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_ps(t1_SSE1,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_ps(lij3_SSE1, dr_SSE1))));
-            t1_SSE2            = _mm_sub_ps(t1_SSE2,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
-                                                                  _mm_mul_ps(lij3_SSE2, dr_SSE2))));
-            t1_SSE3            = _mm_sub_ps(t1_SSE3,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
-                                                                  _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-
-            t2_SSE0            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_ps(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_ps(uij3_SSE1, dr_SSE1)));
-            t2_SSE2            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
-                                                       _mm_mul_ps(uij3_SSE2, dr_SSE2)));
-            t2_SSE3            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
-                                                       _mm_mul_ps(uij3_SSE3, dr_SSE3)));
-            t2_SSE0            = _mm_sub_ps(t2_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
-                                                       _mm_mul_ps(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_ps(t2_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
-                                                       _mm_mul_ps(prod_SSE1, uij3_SSE1)));
-            t2_SSE2            = _mm_sub_ps(t2_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
-                                                       _mm_mul_ps(prod_SSE2, uij3_SSE2)));
-            t2_SSE3            = _mm_sub_ps(t2_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
-                                                       _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-            t3_SSE0            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_ps(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_ps(rinv_SSE1, rinv_SSE1));
-            t3_SSE2            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
-                                            _mm_mul_ps(rinv_SSE2, rinv_SSE2));
-            t3_SSE3            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
-                                            _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-            t3_SSE0            = _mm_sub_ps(t3_SSE0,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_ps(t3_SSE1,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
-            t3_SSE2            = _mm_sub_ps(t3_SSE2,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
-            t3_SSE3            = _mm_sub_ps(t3_SSE3,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-            t1_SSE0            = _mm_mul_ps(rinv_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_ps(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_ps(rinv_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_ps(t2_SSE1, t3_SSE1)));
-            t1_SSE2            = _mm_mul_ps(rinv_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
-                                                       _mm_add_ps(t2_SSE2, t3_SSE2)));
-            t1_SSE3            = _mm_mul_ps(rinv_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
-                                                       _mm_add_ps(t2_SSE3, t3_SSE3)));
-
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-            dadx += 4;
-
-            /* Evaluate influence of atom ai -> aj */
-            t1_SSE0            = _mm_add_ps(dr_SSE0, sk_ai_SSE0);
-            t1_SSE1            = _mm_add_ps(dr_SSE1, sk_ai_SSE1);
-            t1_SSE2            = _mm_add_ps(dr_SSE2, sk_ai_SSE2);
-            t1_SSE3            = _mm_add_ps(dr_SSE3, sk_ai_SSE3);
-            t2_SSE0            = _mm_sub_ps(dr_SSE0, sk_ai_SSE0);
-            t2_SSE1            = _mm_sub_ps(dr_SSE1, sk_ai_SSE1);
-            t2_SSE2            = _mm_sub_ps(dr_SSE2, sk_ai_SSE2);
-            t2_SSE3            = _mm_sub_ps(dr_SSE3, sk_ai_SSE3);
-            t3_SSE0            = _mm_sub_ps(sk_ai_SSE0, dr_SSE0);
-            t3_SSE1            = _mm_sub_ps(sk_ai_SSE1, dr_SSE1);
-            t3_SSE2            = _mm_sub_ps(sk_ai_SSE2, dr_SSE2);
-            t3_SSE3            = _mm_sub_ps(sk_ai_SSE3, dr_SSE3);
-
-            obc_mask1_SSE0     = _mm_cmplt_ps(raj_SSE, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_ps(raj_SSE, t1_SSE1);
-            obc_mask1_SSE2     = _mm_cmplt_ps(raj_SSE, t1_SSE2);
-            obc_mask1_SSE3     = _mm_cmplt_ps(raj_SSE, t1_SSE3);
-            obc_mask2_SSE0     = _mm_cmplt_ps(raj_SSE, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_ps(raj_SSE, t2_SSE1);
-            obc_mask2_SSE2     = _mm_cmplt_ps(raj_SSE, t2_SSE2);
-            obc_mask2_SSE3     = _mm_cmplt_ps(raj_SSE, t2_SSE3);
-            obc_mask3_SSE0     = _mm_cmplt_ps(raj_SSE, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_ps(raj_SSE, t3_SSE1);
-            obc_mask3_SSE2     = _mm_cmplt_ps(raj_SSE, t3_SSE2);
-            obc_mask3_SSE3     = _mm_cmplt_ps(raj_SSE, t3_SSE3);
-            obc_mask1_SSE0     = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
-            obc_mask1_SSE2     = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
-            obc_mask1_SSE3     = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
-            uij_SSE0           = gmx_mm_inv_ps(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_ps(t1_SSE1);
-            uij_SSE2           = gmx_mm_inv_ps(t1_SSE2);
-            uij_SSE3           = gmx_mm_inv_ps(t1_SSE3);
-            lij_SSE0           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
-                                              _mm_andnot_ps(obc_mask2_SSE0, raj_inv_SSE));
-            lij_SSE1           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
-                                              _mm_andnot_ps(obc_mask2_SSE1, raj_inv_SSE));
-            lij_SSE2           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
-                                              _mm_andnot_ps(obc_mask2_SSE2, raj_inv_SSE));
-            lij_SSE3           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
-                                              _mm_andnot_ps(obc_mask2_SSE3, raj_inv_SSE));
-            dlij_SSE0          = _mm_and_ps(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_ps(one_SSE, obc_mask2_SSE1);
-            dlij_SSE2          = _mm_and_ps(one_SSE, obc_mask2_SSE2);
-            dlij_SSE3          = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
-            uij2_SSE0          = _mm_mul_ps(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_ps(uij_SSE1, uij_SSE1);
-            uij2_SSE2          = _mm_mul_ps(uij_SSE2, uij_SSE2);
-            uij2_SSE3          = _mm_mul_ps(uij_SSE3, uij_SSE3);
-            uij3_SSE0          = _mm_mul_ps(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_ps(uij2_SSE1, uij_SSE1);
-            uij3_SSE2          = _mm_mul_ps(uij2_SSE2, uij_SSE2);
-            uij3_SSE3          = _mm_mul_ps(uij2_SSE3, uij_SSE3);
-            lij2_SSE0          = _mm_mul_ps(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_ps(lij_SSE1, lij_SSE1);
-            lij2_SSE2          = _mm_mul_ps(lij_SSE2, lij_SSE2);
-            lij2_SSE3          = _mm_mul_ps(lij_SSE3, lij_SSE3);
-            lij3_SSE0          = _mm_mul_ps(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_ps(lij2_SSE1, lij_SSE1);
-            lij3_SSE2          = _mm_mul_ps(lij2_SSE2, lij_SSE2);
-            lij3_SSE3          = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
-            diff2_SSE0         = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
-            diff2_SSE2         = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
-            diff2_SSE3         = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
-            lij_inv_SSE0       = gmx_mm_invsqrt_ps(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_ps(lij2_SSE1);
-            lij_inv_SSE2       = gmx_mm_invsqrt_ps(lij2_SSE2);
-            lij_inv_SSE3       = gmx_mm_invsqrt_ps(lij2_SSE3);
-            sk2_rinv_SSE0      = _mm_mul_ps(sk2_ai_SSE0, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_ps(sk2_ai_SSE1, rinv_SSE1);
-            sk2_rinv_SSE2      = _mm_mul_ps(sk2_ai_SSE2, rinv_SSE2);
-            sk2_rinv_SSE3      = _mm_mul_ps(sk2_ai_SSE3, rinv_SSE3);
-            prod_SSE0          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
-            prod_SSE2          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
-            prod_SSE3          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
-            logterm_SSE0       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
-            logterm_SSE2       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
-            logterm_SSE3       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-            t1_SSE0            = _mm_sub_ps(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_ps(lij_SSE1, uij_SSE1);
-            t1_SSE2            = _mm_sub_ps(lij_SSE2, uij_SSE2);
-            t1_SSE3            = _mm_sub_ps(lij_SSE3, uij_SSE3);
-            t2_SSE0            = _mm_mul_ps(diff2_SSE0,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_ps(diff2_SSE1,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t2_SSE2            = _mm_mul_ps(diff2_SSE2,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
-                                                       prod_SSE2));
-            t2_SSE3            = _mm_mul_ps(diff2_SSE3,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
-                                                       prod_SSE3));
-            t3_SSE0            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
-            t3_SSE2            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
-            t3_SSE3            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
-            t1_SSE0            = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
-            t1_SSE2            = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
-            t1_SSE3            = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
-            t4_SSE0            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE0));
-            t4_SSE1            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE1));
-            t4_SSE2            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE2));
-            t4_SSE3            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE3));
-            t4_SSE0            = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
-            t4_SSE2            = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
-            t4_SSE3            = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
-            t1_SSE0            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
-            t1_SSE2            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
-            t1_SSE3            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
-            _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
-                                            gmx_mm_sum4_ps(_mm_and_ps(t1_SSE0, obc_mask1_SSE0),
-                                                           _mm_and_ps(t1_SSE1, obc_mask1_SSE1),
-                                                           _mm_and_ps(t1_SSE2, obc_mask1_SSE2),
-                                                           _mm_and_ps(t1_SSE3, obc_mask1_SSE3))));
-
-            t1_SSE0            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
-                                            _mm_mul_ps(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
-                                            _mm_mul_ps(prod_SSE1, lij3_SSE1));
-            t1_SSE2            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
-                                            _mm_mul_ps(prod_SSE2, lij3_SSE2));
-            t1_SSE3            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
-                                            _mm_mul_ps(prod_SSE3, lij3_SSE3));
-            t1_SSE0            = _mm_sub_ps(t1_SSE0,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_ps(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_ps(t1_SSE1,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_ps(lij3_SSE1, dr_SSE1))));
-            t1_SSE2            = _mm_sub_ps(t1_SSE2,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
-                                                                  _mm_mul_ps(lij3_SSE2, dr_SSE2))));
-            t1_SSE3            = _mm_sub_ps(t1_SSE3,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
-                                                                  _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-            t2_SSE0            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_ps(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_ps(uij3_SSE1, dr_SSE1)));
-            t2_SSE2            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
-                                                       _mm_mul_ps(uij3_SSE2, dr_SSE2)));
-            t2_SSE3            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
-                                                       _mm_mul_ps(uij3_SSE3, dr_SSE3)));
-            t2_SSE0            = _mm_sub_ps(t2_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
-                                                       _mm_mul_ps(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_ps(t2_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
-                                                       _mm_mul_ps(prod_SSE1, uij3_SSE1)));
-            t2_SSE2            = _mm_sub_ps(t2_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
-                                                       _mm_mul_ps(prod_SSE2, uij3_SSE2)));
-            t2_SSE3            = _mm_sub_ps(t2_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
-                                                       _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-
-            t3_SSE0            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_ps(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_ps(rinv_SSE1, rinv_SSE1));
-            t3_SSE2            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
-                                            _mm_mul_ps(rinv_SSE2, rinv_SSE2));
-            t3_SSE3            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
-                                            _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-
-            t3_SSE0            = _mm_sub_ps(t3_SSE0,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_ps(t3_SSE1,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
-            t3_SSE2            = _mm_sub_ps(t3_SSE2,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
-            t3_SSE3            = _mm_sub_ps(t3_SSE3,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-
-            t1_SSE0            = _mm_mul_ps(rinv_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_ps(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_ps(rinv_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_ps(t2_SSE1, t3_SSE1)));
-            t1_SSE2            = _mm_mul_ps(rinv_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
-                                                       _mm_add_ps(t2_SSE2, t3_SSE2)));
-            t1_SSE3            = _mm_mul_ps(rinv_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
-                                                       _mm_add_ps(t2_SSE3, t3_SSE3)));
-
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-            dadx += 4;
-
-        }
-
-        /* Main part, no exclusions */
-        for (j = nj1; j < nj2; j += UNROLLJ)
-        {
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_ps(x_align+j);
-            jy_SSE            = _mm_load_ps(y_align+j);
-            jz_SSE            = _mm_load_ps(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_ps(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_ps(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_ps(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_ps(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_ps(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_ps(iz_SSE1, jz_SSE);
-            dx_SSE2            = _mm_sub_ps(ix_SSE2, jx_SSE);
-            dy_SSE2            = _mm_sub_ps(iy_SSE2, jy_SSE);
-            dz_SSE2            = _mm_sub_ps(iz_SSE2, jz_SSE);
-            dx_SSE3            = _mm_sub_ps(ix_SSE3, jx_SSE);
-            dy_SSE3            = _mm_sub_ps(iy_SSE3, jy_SSE);
-            dz_SSE3            = _mm_sub_ps(iz_SSE3, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
-            rsq_SSE2           = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
-            rsq_SSE3           = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_ps(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_ps(rsq_SSE1);
-            rinv_SSE2          = gmx_mm_invsqrt_ps(rsq_SSE2);
-            rinv_SSE3          = gmx_mm_invsqrt_ps(rsq_SSE3);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_ps(rinv_SSE0, imask_SSE0);
-            rinv_SSE1          = _mm_and_ps(rinv_SSE1, imask_SSE1);
-            rinv_SSE2          = _mm_and_ps(rinv_SSE2, imask_SSE2);
-            rinv_SSE3          = _mm_and_ps(rinv_SSE3, imask_SSE3);
-
-            dr_SSE0            = _mm_mul_ps(rsq_SSE0, rinv_SSE0);
-            dr_SSE1            = _mm_mul_ps(rsq_SSE1, rinv_SSE1);
-            dr_SSE2            = _mm_mul_ps(rsq_SSE2, rinv_SSE2);
-            dr_SSE3            = _mm_mul_ps(rsq_SSE3, rinv_SSE3);
-
-            sk_aj_SSE          = _mm_load_ps(obc_param+j);
-            raj_SSE            = _mm_load_ps(gb_radius+j);
-
-            raj_inv_SSE        = gmx_mm_inv_ps(raj_SSE);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1_SSE0            = _mm_add_ps(dr_SSE0, sk_aj_SSE);
-            t1_SSE1            = _mm_add_ps(dr_SSE1, sk_aj_SSE);
-            t1_SSE2            = _mm_add_ps(dr_SSE2, sk_aj_SSE);
-            t1_SSE3            = _mm_add_ps(dr_SSE3, sk_aj_SSE);
-            t2_SSE0            = _mm_sub_ps(dr_SSE0, sk_aj_SSE);
-            t2_SSE1            = _mm_sub_ps(dr_SSE1, sk_aj_SSE);
-            t2_SSE2            = _mm_sub_ps(dr_SSE2, sk_aj_SSE);
-            t2_SSE3            = _mm_sub_ps(dr_SSE3, sk_aj_SSE);
-            t3_SSE0            = _mm_sub_ps(sk_aj_SSE, dr_SSE0);
-            t3_SSE1            = _mm_sub_ps(sk_aj_SSE, dr_SSE1);
-            t3_SSE2            = _mm_sub_ps(sk_aj_SSE, dr_SSE2);
-            t3_SSE3            = _mm_sub_ps(sk_aj_SSE, dr_SSE3);
-
-            obc_mask1_SSE0     = _mm_cmplt_ps(rai_SSE0, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_ps(rai_SSE1, t1_SSE1);
-            obc_mask1_SSE2     = _mm_cmplt_ps(rai_SSE2, t1_SSE2);
-            obc_mask1_SSE3     = _mm_cmplt_ps(rai_SSE3, t1_SSE3);
-            obc_mask2_SSE0     = _mm_cmplt_ps(rai_SSE0, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_ps(rai_SSE1, t2_SSE1);
-            obc_mask2_SSE2     = _mm_cmplt_ps(rai_SSE2, t2_SSE2);
-            obc_mask2_SSE3     = _mm_cmplt_ps(rai_SSE3, t2_SSE3);
-            obc_mask3_SSE0     = _mm_cmplt_ps(rai_SSE0, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_ps(rai_SSE1, t3_SSE1);
-            obc_mask3_SSE2     = _mm_cmplt_ps(rai_SSE2, t3_SSE2);
-            obc_mask3_SSE3     = _mm_cmplt_ps(rai_SSE3, t3_SSE3);
-            obc_mask1_SSE0     = _mm_and_ps(obc_mask1_SSE0, imask_SSE0);
-            obc_mask1_SSE1     = _mm_and_ps(obc_mask1_SSE1, imask_SSE1);
-            obc_mask1_SSE2     = _mm_and_ps(obc_mask1_SSE2, imask_SSE2);
-            obc_mask1_SSE3     = _mm_and_ps(obc_mask1_SSE3, imask_SSE3);
-
-            uij_SSE0           = gmx_mm_inv_ps(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_ps(t1_SSE1);
-            uij_SSE2           = gmx_mm_inv_ps(t1_SSE2);
-            uij_SSE3           = gmx_mm_inv_ps(t1_SSE3);
-            lij_SSE0           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
-                                              _mm_andnot_ps(obc_mask2_SSE0, rai_inv_SSE0));
-            lij_SSE1           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
-                                              _mm_andnot_ps(obc_mask2_SSE1, rai_inv_SSE1));
-            lij_SSE2           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
-                                              _mm_andnot_ps(obc_mask2_SSE2, rai_inv_SSE2));
-            lij_SSE3           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
-                                              _mm_andnot_ps(obc_mask2_SSE3, rai_inv_SSE3));
-            dlij_SSE0          = _mm_and_ps(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_ps(one_SSE, obc_mask2_SSE1);
-            dlij_SSE2          = _mm_and_ps(one_SSE, obc_mask2_SSE2);
-            dlij_SSE3          = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
-            uij2_SSE0          = _mm_mul_ps(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_ps(uij_SSE1, uij_SSE1);
-            uij2_SSE2          = _mm_mul_ps(uij_SSE2, uij_SSE2);
-            uij2_SSE3          = _mm_mul_ps(uij_SSE3, uij_SSE3);
-            uij3_SSE0          = _mm_mul_ps(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_ps(uij2_SSE1, uij_SSE1);
-            uij3_SSE2          = _mm_mul_ps(uij2_SSE2, uij_SSE2);
-            uij3_SSE3          = _mm_mul_ps(uij2_SSE3, uij_SSE3);
-            lij2_SSE0          = _mm_mul_ps(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_ps(lij_SSE1, lij_SSE1);
-            lij2_SSE2          = _mm_mul_ps(lij_SSE2, lij_SSE2);
-            lij2_SSE3          = _mm_mul_ps(lij_SSE3, lij_SSE3);
-            lij3_SSE0          = _mm_mul_ps(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_ps(lij2_SSE1, lij_SSE1);
-            lij3_SSE2          = _mm_mul_ps(lij2_SSE2, lij_SSE2);
-            lij3_SSE3          = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
-            diff2_SSE0         = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
-            diff2_SSE2         = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
-            diff2_SSE3         = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
-            lij_inv_SSE0       = gmx_mm_invsqrt_ps(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_ps(lij2_SSE1);
-            lij_inv_SSE2       = gmx_mm_invsqrt_ps(lij2_SSE2);
-            lij_inv_SSE3       = gmx_mm_invsqrt_ps(lij2_SSE3);
-            sk2_aj_SSE         = _mm_mul_ps(sk_aj_SSE, sk_aj_SSE);
-            sk2_rinv_SSE0      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE1);
-            sk2_rinv_SSE2      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE2);
-            sk2_rinv_SSE3      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE3);
-            prod_SSE0          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
-            prod_SSE2          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
-            prod_SSE3          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
-            logterm_SSE0       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
-            logterm_SSE2       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
-            logterm_SSE3       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-
-            t1_SSE0            = _mm_sub_ps(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_ps(lij_SSE1, uij_SSE1);
-            t1_SSE2            = _mm_sub_ps(lij_SSE2, uij_SSE2);
-            t1_SSE3            = _mm_sub_ps(lij_SSE3, uij_SSE3);
-            t2_SSE0            = _mm_mul_ps(diff2_SSE0,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_ps(diff2_SSE1,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t2_SSE2            = _mm_mul_ps(diff2_SSE2,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
-                                                       prod_SSE2));
-            t2_SSE3            = _mm_mul_ps(diff2_SSE3,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
-                                                       prod_SSE3));
-
-            t3_SSE0            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
-            t3_SSE2            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
-            t3_SSE3            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
-            t1_SSE0            = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
-            t1_SSE2            = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
-            t1_SSE3            = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
-            t4_SSE0            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE0, lij_SSE0));
-            t4_SSE1            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE1, lij_SSE1));
-            t4_SSE2            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE2, lij_SSE2));
-            t4_SSE3            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE3, lij_SSE3));
-            t4_SSE0            = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
-            t4_SSE2            = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
-            t4_SSE3            = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
-            t1_SSE0            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
-            t1_SSE2            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
-            t1_SSE3            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
-            sum_ai_SSE0        = _mm_add_ps(sum_ai_SSE0, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            sum_ai_SSE1        = _mm_add_ps(sum_ai_SSE1, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            sum_ai_SSE2        = _mm_add_ps(sum_ai_SSE2, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            sum_ai_SSE3        = _mm_add_ps(sum_ai_SSE3, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-
-            t1_SSE0            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
-                                            _mm_mul_ps(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
-                                            _mm_mul_ps(prod_SSE1, lij3_SSE1));
-            t1_SSE2            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
-                                            _mm_mul_ps(prod_SSE2, lij3_SSE2));
-            t1_SSE3            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
-                                            _mm_mul_ps(prod_SSE3, lij3_SSE3));
-            t1_SSE0            = _mm_sub_ps(t1_SSE0,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_ps(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_ps(t1_SSE1,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_ps(lij3_SSE1, dr_SSE1))));
-            t1_SSE2            = _mm_sub_ps(t1_SSE2,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
-                                                                  _mm_mul_ps(lij3_SSE2, dr_SSE2))));
-            t1_SSE3            = _mm_sub_ps(t1_SSE3,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
-                                                                  _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-
-            t2_SSE0            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_ps(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_ps(uij3_SSE1, dr_SSE1)));
-            t2_SSE2            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
-                                                       _mm_mul_ps(uij3_SSE2, dr_SSE2)));
-            t2_SSE3            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
-                                                       _mm_mul_ps(uij3_SSE3, dr_SSE3)));
-            t2_SSE0            = _mm_sub_ps(t2_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
-                                                       _mm_mul_ps(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_ps(t2_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
-                                                       _mm_mul_ps(prod_SSE1, uij3_SSE1)));
-            t2_SSE2            = _mm_sub_ps(t2_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
-                                                       _mm_mul_ps(prod_SSE2, uij3_SSE2)));
-            t2_SSE3            = _mm_sub_ps(t2_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
-                                                       _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-            t3_SSE0            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_ps(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_ps(rinv_SSE1, rinv_SSE1));
-            t3_SSE2            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
-                                            _mm_mul_ps(rinv_SSE2, rinv_SSE2));
-            t3_SSE3            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
-                                            _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-            t3_SSE0            = _mm_sub_ps(t3_SSE0,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_ps(t3_SSE1,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
-            t3_SSE2            = _mm_sub_ps(t3_SSE2,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
-            t3_SSE3            = _mm_sub_ps(t3_SSE3,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-            t1_SSE0            = _mm_mul_ps(rinv_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_ps(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_ps(rinv_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_ps(t2_SSE1, t3_SSE1)));
-            t1_SSE2            = _mm_mul_ps(rinv_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
-                                                       _mm_add_ps(t2_SSE2, t3_SSE2)));
-            t1_SSE3            = _mm_mul_ps(rinv_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
-                                                       _mm_add_ps(t2_SSE3, t3_SSE3)));
-
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-            dadx += 4;
-
-            /* Evaluate influence of atom ai -> aj */
-            t1_SSE0            = _mm_add_ps(dr_SSE0, sk_ai_SSE0);
-            t1_SSE1            = _mm_add_ps(dr_SSE1, sk_ai_SSE1);
-            t1_SSE2            = _mm_add_ps(dr_SSE2, sk_ai_SSE2);
-            t1_SSE3            = _mm_add_ps(dr_SSE3, sk_ai_SSE3);
-            t2_SSE0            = _mm_sub_ps(dr_SSE0, sk_ai_SSE0);
-            t2_SSE1            = _mm_sub_ps(dr_SSE1, sk_ai_SSE1);
-            t2_SSE2            = _mm_sub_ps(dr_SSE2, sk_ai_SSE2);
-            t2_SSE3            = _mm_sub_ps(dr_SSE3, sk_ai_SSE3);
-            t3_SSE0            = _mm_sub_ps(sk_ai_SSE0, dr_SSE0);
-            t3_SSE1            = _mm_sub_ps(sk_ai_SSE1, dr_SSE1);
-            t3_SSE2            = _mm_sub_ps(sk_ai_SSE2, dr_SSE2);
-            t3_SSE3            = _mm_sub_ps(sk_ai_SSE3, dr_SSE3);
-
-            obc_mask1_SSE0     = _mm_cmplt_ps(raj_SSE, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_ps(raj_SSE, t1_SSE1);
-            obc_mask1_SSE2     = _mm_cmplt_ps(raj_SSE, t1_SSE2);
-            obc_mask1_SSE3     = _mm_cmplt_ps(raj_SSE, t1_SSE3);
-            obc_mask2_SSE0     = _mm_cmplt_ps(raj_SSE, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_ps(raj_SSE, t2_SSE1);
-            obc_mask2_SSE2     = _mm_cmplt_ps(raj_SSE, t2_SSE2);
-            obc_mask2_SSE3     = _mm_cmplt_ps(raj_SSE, t2_SSE3);
-            obc_mask3_SSE0     = _mm_cmplt_ps(raj_SSE, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_ps(raj_SSE, t3_SSE1);
-            obc_mask3_SSE2     = _mm_cmplt_ps(raj_SSE, t3_SSE2);
-            obc_mask3_SSE3     = _mm_cmplt_ps(raj_SSE, t3_SSE3);
-            obc_mask1_SSE0     = _mm_and_ps(obc_mask1_SSE0, imask_SSE0);
-            obc_mask1_SSE1     = _mm_and_ps(obc_mask1_SSE1, imask_SSE1);
-            obc_mask1_SSE2     = _mm_and_ps(obc_mask1_SSE2, imask_SSE2);
-            obc_mask1_SSE3     = _mm_and_ps(obc_mask1_SSE3, imask_SSE3);
-
-            uij_SSE0           = gmx_mm_inv_ps(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_ps(t1_SSE1);
-            uij_SSE2           = gmx_mm_inv_ps(t1_SSE2);
-            uij_SSE3           = gmx_mm_inv_ps(t1_SSE3);
-            lij_SSE0           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
-                                              _mm_andnot_ps(obc_mask2_SSE0, raj_inv_SSE));
-            lij_SSE1           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
-                                              _mm_andnot_ps(obc_mask2_SSE1, raj_inv_SSE));
-            lij_SSE2           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
-                                              _mm_andnot_ps(obc_mask2_SSE2, raj_inv_SSE));
-            lij_SSE3           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
-                                              _mm_andnot_ps(obc_mask2_SSE3, raj_inv_SSE));
-            dlij_SSE0          = _mm_and_ps(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_ps(one_SSE, obc_mask2_SSE1);
-            dlij_SSE2          = _mm_and_ps(one_SSE, obc_mask2_SSE2);
-            dlij_SSE3          = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
-            uij2_SSE0          = _mm_mul_ps(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_ps(uij_SSE1, uij_SSE1);
-            uij2_SSE2          = _mm_mul_ps(uij_SSE2, uij_SSE2);
-            uij2_SSE3          = _mm_mul_ps(uij_SSE3, uij_SSE3);
-            uij3_SSE0          = _mm_mul_ps(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_ps(uij2_SSE1, uij_SSE1);
-            uij3_SSE2          = _mm_mul_ps(uij2_SSE2, uij_SSE2);
-            uij3_SSE3          = _mm_mul_ps(uij2_SSE3, uij_SSE3);
-            lij2_SSE0          = _mm_mul_ps(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_ps(lij_SSE1, lij_SSE1);
-            lij2_SSE2          = _mm_mul_ps(lij_SSE2, lij_SSE2);
-            lij2_SSE3          = _mm_mul_ps(lij_SSE3, lij_SSE3);
-            lij3_SSE0          = _mm_mul_ps(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_ps(lij2_SSE1, lij_SSE1);
-            lij3_SSE2          = _mm_mul_ps(lij2_SSE2, lij_SSE2);
-            lij3_SSE3          = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
-            diff2_SSE0         = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
-            diff2_SSE2         = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
-            diff2_SSE3         = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
-            lij_inv_SSE0       = gmx_mm_invsqrt_ps(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_ps(lij2_SSE1);
-            lij_inv_SSE2       = gmx_mm_invsqrt_ps(lij2_SSE2);
-            lij_inv_SSE3       = gmx_mm_invsqrt_ps(lij2_SSE3);
-            sk2_rinv_SSE0      = _mm_mul_ps(sk2_ai_SSE0, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_ps(sk2_ai_SSE1, rinv_SSE1);
-            sk2_rinv_SSE2      = _mm_mul_ps(sk2_ai_SSE2, rinv_SSE2);
-            sk2_rinv_SSE3      = _mm_mul_ps(sk2_ai_SSE3, rinv_SSE3);
-            prod_SSE0          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
-            prod_SSE2          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
-            prod_SSE3          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
-            logterm_SSE0       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
-            logterm_SSE2       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
-            logterm_SSE3       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-            t1_SSE0            = _mm_sub_ps(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_ps(lij_SSE1, uij_SSE1);
-            t1_SSE2            = _mm_sub_ps(lij_SSE2, uij_SSE2);
-            t1_SSE3            = _mm_sub_ps(lij_SSE3, uij_SSE3);
-            t2_SSE0            = _mm_mul_ps(diff2_SSE0,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_ps(diff2_SSE1,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t2_SSE2            = _mm_mul_ps(diff2_SSE2,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
-                                                       prod_SSE2));
-            t2_SSE3            = _mm_mul_ps(diff2_SSE3,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
-                                                       prod_SSE3));
-            t3_SSE0            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
-            t3_SSE2            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
-            t3_SSE3            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
-            t1_SSE0            = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
-            t1_SSE2            = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
-            t1_SSE3            = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
-            t4_SSE0            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE0));
-            t4_SSE1            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE1));
-            t4_SSE2            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE2));
-            t4_SSE3            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE3));
-            t4_SSE0            = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
-            t4_SSE2            = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
-            t4_SSE3            = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
-            t1_SSE0            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
-            t1_SSE2            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
-            t1_SSE3            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
-            _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
-                                            gmx_mm_sum4_ps(_mm_and_ps(t1_SSE0, obc_mask1_SSE0),
-                                                           _mm_and_ps(t1_SSE1, obc_mask1_SSE1),
-                                                           _mm_and_ps(t1_SSE2, obc_mask1_SSE2),
-                                                           _mm_and_ps(t1_SSE3, obc_mask1_SSE3))));
-
-            t1_SSE0            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
-                                            _mm_mul_ps(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
-                                            _mm_mul_ps(prod_SSE1, lij3_SSE1));
-            t1_SSE2            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
-                                            _mm_mul_ps(prod_SSE2, lij3_SSE2));
-            t1_SSE3            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
-                                            _mm_mul_ps(prod_SSE3, lij3_SSE3));
-            t1_SSE0            = _mm_sub_ps(t1_SSE0,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_ps(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_ps(t1_SSE1,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_ps(lij3_SSE1, dr_SSE1))));
-            t1_SSE2            = _mm_sub_ps(t1_SSE2,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
-                                                                  _mm_mul_ps(lij3_SSE2, dr_SSE2))));
-            t1_SSE3            = _mm_sub_ps(t1_SSE3,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
-                                                                  _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-            t2_SSE0            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_ps(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_ps(uij3_SSE1, dr_SSE1)));
-            t2_SSE2            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
-                                                       _mm_mul_ps(uij3_SSE2, dr_SSE2)));
-            t2_SSE3            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
-                                                       _mm_mul_ps(uij3_SSE3, dr_SSE3)));
-            t2_SSE0            = _mm_sub_ps(t2_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
-                                                       _mm_mul_ps(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_ps(t2_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
-                                                       _mm_mul_ps(prod_SSE1, uij3_SSE1)));
-            t2_SSE2            = _mm_sub_ps(t2_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
-                                                       _mm_mul_ps(prod_SSE2, uij3_SSE2)));
-            t2_SSE3            = _mm_sub_ps(t2_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
-                                                       _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-
-            t3_SSE0            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_ps(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_ps(rinv_SSE1, rinv_SSE1));
-            t3_SSE2            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
-                                            _mm_mul_ps(rinv_SSE2, rinv_SSE2));
-            t3_SSE3            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
-                                            _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-
-            t3_SSE0            = _mm_sub_ps(t3_SSE0,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_ps(t3_SSE1,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
-            t3_SSE2            = _mm_sub_ps(t3_SSE2,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
-            t3_SSE3            = _mm_sub_ps(t3_SSE3,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-            t1_SSE0            = _mm_mul_ps(rinv_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_ps(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_ps(rinv_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_ps(t2_SSE1, t3_SSE1)));
-            t1_SSE2            = _mm_mul_ps(rinv_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
-                                                       _mm_add_ps(t2_SSE2, t3_SSE2)));
-            t1_SSE3            = _mm_mul_ps(rinv_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
-                                                       _mm_add_ps(t2_SSE3, t3_SSE3)));
-
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-            dadx += 4;
-        }
-
-        /* Epilogue part, including exclusion mask */
-        for (j = nj2; j < nj3; j += UNROLLJ)
-        {
-            jmask_SSE0 = _mm_load_ps((real *)emask0);
-            jmask_SSE1 = _mm_load_ps((real *)emask1);
-            jmask_SSE2 = _mm_load_ps((real *)emask2);
-            jmask_SSE3 = _mm_load_ps((real *)emask3);
-            emask0    += UNROLLJ;
-            emask1    += UNROLLJ;
-            emask2    += UNROLLJ;
-            emask3    += UNROLLJ;
-
-            /* load j atom coordinates */
-            jx_SSE            = _mm_load_ps(x_align+j);
-            jy_SSE            = _mm_load_ps(y_align+j);
-            jz_SSE            = _mm_load_ps(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0            = _mm_sub_ps(ix_SSE0, jx_SSE);
-            dy_SSE0            = _mm_sub_ps(iy_SSE0, jy_SSE);
-            dz_SSE0            = _mm_sub_ps(iz_SSE0, jz_SSE);
-            dx_SSE1            = _mm_sub_ps(ix_SSE1, jx_SSE);
-            dy_SSE1            = _mm_sub_ps(iy_SSE1, jy_SSE);
-            dz_SSE1            = _mm_sub_ps(iz_SSE1, jz_SSE);
-            dx_SSE2            = _mm_sub_ps(ix_SSE2, jx_SSE);
-            dy_SSE2            = _mm_sub_ps(iy_SSE2, jy_SSE);
-            dz_SSE2            = _mm_sub_ps(iz_SSE2, jz_SSE);
-            dx_SSE3            = _mm_sub_ps(ix_SSE3, jx_SSE);
-            dy_SSE3            = _mm_sub_ps(iy_SSE3, jy_SSE);
-            dz_SSE3            = _mm_sub_ps(iz_SSE3, jz_SSE);
-
-            /* rsq = dx*dx+dy*dy+dz*dz */
-            rsq_SSE0           = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
-            rsq_SSE1           = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
-            rsq_SSE2           = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
-            rsq_SSE3           = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
-            /* Combine masks */
-            jmask_SSE0         = _mm_and_ps(jmask_SSE0, imask_SSE0);
-            jmask_SSE1         = _mm_and_ps(jmask_SSE1, imask_SSE1);
-            jmask_SSE2         = _mm_and_ps(jmask_SSE2, imask_SSE2);
-            jmask_SSE3         = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
-            /* Calculate 1/r and 1/r2 */
-            rinv_SSE0          = gmx_mm_invsqrt_ps(rsq_SSE0);
-            rinv_SSE1          = gmx_mm_invsqrt_ps(rsq_SSE1);
-            rinv_SSE2          = gmx_mm_invsqrt_ps(rsq_SSE2);
-            rinv_SSE3          = gmx_mm_invsqrt_ps(rsq_SSE3);
-
-            /* Apply mask */
-            rinv_SSE0          = _mm_and_ps(rinv_SSE0, jmask_SSE0);
-            rinv_SSE1          = _mm_and_ps(rinv_SSE1, jmask_SSE1);
-            rinv_SSE2          = _mm_and_ps(rinv_SSE2, jmask_SSE2);
-            rinv_SSE3          = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
-            dr_SSE0            = _mm_mul_ps(rsq_SSE0, rinv_SSE0);
-            dr_SSE1            = _mm_mul_ps(rsq_SSE1, rinv_SSE1);
-            dr_SSE2            = _mm_mul_ps(rsq_SSE2, rinv_SSE2);
-            dr_SSE3            = _mm_mul_ps(rsq_SSE3, rinv_SSE3);
-
-            sk_aj_SSE          = _mm_load_ps(obc_param+j);
-            raj_SSE            = _mm_load_ps(gb_radius+j);
-
-            raj_inv_SSE        = gmx_mm_inv_ps(raj_SSE);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1_SSE0            = _mm_add_ps(dr_SSE0, sk_aj_SSE);
-            t1_SSE1            = _mm_add_ps(dr_SSE1, sk_aj_SSE);
-            t1_SSE2            = _mm_add_ps(dr_SSE2, sk_aj_SSE);
-            t1_SSE3            = _mm_add_ps(dr_SSE3, sk_aj_SSE);
-            t2_SSE0            = _mm_sub_ps(dr_SSE0, sk_aj_SSE);
-            t2_SSE1            = _mm_sub_ps(dr_SSE1, sk_aj_SSE);
-            t2_SSE2            = _mm_sub_ps(dr_SSE2, sk_aj_SSE);
-            t2_SSE3            = _mm_sub_ps(dr_SSE3, sk_aj_SSE);
-            t3_SSE0            = _mm_sub_ps(sk_aj_SSE, dr_SSE0);
-            t3_SSE1            = _mm_sub_ps(sk_aj_SSE, dr_SSE1);
-            t3_SSE2            = _mm_sub_ps(sk_aj_SSE, dr_SSE2);
-            t3_SSE3            = _mm_sub_ps(sk_aj_SSE, dr_SSE3);
-
-            obc_mask1_SSE0     = _mm_cmplt_ps(rai_SSE0, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_ps(rai_SSE1, t1_SSE1);
-            obc_mask1_SSE2     = _mm_cmplt_ps(rai_SSE2, t1_SSE2);
-            obc_mask1_SSE3     = _mm_cmplt_ps(rai_SSE3, t1_SSE3);
-            obc_mask2_SSE0     = _mm_cmplt_ps(rai_SSE0, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_ps(rai_SSE1, t2_SSE1);
-            obc_mask2_SSE2     = _mm_cmplt_ps(rai_SSE2, t2_SSE2);
-            obc_mask2_SSE3     = _mm_cmplt_ps(rai_SSE3, t2_SSE3);
-            obc_mask3_SSE0     = _mm_cmplt_ps(rai_SSE0, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_ps(rai_SSE1, t3_SSE1);
-            obc_mask3_SSE2     = _mm_cmplt_ps(rai_SSE2, t3_SSE2);
-            obc_mask3_SSE3     = _mm_cmplt_ps(rai_SSE3, t3_SSE3);
-            obc_mask1_SSE0     = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
-            obc_mask1_SSE2     = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
-            obc_mask1_SSE3     = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
-            uij_SSE0           = gmx_mm_inv_ps(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_ps(t1_SSE1);
-            uij_SSE2           = gmx_mm_inv_ps(t1_SSE2);
-            uij_SSE3           = gmx_mm_inv_ps(t1_SSE3);
-            lij_SSE0           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
-                                              _mm_andnot_ps(obc_mask2_SSE0, rai_inv_SSE0));
-            lij_SSE1           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
-                                              _mm_andnot_ps(obc_mask2_SSE1, rai_inv_SSE1));
-            lij_SSE2           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
-                                              _mm_andnot_ps(obc_mask2_SSE2, rai_inv_SSE2));
-            lij_SSE3           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
-                                              _mm_andnot_ps(obc_mask2_SSE3, rai_inv_SSE3));
-            dlij_SSE0          = _mm_and_ps(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_ps(one_SSE, obc_mask2_SSE1);
-            dlij_SSE2          = _mm_and_ps(one_SSE, obc_mask2_SSE2);
-            dlij_SSE3          = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
-            uij2_SSE0          = _mm_mul_ps(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_ps(uij_SSE1, uij_SSE1);
-            uij2_SSE2          = _mm_mul_ps(uij_SSE2, uij_SSE2);
-            uij2_SSE3          = _mm_mul_ps(uij_SSE3, uij_SSE3);
-            uij3_SSE0          = _mm_mul_ps(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_ps(uij2_SSE1, uij_SSE1);
-            uij3_SSE2          = _mm_mul_ps(uij2_SSE2, uij_SSE2);
-            uij3_SSE3          = _mm_mul_ps(uij2_SSE3, uij_SSE3);
-            lij2_SSE0          = _mm_mul_ps(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_ps(lij_SSE1, lij_SSE1);
-            lij2_SSE2          = _mm_mul_ps(lij_SSE2, lij_SSE2);
-            lij2_SSE3          = _mm_mul_ps(lij_SSE3, lij_SSE3);
-            lij3_SSE0          = _mm_mul_ps(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_ps(lij2_SSE1, lij_SSE1);
-            lij3_SSE2          = _mm_mul_ps(lij2_SSE2, lij_SSE2);
-            lij3_SSE3          = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
-            diff2_SSE0         = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
-            diff2_SSE2         = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
-            diff2_SSE3         = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
-            lij_inv_SSE0       = gmx_mm_invsqrt_ps(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_ps(lij2_SSE1);
-            lij_inv_SSE2       = gmx_mm_invsqrt_ps(lij2_SSE2);
-            lij_inv_SSE3       = gmx_mm_invsqrt_ps(lij2_SSE3);
-            sk2_aj_SSE         = _mm_mul_ps(sk_aj_SSE, sk_aj_SSE);
-            sk2_rinv_SSE0      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE1);
-            sk2_rinv_SSE2      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE2);
-            sk2_rinv_SSE3      = _mm_mul_ps(sk2_aj_SSE, rinv_SSE3);
-            prod_SSE0          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
-            prod_SSE2          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
-            prod_SSE3          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
-            logterm_SSE0       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
-            logterm_SSE2       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
-            logterm_SSE3       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-
-            t1_SSE0            = _mm_sub_ps(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_ps(lij_SSE1, uij_SSE1);
-            t1_SSE2            = _mm_sub_ps(lij_SSE2, uij_SSE2);
-            t1_SSE3            = _mm_sub_ps(lij_SSE3, uij_SSE3);
-            t2_SSE0            = _mm_mul_ps(diff2_SSE0,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_ps(diff2_SSE1,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t2_SSE2            = _mm_mul_ps(diff2_SSE2,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
-                                                       prod_SSE2));
-            t2_SSE3            = _mm_mul_ps(diff2_SSE3,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
-                                                       prod_SSE3));
-
-            t3_SSE0            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
-            t3_SSE2            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
-            t3_SSE3            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
-            t1_SSE0            = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
-            t1_SSE2            = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
-            t1_SSE3            = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
-            t4_SSE0            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE0, lij_SSE0));
-            t4_SSE1            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE1, lij_SSE1));
-            t4_SSE2            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE2, lij_SSE2));
-            t4_SSE3            = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE3, lij_SSE3));
-            t4_SSE0            = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
-            t4_SSE2            = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
-            t4_SSE3            = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
-            t1_SSE0            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
-            t1_SSE2            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
-            t1_SSE3            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
-            sum_ai_SSE0        = _mm_add_ps(sum_ai_SSE0, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            sum_ai_SSE1        = _mm_add_ps(sum_ai_SSE1, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            sum_ai_SSE2        = _mm_add_ps(sum_ai_SSE2, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            sum_ai_SSE3        = _mm_add_ps(sum_ai_SSE3, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-
-            t1_SSE0            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
-                                            _mm_mul_ps(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
-                                            _mm_mul_ps(prod_SSE1, lij3_SSE1));
-            t1_SSE2            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
-                                            _mm_mul_ps(prod_SSE2, lij3_SSE2));
-            t1_SSE3            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
-                                            _mm_mul_ps(prod_SSE3, lij3_SSE3));
-            t1_SSE0            = _mm_sub_ps(t1_SSE0,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_ps(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_ps(t1_SSE1,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_ps(lij3_SSE1, dr_SSE1))));
-            t1_SSE2            = _mm_sub_ps(t1_SSE2,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
-                                                                  _mm_mul_ps(lij3_SSE2, dr_SSE2))));
-            t1_SSE3            = _mm_sub_ps(t1_SSE3,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
-                                                                  _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-
-            t2_SSE0            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_ps(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_ps(uij3_SSE1, dr_SSE1)));
-            t2_SSE2            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
-                                                       _mm_mul_ps(uij3_SSE2, dr_SSE2)));
-            t2_SSE3            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
-                                                       _mm_mul_ps(uij3_SSE3, dr_SSE3)));
-            t2_SSE0            = _mm_sub_ps(t2_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
-                                                       _mm_mul_ps(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_ps(t2_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
-                                                       _mm_mul_ps(prod_SSE1, uij3_SSE1)));
-            t2_SSE2            = _mm_sub_ps(t2_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
-                                                       _mm_mul_ps(prod_SSE2, uij3_SSE2)));
-            t2_SSE3            = _mm_sub_ps(t2_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
-                                                       _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-            t3_SSE0            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_ps(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_ps(rinv_SSE1, rinv_SSE1));
-            t3_SSE2            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
-                                            _mm_mul_ps(rinv_SSE2, rinv_SSE2));
-            t3_SSE3            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
-                                            _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-            t3_SSE0            = _mm_sub_ps(t3_SSE0,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_ps(t3_SSE1,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
-            t3_SSE2            = _mm_sub_ps(t3_SSE2,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
-            t3_SSE3            = _mm_sub_ps(t3_SSE3,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-            t1_SSE0            = _mm_mul_ps(rinv_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_ps(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_ps(rinv_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_ps(t2_SSE1, t3_SSE1)));
-            t1_SSE2            = _mm_mul_ps(rinv_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
-                                                       _mm_add_ps(t2_SSE2, t3_SSE2)));
-            t1_SSE3            = _mm_mul_ps(rinv_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
-                                                       _mm_add_ps(t2_SSE3, t3_SSE3)));
-
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-            dadx += 4;
-
-            /* Evaluate influence of atom ai -> aj */
-            t1_SSE0            = _mm_add_ps(dr_SSE0, sk_ai_SSE0);
-            t1_SSE1            = _mm_add_ps(dr_SSE1, sk_ai_SSE1);
-            t1_SSE2            = _mm_add_ps(dr_SSE2, sk_ai_SSE2);
-            t1_SSE3            = _mm_add_ps(dr_SSE3, sk_ai_SSE3);
-            t2_SSE0            = _mm_sub_ps(dr_SSE0, sk_ai_SSE0);
-            t2_SSE1            = _mm_sub_ps(dr_SSE1, sk_ai_SSE1);
-            t2_SSE2            = _mm_sub_ps(dr_SSE2, sk_ai_SSE2);
-            t2_SSE3            = _mm_sub_ps(dr_SSE3, sk_ai_SSE3);
-            t3_SSE0            = _mm_sub_ps(sk_ai_SSE0, dr_SSE0);
-            t3_SSE1            = _mm_sub_ps(sk_ai_SSE1, dr_SSE1);
-            t3_SSE2            = _mm_sub_ps(sk_ai_SSE2, dr_SSE2);
-            t3_SSE3            = _mm_sub_ps(sk_ai_SSE3, dr_SSE3);
-
-            obc_mask1_SSE0     = _mm_cmplt_ps(raj_SSE, t1_SSE0);
-            obc_mask1_SSE1     = _mm_cmplt_ps(raj_SSE, t1_SSE1);
-            obc_mask1_SSE2     = _mm_cmplt_ps(raj_SSE, t1_SSE2);
-            obc_mask1_SSE3     = _mm_cmplt_ps(raj_SSE, t1_SSE3);
-            obc_mask2_SSE0     = _mm_cmplt_ps(raj_SSE, t2_SSE0);
-            obc_mask2_SSE1     = _mm_cmplt_ps(raj_SSE, t2_SSE1);
-            obc_mask2_SSE2     = _mm_cmplt_ps(raj_SSE, t2_SSE2);
-            obc_mask2_SSE3     = _mm_cmplt_ps(raj_SSE, t2_SSE3);
-            obc_mask3_SSE0     = _mm_cmplt_ps(raj_SSE, t3_SSE0);
-            obc_mask3_SSE1     = _mm_cmplt_ps(raj_SSE, t3_SSE1);
-            obc_mask3_SSE2     = _mm_cmplt_ps(raj_SSE, t3_SSE2);
-            obc_mask3_SSE3     = _mm_cmplt_ps(raj_SSE, t3_SSE3);
-            obc_mask1_SSE0     = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
-            obc_mask1_SSE1     = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
-            obc_mask1_SSE2     = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
-            obc_mask1_SSE3     = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
-            uij_SSE0           = gmx_mm_inv_ps(t1_SSE0);
-            uij_SSE1           = gmx_mm_inv_ps(t1_SSE1);
-            uij_SSE2           = gmx_mm_inv_ps(t1_SSE2);
-            uij_SSE3           = gmx_mm_inv_ps(t1_SSE3);
-            lij_SSE0           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
-                                              _mm_andnot_ps(obc_mask2_SSE0, raj_inv_SSE));
-            lij_SSE1           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
-                                              _mm_andnot_ps(obc_mask2_SSE1, raj_inv_SSE));
-            lij_SSE2           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
-                                              _mm_andnot_ps(obc_mask2_SSE2, raj_inv_SSE));
-            lij_SSE3           = _mm_or_ps(   _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
-                                              _mm_andnot_ps(obc_mask2_SSE3, raj_inv_SSE));
-            dlij_SSE0          = _mm_and_ps(one_SSE, obc_mask2_SSE0);
-            dlij_SSE1          = _mm_and_ps(one_SSE, obc_mask2_SSE1);
-            dlij_SSE2          = _mm_and_ps(one_SSE, obc_mask2_SSE2);
-            dlij_SSE3          = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
-            uij2_SSE0          = _mm_mul_ps(uij_SSE0, uij_SSE0);
-            uij2_SSE1          = _mm_mul_ps(uij_SSE1, uij_SSE1);
-            uij2_SSE2          = _mm_mul_ps(uij_SSE2, uij_SSE2);
-            uij2_SSE3          = _mm_mul_ps(uij_SSE3, uij_SSE3);
-            uij3_SSE0          = _mm_mul_ps(uij2_SSE0, uij_SSE0);
-            uij3_SSE1          = _mm_mul_ps(uij2_SSE1, uij_SSE1);
-            uij3_SSE2          = _mm_mul_ps(uij2_SSE2, uij_SSE2);
-            uij3_SSE3          = _mm_mul_ps(uij2_SSE3, uij_SSE3);
-            lij2_SSE0          = _mm_mul_ps(lij_SSE0, lij_SSE0);
-            lij2_SSE1          = _mm_mul_ps(lij_SSE1, lij_SSE1);
-            lij2_SSE2          = _mm_mul_ps(lij_SSE2, lij_SSE2);
-            lij2_SSE3          = _mm_mul_ps(lij_SSE3, lij_SSE3);
-            lij3_SSE0          = _mm_mul_ps(lij2_SSE0, lij_SSE0);
-            lij3_SSE1          = _mm_mul_ps(lij2_SSE1, lij_SSE1);
-            lij3_SSE2          = _mm_mul_ps(lij2_SSE2, lij_SSE2);
-            lij3_SSE3          = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
-            diff2_SSE0         = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
-            diff2_SSE1         = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
-            diff2_SSE2         = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
-            diff2_SSE3         = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
-            lij_inv_SSE0       = gmx_mm_invsqrt_ps(lij2_SSE0);
-            lij_inv_SSE1       = gmx_mm_invsqrt_ps(lij2_SSE1);
-            lij_inv_SSE2       = gmx_mm_invsqrt_ps(lij2_SSE2);
-            lij_inv_SSE3       = gmx_mm_invsqrt_ps(lij2_SSE3);
-            sk2_rinv_SSE0      = _mm_mul_ps(sk2_ai_SSE0, rinv_SSE0);
-            sk2_rinv_SSE1      = _mm_mul_ps(sk2_ai_SSE1, rinv_SSE1);
-            sk2_rinv_SSE2      = _mm_mul_ps(sk2_ai_SSE2, rinv_SSE2);
-            sk2_rinv_SSE3      = _mm_mul_ps(sk2_ai_SSE3, rinv_SSE3);
-            prod_SSE0          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
-            prod_SSE1          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
-            prod_SSE2          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
-            prod_SSE3          = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
-            logterm_SSE0       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
-            logterm_SSE1       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
-            logterm_SSE2       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
-            logterm_SSE3       = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-            t1_SSE0            = _mm_sub_ps(lij_SSE0, uij_SSE0);
-            t1_SSE1            = _mm_sub_ps(lij_SSE1, uij_SSE1);
-            t1_SSE2            = _mm_sub_ps(lij_SSE2, uij_SSE2);
-            t1_SSE3            = _mm_sub_ps(lij_SSE3, uij_SSE3);
-            t2_SSE0            = _mm_mul_ps(diff2_SSE0,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
-                                                       prod_SSE0));
-            t2_SSE1            = _mm_mul_ps(diff2_SSE1,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
-                                                       prod_SSE1));
-            t2_SSE2            = _mm_mul_ps(diff2_SSE2,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
-                                                       prod_SSE2));
-            t2_SSE3            = _mm_mul_ps(diff2_SSE3,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
-                                                       prod_SSE3));
-            t3_SSE0            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
-            t3_SSE1            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
-            t3_SSE2            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
-            t3_SSE3            = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
-            t1_SSE0            = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
-            t1_SSE1            = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
-            t1_SSE2            = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
-            t1_SSE3            = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
-            t4_SSE0            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE0));
-            t4_SSE1            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE1));
-            t4_SSE2            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE2));
-            t4_SSE3            = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE3));
-            t4_SSE0            = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
-            t4_SSE1            = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
-            t4_SSE2            = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
-            t4_SSE3            = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
-            t1_SSE0            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
-            t1_SSE1            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
-            t1_SSE2            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
-            t1_SSE3            = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
-            _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
-                                            gmx_mm_sum4_ps(_mm_and_ps(t1_SSE0, obc_mask1_SSE0),
-                                                           _mm_and_ps(t1_SSE1, obc_mask1_SSE1),
-                                                           _mm_and_ps(t1_SSE2, obc_mask1_SSE2),
-                                                           _mm_and_ps(t1_SSE3, obc_mask1_SSE3))));
-
-            t1_SSE0            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
-                                            _mm_mul_ps(prod_SSE0, lij3_SSE0));
-            t1_SSE1            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
-                                            _mm_mul_ps(prod_SSE1, lij3_SSE1));
-            t1_SSE2            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
-                                            _mm_mul_ps(prod_SSE2, lij3_SSE2));
-            t1_SSE3            = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
-                                            _mm_mul_ps(prod_SSE3, lij3_SSE3));
-            t1_SSE0            = _mm_sub_ps(t1_SSE0,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
-                                                                  _mm_mul_ps(lij3_SSE0, dr_SSE0))));
-            t1_SSE1            = _mm_sub_ps(t1_SSE1,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
-                                                                  _mm_mul_ps(lij3_SSE1, dr_SSE1))));
-            t1_SSE2            = _mm_sub_ps(t1_SSE2,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
-                                                                  _mm_mul_ps(lij3_SSE2, dr_SSE2))));
-            t1_SSE3            = _mm_sub_ps(t1_SSE3,
-                                            _mm_mul_ps(onefourth_SSE,
-                                                       _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
-                                                                  _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-            t2_SSE0            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
-                                                       _mm_mul_ps(uij3_SSE0, dr_SSE0)));
-            t2_SSE1            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
-                                                       _mm_mul_ps(uij3_SSE1, dr_SSE1)));
-            t2_SSE2            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
-                                                       _mm_mul_ps(uij3_SSE2, dr_SSE2)));
-            t2_SSE3            = _mm_mul_ps(onefourth_SSE,
-                                            _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
-                                                       _mm_mul_ps(uij3_SSE3, dr_SSE3)));
-            t2_SSE0            = _mm_sub_ps(t2_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
-                                                       _mm_mul_ps(prod_SSE0, uij3_SSE0)));
-            t2_SSE1            = _mm_sub_ps(t2_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
-                                                       _mm_mul_ps(prod_SSE1, uij3_SSE1)));
-            t2_SSE2            = _mm_sub_ps(t2_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
-                                                       _mm_mul_ps(prod_SSE2, uij3_SSE2)));
-            t2_SSE3            = _mm_sub_ps(t2_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
-                                                       _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-
-            t3_SSE0            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
-                                            _mm_mul_ps(rinv_SSE0, rinv_SSE0));
-            t3_SSE1            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
-                                            _mm_mul_ps(rinv_SSE1, rinv_SSE1));
-            t3_SSE2            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
-                                            _mm_mul_ps(rinv_SSE2, rinv_SSE2));
-            t3_SSE3            = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
-                                            _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-
-            t3_SSE0            = _mm_sub_ps(t3_SSE0,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
-            t3_SSE1            = _mm_sub_ps(t3_SSE1,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
-            t3_SSE2            = _mm_sub_ps(t3_SSE2,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
-            t3_SSE3            = _mm_sub_ps(t3_SSE3,
-                                            _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
-                                                       _mm_add_ps(one_SSE,
-                                                                  _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-
-            t1_SSE0            = _mm_mul_ps(rinv_SSE0,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
-                                                       _mm_add_ps(t2_SSE0, t3_SSE0)));
-            t1_SSE1            = _mm_mul_ps(rinv_SSE1,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
-                                                       _mm_add_ps(t2_SSE1, t3_SSE1)));
-            t1_SSE2            = _mm_mul_ps(rinv_SSE2,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
-                                                       _mm_add_ps(t2_SSE2, t3_SSE2)));
-            t1_SSE3            = _mm_mul_ps(rinv_SSE3,
-                                            _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
-                                                       _mm_add_ps(t2_SSE3, t3_SSE3)));
-
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-            dadx += 4;
-        }
-        _MM_TRANSPOSE4_PS(sum_ai_SSE0, sum_ai_SSE1, sum_ai_SSE2, sum_ai_SSE3);
-        sum_ai_SSE0 = _mm_add_ps(sum_ai_SSE0, sum_ai_SSE1);
-        sum_ai_SSE2 = _mm_add_ps(sum_ai_SSE2, sum_ai_SSE3);
-        sum_ai_SSE0 = _mm_add_ps(sum_ai_SSE0, sum_ai_SSE2);
-        _mm_store_ps(work+i, _mm_add_ps(sum_ai_SSE0, _mm_load_ps(work+i)));
-    }
-
-
-    for (i = 0; i < natoms/2+1; i++)
-    {
-        work[i] += work[natoms+i];
-    }
-
-    /* Parallel summations would go here if ever implemented with DD */
-
-    if (gb_algorithm == egbHCT)
-    {
-        /* HCT */
-        for (i = 0; i < natoms; i++)
-        {
-            if (born->use[i] != 0)
-            {
-                rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
-                sum_ai  = 1.0/rai - work[i];
-                min_rad = rai + born->gb_doffset;
-                rad     = 1.0/sum_ai;
-
-                born->bRad[i]   = rad > min_rad ? rad : min_rad;
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-            }
-        }
-
-    }
-    else
-    {
-        /* OBC */
-
-        /* Calculate the radii */
-        for (i = 0; i < natoms; i++)
-        {
-
-            if (born->use[i] != 0)
-            {
-                rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
-                rai_inv2   = 1.0/rai;
-                rai        = rai-born->gb_doffset;
-                rai_inv    = 1.0/rai;
-                sum_ai     = rai * work[i];
-                sum_ai2    = sum_ai  * sum_ai;
-                sum_ai3    = sum_ai2 * sum_ai;
-
-                tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
-                born->bRad[i] = rai_inv - tsum*rai_inv2;
-                born->bRad[i] = 1.0 / born->bRad[i];
-
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
-                tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
-                born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
-            }
-        }
-    }
-
-    return 0;
-}
-
-
-
-
-
-
-
-
-int
-genborn_allvsall_calc_chainrule_sse2_single(t_forcerec *           fr,
-                                            t_mdatoms *            mdatoms,
-                                            gmx_genborn_t *        born,
-                                            real *                 x,
-                                            real *                 f,
-                                            int                    gb_algorithm,
-                                            void *                 paadata)
-{
-    gmx_allvsallgb2_data_t *aadata;
-    int                     natoms;
-    int                     ni0, ni1;
-    int                     nj0, nj1, nj2, nj3;
-    int                     i, j, k, n;
-    int                     idx;
-    int              *      mask;
-    int              *      pmask0;
-    int              *      emask0;
-    int              *      jindex;
-
-    real                    ix, iy, iz;
-    real                    fix, fiy, fiz;
-    real                    jx, jy, jz;
-    real                    dx, dy, dz;
-    real                    tx, ty, tz;
-    real                    rbai, rbaj, fgb, fgb_ai, rbi;
-    real              *     rb;
-    real              *     dadx;
-    real              *     x_align;
-    real              *     y_align;
-    real              *     z_align;
-    real              *     fx_align;
-    real              *     fy_align;
-    real              *     fz_align;
-    real                    tmpsum[4];
-
-    __m128                  jmask_SSE0, jmask_SSE1, jmask_SSE2, jmask_SSE3;
-    __m128                  ix_SSE0, iy_SSE0, iz_SSE0;
-    __m128                  ix_SSE1, iy_SSE1, iz_SSE1;
-    __m128                  ix_SSE2, iy_SSE2, iz_SSE2;
-    __m128                  ix_SSE3, iy_SSE3, iz_SSE3;
-    __m128                  fix_SSE0, fiy_SSE0, fiz_SSE0;
-    __m128                  fix_SSE1, fiy_SSE1, fiz_SSE1;
-    __m128                  fix_SSE2, fiy_SSE2, fiz_SSE2;
-    __m128                  fix_SSE3, fiy_SSE3, fiz_SSE3;
-    __m128                  rbai_SSE0, rbai_SSE1, rbai_SSE2, rbai_SSE3;
-    __m128                  imask_SSE0, imask_SSE1, imask_SSE2, imask_SSE3;
-    __m128                  jx_SSE, jy_SSE, jz_SSE, rbaj_SSE;
-    __m128                  dx_SSE0, dy_SSE0, dz_SSE0;
-    __m128                  dx_SSE1, dy_SSE1, dz_SSE1;
-    __m128                  dx_SSE2, dy_SSE2, dz_SSE2;
-    __m128                  dx_SSE3, dy_SSE3, dz_SSE3;
-    __m128                  fgb_SSE0, fgb_ai_SSE0;
-    __m128                  fgb_SSE1, fgb_ai_SSE1;
-    __m128                  fgb_SSE2, fgb_ai_SSE2;
-    __m128                  fgb_SSE3, fgb_ai_SSE3;
-    __m128                  tx_SSE0, ty_SSE0, tz_SSE0;
-    __m128                  tx_SSE1, ty_SSE1, tz_SSE1;
-    __m128                  tx_SSE2, ty_SSE2, tz_SSE2;
-    __m128                  tx_SSE3, ty_SSE3, tz_SSE3;
-    __m128                  t1, t2;
-
-    natoms              = mdatoms->nr;
-    ni0                 = 0;
-    ni1                 = mdatoms->homenr;
-    dadx                = fr->dadx;
-
-    aadata = (gmx_allvsallgb2_data_t *)paadata;
-
-    x_align  = aadata->x_align;
-    y_align  = aadata->y_align;
-    z_align  = aadata->z_align;
-    fx_align = aadata->fx_align;
-    fy_align = aadata->fy_align;
-    fz_align = aadata->fz_align;
-
-    jindex    = aadata->jindex_gb;
-    dadx      = fr->dadx;
-
-    n  = 0;
-    rb = aadata->work;
-
-    /* Loop to get the proper form for the Born radius term */
-    if (gb_algorithm == egbSTILL)
-    {
-        for (i = 0; i < natoms; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
-        }
-    }
-    else if (gb_algorithm == egbHCT)
-    {
-        for (i = 0; i < natoms; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * fr->dvda[i];
-        }
-    }
-    else if (gb_algorithm == egbOBC)
-    {
-        for (idx = 0; idx < natoms; idx++)
-        {
-            rbi     = born->bRad[idx];
-            rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
-        }
-    }
-
-    for (i = 0; i < 2*natoms; i++)
-    {
-        fx_align[i]       = 0;
-        fy_align[i]       = 0;
-        fz_align[i]       = 0;
-    }
-
-
-    for (i = 0; i < natoms; i++)
-    {
-        rb[i+natoms] = rb[i];
-    }
-
-    for (i = ni0; i < ni1; i += UNROLLI)
-    {
-        /* We assume shifts are NOT used for all-vs-all interactions */
-
-        /* Load i atom data */
-        ix_SSE0          = _mm_load1_ps(x_align+i);
-        iy_SSE0          = _mm_load1_ps(y_align+i);
-        iz_SSE0          = _mm_load1_ps(z_align+i);
-        ix_SSE1          = _mm_load1_ps(x_align+i+1);
-        iy_SSE1          = _mm_load1_ps(y_align+i+1);
-        iz_SSE1          = _mm_load1_ps(z_align+i+1);
-        ix_SSE2          = _mm_load1_ps(x_align+i+2);
-        iy_SSE2          = _mm_load1_ps(y_align+i+2);
-        iz_SSE2          = _mm_load1_ps(z_align+i+2);
-        ix_SSE3          = _mm_load1_ps(x_align+i+3);
-        iy_SSE3          = _mm_load1_ps(y_align+i+3);
-        iz_SSE3          = _mm_load1_ps(z_align+i+3);
-
-        fix_SSE0         = _mm_setzero_ps();
-        fiy_SSE0         = _mm_setzero_ps();
-        fiz_SSE0         = _mm_setzero_ps();
-        fix_SSE1         = _mm_setzero_ps();
-        fiy_SSE1         = _mm_setzero_ps();
-        fiz_SSE1         = _mm_setzero_ps();
-        fix_SSE2         = _mm_setzero_ps();
-        fiy_SSE2         = _mm_setzero_ps();
-        fiz_SSE2         = _mm_setzero_ps();
-        fix_SSE3         = _mm_setzero_ps();
-        fiy_SSE3         = _mm_setzero_ps();
-        fiz_SSE3         = _mm_setzero_ps();
-
-        rbai_SSE0        = _mm_load1_ps(rb+i);
-        rbai_SSE1        = _mm_load1_ps(rb+i+1);
-        rbai_SSE2        = _mm_load1_ps(rb+i+2);
-        rbai_SSE3        = _mm_load1_ps(rb+i+3);
-
-        /* Load limits for loop over neighbors */
-        nj0              = jindex[4*i];
-        nj3              = jindex[4*i+3];
-
-        /* No masks necessary, since the stored chain rule derivatives will be zero in those cases! */
-        for (j = nj0; j < nj3; j += UNROLLJ)
-        {
-            /* load j atom coordinates */
-            jx_SSE           = _mm_load_ps(x_align+j);
-            jy_SSE           = _mm_load_ps(y_align+j);
-            jz_SSE           = _mm_load_ps(z_align+j);
-
-            /* Calculate distance */
-            dx_SSE0          = _mm_sub_ps(ix_SSE0, jx_SSE);
-            dy_SSE0          = _mm_sub_ps(iy_SSE0, jy_SSE);
-            dz_SSE0          = _mm_sub_ps(iz_SSE0, jz_SSE);
-            dx_SSE1          = _mm_sub_ps(ix_SSE1, jx_SSE);
-            dy_SSE1          = _mm_sub_ps(iy_SSE1, jy_SSE);
-            dz_SSE1          = _mm_sub_ps(iz_SSE1, jz_SSE);
-            dx_SSE2          = _mm_sub_ps(ix_SSE2, jx_SSE);
-            dy_SSE2          = _mm_sub_ps(iy_SSE2, jy_SSE);
-            dz_SSE2          = _mm_sub_ps(iz_SSE2, jz_SSE);
-            dx_SSE3          = _mm_sub_ps(ix_SSE3, jx_SSE);
-            dy_SSE3          = _mm_sub_ps(iy_SSE3, jy_SSE);
-            dz_SSE3          = _mm_sub_ps(iz_SSE3, jz_SSE);
-
-            rbaj_SSE         = _mm_load_ps(rb+j);
-
-            fgb_SSE0         = _mm_mul_ps(rbai_SSE0, _mm_load_ps(dadx));
-            dadx            += 4;
-            fgb_SSE1         = _mm_mul_ps(rbai_SSE1, _mm_load_ps(dadx));
-            dadx            += 4;
-            fgb_SSE2         = _mm_mul_ps(rbai_SSE2, _mm_load_ps(dadx));
-            dadx            += 4;
-            fgb_SSE3         = _mm_mul_ps(rbai_SSE3, _mm_load_ps(dadx));
-            dadx            += 4;
-
-            fgb_ai_SSE0      = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
-            dadx            += 4;
-            fgb_ai_SSE1      = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
-            dadx            += 4;
-            fgb_ai_SSE2      = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
-            dadx            += 4;
-            fgb_ai_SSE3      = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
-            dadx            += 4;
-
-            /* Total force between ai and aj is the sum of ai->aj and aj->ai */
-            fgb_SSE0         = _mm_add_ps(fgb_SSE0, fgb_ai_SSE0);
-            fgb_SSE1         = _mm_add_ps(fgb_SSE1, fgb_ai_SSE1);
-            fgb_SSE2         = _mm_add_ps(fgb_SSE2, fgb_ai_SSE2);
-            fgb_SSE3         = _mm_add_ps(fgb_SSE3, fgb_ai_SSE3);
-
-            /* Calculate temporary vectorial force */
-            tx_SSE0            = _mm_mul_ps(fgb_SSE0, dx_SSE0);
-            ty_SSE0            = _mm_mul_ps(fgb_SSE0, dy_SSE0);
-            tz_SSE0            = _mm_mul_ps(fgb_SSE0, dz_SSE0);
-            tx_SSE1            = _mm_mul_ps(fgb_SSE1, dx_SSE1);
-            ty_SSE1            = _mm_mul_ps(fgb_SSE1, dy_SSE1);
-            tz_SSE1            = _mm_mul_ps(fgb_SSE1, dz_SSE1);
-            tx_SSE2            = _mm_mul_ps(fgb_SSE2, dx_SSE2);
-            ty_SSE2            = _mm_mul_ps(fgb_SSE2, dy_SSE2);
-            tz_SSE2            = _mm_mul_ps(fgb_SSE2, dz_SSE2);
-            tx_SSE3            = _mm_mul_ps(fgb_SSE3, dx_SSE3);
-            ty_SSE3            = _mm_mul_ps(fgb_SSE3, dy_SSE3);
-            tz_SSE3            = _mm_mul_ps(fgb_SSE3, dz_SSE3);
-
-            /* Increment i atom force */
-            fix_SSE0          = _mm_add_ps(fix_SSE0, tx_SSE0);
-            fiy_SSE0          = _mm_add_ps(fiy_SSE0, ty_SSE0);
-            fiz_SSE0          = _mm_add_ps(fiz_SSE0, tz_SSE0);
-            fix_SSE1          = _mm_add_ps(fix_SSE1, tx_SSE1);
-            fiy_SSE1          = _mm_add_ps(fiy_SSE1, ty_SSE1);
-            fiz_SSE1          = _mm_add_ps(fiz_SSE1, tz_SSE1);
-            fix_SSE2          = _mm_add_ps(fix_SSE2, tx_SSE2);
-            fiy_SSE2          = _mm_add_ps(fiy_SSE2, ty_SSE2);
-            fiz_SSE2          = _mm_add_ps(fiz_SSE2, tz_SSE2);
-            fix_SSE3          = _mm_add_ps(fix_SSE3, tx_SSE3);
-            fiy_SSE3          = _mm_add_ps(fiy_SSE3, ty_SSE3);
-            fiz_SSE3          = _mm_add_ps(fiz_SSE3, tz_SSE3);
-
-            /* Decrement j atom force */
-            _mm_store_ps(fx_align+j,
-                         _mm_sub_ps( _mm_load_ps(fx_align+j), gmx_mm_sum4_ps(tx_SSE0, tx_SSE1, tx_SSE2, tx_SSE3) ));
-            _mm_store_ps(fy_align+j,
-                         _mm_sub_ps( _mm_load_ps(fy_align+j), gmx_mm_sum4_ps(ty_SSE0, ty_SSE1, ty_SSE2, ty_SSE3) ));
-            _mm_store_ps(fz_align+j,
-                         _mm_sub_ps( _mm_load_ps(fz_align+j), gmx_mm_sum4_ps(tz_SSE0, tz_SSE1, tz_SSE2, tz_SSE3) ));
-        }
-        /* Add i forces to mem and shifted force list */
-        _MM_TRANSPOSE4_PS(fix_SSE0, fix_SSE1, fix_SSE2, fix_SSE3);
-        fix_SSE0 = _mm_add_ps(fix_SSE0, fix_SSE1);
-        fix_SSE2 = _mm_add_ps(fix_SSE2, fix_SSE3);
-        fix_SSE0 = _mm_add_ps(fix_SSE0, fix_SSE2);
-        _mm_store_ps(fx_align+i, _mm_add_ps(fix_SSE0, _mm_load_ps(fx_align+i)));
-
-        _MM_TRANSPOSE4_PS(fiy_SSE0, fiy_SSE1, fiy_SSE2, fiy_SSE3);
-        fiy_SSE0 = _mm_add_ps(fiy_SSE0, fiy_SSE1);
-        fiy_SSE2 = _mm_add_ps(fiy_SSE2, fiy_SSE3);
-        fiy_SSE0 = _mm_add_ps(fiy_SSE0, fiy_SSE2);
-        _mm_store_ps(fy_align+i, _mm_add_ps(fiy_SSE0, _mm_load_ps(fy_align+i)));
-
-        _MM_TRANSPOSE4_PS(fiz_SSE0, fiz_SSE1, fiz_SSE2, fiz_SSE3);
-        fiz_SSE0 = _mm_add_ps(fiz_SSE0, fiz_SSE1);
-        fiz_SSE2 = _mm_add_ps(fiz_SSE2, fiz_SSE3);
-        fiz_SSE0 = _mm_add_ps(fiz_SSE0, fiz_SSE2);
-        _mm_store_ps(fz_align+i, _mm_add_ps(fiz_SSE0, _mm_load_ps(fz_align+i)));
-    }
-
-    for (i = 0; i < natoms; i++)
-    {
-        f[3*i]       += fx_align[i] + fx_align[natoms+i];
-        f[3*i+1]     += fy_align[i] + fy_align[natoms+i];
-        f[3*i+2]     += fz_align[i] + fz_align[natoms+i];
-    }
-
-    return 0;
-}
-
-#else
-/* dummy variable when not using SSE */
-int genborn_allvsall_sse2_single_dummy;
-
-
-#endif
diff --git a/src/gromacs/mdlib/genborn_allvsall_sse2_single.h b/src/gromacs/mdlib/genborn_allvsall_sse2_single.h
deleted file mode 100644 (file)
index d1e908a..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _GENBORN_ALLVSALL_SSE2_SINGLE_H
-#define _GENBORN_ALLVSALL_SSE2_SINGLE_H
-
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/simple.h"
-
-int
-genborn_allvsall_calc_still_radii_sse2_single(t_forcerec *           fr,
-                                              t_mdatoms *            mdatoms,
-                                              gmx_genborn_t *        born,
-                                              gmx_localtop_t *       top,
-                                              real *                 x,
-                                              t_commrec *            cr,
-                                              void *                 work);
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_single(t_forcerec *           fr,
-                                                t_mdatoms *            mdatoms,
-                                                gmx_genborn_t *        born,
-                                                int                    gb_algorithm,
-                                                gmx_localtop_t *       top,
-                                                real *                 x,
-                                                t_commrec *            cr,
-                                                void *                 work);
-
-int
-genborn_allvsall_calc_chainrule_sse2_single(t_forcerec *           fr,
-                                            t_mdatoms *            mdatoms,
-                                            gmx_genborn_t *        born,
-                                            real *                 x,
-                                            real *                 f,
-                                            int                    gb_algorithm,
-                                            void *                 work);
-
-#endif
diff --git a/src/gromacs/mdlib/genborn_sse2_double.c b/src/gromacs/mdlib/genborn_sse2_double.c
deleted file mode 100644 (file)
index 62cab4b..0000000
+++ /dev/null
@@ -1,918 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-#include <string.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/fileio/pdbio.h"
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-/* Only compile this file if SSE2 intrinsics are available */
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-#include "genborn_sse2_double.h"
-
-#include <emmintrin.h>
-#include <gmx_sse2_double.h>
-
-int
-calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr,
-                              int natoms, gmx_localtop_t *top,
-                              double *x, t_nblist *nl,
-                              gmx_genborn_t *born)
-{
-    int           i, k, n, ii, is3, ii3, nj0, nj1, offset;
-    int           jnrA, jnrB, j3A, j3B;
-    int          *mdtype;
-    double        shX, shY, shZ;
-    int          *jjnr;
-    double       *shiftvec;
-
-    double        gpi_ai, gpi2;
-    double        factor;
-    double       *gb_radius;
-    double       *vsolv;
-    double       *work;
-    double       *dadx;
-
-    __m128d       ix, iy, iz;
-    __m128d       jx, jy, jz;
-    __m128d       dx, dy, dz;
-    __m128d       tx, ty, tz;
-    __m128d       rsq, rinv, rinv2, rinv4, rinv6;
-    __m128d       ratio, gpi, rai, raj, vai, vaj, rvdw;
-    __m128d       ccf, dccf, theta, cosq, term, sinq, res, prod, prod_ai, tmp;
-    __m128d       mask, icf4, icf6, mask_cmp;
-
-    const __m128d half   = _mm_set1_pd(0.5);
-    const __m128d three  = _mm_set1_pd(3.0);
-    const __m128d one    = _mm_set1_pd(1.0);
-    const __m128d two    = _mm_set1_pd(2.0);
-    const __m128d zero   = _mm_set1_pd(0.0);
-    const __m128d four   = _mm_set1_pd(4.0);
-
-    const __m128d still_p5inv  = _mm_set1_pd(STILL_P5INV);
-    const __m128d still_pip5   = _mm_set1_pd(STILL_PIP5);
-    const __m128d still_p4     = _mm_set1_pd(STILL_P4);
-
-    factor  = 0.5 * ONE_4PI_EPS0;
-
-    gb_radius = born->gb_radius;
-    vsolv     = born->vsolv;
-    work      = born->gpol_still_work;
-    jjnr      = nl->jjnr;
-    shiftvec  = fr->shift_vec[0];
-    dadx      = fr->dadx;
-
-    jnrA = jnrB = 0;
-    jx   = _mm_setzero_pd();
-    jy   = _mm_setzero_pd();
-    jz   = _mm_setzero_pd();
-
-    n = 0;
-
-    for (i = 0; i < natoms; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ii     = nl->iinr[i];
-        ii3    = ii*3;
-        is3    = 3*nl->shift[i];
-        shX    = shiftvec[is3];
-        shY    = shiftvec[is3+1];
-        shZ    = shiftvec[is3+2];
-        nj0    = nl->jindex[i];
-        nj1    = nl->jindex[i+1];
-
-        ix     = _mm_set1_pd(shX+x[ii3+0]);
-        iy     = _mm_set1_pd(shY+x[ii3+1]);
-        iz     = _mm_set1_pd(shZ+x[ii3+2]);
-
-
-        /* Polarization energy for atom ai */
-        gpi    = _mm_setzero_pd();
-
-        rai     = _mm_load1_pd(gb_radius+ii);
-        prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]);
-
-        for (k = nj0; k < nj1-1; k += 2)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A, x+j3B, jx, jy, jz);
-
-            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA, gb_radius+jnrB, raj);
-            GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA, vsolv+jnrB, vaj);
-
-            dx          = _mm_sub_pd(ix, jx);
-            dy          = _mm_sub_pd(iy, jy);
-            dz          = _mm_sub_pd(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_pd(dx, dy, dz);
-            rinv        = gmx_mm_invsqrt_pd(rsq);
-            rinv2       = _mm_mul_pd(rinv, rinv);
-            rinv4       = _mm_mul_pd(rinv2, rinv2);
-            rinv6       = _mm_mul_pd(rinv4, rinv2);
-
-            rvdw        = _mm_add_pd(rai, raj);
-            ratio       = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw, rvdw)));
-
-            mask_cmp    = _mm_cmple_pd(ratio, still_p5inv);
-
-            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
-            if (0 == _mm_movemask_pd(mask_cmp) )
-            {
-                /* if ratio>still_p5inv for ALL elements */
-                ccf         = one;
-                dccf        = _mm_setzero_pd();
-            }
-            else
-            {
-                ratio       = _mm_min_pd(ratio, still_p5inv);
-                theta       = _mm_mul_pd(ratio, still_pip5);
-                gmx_mm_sincos_pd(theta, &sinq, &cosq);
-                term        = _mm_mul_pd(half, _mm_sub_pd(one, cosq));
-                ccf         = _mm_mul_pd(term, term);
-                dccf        = _mm_mul_pd(_mm_mul_pd(two, term),
-                                         _mm_mul_pd(sinq, theta));
-            }
-
-            prod        = _mm_mul_pd(still_p4, vaj);
-            icf4        = _mm_mul_pd(ccf, rinv4);
-            icf6        = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four, ccf), dccf), rinv6);
-
-            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA, work+jnrB, _mm_mul_pd(prod_ai, icf4));
-
-            gpi           = _mm_add_pd(gpi, _mm_mul_pd(prod, icf4) );
-
-            _mm_store_pd(dadx, _mm_mul_pd(prod, icf6));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai, icf6));
-            dadx += 2;
-        }
-
-        if (k < nj1)
-        {
-            jnrA        = jjnr[k];
-
-            j3A         = 3*jnrA;
-
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A, jx, jy, jz);
-
-            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA, raj);
-            GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA, vaj);
-
-            dx          = _mm_sub_sd(ix, jx);
-            dy          = _mm_sub_sd(iy, jy);
-            dz          = _mm_sub_sd(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_pd(dx, dy, dz);
-            rinv        = gmx_mm_invsqrt_pd(rsq);
-            rinv2       = _mm_mul_sd(rinv, rinv);
-            rinv4       = _mm_mul_sd(rinv2, rinv2);
-            rinv6       = _mm_mul_sd(rinv4, rinv2);
-
-            rvdw        = _mm_add_sd(rai, raj);
-            ratio       = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw, rvdw)));
-
-            mask_cmp    = _mm_cmple_sd(ratio, still_p5inv);
-
-            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
-            if (0 == _mm_movemask_pd(mask_cmp) )
-            {
-                /* if ratio>still_p5inv for ALL elements */
-                ccf         = one;
-                dccf        = _mm_setzero_pd();
-            }
-            else
-            {
-                ratio       = _mm_min_sd(ratio, still_p5inv);
-                theta       = _mm_mul_sd(ratio, still_pip5);
-                gmx_mm_sincos_pd(theta, &sinq, &cosq);
-                term        = _mm_mul_sd(half, _mm_sub_sd(one, cosq));
-                ccf         = _mm_mul_sd(term, term);
-                dccf        = _mm_mul_sd(_mm_mul_sd(two, term),
-                                         _mm_mul_sd(sinq, theta));
-            }
-
-            prod        = _mm_mul_sd(still_p4, vaj);
-            icf4        = _mm_mul_sd(ccf, rinv4);
-            icf6        = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four, ccf), dccf), rinv6);
-
-            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA, _mm_mul_sd(prod_ai, icf4));
-
-            gpi           = _mm_add_sd(gpi, _mm_mul_sd(prod, icf4) );
-
-            _mm_store_pd(dadx, _mm_mul_pd(prod, icf6));
-            dadx += 2;
-            _mm_store_pd(dadx, _mm_mul_pd(prod_ai, icf6));
-            dadx += 2;
-        }
-        gmx_mm_update_1pot_pd(gpi, work+ii);
-    }
-
-    /* Sum up the polarization energy from other nodes */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, work);
-    }
-
-    /* Compute the radii */
-    for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-    {
-        if (born->use[i] != 0)
-        {
-            gpi_ai           = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/
-            gpi2             = gpi_ai * gpi_ai;
-            born->bRad[i]    = factor*gmx_invsqrt(gpi2);
-            fr->invsqrta[i]  = gmx_invsqrt(born->bRad[i]);
-        }
-    }
-
-    /* Extra (local) communication required for DD */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_spread_real(cr->dd, born->bRad);
-        dd_atom_spread_real(cr->dd, fr->invsqrta);
-    }
-
-    return 0;
-}
-
-
-int
-calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
-                                double *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm)
-{
-    int           i, ai, k, n, ii, ii3, is3, nj0, nj1, at0, at1, offset;
-    int           jnrA, jnrB;
-    int           j3A, j3B;
-    double        shX, shY, shZ;
-    double        rr, rr_inv, rr_inv2, sum_tmp, sum, sum2, sum3, gbr;
-    double        sum_ai2, sum_ai3, tsum, tchain, doffset;
-    double       *obc_param;
-    double       *gb_radius;
-    double       *work;
-    int        *  jjnr;
-    double       *dadx;
-    double       *shiftvec;
-    double        min_rad, rad;
-
-    __m128d       ix, iy, iz, jx, jy, jz;
-    __m128d       dx, dy, dz, t1, t2, t3, t4;
-    __m128d       rsq, rinv, r;
-    __m128d       rai, rai_inv, raj, raj_inv, rai_inv2, sk, sk2, lij, dlij, duij;
-    __m128d       uij, lij2, uij2, lij3, uij3, diff2;
-    __m128d       lij_inv, sk2_inv, prod, log_term, tmp, tmp_sum;
-    __m128d       sum_ai, tmp_ai, sk_ai, sk_aj, sk2_ai, sk2_aj, sk2_rinv;
-    __m128d       dadx1, dadx2;
-    __m128d       logterm;
-    __m128d       mask;
-    __m128d       obc_mask1, obc_mask2, obc_mask3;
-
-    __m128d       oneeighth   = _mm_set1_pd(0.125);
-    __m128d       onefourth   = _mm_set1_pd(0.25);
-
-    const __m128d half  = _mm_set1_pd(0.5);
-    const __m128d three = _mm_set1_pd(3.0);
-    const __m128d one   = _mm_set1_pd(1.0);
-    const __m128d two   = _mm_set1_pd(2.0);
-    const __m128d zero  = _mm_set1_pd(0.0);
-    const __m128d neg   = _mm_set1_pd(-1.0);
-
-    /* Set the dielectric offset */
-    doffset   = born->gb_doffset;
-    gb_radius = born->gb_radius;
-    obc_param = born->param;
-    work      = born->gpol_hct_work;
-    jjnr      = nl->jjnr;
-    dadx      = fr->dadx;
-    shiftvec  = fr->shift_vec[0];
-
-    jx        = _mm_setzero_pd();
-    jy        = _mm_setzero_pd();
-    jz        = _mm_setzero_pd();
-
-    jnrA = jnrB = 0;
-
-    for (i = 0; i < born->nr; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ii     = nl->iinr[i];
-        ii3    = ii*3;
-        is3    = 3*nl->shift[i];
-        shX    = shiftvec[is3];
-        shY    = shiftvec[is3+1];
-        shZ    = shiftvec[is3+2];
-        nj0    = nl->jindex[i];
-        nj1    = nl->jindex[i+1];
-
-        ix     = _mm_set1_pd(shX+x[ii3+0]);
-        iy     = _mm_set1_pd(shY+x[ii3+1]);
-        iz     = _mm_set1_pd(shZ+x[ii3+2]);
-
-        rai     = _mm_load1_pd(gb_radius+ii);
-        rai_inv = gmx_mm_inv_pd(rai);
-
-        sum_ai = _mm_setzero_pd();
-
-        sk_ai  = _mm_load1_pd(born->param+ii);
-        sk2_ai = _mm_mul_pd(sk_ai, sk_ai);
-
-        for (k = nj0; k < nj1-1; k += 2)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A, x+j3B, jx, jy, jz);
-            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA, gb_radius+jnrB, raj);
-            GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA, obc_param+jnrB, sk_aj);
-
-            dx    = _mm_sub_pd(ix, jx);
-            dy    = _mm_sub_pd(iy, jy);
-            dz    = _mm_sub_pd(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_pd(dx, dy, dz);
-
-            rinv        = gmx_mm_invsqrt_pd(rsq);
-            r           = _mm_mul_pd(rsq, rinv);
-
-            /* Compute raj_inv aj1-4 */
-            raj_inv     = gmx_mm_inv_pd(raj);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1            = _mm_add_pd(r, sk_aj);
-            t2            = _mm_sub_pd(r, sk_aj);
-            t3            = _mm_sub_pd(sk_aj, r);
-            obc_mask1     = _mm_cmplt_pd(rai, t1);
-            obc_mask2     = _mm_cmplt_pd(rai, t2);
-            obc_mask3     = _mm_cmplt_pd(rai, t3);
-
-            uij           = gmx_mm_inv_pd(t1);
-            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
-                                         _mm_andnot_pd(obc_mask2, rai_inv));
-            dlij          = _mm_and_pd(one, obc_mask2);
-            uij2          = _mm_mul_pd(uij, uij);
-            uij3          = _mm_mul_pd(uij2, uij);
-            lij2          = _mm_mul_pd(lij, lij);
-            lij3          = _mm_mul_pd(lij2, lij);
-
-            diff2         = _mm_sub_pd(uij2, lij2);
-            lij_inv       = gmx_mm_invsqrt_pd(lij2);
-            sk2_aj        = _mm_mul_pd(sk_aj, sk_aj);
-            sk2_rinv      = _mm_mul_pd(sk2_aj, rinv);
-            prod          = _mm_mul_pd(onefourth, sk2_rinv);
-
-            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij, lij_inv));
-
-            t1            = _mm_sub_pd(lij, uij);
-            t2            = _mm_mul_pd(diff2,
-                                       _mm_sub_pd(_mm_mul_pd(onefourth, r),
-                                                  prod));
-            t3            = _mm_mul_pd(half, _mm_mul_pd(rinv, logterm));
-            t1            = _mm_add_pd(t1, _mm_add_pd(t2, t3));
-            t4            = _mm_mul_pd(two, _mm_sub_pd(rai_inv, lij));
-            t4            = _mm_and_pd(t4, obc_mask3);
-            t1            = _mm_mul_pd(half, _mm_add_pd(t1, t4));
-
-            sum_ai        = _mm_add_pd(sum_ai, _mm_and_pd(t1, obc_mask1) );
-
-            t1            = _mm_add_pd(_mm_mul_pd(half, lij2),
-                                       _mm_mul_pd(prod, lij3));
-            t1            = _mm_sub_pd(t1,
-                                       _mm_mul_pd(onefourth,
-                                                  _mm_add_pd(_mm_mul_pd(lij, rinv),
-                                                             _mm_mul_pd(lij3, r))));
-            t2            = _mm_mul_pd(onefourth,
-                                       _mm_add_pd(_mm_mul_pd(uij, rinv),
-                                                  _mm_mul_pd(uij3, r)));
-            t2            = _mm_sub_pd(t2,
-                                       _mm_add_pd(_mm_mul_pd(half, uij2),
-                                                  _mm_mul_pd(prod, uij3)));
-            t3            = _mm_mul_pd(_mm_mul_pd(onefourth, logterm),
-                                       _mm_mul_pd(rinv, rinv));
-            t3            = _mm_sub_pd(t3,
-                                       _mm_mul_pd(_mm_mul_pd(diff2, oneeighth),
-                                                  _mm_add_pd(one,
-                                                             _mm_mul_pd(sk2_rinv, rinv))));
-            t1            = _mm_mul_pd(rinv,
-                                       _mm_add_pd(_mm_mul_pd(dlij, t1),
-                                                  _mm_add_pd(t2, t3)));
-
-            dadx1         = _mm_and_pd(t1, obc_mask1);
-
-            /* Evaluate influence of atom ai -> aj */
-            t1            = _mm_add_pd(r, sk_ai);
-            t2            = _mm_sub_pd(r, sk_ai);
-            t3            = _mm_sub_pd(sk_ai, r);
-            obc_mask1     = _mm_cmplt_pd(raj, t1);
-            obc_mask2     = _mm_cmplt_pd(raj, t2);
-            obc_mask3     = _mm_cmplt_pd(raj, t3);
-
-            uij           = gmx_mm_inv_pd(t1);
-            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
-                                         _mm_andnot_pd(obc_mask2, raj_inv));
-            dlij          = _mm_and_pd(one, obc_mask2);
-            uij2          = _mm_mul_pd(uij, uij);
-            uij3          = _mm_mul_pd(uij2, uij);
-            lij2          = _mm_mul_pd(lij, lij);
-            lij3          = _mm_mul_pd(lij2, lij);
-
-            diff2         = _mm_sub_pd(uij2, lij2);
-            lij_inv       = gmx_mm_invsqrt_pd(lij2);
-            sk2_rinv      = _mm_mul_pd(sk2_ai, rinv);
-            prod          = _mm_mul_pd(onefourth, sk2_rinv);
-
-            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij, lij_inv));
-
-            t1            = _mm_sub_pd(lij, uij);
-            t2            = _mm_mul_pd(diff2,
-                                       _mm_sub_pd(_mm_mul_pd(onefourth, r),
-                                                  prod));
-            t3            = _mm_mul_pd(half, _mm_mul_pd(rinv, logterm));
-            t1            = _mm_add_pd(t1, _mm_add_pd(t2, t3));
-            t4            = _mm_mul_pd(two, _mm_sub_pd(raj_inv, lij));
-            t4            = _mm_and_pd(t4, obc_mask3);
-            t1            = _mm_mul_pd(half, _mm_add_pd(t1, t4));
-
-            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA, work+jnrB, _mm_and_pd(t1, obc_mask1));
-
-            t1            = _mm_add_pd(_mm_mul_pd(half, lij2),
-                                       _mm_mul_pd(prod, lij3));
-            t1            = _mm_sub_pd(t1,
-                                       _mm_mul_pd(onefourth,
-                                                  _mm_add_pd(_mm_mul_pd(lij, rinv),
-                                                             _mm_mul_pd(lij3, r))));
-            t2            = _mm_mul_pd(onefourth,
-                                       _mm_add_pd(_mm_mul_pd(uij, rinv),
-                                                  _mm_mul_pd(uij3, r)));
-            t2            = _mm_sub_pd(t2,
-                                       _mm_add_pd(_mm_mul_pd(half, uij2),
-                                                  _mm_mul_pd(prod, uij3)));
-            t3            = _mm_mul_pd(_mm_mul_pd(onefourth, logterm),
-                                       _mm_mul_pd(rinv, rinv));
-            t3            = _mm_sub_pd(t3,
-                                       _mm_mul_pd(_mm_mul_pd(diff2, oneeighth),
-                                                  _mm_add_pd(one,
-                                                             _mm_mul_pd(sk2_rinv, rinv))));
-            t1            = _mm_mul_pd(rinv,
-                                       _mm_add_pd(_mm_mul_pd(dlij, t1),
-                                                  _mm_add_pd(t2, t3)));
-
-            dadx2         = _mm_and_pd(t1, obc_mask1);
-
-            _mm_store_pd(dadx, dadx1);
-            dadx += 2;
-            _mm_store_pd(dadx, dadx2);
-            dadx += 2;
-        } /* end normal inner loop */
-
-        if (k < nj1)
-        {
-            jnrA        = jjnr[k];
-
-            j3A         = 3*jnrA;
-
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A, jx, jy, jz);
-            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA, raj);
-            GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA, sk_aj);
-
-            dx    = _mm_sub_sd(ix, jx);
-            dy    = _mm_sub_sd(iy, jy);
-            dz    = _mm_sub_sd(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_pd(dx, dy, dz);
-
-            rinv        = gmx_mm_invsqrt_pd(rsq);
-            r           = _mm_mul_sd(rsq, rinv);
-
-            /* Compute raj_inv aj1-4 */
-            raj_inv     = gmx_mm_inv_pd(raj);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1            = _mm_add_sd(r, sk_aj);
-            t2            = _mm_sub_sd(r, sk_aj);
-            t3            = _mm_sub_sd(sk_aj, r);
-            obc_mask1     = _mm_cmplt_sd(rai, t1);
-            obc_mask2     = _mm_cmplt_sd(rai, t2);
-            obc_mask3     = _mm_cmplt_sd(rai, t3);
-
-            uij           = gmx_mm_inv_pd(t1);
-            lij           = _mm_or_pd(_mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
-                                      _mm_andnot_pd(obc_mask2, rai_inv));
-            dlij          = _mm_and_pd(one, obc_mask2);
-            uij2          = _mm_mul_sd(uij, uij);
-            uij3          = _mm_mul_sd(uij2, uij);
-            lij2          = _mm_mul_sd(lij, lij);
-            lij3          = _mm_mul_sd(lij2, lij);
-
-            diff2         = _mm_sub_sd(uij2, lij2);
-            lij_inv       = gmx_mm_invsqrt_pd(lij2);
-            sk2_aj        = _mm_mul_sd(sk_aj, sk_aj);
-            sk2_rinv      = _mm_mul_sd(sk2_aj, rinv);
-            prod          = _mm_mul_sd(onefourth, sk2_rinv);
-
-            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij, lij_inv));
-
-            t1            = _mm_sub_sd(lij, uij);
-            t2            = _mm_mul_sd(diff2,
-                                       _mm_sub_sd(_mm_mul_pd(onefourth, r),
-                                                  prod));
-            t3            = _mm_mul_sd(half, _mm_mul_sd(rinv, logterm));
-            t1            = _mm_add_sd(t1, _mm_add_sd(t2, t3));
-            t4            = _mm_mul_sd(two, _mm_sub_sd(rai_inv, lij));
-            t4            = _mm_and_pd(t4, obc_mask3);
-            t1            = _mm_mul_sd(half, _mm_add_sd(t1, t4));
-
-            sum_ai        = _mm_add_sd(sum_ai, _mm_and_pd(t1, obc_mask1) );
-
-            t1            = _mm_add_sd(_mm_mul_sd(half, lij2),
-                                       _mm_mul_sd(prod, lij3));
-            t1            = _mm_sub_sd(t1,
-                                       _mm_mul_sd(onefourth,
-                                                  _mm_add_sd(_mm_mul_sd(lij, rinv),
-                                                             _mm_mul_sd(lij3, r))));
-            t2            = _mm_mul_sd(onefourth,
-                                       _mm_add_sd(_mm_mul_sd(uij, rinv),
-                                                  _mm_mul_sd(uij3, r)));
-            t2            = _mm_sub_sd(t2,
-                                       _mm_add_sd(_mm_mul_sd(half, uij2),
-                                                  _mm_mul_sd(prod, uij3)));
-            t3            = _mm_mul_sd(_mm_mul_sd(onefourth, logterm),
-                                       _mm_mul_sd(rinv, rinv));
-            t3            = _mm_sub_sd(t3,
-                                       _mm_mul_sd(_mm_mul_sd(diff2, oneeighth),
-                                                  _mm_add_sd(one,
-                                                             _mm_mul_sd(sk2_rinv, rinv))));
-            t1            = _mm_mul_sd(rinv,
-                                       _mm_add_sd(_mm_mul_sd(dlij, t1),
-                                                  _mm_add_pd(t2, t3)));
-
-            dadx1         = _mm_and_pd(t1, obc_mask1);
-
-            /* Evaluate influence of atom ai -> aj */
-            t1            = _mm_add_sd(r, sk_ai);
-            t2            = _mm_sub_sd(r, sk_ai);
-            t3            = _mm_sub_sd(sk_ai, r);
-            obc_mask1     = _mm_cmplt_sd(raj, t1);
-            obc_mask2     = _mm_cmplt_sd(raj, t2);
-            obc_mask3     = _mm_cmplt_sd(raj, t3);
-
-            uij           = gmx_mm_inv_pd(t1);
-            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
-                                         _mm_andnot_pd(obc_mask2, raj_inv));
-            dlij          = _mm_and_pd(one, obc_mask2);
-            uij2          = _mm_mul_sd(uij, uij);
-            uij3          = _mm_mul_sd(uij2, uij);
-            lij2          = _mm_mul_sd(lij, lij);
-            lij3          = _mm_mul_sd(lij2, lij);
-
-            diff2         = _mm_sub_sd(uij2, lij2);
-            lij_inv       = gmx_mm_invsqrt_pd(lij2);
-            sk2_rinv      = _mm_mul_sd(sk2_ai, rinv);
-            prod          = _mm_mul_sd(onefourth, sk2_rinv);
-
-            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij, lij_inv));
-
-            t1            = _mm_sub_sd(lij, uij);
-            t2            = _mm_mul_sd(diff2,
-                                       _mm_sub_sd(_mm_mul_sd(onefourth, r),
-                                                  prod));
-            t3            = _mm_mul_sd(half, _mm_mul_sd(rinv, logterm));
-            t1            = _mm_add_sd(t1, _mm_add_sd(t2, t3));
-            t4            = _mm_mul_sd(two, _mm_sub_sd(raj_inv, lij));
-            t4            = _mm_and_pd(t4, obc_mask3);
-            t1            = _mm_mul_sd(half, _mm_add_sd(t1, t4));
-
-            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA, _mm_and_pd(t1, obc_mask1));
-
-            t1            = _mm_add_sd(_mm_mul_sd(half, lij2),
-                                       _mm_mul_sd(prod, lij3));
-            t1            = _mm_sub_sd(t1,
-                                       _mm_mul_sd(onefourth,
-                                                  _mm_add_sd(_mm_mul_sd(lij, rinv),
-                                                             _mm_mul_sd(lij3, r))));
-            t2            = _mm_mul_sd(onefourth,
-                                       _mm_add_sd(_mm_mul_sd(uij, rinv),
-                                                  _mm_mul_sd(uij3, r)));
-            t2            = _mm_sub_sd(t2,
-                                       _mm_add_sd(_mm_mul_sd(half, uij2),
-                                                  _mm_mul_sd(prod, uij3)));
-            t3            = _mm_mul_sd(_mm_mul_sd(onefourth, logterm),
-                                       _mm_mul_sd(rinv, rinv));
-            t3            = _mm_sub_sd(t3,
-                                       _mm_mul_sd(_mm_mul_sd(diff2, oneeighth),
-                                                  _mm_add_sd(one,
-                                                             _mm_mul_sd(sk2_rinv, rinv))));
-            t1            = _mm_mul_sd(rinv,
-                                       _mm_add_sd(_mm_mul_sd(dlij, t1),
-                                                  _mm_add_sd(t2, t3)));
-
-            dadx2         = _mm_and_pd(t1, obc_mask1);
-
-            _mm_store_pd(dadx, dadx1);
-            dadx += 2;
-            _mm_store_pd(dadx, dadx2);
-            dadx += 2;
-        }
-        gmx_mm_update_1pot_pd(sum_ai, work+ii);
-
-    }
-
-    /* Parallel summations */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, work);
-    }
-
-    if (gb_algorithm == egbHCT)
-    {
-        /* HCT */
-        for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-        {
-            if (born->use[i] != 0)
-            {
-                rr      = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
-                sum     = 1.0/rr - work[i];
-                min_rad = rr + doffset;
-                rad     = 1.0/sum;
-
-                born->bRad[i]   = rad > min_rad ? rad : min_rad;
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-            }
-        }
-
-        /* Extra communication required for DD */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_atom_spread_real(cr->dd, born->bRad);
-            dd_atom_spread_real(cr->dd, fr->invsqrta);
-        }
-    }
-    else
-    {
-        /* OBC */
-        for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-        {
-            if (born->use[i] != 0)
-            {
-                rr      = top->atomtypes.gb_radius[md->typeA[i]];
-                rr_inv2 = 1.0/rr;
-                rr      = rr-doffset;
-                rr_inv  = 1.0/rr;
-                sum     = rr * work[i];
-                sum2    = sum  * sum;
-                sum3    = sum2 * sum;
-
-                tsum          = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3);
-                born->bRad[i] = rr_inv - tsum*rr_inv2;
-                born->bRad[i] = 1.0 / born->bRad[i];
-
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
-                tchain         = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2);
-                born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2;
-            }
-        }
-        /* Extra (local) communication required for DD */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_atom_spread_real(cr->dd, born->bRad);
-            dd_atom_spread_real(cr->dd, fr->invsqrta);
-            dd_atom_spread_real(cr->dd, born->drobc);
-        }
-    }
-
-
-
-    return 0;
-}
-
-
-int
-calc_gb_chainrule_sse2_double(int natoms, t_nblist *nl, double *dadx, double *dvda,
-                              double *x, double *f, double *fshift, double *shiftvec,
-                              int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md)
-{
-    int           i, k, n, ii, jnr, ii3, is3, nj0, nj1, n0, n1;
-    int           jnrA, jnrB;
-    int           j3A, j3B;
-    int        *  jjnr;
-
-    double        rbi, shX, shY, shZ;
-    double       *rb;
-
-    __m128d       ix, iy, iz;
-    __m128d       jx, jy, jz;
-    __m128d       fix, fiy, fiz;
-    __m128d       dx, dy, dz;
-    __m128d       tx, ty, tz;
-
-    __m128d       rbai, rbaj, f_gb, f_gb_ai;
-    __m128d       xmm1, xmm2, xmm3;
-
-    const __m128d two = _mm_set1_pd(2.0);
-
-    rb     = born->work;
-
-    jjnr   = nl->jjnr;
-
-    /* Loop to get the proper form for the Born radius term, sse style */
-    n0 = 0;
-    n1 = natoms;
-
-    if (gb_algorithm == egbSTILL)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
-        }
-    }
-    else if (gb_algorithm == egbHCT)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * dvda[i];
-        }
-    }
-    else if (gb_algorithm == egbOBC)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
-        }
-    }
-
-    jz = _mm_setzero_pd();
-
-    n = j3A = j3B = 0;
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ii     = nl->iinr[i];
-        ii3    = ii*3;
-        is3    = 3*nl->shift[i];
-        shX    = shiftvec[is3];
-        shY    = shiftvec[is3+1];
-        shZ    = shiftvec[is3+2];
-        nj0    = nl->jindex[i];
-        nj1    = nl->jindex[i+1];
-
-        ix     = _mm_set1_pd(shX+x[ii3+0]);
-        iy     = _mm_set1_pd(shY+x[ii3+1]);
-        iz     = _mm_set1_pd(shZ+x[ii3+2]);
-
-        rbai   = _mm_load1_pd(rb+ii);
-        fix    = _mm_setzero_pd();
-        fiy    = _mm_setzero_pd();
-        fiz    = _mm_setzero_pd();
-
-
-        for (k = nj0; k < nj1-1; k += 2)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-
-            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A, x+j3B, jx, jy, jz);
-
-            dx          = _mm_sub_pd(ix, jx);
-            dy          = _mm_sub_pd(iy, jy);
-            dz          = _mm_sub_pd(iz, jz);
-
-            GMX_MM_LOAD_2VALUES_PD(rb+jnrA, rb+jnrB, rbaj);
-
-            /* load chain rule terms for j1-4 */
-            f_gb        = _mm_load_pd(dadx);
-            dadx       += 2;
-            f_gb_ai     = _mm_load_pd(dadx);
-            dadx       += 2;
-
-            /* calculate scalar force */
-            f_gb    = _mm_mul_pd(f_gb, rbai);
-            f_gb_ai = _mm_mul_pd(f_gb_ai, rbaj);
-            f_gb    = _mm_add_pd(f_gb, f_gb_ai);
-
-            tx     = _mm_mul_pd(f_gb, dx);
-            ty     = _mm_mul_pd(f_gb, dy);
-            tz     = _mm_mul_pd(f_gb, dz);
-
-            fix    = _mm_add_pd(fix, tx);
-            fiy    = _mm_add_pd(fiy, ty);
-            fiz    = _mm_add_pd(fiz, tz);
-
-            GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(f+j3A, f+j3B, tx, ty, tz);
-        }
-
-        /*deal with odd elements */
-        if (k < nj1)
-        {
-            jnrA        = jjnr[k];
-            j3A         = 3*jnrA;
-
-            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A, jx, jy, jz);
-
-            dx          = _mm_sub_sd(ix, jx);
-            dy          = _mm_sub_sd(iy, jy);
-            dz          = _mm_sub_sd(iz, jz);
-
-            GMX_MM_LOAD_1VALUE_PD(rb+jnrA, rbaj);
-
-            /* load chain rule terms */
-            f_gb        = _mm_load_pd(dadx);
-            dadx       += 2;
-            f_gb_ai     = _mm_load_pd(dadx);
-            dadx       += 2;
-
-            /* calculate scalar force */
-            f_gb    = _mm_mul_sd(f_gb, rbai);
-            f_gb_ai = _mm_mul_sd(f_gb_ai, rbaj);
-            f_gb    = _mm_add_sd(f_gb, f_gb_ai);
-
-            tx     = _mm_mul_sd(f_gb, dx);
-            ty     = _mm_mul_sd(f_gb, dy);
-            tz     = _mm_mul_sd(f_gb, dz);
-
-            fix    = _mm_add_sd(fix, tx);
-            fiy    = _mm_add_sd(fiy, ty);
-            fiz    = _mm_add_sd(fiz, tz);
-
-            GMX_MM_DECREMENT_1RVEC_1POINTER_PD(f+j3A, tx, ty, tz);
-        }
-
-        /* fix/fiy/fiz now contain four partial force terms, that all should be
-         * added to the i particle forces and shift forces.
-         */
-        gmx_mm_update_iforce_1atom_pd(&fix, &fiy, &fiz, f+ii3, fshift+is3);
-    }
-
-    return 0;
-}
-
-#else
-/* keep compiler happy */
-int genborn_sse2_dummy;
-
-#endif /* SSE2 intrinsics available */
diff --git a/src/gromacs/mdlib/genborn_sse2_single.c b/src/gromacs/mdlib/genborn_sse2_single.c
deleted file mode 100644 (file)
index accbb6e..0000000
+++ /dev/null
@@ -1,1510 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-#include <string.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/fileio/pdbio.h"
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-
-/* Only compile this file if SSE intrinsics are available */
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-
-#include "genborn_sse2_single.h"
-
-#include <emmintrin.h>
-#include <gmx_sse2_single.h>
-
-
-int
-calc_gb_rad_still_sse2_single(t_commrec *cr, t_forcerec *fr,
-                              int natoms, gmx_localtop_t *top,
-                              float *x, t_nblist *nl,
-                              gmx_genborn_t *born)
-{
-    int          i, k, n, ii, is3, ii3, nj0, nj1, offset;
-    int          jnrA, jnrB, jnrC, jnrD, j3A, j3B, j3C, j3D;
-    int          jnrE, jnrF, jnrG, jnrH, j3E, j3F, j3G, j3H;
-    int          shift;
-    int         *mdtype;
-    real         shX, shY, shZ;
-    int         *jjnr;
-    real        *shiftvec;
-
-    float        gpi_ai, gpi2;
-    float        factor;
-    float       *gb_radius;
-    float       *vsolv;
-    float       *work;
-    float       *dadx;
-
-    __m128       ix, iy, iz;
-    __m128       jx, jy, jz;
-    __m128       dx, dy, dz;
-    __m128       tx, ty, tz;
-    __m128       jxB, jyB, jzB;
-    __m128       dxB, dyB, dzB;
-    __m128       txB, tyB, tzB;
-    __m128       rsq, rinv, rinv2, rinv4, rinv6;
-    __m128       rsqB, rinvB, rinv2B, rinv4B, rinv6B;
-    __m128       ratio, gpi, rai, raj, vai, vaj, rvdw;
-    __m128       ratioB, rajB, vajB, rvdwB;
-    __m128       ccf, dccf, theta, cosq, term, sinq, res, prod, prod_ai, tmp;
-    __m128       ccfB, dccfB, thetaB, cosqB, termB, sinqB, resB, prodB;
-    __m128       mask, icf4, icf6, mask_cmp;
-    __m128       icf4B, icf6B, mask_cmpB;
-
-    __m128       mask1 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0, 0xffffffff) );
-    __m128       mask2 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff) );
-    __m128       mask3 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0xffffffff, 0xffffffff, 0xffffffff) );
-
-    const __m128 half   = _mm_set1_ps(0.5f);
-    const __m128 three  = _mm_set1_ps(3.0f);
-    const __m128 one    = _mm_set1_ps(1.0f);
-    const __m128 two    = _mm_set1_ps(2.0f);
-    const __m128 zero   = _mm_set1_ps(0.0f);
-    const __m128 four   = _mm_set1_ps(4.0f);
-
-    const __m128 still_p5inv  = _mm_set1_ps(STILL_P5INV);
-    const __m128 still_pip5   = _mm_set1_ps(STILL_PIP5);
-    const __m128 still_p4     = _mm_set1_ps(STILL_P4);
-
-    factor  = 0.5 * ONE_4PI_EPS0;
-
-    gb_radius = born->gb_radius;
-    vsolv     = born->vsolv;
-    work      = born->gpol_still_work;
-    jjnr      = nl->jjnr;
-    shiftvec  = fr->shift_vec[0];
-    dadx      = fr->dadx;
-
-    jnrA = jnrB = jnrC = jnrD = 0;
-    jx   = _mm_setzero_ps();
-    jy   = _mm_setzero_ps();
-    jz   = _mm_setzero_ps();
-
-    n = 0;
-
-    for (i = 0; i < natoms; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ii     = nl->iinr[i];
-        ii3    = ii*3;
-        is3    = 3*nl->shift[i];
-        shX    = shiftvec[is3];
-        shY    = shiftvec[is3+1];
-        shZ    = shiftvec[is3+2];
-        nj0    = nl->jindex[i];
-        nj1    = nl->jindex[i+1];
-
-        ix     = _mm_set1_ps(shX+x[ii3+0]);
-        iy     = _mm_set1_ps(shY+x[ii3+1]);
-        iz     = _mm_set1_ps(shZ+x[ii3+2]);
-
-        offset = (nj1-nj0)%4;
-
-        /* Polarization energy for atom ai */
-        gpi    = _mm_setzero_ps();
-
-        rai     = _mm_load1_ps(gb_radius+ii);
-        prod_ai = _mm_set1_ps(STILL_P4*vsolv[ii]);
-
-        for (k = nj0; k < nj1-4-offset; k += 8)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-            jnrC        = jjnr[k+2];
-            jnrD        = jjnr[k+3];
-            jnrE        = jjnr[k+4];
-            jnrF        = jjnr[k+5];
-            jnrG        = jjnr[k+6];
-            jnrH        = jjnr[k+7];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-            j3C         = 3*jnrC;
-            j3D         = 3*jnrD;
-            j3E         = 3*jnrE;
-            j3F         = 3*jnrF;
-            j3G         = 3*jnrG;
-            j3H         = 3*jnrH;
-
-            GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
-            GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3E, x+j3F, x+j3G, x+j3H, jxB, jyB, jzB);
-
-            GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
-            GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrE, gb_radius+jnrF, gb_radius+jnrG, gb_radius+jnrH, rajB);
-            GMX_MM_LOAD_4VALUES_PS(vsolv+jnrA, vsolv+jnrB, vsolv+jnrC, vsolv+jnrD, vaj);
-            GMX_MM_LOAD_4VALUES_PS(vsolv+jnrE, vsolv+jnrF, vsolv+jnrG, vsolv+jnrH, vajB);
-
-            dx          = _mm_sub_ps(ix, jx);
-            dy          = _mm_sub_ps(iy, jy);
-            dz          = _mm_sub_ps(iz, jz);
-            dxB         = _mm_sub_ps(ix, jxB);
-            dyB         = _mm_sub_ps(iy, jyB);
-            dzB         = _mm_sub_ps(iz, jzB);
-
-            rsq         = gmx_mm_calc_rsq_ps(dx, dy, dz);
-            rsqB        = gmx_mm_calc_rsq_ps(dxB, dyB, dzB);
-            rinv        = gmx_mm_invsqrt_ps(rsq);
-            rinvB       = gmx_mm_invsqrt_ps(rsqB);
-            rinv2       = _mm_mul_ps(rinv, rinv);
-            rinv2B      = _mm_mul_ps(rinvB, rinvB);
-            rinv4       = _mm_mul_ps(rinv2, rinv2);
-            rinv4B      = _mm_mul_ps(rinv2B, rinv2B);
-            rinv6       = _mm_mul_ps(rinv4, rinv2);
-            rinv6B      = _mm_mul_ps(rinv4B, rinv2B);
-
-            rvdw        = _mm_add_ps(rai, raj);
-            rvdwB       = _mm_add_ps(rai, rajB);
-            ratio       = _mm_mul_ps(rsq, gmx_mm_inv_ps( _mm_mul_ps(rvdw, rvdw)));
-            ratioB      = _mm_mul_ps(rsqB, gmx_mm_inv_ps( _mm_mul_ps(rvdwB, rvdwB)));
-
-            mask_cmp    = _mm_cmple_ps(ratio, still_p5inv);
-            mask_cmpB   = _mm_cmple_ps(ratioB, still_p5inv);
-
-            /* gmx_mm_sincos_ps() is quite expensive, so avoid calculating it if we can! */
-            if (0 == _mm_movemask_ps(mask_cmp) )
-            {
-                /* if ratio>still_p5inv for ALL elements */
-                ccf         = one;
-                dccf        = _mm_setzero_ps();
-            }
-            else
-            {
-                ratio       = _mm_min_ps(ratio, still_p5inv);
-                theta       = _mm_mul_ps(ratio, still_pip5);
-                gmx_mm_sincos_ps(theta, &sinq, &cosq);
-                term        = _mm_mul_ps(half, _mm_sub_ps(one, cosq));
-                ccf         = _mm_mul_ps(term, term);
-                dccf        = _mm_mul_ps(_mm_mul_ps(two, term),
-                                         _mm_mul_ps(sinq, theta));
-            }
-            if (0 == _mm_movemask_ps(mask_cmpB) )
-            {
-                /* if ratio>still_p5inv for ALL elements */
-                ccfB        = one;
-                dccfB       = _mm_setzero_ps();
-            }
-            else
-            {
-                ratioB      = _mm_min_ps(ratioB, still_p5inv);
-                thetaB      = _mm_mul_ps(ratioB, still_pip5);
-                gmx_mm_sincos_ps(thetaB, &sinqB, &cosqB);
-                termB       = _mm_mul_ps(half, _mm_sub_ps(one, cosqB));
-                ccfB        = _mm_mul_ps(termB, termB);
-                dccfB       = _mm_mul_ps(_mm_mul_ps(two, termB),
-                                         _mm_mul_ps(sinqB, thetaB));
-            }
-
-            prod        = _mm_mul_ps(still_p4, vaj);
-            prodB       = _mm_mul_ps(still_p4, vajB);
-            icf4        = _mm_mul_ps(ccf, rinv4);
-            icf4B       = _mm_mul_ps(ccfB, rinv4B);
-            icf6        = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccf), dccf), rinv6);
-            icf6B       = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccfB), dccfB), rinv6B);
-
-            GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_mul_ps(prod_ai, icf4));
-            GMX_MM_INCREMENT_4VALUES_PS(work+jnrE, work+jnrF, work+jnrG, work+jnrH, _mm_mul_ps(prod_ai, icf4B));
-
-            gpi           = _mm_add_ps(gpi, _mm_add_ps( _mm_mul_ps(prod, icf4), _mm_mul_ps(prodB, icf4B) ) );
-
-            _mm_store_ps(dadx, _mm_mul_ps(prod, icf6));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prodB, icf6B));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6B));
-            dadx += 4;
-        }
-
-        for (; k < nj1-offset; k += 4)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-            jnrC        = jjnr[k+2];
-            jnrD        = jjnr[k+3];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-            j3C         = 3*jnrC;
-            j3D         = 3*jnrD;
-
-            GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
-
-            GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
-            GMX_MM_LOAD_4VALUES_PS(vsolv+jnrA, vsolv+jnrB, vsolv+jnrC, vsolv+jnrD, vaj);
-
-            dx          = _mm_sub_ps(ix, jx);
-            dy          = _mm_sub_ps(iy, jy);
-            dz          = _mm_sub_ps(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_ps(dx, dy, dz);
-            rinv        = gmx_mm_invsqrt_ps(rsq);
-            rinv2       = _mm_mul_ps(rinv, rinv);
-            rinv4       = _mm_mul_ps(rinv2, rinv2);
-            rinv6       = _mm_mul_ps(rinv4, rinv2);
-
-            rvdw        = _mm_add_ps(rai, raj);
-            ratio       = _mm_mul_ps(rsq, gmx_mm_inv_ps( _mm_mul_ps(rvdw, rvdw)));
-
-            mask_cmp    = _mm_cmple_ps(ratio, still_p5inv);
-
-            /* gmx_mm_sincos_ps() is quite expensive, so avoid calculating it if we can! */
-            if (0 == _mm_movemask_ps(mask_cmp))
-            {
-                /* if ratio>still_p5inv for ALL elements */
-                ccf         = one;
-                dccf        = _mm_setzero_ps();
-            }
-            else
-            {
-                ratio       = _mm_min_ps(ratio, still_p5inv);
-                theta       = _mm_mul_ps(ratio, still_pip5);
-                gmx_mm_sincos_ps(theta, &sinq, &cosq);
-                term        = _mm_mul_ps(half, _mm_sub_ps(one, cosq));
-                ccf         = _mm_mul_ps(term, term);
-                dccf        = _mm_mul_ps(_mm_mul_ps(two, term),
-                                         _mm_mul_ps(sinq, theta));
-            }
-
-            prod        = _mm_mul_ps(still_p4, vaj);
-            icf4        = _mm_mul_ps(ccf, rinv4);
-            icf6        = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccf), dccf), rinv6);
-
-            GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_mul_ps(prod_ai, icf4));
-
-            gpi           = _mm_add_ps(gpi, _mm_mul_ps(prod, icf4));
-
-            _mm_store_ps(dadx, _mm_mul_ps(prod, icf6));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6));
-            dadx += 4;
-        }
-
-        if (offset != 0)
-        {
-            if (offset == 1)
-            {
-                jnrA        = jjnr[k];
-                j3A         = 3*jnrA;
-                GMX_MM_LOAD_1RVEC_1POINTER_PS(x+j3A, jx, jy, jz);
-                GMX_MM_LOAD_1VALUE_PS(gb_radius+jnrA, raj);
-                GMX_MM_LOAD_1VALUE_PS(vsolv+jnrA, vaj);
-                mask        = mask1;
-            }
-            else if (offset == 2)
-            {
-                jnrA        = jjnr[k];
-                jnrB        = jjnr[k+1];
-                j3A         = 3*jnrA;
-                j3B         = 3*jnrB;
-                GMX_MM_LOAD_1RVEC_2POINTERS_PS(x+j3A, x+j3B, jx, jy, jz);
-                GMX_MM_LOAD_2VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, raj);
-                GMX_MM_LOAD_2VALUES_PS(vsolv+jnrA, vsolv+jnrB, vaj);
-                mask        = mask2;
-            }
-            else
-            {
-                /* offset must be 3 */
-                jnrA        = jjnr[k];
-                jnrB        = jjnr[k+1];
-                jnrC        = jjnr[k+2];
-                j3A         = 3*jnrA;
-                j3B         = 3*jnrB;
-                j3C         = 3*jnrC;
-                GMX_MM_LOAD_1RVEC_3POINTERS_PS(x+j3A, x+j3B, x+j3C, jx, jy, jz);
-                GMX_MM_LOAD_3VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, raj);
-                GMX_MM_LOAD_3VALUES_PS(vsolv+jnrA, vsolv+jnrB, vsolv+jnrC, vaj);
-                mask        = mask3;
-            }
-
-            dx          = _mm_sub_ps(ix, jx);
-            dy          = _mm_sub_ps(iy, jy);
-            dz          = _mm_sub_ps(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_ps(dx, dy, dz);
-            rinv        = gmx_mm_invsqrt_ps(rsq);
-            rinv2       = _mm_mul_ps(rinv, rinv);
-            rinv4       = _mm_mul_ps(rinv2, rinv2);
-            rinv6       = _mm_mul_ps(rinv4, rinv2);
-
-            rvdw        = _mm_add_ps(rai, raj);
-            ratio       = _mm_mul_ps(rsq, gmx_mm_inv_ps( _mm_mul_ps(rvdw, rvdw)));
-
-            mask_cmp    = _mm_cmple_ps(ratio, still_p5inv);
-
-            if (0 == _mm_movemask_ps(mask_cmp))
-            {
-                /* if ratio>still_p5inv for ALL elements */
-                ccf         = one;
-                dccf        = _mm_setzero_ps();
-            }
-            else
-            {
-                ratio       = _mm_min_ps(ratio, still_p5inv);
-                theta       = _mm_mul_ps(ratio, still_pip5);
-                gmx_mm_sincos_ps(theta, &sinq, &cosq);
-                term        = _mm_mul_ps(half, _mm_sub_ps(one, cosq));
-                ccf         = _mm_mul_ps(term, term);
-                dccf        = _mm_mul_ps(_mm_mul_ps(two, term),
-                                         _mm_mul_ps(sinq, theta));
-            }
-
-            prod        = _mm_mul_ps(still_p4, vaj);
-            icf4        = _mm_mul_ps(ccf, rinv4);
-            icf6        = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccf), dccf), rinv6);
-
-            gpi           = _mm_add_ps(gpi, _mm_mul_ps(prod, icf4));
-
-            _mm_store_ps(dadx, _mm_mul_ps(prod, icf6));
-            dadx += 4;
-            _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6));
-            dadx += 4;
-
-            tmp = _mm_mul_ps(prod_ai, icf4);
-
-            if (offset == 1)
-            {
-                GMX_MM_INCREMENT_1VALUE_PS(work+jnrA, tmp);
-            }
-            else if (offset == 2)
-            {
-                GMX_MM_INCREMENT_2VALUES_PS(work+jnrA, work+jnrB, tmp);
-            }
-            else
-            {
-                /* offset must be 3 */
-                GMX_MM_INCREMENT_3VALUES_PS(work+jnrA, work+jnrB, work+jnrC, tmp);
-            }
-        }
-        GMX_MM_UPDATE_1POT_PS(gpi, work+ii);
-    }
-
-    /* Sum up the polarization energy from other nodes */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, work);
-    }
-
-    /* Compute the radii */
-    for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-    {
-        if (born->use[i] != 0)
-        {
-            gpi_ai           = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/
-            gpi2             = gpi_ai * gpi_ai;
-            born->bRad[i]    = factor*gmx_invsqrt(gpi2);
-            fr->invsqrta[i]  = gmx_invsqrt(born->bRad[i]);
-        }
-    }
-
-    /* Extra (local) communication required for DD */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_spread_real(cr->dd, born->bRad);
-        dd_atom_spread_real(cr->dd, fr->invsqrta);
-    }
-
-    return 0;
-}
-
-
-int
-calc_gb_rad_hct_obc_sse2_single(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
-                                float *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm)
-{
-    int          i, ai, k, n, ii, ii3, is3, nj0, nj1, at0, at1, offset;
-    int          jnrA, jnrB, jnrC, jnrD;
-    int          j3A, j3B, j3C, j3D;
-    int          jnrE, jnrF, jnrG, jnrH;
-    int          j3E, j3F, j3G, j3H;
-    float        shX, shY, shZ;
-    float        rr, rr_inv, rr_inv2, sum_tmp, sum, sum2, sum3, gbr;
-    float        sum_ai2, sum_ai3, tsum, tchain, doffset;
-    float       *obc_param;
-    float       *gb_radius;
-    float       *work;
-    int       *  jjnr;
-    float       *dadx;
-    float       *shiftvec;
-    float        min_rad, rad;
-
-    __m128       ix, iy, iz, jx, jy, jz;
-    __m128       dx, dy, dz, t1, t2, t3, t4;
-    __m128       rsq, rinv, r;
-    __m128       rai, rai_inv, raj, raj_inv, rai_inv2, sk, sk2, lij, dlij, duij;
-    __m128       uij, lij2, uij2, lij3, uij3, diff2;
-    __m128       lij_inv, sk2_inv, prod, log_term, tmp, tmp_sum;
-    __m128       sum_ai, tmp_ai, sk_ai, sk_aj, sk2_ai, sk2_aj, sk2_rinv;
-    __m128       dadx1, dadx2;
-    __m128       logterm;
-    __m128       mask;
-    __m128       obc_mask1, obc_mask2, obc_mask3;
-    __m128       jxB, jyB, jzB, t1B, t2B, t3B, t4B;
-    __m128       dxB, dyB, dzB, rsqB, rinvB, rB;
-    __m128       rajB, raj_invB, rai_inv2B, sk2B, lijB, dlijB, duijB;
-    __m128       uijB, lij2B, uij2B, lij3B, uij3B, diff2B;
-    __m128       lij_invB, sk2_invB, prodB;
-    __m128       sk_ajB, sk2_ajB, sk2_rinvB;
-    __m128       dadx1B, dadx2B;
-    __m128       logtermB;
-    __m128       obc_mask1B, obc_mask2B, obc_mask3B;
-
-    __m128       mask1 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0, 0xffffffff) );
-    __m128       mask2 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff) );
-    __m128       mask3 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0xffffffff, 0xffffffff, 0xffffffff) );
-
-    __m128       oneeighth   = _mm_set1_ps(0.125);
-    __m128       onefourth   = _mm_set1_ps(0.25);
-
-    const __m128 half  = _mm_set1_ps(0.5f);
-    const __m128 three = _mm_set1_ps(3.0f);
-    const __m128 one   = _mm_set1_ps(1.0f);
-    const __m128 two   = _mm_set1_ps(2.0f);
-    const __m128 zero  = _mm_set1_ps(0.0f);
-    const __m128 neg   = _mm_set1_ps(-1.0f);
-
-    /* Set the dielectric offset */
-    doffset   = born->gb_doffset;
-    gb_radius = born->gb_radius;
-    obc_param = born->param;
-    work      = born->gpol_hct_work;
-    jjnr      = nl->jjnr;
-    dadx      = fr->dadx;
-    shiftvec  = fr->shift_vec[0];
-
-    jx        = _mm_setzero_ps();
-    jy        = _mm_setzero_ps();
-    jz        = _mm_setzero_ps();
-
-    jnrA = jnrB = jnrC = jnrD = 0;
-
-    for (i = 0; i < born->nr; i++)
-    {
-        work[i] = 0;
-    }
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ii     = nl->iinr[i];
-        ii3    = ii*3;
-        is3    = 3*nl->shift[i];
-        shX    = shiftvec[is3];
-        shY    = shiftvec[is3+1];
-        shZ    = shiftvec[is3+2];
-        nj0    = nl->jindex[i];
-        nj1    = nl->jindex[i+1];
-
-        ix     = _mm_set1_ps(shX+x[ii3+0]);
-        iy     = _mm_set1_ps(shY+x[ii3+1]);
-        iz     = _mm_set1_ps(shZ+x[ii3+2]);
-
-        offset = (nj1-nj0)%4;
-
-        rai     = _mm_load1_ps(gb_radius+ii);
-        rai_inv = gmx_mm_inv_ps(rai);
-
-        sum_ai = _mm_setzero_ps();
-
-        sk_ai  = _mm_load1_ps(born->param+ii);
-        sk2_ai = _mm_mul_ps(sk_ai, sk_ai);
-
-        for (k = nj0; k < nj1-4-offset; k += 8)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-            jnrC        = jjnr[k+2];
-            jnrD        = jjnr[k+3];
-            jnrE        = jjnr[k+4];
-            jnrF        = jjnr[k+5];
-            jnrG        = jjnr[k+6];
-            jnrH        = jjnr[k+7];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-            j3C         = 3*jnrC;
-            j3D         = 3*jnrD;
-            j3E         = 3*jnrE;
-            j3F         = 3*jnrF;
-            j3G         = 3*jnrG;
-            j3H         = 3*jnrH;
-
-            GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
-            GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3E, x+j3F, x+j3G, x+j3H, jxB, jyB, jzB);
-            GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
-            GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrE, gb_radius+jnrF, gb_radius+jnrG, gb_radius+jnrH, rajB);
-            GMX_MM_LOAD_4VALUES_PS(obc_param+jnrA, obc_param+jnrB, obc_param+jnrC, obc_param+jnrD, sk_aj);
-            GMX_MM_LOAD_4VALUES_PS(obc_param+jnrE, obc_param+jnrF, obc_param+jnrG, obc_param+jnrH, sk_ajB);
-
-            dx    = _mm_sub_ps(ix, jx);
-            dy    = _mm_sub_ps(iy, jy);
-            dz    = _mm_sub_ps(iz, jz);
-            dxB   = _mm_sub_ps(ix, jxB);
-            dyB   = _mm_sub_ps(iy, jyB);
-            dzB   = _mm_sub_ps(iz, jzB);
-
-            rsq         = gmx_mm_calc_rsq_ps(dx, dy, dz);
-            rsqB        = gmx_mm_calc_rsq_ps(dxB, dyB, dzB);
-
-            rinv        = gmx_mm_invsqrt_ps(rsq);
-            r           = _mm_mul_ps(rsq, rinv);
-            rinvB       = gmx_mm_invsqrt_ps(rsqB);
-            rB          = _mm_mul_ps(rsqB, rinvB);
-
-            /* Compute raj_inv aj1-4 */
-            raj_inv     = gmx_mm_inv_ps(raj);
-            raj_invB    = gmx_mm_inv_ps(rajB);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1            = _mm_add_ps(r, sk_aj);
-            t2            = _mm_sub_ps(r, sk_aj);
-            t3            = _mm_sub_ps(sk_aj, r);
-            t1B           = _mm_add_ps(rB, sk_ajB);
-            t2B           = _mm_sub_ps(rB, sk_ajB);
-            t3B           = _mm_sub_ps(sk_ajB, rB);
-            obc_mask1     = _mm_cmplt_ps(rai, t1);
-            obc_mask2     = _mm_cmplt_ps(rai, t2);
-            obc_mask3     = _mm_cmplt_ps(rai, t3);
-            obc_mask1B    = _mm_cmplt_ps(rai, t1B);
-            obc_mask2B    = _mm_cmplt_ps(rai, t2B);
-            obc_mask3B    = _mm_cmplt_ps(rai, t3B);
-
-            uij           = gmx_mm_inv_ps(t1);
-            lij           = _mm_or_ps(   _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
-                                         _mm_andnot_ps(obc_mask2, rai_inv));
-            dlij          = _mm_and_ps(one, obc_mask2);
-            uij2          = _mm_mul_ps(uij, uij);
-            uij3          = _mm_mul_ps(uij2, uij);
-            lij2          = _mm_mul_ps(lij, lij);
-            lij3          = _mm_mul_ps(lij2, lij);
-
-            uijB          = gmx_mm_inv_ps(t1B);
-            lijB          = _mm_or_ps(   _mm_and_ps(obc_mask2B, gmx_mm_inv_ps(t2B)),
-                                         _mm_andnot_ps(obc_mask2B, rai_inv));
-            dlijB         = _mm_and_ps(one, obc_mask2B);
-            uij2B         = _mm_mul_ps(uijB, uijB);
-            uij3B         = _mm_mul_ps(uij2B, uijB);
-            lij2B         = _mm_mul_ps(lijB, lijB);
-            lij3B         = _mm_mul_ps(lij2B, lijB);
-
-            diff2         = _mm_sub_ps(uij2, lij2);
-            lij_inv       = gmx_mm_invsqrt_ps(lij2);
-            sk2_aj        = _mm_mul_ps(sk_aj, sk_aj);
-            sk2_rinv      = _mm_mul_ps(sk2_aj, rinv);
-            prod          = _mm_mul_ps(onefourth, sk2_rinv);
-
-            diff2B        = _mm_sub_ps(uij2B, lij2B);
-            lij_invB      = gmx_mm_invsqrt_ps(lij2B);
-            sk2_ajB       = _mm_mul_ps(sk_ajB, sk_ajB);
-            sk2_rinvB     = _mm_mul_ps(sk2_ajB, rinvB);
-            prodB         = _mm_mul_ps(onefourth, sk2_rinvB);
-
-            logterm       = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
-            logtermB      = gmx_mm_log_ps(_mm_mul_ps(uijB, lij_invB));
-
-            t1            = _mm_sub_ps(lij, uij);
-            t2            = _mm_mul_ps(diff2,
-                                       _mm_sub_ps(_mm_mul_ps(onefourth, r),
-                                                  prod));
-            t3            = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
-            t1            = _mm_add_ps(t1, _mm_add_ps(t2, t3));
-            t4            = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lij));
-            t4            = _mm_and_ps(t4, obc_mask3);
-            t1            = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
-            t1B           = _mm_sub_ps(lijB, uijB);
-            t2B           = _mm_mul_ps(diff2B,
-                                       _mm_sub_ps(_mm_mul_ps(onefourth, rB),
-                                                  prodB));
-            t3B           = _mm_mul_ps(half, _mm_mul_ps(rinvB, logtermB));
-            t1B           = _mm_add_ps(t1B, _mm_add_ps(t2B, t3B));
-            t4B           = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lijB));
-            t4B           = _mm_and_ps(t4B, obc_mask3B);
-            t1B           = _mm_mul_ps(half, _mm_add_ps(t1B, t4B));
-
-            sum_ai        = _mm_add_ps(sum_ai, _mm_add_ps( _mm_and_ps(t1, obc_mask1), _mm_and_ps(t1B, obc_mask1B) ));
-
-            t1            = _mm_add_ps(_mm_mul_ps(half, lij2),
-                                       _mm_mul_ps(prod, lij3));
-            t1            = _mm_sub_ps(t1,
-                                       _mm_mul_ps(onefourth,
-                                                  _mm_add_ps(_mm_mul_ps(lij, rinv),
-                                                             _mm_mul_ps(lij3, r))));
-            t2            = _mm_mul_ps(onefourth,
-                                       _mm_add_ps(_mm_mul_ps(uij, rinv),
-                                                  _mm_mul_ps(uij3, r)));
-            t2            = _mm_sub_ps(t2,
-                                       _mm_add_ps(_mm_mul_ps(half, uij2),
-                                                  _mm_mul_ps(prod, uij3)));
-            t3            = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
-                                       _mm_mul_ps(rinv, rinv));
-            t3            = _mm_sub_ps(t3,
-                                       _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
-                                                  _mm_add_ps(one,
-                                                             _mm_mul_ps(sk2_rinv, rinv))));
-            t1            = _mm_mul_ps(rinv,
-                                       _mm_add_ps(_mm_mul_ps(dlij, t1),
-                                                  _mm_add_ps(t2, t3)));
-
-
-
-            t1B           = _mm_add_ps(_mm_mul_ps(half, lij2B),
-                                       _mm_mul_ps(prodB, lij3B));
-            t1B           = _mm_sub_ps(t1B,
-                                       _mm_mul_ps(onefourth,
-                                                  _mm_add_ps(_mm_mul_ps(lijB, rinvB),
-                                                             _mm_mul_ps(lij3B, rB))));
-            t2B           = _mm_mul_ps(onefourth,
-                                       _mm_add_ps(_mm_mul_ps(uijB, rinvB),
-                                                  _mm_mul_ps(uij3B, rB)));
-            t2B           = _mm_sub_ps(t2B,
-                                       _mm_add_ps(_mm_mul_ps(half, uij2B),
-                                                  _mm_mul_ps(prodB, uij3B)));
-            t3B           = _mm_mul_ps(_mm_mul_ps(onefourth, logtermB),
-                                       _mm_mul_ps(rinvB, rinvB));
-            t3B           = _mm_sub_ps(t3B,
-                                       _mm_mul_ps(_mm_mul_ps(diff2B, oneeighth),
-                                                  _mm_add_ps(one,
-                                                             _mm_mul_ps(sk2_rinvB, rinvB))));
-            t1B           = _mm_mul_ps(rinvB,
-                                       _mm_add_ps(_mm_mul_ps(dlijB, t1B),
-                                                  _mm_add_ps(t2B, t3B)));
-
-            dadx1         = _mm_and_ps(t1, obc_mask1);
-            dadx1B        = _mm_and_ps(t1B, obc_mask1B);
-
-
-            /* Evaluate influence of atom ai -> aj */
-            t1            = _mm_add_ps(r, sk_ai);
-            t2            = _mm_sub_ps(r, sk_ai);
-            t3            = _mm_sub_ps(sk_ai, r);
-            t1B           = _mm_add_ps(rB, sk_ai);
-            t2B           = _mm_sub_ps(rB, sk_ai);
-            t3B           = _mm_sub_ps(sk_ai, rB);
-            obc_mask1     = _mm_cmplt_ps(raj, t1);
-            obc_mask2     = _mm_cmplt_ps(raj, t2);
-            obc_mask3     = _mm_cmplt_ps(raj, t3);
-            obc_mask1B    = _mm_cmplt_ps(rajB, t1B);
-            obc_mask2B    = _mm_cmplt_ps(rajB, t2B);
-            obc_mask3B    = _mm_cmplt_ps(rajB, t3B);
-
-            uij           = gmx_mm_inv_ps(t1);
-            lij           = _mm_or_ps(   _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
-                                         _mm_andnot_ps(obc_mask2, raj_inv));
-            dlij          = _mm_and_ps(one, obc_mask2);
-            uij2          = _mm_mul_ps(uij, uij);
-            uij3          = _mm_mul_ps(uij2, uij);
-            lij2          = _mm_mul_ps(lij, lij);
-            lij3          = _mm_mul_ps(lij2, lij);
-
-            uijB          = gmx_mm_inv_ps(t1B);
-            lijB          = _mm_or_ps(   _mm_and_ps(obc_mask2B, gmx_mm_inv_ps(t2B)),
-                                         _mm_andnot_ps(obc_mask2B, raj_invB));
-            dlijB         = _mm_and_ps(one, obc_mask2B);
-            uij2B         = _mm_mul_ps(uijB, uijB);
-            uij3B         = _mm_mul_ps(uij2B, uijB);
-            lij2B         = _mm_mul_ps(lijB, lijB);
-            lij3B         = _mm_mul_ps(lij2B, lijB);
-
-            diff2         = _mm_sub_ps(uij2, lij2);
-            lij_inv       = gmx_mm_invsqrt_ps(lij2);
-            sk2_rinv      = _mm_mul_ps(sk2_ai, rinv);
-            prod          = _mm_mul_ps(onefourth, sk2_rinv);
-
-            diff2B        = _mm_sub_ps(uij2B, lij2B);
-            lij_invB      = gmx_mm_invsqrt_ps(lij2B);
-            sk2_rinvB     = _mm_mul_ps(sk2_ai, rinvB);
-            prodB         = _mm_mul_ps(onefourth, sk2_rinvB);
-
-            logterm       = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
-            logtermB      = gmx_mm_log_ps(_mm_mul_ps(uijB, lij_invB));
-
-            t1            = _mm_sub_ps(lij, uij);
-            t2            = _mm_mul_ps(diff2,
-                                       _mm_sub_ps(_mm_mul_ps(onefourth, r),
-                                                  prod));
-            t3            = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
-            t1            = _mm_add_ps(t1, _mm_add_ps(t2, t3));
-            t4            = _mm_mul_ps(two, _mm_sub_ps(raj_inv, lij));
-            t4            = _mm_and_ps(t4, obc_mask3);
-            t1            = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
-            t1B           = _mm_sub_ps(lijB, uijB);
-            t2B           = _mm_mul_ps(diff2B,
-                                       _mm_sub_ps(_mm_mul_ps(onefourth, rB),
-                                                  prodB));
-            t3B           = _mm_mul_ps(half, _mm_mul_ps(rinvB, logtermB));
-            t1B           = _mm_add_ps(t1B, _mm_add_ps(t2B, t3B));
-            t4B           = _mm_mul_ps(two, _mm_sub_ps(raj_invB, lijB));
-            t4B           = _mm_and_ps(t4B, obc_mask3B);
-            t1B           = _mm_mul_ps(half, _mm_add_ps(t1B, t4B));
-
-            GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_and_ps(t1, obc_mask1));
-            GMX_MM_INCREMENT_4VALUES_PS(work+jnrE, work+jnrF, work+jnrG, work+jnrH, _mm_and_ps(t1B, obc_mask1B));
-
-            t1            = _mm_add_ps(_mm_mul_ps(half, lij2),
-                                       _mm_mul_ps(prod, lij3));
-            t1            = _mm_sub_ps(t1,
-                                       _mm_mul_ps(onefourth,
-                                                  _mm_add_ps(_mm_mul_ps(lij, rinv),
-                                                             _mm_mul_ps(lij3, r))));
-            t2            = _mm_mul_ps(onefourth,
-                                       _mm_add_ps(_mm_mul_ps(uij, rinv),
-                                                  _mm_mul_ps(uij3, r)));
-            t2            = _mm_sub_ps(t2,
-                                       _mm_add_ps(_mm_mul_ps(half, uij2),
-                                                  _mm_mul_ps(prod, uij3)));
-            t3            = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
-                                       _mm_mul_ps(rinv, rinv));
-            t3            = _mm_sub_ps(t3,
-                                       _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
-                                                  _mm_add_ps(one,
-                                                             _mm_mul_ps(sk2_rinv, rinv))));
-            t1            = _mm_mul_ps(rinv,
-                                       _mm_add_ps(_mm_mul_ps(dlij, t1),
-                                                  _mm_add_ps(t2, t3)));
-
-
-            t1B           = _mm_add_ps(_mm_mul_ps(half, lij2B),
-                                       _mm_mul_ps(prodB, lij3B));
-            t1B           = _mm_sub_ps(t1B,
-                                       _mm_mul_ps(onefourth,
-                                                  _mm_add_ps(_mm_mul_ps(lijB, rinvB),
-                                                             _mm_mul_ps(lij3B, rB))));
-            t2B           = _mm_mul_ps(onefourth,
-                                       _mm_add_ps(_mm_mul_ps(uijB, rinvB),
-                                                  _mm_mul_ps(uij3B, rB)));
-            t2B           = _mm_sub_ps(t2B,
-                                       _mm_add_ps(_mm_mul_ps(half, uij2B),
-                                                  _mm_mul_ps(prodB, uij3B)));
-            t3B           = _mm_mul_ps(_mm_mul_ps(onefourth, logtermB),
-                                       _mm_mul_ps(rinvB, rinvB));
-            t3B           = _mm_sub_ps(t3B,
-                                       _mm_mul_ps(_mm_mul_ps(diff2B, oneeighth),
-                                                  _mm_add_ps(one,
-                                                             _mm_mul_ps(sk2_rinvB, rinvB))));
-            t1B           = _mm_mul_ps(rinvB,
-                                       _mm_add_ps(_mm_mul_ps(dlijB, t1B),
-                                                  _mm_add_ps(t2B, t3B)));
-
-
-            dadx2         = _mm_and_ps(t1, obc_mask1);
-            dadx2B        = _mm_and_ps(t1B, obc_mask1B);
-
-            _mm_store_ps(dadx, dadx1);
-            dadx += 4;
-            _mm_store_ps(dadx, dadx2);
-            dadx += 4;
-            _mm_store_ps(dadx, dadx1B);
-            dadx += 4;
-            _mm_store_ps(dadx, dadx2B);
-            dadx += 4;
-
-        } /* end normal inner loop */
-
-        for (; k < nj1-offset; k += 4)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-            jnrC        = jjnr[k+2];
-            jnrD        = jjnr[k+3];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-            j3C         = 3*jnrC;
-            j3D         = 3*jnrD;
-
-            GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
-            GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
-            GMX_MM_LOAD_4VALUES_PS(obc_param+jnrA, obc_param+jnrB, obc_param+jnrC, obc_param+jnrD, sk_aj);
-
-            dx    = _mm_sub_ps(ix, jx);
-            dy    = _mm_sub_ps(iy, jy);
-            dz    = _mm_sub_ps(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_ps(dx, dy, dz);
-
-            rinv        = gmx_mm_invsqrt_ps(rsq);
-            r           = _mm_mul_ps(rsq, rinv);
-
-            /* Compute raj_inv aj1-4 */
-            raj_inv     = gmx_mm_inv_ps(raj);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1            = _mm_add_ps(r, sk_aj);
-            obc_mask1     = _mm_cmplt_ps(rai, t1);
-
-            if (_mm_movemask_ps(obc_mask1))
-            {
-                /* If any of the elements has rai<dr+sk, this is executed */
-                t2            = _mm_sub_ps(r, sk_aj);
-                t3            = _mm_sub_ps(sk_aj, r);
-
-                obc_mask2     = _mm_cmplt_ps(rai, t2);
-                obc_mask3     = _mm_cmplt_ps(rai, t3);
-
-                uij           = gmx_mm_inv_ps(t1);
-                lij           = _mm_or_ps(   _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
-                                             _mm_andnot_ps(obc_mask2, rai_inv));
-                dlij          = _mm_and_ps(one, obc_mask2);
-                uij2          = _mm_mul_ps(uij, uij);
-                uij3          = _mm_mul_ps(uij2, uij);
-                lij2          = _mm_mul_ps(lij, lij);
-                lij3          = _mm_mul_ps(lij2, lij);
-                diff2         = _mm_sub_ps(uij2, lij2);
-                lij_inv       = gmx_mm_invsqrt_ps(lij2);
-                sk2_aj        = _mm_mul_ps(sk_aj, sk_aj);
-                sk2_rinv      = _mm_mul_ps(sk2_aj, rinv);
-                prod          = _mm_mul_ps(onefourth, sk2_rinv);
-                logterm       = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
-                t1            = _mm_sub_ps(lij, uij);
-                t2            = _mm_mul_ps(diff2,
-                                           _mm_sub_ps(_mm_mul_ps(onefourth, r),
-                                                      prod));
-                t3            = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
-                t1            = _mm_add_ps(t1, _mm_add_ps(t2, t3));
-                t4            = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lij));
-                t4            = _mm_and_ps(t4, obc_mask3);
-                t1            = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-                sum_ai        = _mm_add_ps(sum_ai, _mm_and_ps(t1, obc_mask1));
-                t1            = _mm_add_ps(_mm_mul_ps(half, lij2),
-                                           _mm_mul_ps(prod, lij3));
-                t1            = _mm_sub_ps(t1,
-                                           _mm_mul_ps(onefourth,
-                                                      _mm_add_ps(_mm_mul_ps(lij, rinv),
-                                                                 _mm_mul_ps(lij3, r))));
-                t2            = _mm_mul_ps(onefourth,
-                                           _mm_add_ps(_mm_mul_ps(uij, rinv),
-                                                      _mm_mul_ps(uij3, r)));
-                t2            = _mm_sub_ps(t2,
-                                           _mm_add_ps(_mm_mul_ps(half, uij2),
-                                                      _mm_mul_ps(prod, uij3)));
-                t3            = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
-                                           _mm_mul_ps(rinv, rinv));
-                t3            = _mm_sub_ps(t3,
-                                           _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
-                                                      _mm_add_ps(one,
-                                                                 _mm_mul_ps(sk2_rinv, rinv))));
-                t1            = _mm_mul_ps(rinv,
-                                           _mm_add_ps(_mm_mul_ps(dlij, t1),
-                                                      _mm_add_ps(t2, t3)));
-
-                dadx1         = _mm_and_ps(t1, obc_mask1);
-            }
-            else
-            {
-                dadx1         = _mm_setzero_ps();
-            }
-
-            /* Evaluate influence of atom ai -> aj */
-            t1            = _mm_add_ps(r, sk_ai);
-            obc_mask1     = _mm_cmplt_ps(raj, t1);
-
-            if (_mm_movemask_ps(obc_mask1))
-            {
-                t2            = _mm_sub_ps(r, sk_ai);
-                t3            = _mm_sub_ps(sk_ai, r);
-                obc_mask2     = _mm_cmplt_ps(raj, t2);
-                obc_mask3     = _mm_cmplt_ps(raj, t3);
-
-                uij           = gmx_mm_inv_ps(t1);
-                lij           = _mm_or_ps(   _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
-                                             _mm_andnot_ps(obc_mask2, raj_inv));
-                dlij          = _mm_and_ps(one, obc_mask2);
-                uij2          = _mm_mul_ps(uij, uij);
-                uij3          = _mm_mul_ps(uij2, uij);
-                lij2          = _mm_mul_ps(lij, lij);
-                lij3          = _mm_mul_ps(lij2, lij);
-                diff2         = _mm_sub_ps(uij2, lij2);
-                lij_inv       = gmx_mm_invsqrt_ps(lij2);
-                sk2_rinv      = _mm_mul_ps(sk2_ai, rinv);
-                prod          = _mm_mul_ps(onefourth, sk2_rinv);
-                logterm       = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
-                t1            = _mm_sub_ps(lij, uij);
-                t2            = _mm_mul_ps(diff2,
-                                           _mm_sub_ps(_mm_mul_ps(onefourth, r),
-                                                      prod));
-                t3            = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
-                t1            = _mm_add_ps(t1, _mm_add_ps(t2, t3));
-                t4            = _mm_mul_ps(two, _mm_sub_ps(raj_inv, lij));
-                t4            = _mm_and_ps(t4, obc_mask3);
-                t1            = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
-                GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_and_ps(t1, obc_mask1));
-
-                t1            = _mm_add_ps(_mm_mul_ps(half, lij2),
-                                           _mm_mul_ps(prod, lij3));
-                t1            = _mm_sub_ps(t1,
-                                           _mm_mul_ps(onefourth,
-                                                      _mm_add_ps(_mm_mul_ps(lij, rinv),
-                                                                 _mm_mul_ps(lij3, r))));
-                t2            = _mm_mul_ps(onefourth,
-                                           _mm_add_ps(_mm_mul_ps(uij, rinv),
-                                                      _mm_mul_ps(uij3, r)));
-                t2            = _mm_sub_ps(t2,
-                                           _mm_add_ps(_mm_mul_ps(half, uij2),
-                                                      _mm_mul_ps(prod, uij3)));
-                t3            = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
-                                           _mm_mul_ps(rinv, rinv));
-                t3            = _mm_sub_ps(t3,
-                                           _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
-                                                      _mm_add_ps(one,
-                                                                 _mm_mul_ps(sk2_rinv, rinv))));
-                t1            = _mm_mul_ps(rinv,
-                                           _mm_add_ps(_mm_mul_ps(dlij, t1),
-                                                      _mm_add_ps(t2, t3)));
-                dadx2         = _mm_and_ps(t1, obc_mask1);
-            }
-            else
-            {
-                dadx2         = _mm_setzero_ps();
-            }
-
-            _mm_store_ps(dadx, dadx1);
-            dadx += 4;
-            _mm_store_ps(dadx, dadx2);
-            dadx += 4;
-        } /* end normal inner loop */
-
-        if (offset != 0)
-        {
-            if (offset == 1)
-            {
-                jnrA        = jjnr[k];
-                j3A         = 3*jnrA;
-                GMX_MM_LOAD_1RVEC_1POINTER_PS(x+j3A, jx, jy, jz);
-                GMX_MM_LOAD_1VALUE_PS(gb_radius+jnrA, raj);
-                GMX_MM_LOAD_1VALUE_PS(obc_param+jnrA, sk_aj);
-                mask        = mask1;
-            }
-            else if (offset == 2)
-            {
-                jnrA        = jjnr[k];
-                jnrB        = jjnr[k+1];
-                j3A         = 3*jnrA;
-                j3B         = 3*jnrB;
-                GMX_MM_LOAD_1RVEC_2POINTERS_PS(x+j3A, x+j3B, jx, jy, jz);
-                GMX_MM_LOAD_2VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, raj);
-                GMX_MM_LOAD_2VALUES_PS(obc_param+jnrA, obc_param+jnrB, sk_aj);
-                mask        = mask2;
-            }
-            else
-            {
-                /* offset must be 3 */
-                jnrA        = jjnr[k];
-                jnrB        = jjnr[k+1];
-                jnrC        = jjnr[k+2];
-                j3A         = 3*jnrA;
-                j3B         = 3*jnrB;
-                j3C         = 3*jnrC;
-                GMX_MM_LOAD_1RVEC_3POINTERS_PS(x+j3A, x+j3B, x+j3C, jx, jy, jz);
-                GMX_MM_LOAD_3VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, raj);
-                GMX_MM_LOAD_3VALUES_PS(obc_param+jnrA, obc_param+jnrB, obc_param+jnrC, sk_aj);
-                mask        = mask3;
-            }
-
-            dx    = _mm_sub_ps(ix, jx);
-            dy    = _mm_sub_ps(iy, jy);
-            dz    = _mm_sub_ps(iz, jz);
-
-            rsq         = gmx_mm_calc_rsq_ps(dx, dy, dz);
-
-            rinv        = gmx_mm_invsqrt_ps(rsq);
-            r           = _mm_mul_ps(rsq, rinv);
-
-            /* Compute raj_inv aj1-4 */
-            raj_inv     = gmx_mm_inv_ps(raj);
-
-            /* Evaluate influence of atom aj -> ai */
-            t1            = _mm_add_ps(r, sk_aj);
-            obc_mask1     = _mm_cmplt_ps(rai, t1);
-            obc_mask1     = _mm_and_ps(obc_mask1, mask);
-
-            if (_mm_movemask_ps(obc_mask1))
-            {
-                t2            = _mm_sub_ps(r, sk_aj);
-                t3            = _mm_sub_ps(sk_aj, r);
-                obc_mask2     = _mm_cmplt_ps(rai, t2);
-                obc_mask3     = _mm_cmplt_ps(rai, t3);
-
-                uij           = gmx_mm_inv_ps(t1);
-                lij           = _mm_or_ps(   _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
-                                             _mm_andnot_ps(obc_mask2, rai_inv));
-                dlij           = _mm_and_ps(one, obc_mask2);
-                uij2           = _mm_mul_ps(uij, uij);
-                uij3           = _mm_mul_ps(uij2, uij);
-                lij2           = _mm_mul_ps(lij, lij);
-                lij3           = _mm_mul_ps(lij2, lij);
-                diff2          = _mm_sub_ps(uij2, lij2);
-                lij_inv        = gmx_mm_invsqrt_ps(lij2);
-                sk2_aj         = _mm_mul_ps(sk_aj, sk_aj);
-                sk2_rinv       = _mm_mul_ps(sk2_aj, rinv);
-                prod           = _mm_mul_ps(onefourth, sk2_rinv);
-                logterm        = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
-                t1             = _mm_sub_ps(lij, uij);
-                t2             = _mm_mul_ps(diff2,
-                                            _mm_sub_ps(_mm_mul_ps(onefourth, r),
-                                                       prod));
-                t3            = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
-                t1            = _mm_add_ps(t1, _mm_add_ps(t2, t3));
-                t4            = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lij));
-                t4            = _mm_and_ps(t4, obc_mask3);
-                t1            = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-                sum_ai        = _mm_add_ps(sum_ai, _mm_and_ps(t1, obc_mask1));
-                t1            = _mm_add_ps(_mm_mul_ps(half, lij2),
-                                           _mm_mul_ps(prod, lij3));
-                t1            = _mm_sub_ps(t1,
-                                           _mm_mul_ps(onefourth,
-                                                      _mm_add_ps(_mm_mul_ps(lij, rinv),
-                                                                 _mm_mul_ps(lij3, r))));
-                t2            = _mm_mul_ps(onefourth,
-                                           _mm_add_ps(_mm_mul_ps(uij, rinv),
-                                                      _mm_mul_ps(uij3, r)));
-                t2            = _mm_sub_ps(t2,
-                                           _mm_add_ps(_mm_mul_ps(half, uij2),
-                                                      _mm_mul_ps(prod, uij3)));
-                t3            = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
-                                           _mm_mul_ps(rinv, rinv));
-                t3            = _mm_sub_ps(t3,
-                                           _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
-                                                      _mm_add_ps(one,
-                                                                 _mm_mul_ps(sk2_rinv, rinv))));
-                t1            = _mm_mul_ps(rinv,
-                                           _mm_add_ps(_mm_mul_ps(dlij, t1),
-                                                      _mm_add_ps(t2, t3)));
-                dadx1         = _mm_and_ps(t1, obc_mask1);
-            }
-            else
-            {
-                dadx1         = _mm_setzero_ps();
-            }
-
-            /* Evaluate influence of atom ai -> aj */
-            t1            = _mm_add_ps(r, sk_ai);
-            obc_mask1     = _mm_cmplt_ps(raj, t1);
-            obc_mask1     = _mm_and_ps(obc_mask1, mask);
-
-            if (_mm_movemask_ps(obc_mask1))
-            {
-                t2            = _mm_sub_ps(r, sk_ai);
-                t3            = _mm_sub_ps(sk_ai, r);
-                obc_mask2     = _mm_cmplt_ps(raj, t2);
-                obc_mask3     = _mm_cmplt_ps(raj, t3);
-
-                uij           = gmx_mm_inv_ps(t1);
-                lij           = _mm_or_ps(_mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
-                                          _mm_andnot_ps(obc_mask2, raj_inv));
-                dlij          = _mm_and_ps(one, obc_mask2);
-                uij2          = _mm_mul_ps(uij, uij);
-                uij3          = _mm_mul_ps(uij2, uij);
-                lij2          = _mm_mul_ps(lij, lij);
-                lij3          = _mm_mul_ps(lij2, lij);
-                diff2         = _mm_sub_ps(uij2, lij2);
-                lij_inv       = gmx_mm_invsqrt_ps(lij2);
-                sk2_rinv      = _mm_mul_ps(sk2_ai, rinv);
-                prod          = _mm_mul_ps(onefourth, sk2_rinv);
-                logterm       = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
-                t1            = _mm_sub_ps(lij, uij);
-                t2            = _mm_mul_ps(diff2,
-                                           _mm_sub_ps(_mm_mul_ps(onefourth, r),
-                                                      prod));
-                t3            = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
-                t1            = _mm_add_ps(t1, _mm_add_ps(t2, t3));
-                t4            = _mm_mul_ps(two, _mm_sub_ps(raj_inv, lij));
-                t4            = _mm_and_ps(t4, obc_mask3);
-                t1            = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
-                tmp           = _mm_and_ps(t1, obc_mask1);
-
-                t1            = _mm_add_ps(_mm_mul_ps(half, lij2),
-                                           _mm_mul_ps(prod, lij3));
-                t1            = _mm_sub_ps(t1,
-                                           _mm_mul_ps(onefourth,
-                                                      _mm_add_ps(_mm_mul_ps(lij, rinv),
-                                                                 _mm_mul_ps(lij3, r))));
-                t2            = _mm_mul_ps(onefourth,
-                                           _mm_add_ps(_mm_mul_ps(uij, rinv),
-                                                      _mm_mul_ps(uij3, r)));
-                t2            = _mm_sub_ps(t2,
-                                           _mm_add_ps(_mm_mul_ps(half, uij2),
-                                                      _mm_mul_ps(prod, uij3)));
-                t3            = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
-                                           _mm_mul_ps(rinv, rinv));
-                t3            = _mm_sub_ps(t3,
-                                           _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
-                                                      _mm_add_ps(one,
-                                                                 _mm_mul_ps(sk2_rinv, rinv))));
-                t1            = _mm_mul_ps(rinv,
-                                           _mm_add_ps(_mm_mul_ps(dlij, t1),
-                                                      _mm_add_ps(t2, t3)));
-                dadx2         = _mm_and_ps(t1, obc_mask1);
-            }
-            else
-            {
-                dadx2         = _mm_setzero_ps();
-                tmp           = _mm_setzero_ps();
-            }
-
-            _mm_store_ps(dadx, dadx1);
-            dadx += 4;
-            _mm_store_ps(dadx, dadx2);
-            dadx += 4;
-
-            if (offset == 1)
-            {
-                GMX_MM_INCREMENT_1VALUE_PS(work+jnrA, tmp);
-            }
-            else if (offset == 2)
-            {
-                GMX_MM_INCREMENT_2VALUES_PS(work+jnrA, work+jnrB, tmp);
-            }
-            else
-            {
-                /* offset must be 3 */
-                GMX_MM_INCREMENT_3VALUES_PS(work+jnrA, work+jnrB, work+jnrC, tmp);
-            }
-
-        }
-        GMX_MM_UPDATE_1POT_PS(sum_ai, work+ii);
-
-    }
-
-    /* Parallel summations */
-    if (DOMAINDECOMP(cr))
-    {
-        dd_atom_sum_real(cr->dd, work);
-    }
-
-    if (gb_algorithm == egbHCT)
-    {
-        /* HCT */
-        for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-        {
-            if (born->use[i] != 0)
-            {
-                rr      = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
-                sum     = 1.0/rr - work[i];
-                min_rad = rr + doffset;
-                rad     = 1.0/sum;
-
-                born->bRad[i]   = rad > min_rad ? rad : min_rad;
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-            }
-        }
-
-        /* Extra communication required for DD */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_atom_spread_real(cr->dd, born->bRad);
-            dd_atom_spread_real(cr->dd, fr->invsqrta);
-        }
-    }
-    else
-    {
-        /* OBC */
-        for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
-        {
-            if (born->use[i] != 0)
-            {
-                rr      = top->atomtypes.gb_radius[md->typeA[i]];
-                rr_inv2 = 1.0/rr;
-                rr      = rr-doffset;
-                rr_inv  = 1.0/rr;
-                sum     = rr * work[i];
-                sum2    = sum  * sum;
-                sum3    = sum2 * sum;
-
-                tsum          = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3);
-                born->bRad[i] = rr_inv - tsum*rr_inv2;
-                born->bRad[i] = 1.0 / born->bRad[i];
-
-                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
-                tchain         = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2);
-                born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2;
-            }
-        }
-        /* Extra (local) communication required for DD */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_atom_spread_real(cr->dd, born->bRad);
-            dd_atom_spread_real(cr->dd, fr->invsqrta);
-            dd_atom_spread_real(cr->dd, born->drobc);
-        }
-    }
-
-
-
-    return 0;
-}
-
-
-
-float calc_gb_chainrule_sse2_single(int natoms, t_nblist *nl, float *dadx, float *dvda,
-                                    float *x, float *f, float *fshift, float *shiftvec,
-                                    int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md)
-{
-    int          i, k, n, ii, jnr, ii3, is3, nj0, nj1, offset, n0, n1;
-    int          jnrA, jnrB, jnrC, jnrD;
-    int          j3A, j3B, j3C, j3D;
-    int          jnrE, jnrF, jnrG, jnrH;
-    int          j3E, j3F, j3G, j3H;
-    int       *  jjnr;
-
-    float        rbi, shX, shY, shZ;
-    float       *rb;
-
-    __m128       ix, iy, iz;
-    __m128       jx, jy, jz;
-    __m128       jxB, jyB, jzB;
-    __m128       fix, fiy, fiz;
-    __m128       dx, dy, dz;
-    __m128       tx, ty, tz;
-    __m128       dxB, dyB, dzB;
-    __m128       txB, tyB, tzB;
-
-    __m128       rbai, rbaj, rbajB, f_gb, f_gb_ai, f_gbB, f_gb_aiB;
-    __m128       xmm1, xmm2, xmm3;
-
-    const __m128 two = _mm_set1_ps(2.0f);
-
-    rb     = born->work;
-
-    jjnr   = nl->jjnr;
-
-    /* Loop to get the proper form for the Born radius term, sse style */
-    offset = natoms%4;
-
-    n0 = 0;
-    n1 = natoms;
-
-    if (gb_algorithm == egbSTILL)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
-        }
-    }
-    else if (gb_algorithm == egbHCT)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * dvda[i];
-        }
-    }
-    else if (gb_algorithm == egbOBC)
-    {
-        for (i = n0; i < n1; i++)
-        {
-            rbi   = born->bRad[i];
-            rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
-        }
-    }
-
-    jz = _mm_setzero_ps();
-
-    n = j3A = j3B = j3C = j3D = 0;
-
-    for (i = 0; i < nl->nri; i++)
-    {
-        ii     = nl->iinr[i];
-        ii3    = ii*3;
-        is3    = 3*nl->shift[i];
-        shX    = shiftvec[is3];
-        shY    = shiftvec[is3+1];
-        shZ    = shiftvec[is3+2];
-        nj0    = nl->jindex[i];
-        nj1    = nl->jindex[i+1];
-
-        ix     = _mm_set1_ps(shX+x[ii3+0]);
-        iy     = _mm_set1_ps(shY+x[ii3+1]);
-        iz     = _mm_set1_ps(shZ+x[ii3+2]);
-
-        offset = (nj1-nj0)%4;
-
-        rbai   = _mm_load1_ps(rb+ii);
-        fix    = _mm_setzero_ps();
-        fiy    = _mm_setzero_ps();
-        fiz    = _mm_setzero_ps();
-
-
-        for (k = nj0; k < nj1-offset; k += 4)
-        {
-            jnrA        = jjnr[k];
-            jnrB        = jjnr[k+1];
-            jnrC        = jjnr[k+2];
-            jnrD        = jjnr[k+3];
-
-            j3A         = 3*jnrA;
-            j3B         = 3*jnrB;
-            j3C         = 3*jnrC;
-            j3D         = 3*jnrD;
-
-            GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
-
-            dx          = _mm_sub_ps(ix, jx);
-            dy          = _mm_sub_ps(iy, jy);
-            dz          = _mm_sub_ps(iz, jz);
-
-            GMX_MM_LOAD_4VALUES_PS(rb+jnrA, rb+jnrB, rb+jnrC, rb+jnrD, rbaj);
-
-            /* load chain rule terms for j1-4 */
-            f_gb        = _mm_load_ps(dadx);
-            dadx       += 4;
-            f_gb_ai     = _mm_load_ps(dadx);
-            dadx       += 4;
-
-            /* calculate scalar force */
-            f_gb    = _mm_mul_ps(f_gb, rbai);
-            f_gb_ai = _mm_mul_ps(f_gb_ai, rbaj);
-            f_gb    = _mm_add_ps(f_gb, f_gb_ai);
-
-            tx     = _mm_mul_ps(f_gb, dx);
-            ty     = _mm_mul_ps(f_gb, dy);
-            tz     = _mm_mul_ps(f_gb, dz);
-
-            fix    = _mm_add_ps(fix, tx);
-            fiy    = _mm_add_ps(fiy, ty);
-            fiz    = _mm_add_ps(fiz, tz);
-
-            GMX_MM_DECREMENT_1RVEC_4POINTERS_PS(f+j3A, f+j3B, f+j3C, f+j3D, tx, ty, tz);
-        }
-
-        /*deal with odd elements */
-        if (offset != 0)
-        {
-            if (offset == 1)
-            {
-                jnrA        = jjnr[k];
-                j3A         = 3*jnrA;
-                GMX_MM_LOAD_1RVEC_1POINTER_PS(x+j3A, jx, jy, jz);
-                GMX_MM_LOAD_1VALUE_PS(rb+jnrA, rbaj);
-            }
-            else if (offset == 2)
-            {
-                jnrA        = jjnr[k];
-                jnrB        = jjnr[k+1];
-                j3A         = 3*jnrA;
-                j3B         = 3*jnrB;
-                GMX_MM_LOAD_1RVEC_2POINTERS_PS(x+j3A, x+j3B, jx, jy, jz);
-                GMX_MM_LOAD_2VALUES_PS(rb+jnrA, rb+jnrB, rbaj);
-            }
-            else
-            {
-                /* offset must be 3 */
-                jnrA        = jjnr[k];
-                jnrB        = jjnr[k+1];
-                jnrC        = jjnr[k+2];
-                j3A         = 3*jnrA;
-                j3B         = 3*jnrB;
-                j3C         = 3*jnrC;
-                GMX_MM_LOAD_1RVEC_3POINTERS_PS(x+j3A, x+j3B, x+j3C, jx, jy, jz);
-                GMX_MM_LOAD_3VALUES_PS(rb+jnrA, rb+jnrB, rb+jnrC, rbaj);
-            }
-
-            dx          = _mm_sub_ps(ix, jx);
-            dy          = _mm_sub_ps(iy, jy);
-            dz          = _mm_sub_ps(iz, jz);
-
-            /* load chain rule terms for j1-4 */
-            f_gb        = _mm_load_ps(dadx);
-            dadx       += 4;
-            f_gb_ai     = _mm_load_ps(dadx);
-            dadx       += 4;
-
-            /* calculate scalar force */
-            f_gb    = _mm_mul_ps(f_gb, rbai);
-            f_gb_ai = _mm_mul_ps(f_gb_ai, rbaj);
-            f_gb    = _mm_add_ps(f_gb, f_gb_ai);
-
-            tx     = _mm_mul_ps(f_gb, dx);
-            ty     = _mm_mul_ps(f_gb, dy);
-            tz     = _mm_mul_ps(f_gb, dz);
-
-            fix    = _mm_add_ps(fix, tx);
-            fiy    = _mm_add_ps(fiy, ty);
-            fiz    = _mm_add_ps(fiz, tz);
-
-            if (offset == 1)
-            {
-                GMX_MM_DECREMENT_1RVEC_1POINTER_PS(f+j3A, tx, ty, tz);
-            }
-            else if (offset == 2)
-            {
-                GMX_MM_DECREMENT_1RVEC_2POINTERS_PS(f+j3A, f+j3B, tx, ty, tz);
-            }
-            else
-            {
-                /* offset must be 3 */
-                GMX_MM_DECREMENT_1RVEC_3POINTERS_PS(f+j3A, f+j3B, f+j3C, tx, ty, tz);
-            }
-        }
-
-        /* fix/fiy/fiz now contain four partial force terms, that all should be
-         * added to the i particle forces and shift forces.
-         */
-        gmx_mm_update_iforce_1atom_ps(&fix, &fiy, &fiz, f+ii3, fshift+is3);
-    }
-
-    return 0;
-}
-
-
-#else
-/* keep compiler happy */
-int genborn_sse_dummy;
-
-#endif /* SSE intrinsics available */
index f6ee45829229e765c638bc25ddce8074884db7bb..e09de482f9e9d3dd0291780243b03a242a3111f1 100644 (file)
@@ -297,16 +297,16 @@ struct gmx_nbnxn_ocl_t
     cl_command_queue    stream[2];      /**< local and non-local GPU queues                             */
 
     /** events used for synchronization */
-    cl_event nonlocal_done;              /**< event triggered when the non-local non-bonded kernel
-                                            is done (and the local transfer can proceed) */
-    cl_event isc_ops_and_local_H2D_done; /**< event triggered when the tasks issued in
-                                            the local stream that need to precede the
-                                            non-local force calculations are done
-                                            (e.g. f buffer 0-ing, local x/q H2D) */
-
-    cl_bool                     bDoTime; /**< True if event-based timing is enabled.                     */
-    cl_timers_t                *timers;  /**< OpenCL event-based timers.                                 */
-    struct gmx_wallclock_gpu_t *timings; /**< Timing data.                                               */
+    cl_event nonlocal_done;               /**< event triggered when the non-local non-bonded kernel
+                                             is done (and the local transfer can proceed) */
+    cl_event misc_ops_and_local_H2D_done; /**< event triggered when the tasks issued in
+                                             the local stream that need to precede the
+                                             non-local force calculations are done
+                                             (e.g. f buffer 0-ing, local x/q H2D) */
+
+    cl_bool                     bDoTime;  /**< True if event-based timing is enabled.                     */
+    cl_timers_t                *timers;   /**< OpenCL event-based timers.                                 */
+    struct gmx_wallclock_gpu_t *timings;  /**< Timing data.                                               */
 };
 
 #ifdef __cplusplus
index a3720823567862c8d3740ef5973d352417be082c..16fd3b010127634444485d6a7b183a2192331ad9 100644 (file)
@@ -4014,7 +4014,8 @@ static void split_sci_entry(nbnxn_pairlist_t *nbl,
                 nsp_cj4 += (nbl->cj4[cj4].imei[0].imask >> p) & 1;
             }
 
-            if (nsp_cj4 > 0 && nsp + nsp_cj4 > nsp_max)
+            /* Check if we should split at this cj4 to get a list of size nsp */
+            if (nsp > 0 && nsp + nsp_cj4 > nsp_max)
             {
                 /* Split the list at cj4 */
                 nbl->sci[sci].cj4_ind_end = cj4;
@@ -4379,7 +4380,7 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
 
     ls[XX] = (grid->c1[XX] - grid->c0[XX])/(grid->ncx*GPU_NSUBCELL_X);
     ls[YY] = (grid->c1[YY] - grid->c0[YY])/(grid->ncy*GPU_NSUBCELL_Y);
-    ls[ZZ] = (grid->c1[ZZ] - grid->c0[ZZ])*grid->ncx*grid->ncy/(grid->nc*GPU_NSUBCELL_Z);
+    ls[ZZ] = grid->na_c/(grid->atom_density*ls[XX]*ls[YY]);
 
     /* The average squared length of the diagonal of a sub cell */
     xy_diag2 = ls[XX]*ls[XX] + ls[YY]*ls[YY] + ls[ZZ]*ls[ZZ];
@@ -4409,11 +4410,25 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
         /* 4 octants of a sphere */
         vol_est += 0.5*4.0/3.0*M_PI*pow(r_eff_sup, 3);
 
+        /* Estimate the number of cluster pairs as the local number of
+         * clusters times the volume they interact with times the density.
+         */
         nsp_est = grid->nsubc_tot*vol_est*grid->atom_density/grid->na_c;
 
         /* Subtract the non-local pair count */
         nsp_est -= nsp_est_nl;
 
+        /* For small cut-offs nsp_est will be an underesimate.
+         * With DD nsp_est_nl is an overestimate so nsp_est can get negative.
+         * So to avoid too small or negative nsp_est we set a minimum of
+         * all cells interacting with all 3^3 direct neighbors (3^3-1)/2+1=14.
+         * This might be a slight overestimate for small non-periodic groups of
+         * atoms as will occur for a local domain with DD, but for small
+         * groups of atoms we'll anyhow be limited by nsubpair_target_min,
+         * so this overestimation will not matter.
+         */
+        nsp_est = max(nsp_est, grid->nsubc_tot*14.0);
+
         if (debug)
         {
             fprintf(debug, "nsp_est local %5.1f non-local %5.1f\n",
similarity index 97%
rename from src/gromacs/mdlib/ns.c
rename to src/gromacs/mdlib/ns.cpp
index d6e9314fb16a70eeca45b33d9fb3774d8463a34c..e3f37fc45a6b089a0ae6715d3b82b81f0bc98023 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "gromacs/domdec/domdec.h"
 #include "gromacs/legacyheaders/force.h"
 #include "gromacs/legacyheaders/macros.h"
@@ -93,7 +97,7 @@ static gmx_bool NOTEXCL_(t_excl e[], atom_id i, atom_id j)
 static int
 round_up_to_simd_width(int length, int simd_width)
 {
-    int offset, newlength;
+    int offset;
 
     offset = (simd_width > 0) ? length % simd_width : 0;
 
@@ -132,7 +136,7 @@ static void init_nblist(FILE *log, t_nblist *nl_sr, t_nblist *nl_lr,
 {
     t_nblist *nl;
     int       homenr;
-    int       i, nn;
+    int       i;
 
     for (i = 0; (i < 2); i++)
     {
@@ -199,7 +203,6 @@ void init_neighbor_list(FILE *log, t_forcerec *fr, int homenr)
      */
     int        maxsr, maxsr_wat, maxlr, maxlr_wat;
     int        ielec, ivdw, ielecmod, ivdwmod, type;
-    int        solvent;
     int        igeometry_def, igeometry_w, igeometry_ww;
     int        i;
     gmx_bool   bElecAndVdwSwitchDiffers;
@@ -217,11 +220,11 @@ void init_neighbor_list(FILE *log, t_forcerec *fr, int homenr)
      * all the nlist arrays many times in a row.
      * The numbers seem very accurate, but they are uncritical.
      */
-    maxsr_wat = min(fr->nWatMol, (homenr+2)/3);
+    maxsr_wat = std::min(fr->nWatMol, (homenr+2)/3);
     if (fr->bTwinRange)
     {
         maxlr     = 50;
-        maxlr_wat = min(maxsr_wat, maxlr);
+        maxlr_wat = std::min(maxsr_wat, maxlr);
     }
     else
     {
@@ -363,9 +366,7 @@ static void reset_neighbor_lists(t_forcerec *fr, gmx_bool bResetSR, gmx_bool bRe
 
 static gmx_inline void new_i_nblist(t_nblist *nlist, atom_id i_atom, int shift, int gid)
 {
-    int    i, k, nri, nshift;
-
-    nri = nlist->nri;
+    int    nri = nlist->nri;
 
     /* Check whether we have to increase the i counter */
     if ((nri == -1) ||
@@ -594,14 +595,14 @@ put_in_list_at(gmx_bool              bHaveVdW[],
     t_nblist  *   vdwc_ww    = NULL;
     t_nblist  *   coul_ww    = NULL;
 
-    int           i, j, jcg, igid, gid, nbl_ind, ind_ij;
+    int           i, j, jcg, igid, gid, nbl_ind;
     atom_id       jj, jj0, jj1, i_atom;
-    int           i0, nicg, len;
+    int           i0, nicg;
 
     int          *cginfo;
     int          *type, *typeB;
     real         *charge, *chargeB;
-    real          qi, qiB, qq, rlj;
+    real          qi, qiB;
     gmx_bool      bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
     gmx_bool      bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
     int           iwater, jwater;
@@ -1094,7 +1095,7 @@ put_in_list_adress(gmx_bool              bHaveVdW[],
                    gmx_bool              bLR,
                    gmx_bool              bDoVdW,
                    gmx_bool              bDoCoul,
-                   int                   solvent_opt)
+                   int       gmx_unused  solvent_opt)
 {
     /* The a[] index has been removed,
      * to put it back in i_atom should be a[i0] and jj should be a[jj].
@@ -1105,33 +1106,26 @@ put_in_list_adress(gmx_bool              bHaveVdW[],
     t_nblist  *   vdwc_adress  = NULL;
     t_nblist  *   vdw_adress   = NULL;
     t_nblist  *   coul_adress  = NULL;
-    t_nblist  *   vdwc_ww      = NULL;
-    t_nblist  *   coul_ww      = NULL;
 
     int           i, j, jcg, igid, gid, nbl_ind, nbl_ind_adress;
     atom_id       jj, jj0, jj1, i_atom;
-    int           i0, nicg, len;
+    int           i0, nicg;
 
     int          *cginfo;
-    int          *type, *typeB;
-    real         *charge, *chargeB;
+    int          *type;
+    real         *charge;
     real         *wf;
-    real          qi, qiB, qq, rlj;
-    gmx_bool      bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
-    gmx_bool      bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
+    real          qi;
+    gmx_bool      bNotEx;
+    gmx_bool      bDoVdW_i, bDoCoul_i;
     gmx_bool      b_hybrid;
-    gmx_bool      j_all_atom;
-    int           iwater, jwater;
     t_nblist     *nlist, *nlist_adress;
     gmx_bool      bEnergyGroupCG;
 
     /* Copy some pointers */
     cginfo  = fr->cginfo;
     charge  = md->chargeA;
-    chargeB = md->chargeB;
     type    = md->typeA;
-    typeB   = md->typeB;
-    bPert   = md->bPerturbed;
     wf      = md->wf;
 
     /* Get atom range */
@@ -1141,8 +1135,6 @@ put_in_list_adress(gmx_bool              bHaveVdW[],
     /* Get the i charge group info */
     igid   = GET_CGINFO_GID(cginfo[icg]);
 
-    iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
-
     if (md->nPerturbed)
     {
         gmx_fatal(FARGS, "AdResS does not support free energy pertubation\n");
@@ -1585,18 +1577,18 @@ static real calc_image_tric(rvec xi, rvec xj, matrix box,
     /* Perform NINT operation, using trunc operation, therefore
      * we first add 2.5 then subtract 2 again
      */
-    tz  = dz*b_inv[ZZ] + h25;
+    tz  = static_cast<int>(dz*b_inv[ZZ] + h25);
     tz -= 2;
     dz -= tz*box[ZZ][ZZ];
     dy -= tz*box[ZZ][YY];
     dx -= tz*box[ZZ][XX];
 
-    ty  = dy*b_inv[YY] + h25;
+    ty  = static_cast<int>(dy*b_inv[YY] + h25);
     ty -= 2;
     dy -= ty*box[YY][YY];
     dx -= ty*box[YY][XX];
 
-    tx  = dx*b_inv[XX]+h25;
+    tx  = static_cast<int>(dx*b_inv[XX]+h25);
     tx -= 2;
     dx -= tx*box[XX][XX];
 
@@ -1625,9 +1617,9 @@ static real calc_image_rect(rvec xi, rvec xj, rvec box_size,
     /* Perform NINT operation, using trunc operation, therefore
      * we first add 1.5 then subtract 1 again
      */
-    tx = dx*b_inv[XX] + h15;
-    ty = dy*b_inv[YY] + h15;
-    tz = dz*b_inv[ZZ] + h15;
+    tx = static_cast<int>(dx*b_inv[XX] + h15);
+    ty = static_cast<int>(dy*b_inv[YY] + h15);
+    tz = static_cast<int>(dz*b_inv[ZZ] + h15);
     tx--;
     ty--;
     tz--;
@@ -1645,7 +1637,7 @@ static real calc_image_rect(rvec xi, rvec xj, rvec box_size,
     return r2;
 }
 
-static void add_simple(t_ns_buf *nsbuf, int nrj, atom_id cg_j,
+static void add_simple(t_ns_buf * nsbuf, int nrj, atom_id cg_j,
                        gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
                        int icg, int jgid, t_block *cgs, t_excl bexcl[],
                        int shift, t_forcerec *fr, put_in_list_t *put_in_list)
@@ -1673,7 +1665,6 @@ static void ns_inner_tric(rvec x[], int icg, int *i_egp_flags,
     int       j, nrj, jgid;
     int      *cginfo = fr->cginfo;
     atom_id   cg_j, *cgindex;
-    t_ns_buf *nsbuf;
 
     cgindex = cgs->index;
     shift   = CENTRAL;
@@ -1706,7 +1697,6 @@ static void ns_inner_rect(rvec x[], int icg, int *i_egp_flags,
     int       j, nrj, jgid;
     int      *cginfo = fr->cginfo;
     atom_id   cg_j, *cgindex;
-    t_ns_buf *nsbuf;
 
     cgindex = cgs->index;
     if (bBox)
@@ -1760,7 +1750,7 @@ static int ns_simple_core(t_forcerec *fr,
 {
     int          naaj, k;
     real         rlist2;
-    int          nsearch, icg, jcg, igid, i0, nri, nn;
+    int          nsearch, icg, igid, nn;
     int         *cginfo;
     t_ns_buf    *nsbuf;
     /* atom_id  *i_atoms; */
@@ -2044,8 +2034,8 @@ static void get_cutoff2(t_forcerec *fr, gmx_bool bDoLongRange,
         *rvdw2  = *rs2;
         *rcoul2 = *rs2;
     }
-    *rm2 = min(*rvdw2, *rcoul2);
-    *rl2 = max(*rvdw2, *rcoul2);
+    *rm2 = std::min(*rvdw2, *rcoul2);
+    *rl2 = std::max(*rvdw2, *rcoul2);
 }
 
 static void init_nsgrid_lists(t_forcerec *fr, int ngid, gmx_ns_t *ns)
@@ -2107,15 +2097,15 @@ static int nsgrid_core(t_commrec *cr, t_forcerec *fr,
 #endif
     int           dx0, dx1, dy0, dy1, dz0, dz1;
     int           Nx, Ny, Nz, shift = -1, j, nrj, nns, nn = -1;
-    real          gridx, gridy, gridz, grid_x, grid_y, grid_z;
+    real          gridx, gridy, gridz, grid_x, grid_y;
     real         *dcx2, *dcy2, *dcz2;
     int           zgi, ygi, xgi;
-    int           cg0, cg1, icg = -1, cgsnr, i0, igid, nri, naaj, max_jcg;
+    int           cg0, cg1, icg = -1, cgsnr, i0, igid, naaj, max_jcg;
     int           jcg0, jcg1, jjcg, cgj0, jgid;
     int          *grida, *gridnra, *gridind;
     gmx_bool      rvdw_lt_rcoul, rcoul_lt_rvdw;
-    rvec          xi, *cgcm, grid_offset;
-    real          r2, rs2, rvdw2, rcoul2, rm2, rl2, XI, YI, ZI, dcx, dcy, dcz, tmp1, tmp2;
+    rvec         *cgcm, grid_offset;
+    real          r2, rs2, rvdw2, rcoul2, rm2, rl2, XI, YI, ZI, tmp1, tmp2;
     int          *i_egp_flags;
     gmx_bool      bDomDec, bTriclinicX, bTriclinicY;
     ivec          ncpddc;
@@ -2167,7 +2157,6 @@ static int nsgrid_core(t_commrec *cr, t_forcerec *fr,
     gridz      = grid->cell_size[ZZ];
     grid_x     = 1/gridx;
     grid_y     = 1/gridy;
-    grid_z     = 1/gridz;
     copy_rvec(grid->cell_offset, grid_offset);
     copy_ivec(grid->ncpddc, ncpddc);
     dcx2       = grid->dcx2;
@@ -2213,7 +2202,7 @@ static int nsgrid_core(t_commrec *cr, t_forcerec *fr,
         else
         {
             if (d == XX &&
-                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
+                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < std::sqrt(rl2))
             {
                 shp[d] = 2;
             }
@@ -2512,7 +2501,6 @@ static int nsgrid_core(t_commrec *cr, t_forcerec *fr,
                 }
             }
         }
-        /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
         setexcl(cgs->index[icg], cgs->index[icg+1], &top->excls, FALSE, bexcl);
     }
     /* No need to perform any left-over force calculations anymore (as we used to do here)
@@ -2548,7 +2536,6 @@ void init_ns(FILE *fplog, const t_commrec *cr,
 {
     int  mt, icg, nr_in_cg, maxcg, i, j, jcg, ngid, ncg;
     t_block *cgs;
-    char *ptr;
 
     /* Compute largest charge groups size (# atoms) */
     nr_in_cg = 1;
@@ -2557,7 +2544,7 @@ void init_ns(FILE *fplog, const t_commrec *cr,
         cgs = &mtop->moltype[mt].cgs;
         for (icg = 0; (icg < cgs->nr); icg++)
         {
-            nr_in_cg = max(nr_in_cg, (int)(cgs->index[icg+1]-cgs->index[icg]));
+            nr_in_cg = std::max(nr_in_cg, (int)(cgs->index[icg+1]-cgs->index[icg]));
         }
     }
 
@@ -2669,13 +2656,11 @@ int search_neighbours(FILE *log, t_forcerec *fr,
 {
     t_block  *cgs = &(top->cgs);
     rvec     box_size, grid_x0, grid_x1;
-    int      i, j, m, ngid;
+    int      m, ngid;
     real     min_size, grid_dens;
     int      nsearch;
     gmx_bool     bGrid;
-    char     *ptr;
-    gmx_bool     *i_egp_flags;
-    int      cg_start, cg_end, start, end;
+    int      start, end;
     gmx_ns_t *ns;
     t_grid   *grid;
     gmx_domdec_zones_t *dd_zones;
@@ -2700,7 +2685,7 @@ int search_neighbours(FILE *log, t_forcerec *fr,
         }
         if (!bGrid)
         {
-            min_size = min(box_size[XX], min(box_size[YY], box_size[ZZ]));
+            min_size = std::min(box_size[XX], std::min(box_size[YY], box_size[ZZ]));
             if (2*fr->rlistlong >= min_size)
             {
                 gmx_fatal(FARGS, "One of the box diagonal elements has become smaller than twice the cut-off length.");
similarity index 96%
rename from src/gromacs/mdlib/nsgrid.c
rename to src/gromacs/mdlib/nsgrid.cpp
index 75e689e8f4112e029502bd3310de05d301667817..63649e3bee792e81ffb6af9f5954d14e7c5ef38e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include <stdio.h>
 #include <stdlib.h>
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "gromacs/domdec/domdec.h"
 #include "gromacs/fileio/pdbio.h"
 #include "gromacs/legacyheaders/macros.h"
@@ -90,7 +94,7 @@ static void calc_x_av_stddev(int n, rvec *x, rvec av, rvec stddev)
     for (d = 0; d < DIM; d++)
     {
         av[d]     = s1[d];
-        stddev[d] = sqrt(s2[d] - s1[d]*s1[d]);
+        stddev[d] = std::sqrt(s2[d] - s1[d]*s1[d]);
     }
 }
 
@@ -202,7 +206,6 @@ static void set_grid_sizes(matrix box, rvec izones_x0, rvec izones_x1, real rlis
 {
     int      i, j;
     gmx_bool bDD, bDDRect;
-    rvec     av, stddev;
     rvec     izones_size;
     real     inv_r_ideal, size, add_tric, radd;
 
@@ -218,7 +221,7 @@ static void set_grid_sizes(matrix box, rvec izones_x0, rvec izones_x1, real rlis
     }
 
     /* Use the ideal number of cg's per cell to set the ideal cell size */
-    inv_r_ideal = pow(grid_density/grid->ncg_ideal, 1.0/3.0);
+    inv_r_ideal = std::pow((real)(grid_density/grid->ncg_ideal), (real)(1.0/3.0));
     if (rlist > 0 && inv_r_ideal*rlist < 1)
     {
         inv_r_ideal = 1/rlist;
@@ -247,7 +250,7 @@ static void set_grid_sizes(matrix box, rvec izones_x0, rvec izones_x1, real rlis
              * direction has uniform DD cell boundaries.
              */
             bDDRect = !(ddbox->tric_dir[i] ||
-                        (dd->bGridJump && i != dd->dim[0]));
+                        (dd_dlb_is_on(dd) && i != dd->dim[0]));
 
             radd = rlist;
             if (i >= ddbox->npbcdim &&
@@ -273,8 +276,11 @@ static void set_grid_sizes(matrix box, rvec izones_x0, rvec izones_x1, real rlis
 
             /* Check if the cell boundary in this direction is
              * perpendicular to the Cartesian axis.
+             * Since grid->npbcdim isan integer that in principle can take
+             * any value, we help the compiler avoid warnings and potentially
+             * optimize by ensuring that j < DIM here.
              */
-            for (j = i+1; j < grid->npbcdim; j++)
+            for (j = i+1; j < grid->npbcdim && j < DIM; j++)
             {
                 if (box[j][i] != 0)
                 {
@@ -357,7 +363,6 @@ static void set_grid_sizes(matrix box, rvec izones_x0, rvec izones_x1, real rlis
 
 t_grid *init_grid(FILE *fplog, t_forcerec *fr)
 {
-    int     d, m;
     char   *ptr;
     t_grid *grid;
 
@@ -475,7 +480,6 @@ void grid_first(FILE *fplog, t_grid *grid,
                 real rlistlong, real grid_density)
 {
     int    i, m;
-    ivec   cx;
 
     set_grid_sizes(box, izones_x0, izones_x1, rlistlong, dd, ddbox, grid, grid_density);
 
@@ -495,7 +499,7 @@ void grid_first(FILE *fplog, t_grid *grid,
         }
     }
 
-    m = max(grid->n[XX], max(grid->n[YY], grid->n[ZZ]));
+    m = std::max(grid->n[XX], std::max(grid->n[YY], grid->n[ZZ]));
     if (m > grid->dc_nalloc)
     {
         /* Allocate with double the initial size for box scaling */
@@ -642,7 +646,7 @@ void fill_grid(gmx_domdec_zones_t *dd_zones,
                int cg0, int cg1, rvec cg_cm[])
 {
     int       *cell_index;
-    int        nrx, nry, nrz;
+    int        nry, nrz;
     rvec       n_box, offset;
     int        zone, ccg0, ccg1, cg, d, not_used;
     ivec       shift0, useall, b0, b1, ind;
@@ -661,7 +665,6 @@ void fill_grid(gmx_domdec_zones_t *dd_zones,
     cell_index = grid->cell_index;
 
     /* Initiate cell borders */
-    nrx = grid->n[XX];
     nry = grid->n[YY];
     nrz = grid->n[ZZ];
     for (d = 0; d < DIM; d++)
@@ -689,7 +692,7 @@ void fill_grid(gmx_domdec_zones_t *dd_zones,
         {
             for (d = 0; d < DIM; d++)
             {
-                ind[d] = (cg_cm[cg][d] - offset[d])*n_box[d];
+                ind[d] = static_cast<int>((cg_cm[cg][d] - offset[d])*n_box[d]);
                 /* With pbc we should be done here.
                  * Without pbc cg's outside the grid
                  * should be assigned to the closest grid cell.
@@ -764,7 +767,7 @@ void fill_grid(gmx_domdec_zones_t *dd_zones,
                 bUse = TRUE;
                 for (d = 0; d < DIM; d++)
                 {
-                    ind[d] = (cg_cm[cg][d] - offset[d])*n_box[d];
+                    ind[d] = static_cast<int>((cg_cm[cg][d] - offset[d])*n_box[d]);
                     /* Here we have to correct for rounding problems,
                      * as this cg_cm to cell index operation is not necessarily
                      * binary identical to the operation for the DD zone assignment
similarity index 95%
rename from src/gromacs/mdlib/qm_gamess.c
rename to src/gromacs/mdlib/qm_gamess.cpp
index d913f7f48925abd33818aa8e90317dac5d590b2a..b7c729b29d34b8b7c9c896eda8e5ca5f9970e1ac 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -55,6 +55,7 @@
 #include "gromacs/legacyheaders/qmmm.h"
 #include "gromacs/legacyheaders/txtdump.h"
 #include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
 #include "gromacs/math/units.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/utility/fatalerror.h"
 
 
 void
-F77_FUNC(inigms, IMIGMS) (void);
+    F77_FUNC(inigms, IMIGMS) (void);
 
 void
-F77_FUNC(endgms, ENDGMS) (void);
+    F77_FUNC(endgms, ENDGMS) (void);
 
 void
-F77_FUNC(grads, GRADS) (int *nrqmat, real *qmcrd, int *nrmmat, real *mmchrg,
-                        real *mmcrd, real *qmgrad, real *mmgrad, real *energy);
+    F77_FUNC(grads, GRADS) (int *nrqmat, real *qmcrd, int *nrmmat, real *mmchrg,
+                            real *mmcrd, real *qmgrad, real *mmgrad, real *energy);
 
 
 
@@ -88,7 +89,7 @@ void init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
      * dynamics simulations. 7-6-2002 (London)
      */
     int
-        i, j, rank;
+        i, j;
     FILE
        *out;
     char
@@ -228,7 +229,7 @@ void init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
     }
 }
 
-real call_gamess(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
+real call_gamess(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
                  rvec f[], rvec fshift[])
 {
     /* do the actual QMMM calculation using GAMESS-UK. In this
@@ -237,7 +238,7 @@ real call_gamess(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
      * gradient routines linked directly
      */
     int
-        i, j, rank;
+        i, j;
     real
         QMener = 0.0, *qmgrad, *mmgrad, *mmcrd, *qmcrd, energy;
     t_QMMMrec
similarity index 97%
rename from src/gromacs/mdlib/qm_gaussian.c
rename to src/gromacs/mdlib/qm_gaussian.cpp
index d90232c7e86a55fae13a1b0ca5d9a31f4c1fb51f..6f519c26b3f64f322f1d876215fa04b548e8f9ea 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include "gromacs/legacyheaders/typedefs.h"
 #include "gromacs/math/units.h"
 #include "gromacs/math/vec.h"
+#include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/smalloc.h"
 
-
 /* TODO: this should be made thread-safe */
 
 /* Gaussian interface routines */
 
-void init_gaussian(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
+void init_gaussian(t_QMrec *qm)
 {
-    FILE
-       *rffile = NULL, *out = NULL;
+    FILE *out = NULL;
     ivec
-        basissets[eQMbasisNR] = {{0, 3, 0},
+          basissets[eQMbasisNR] = {{0, 3, 0},
                                  {0, 3, 0}, /*added for double sto-3g entry in names.c*/
                                  {5, 0, 0},
                                  {5, 0, 1},
@@ -81,9 +80,9 @@ void init_gaussian(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
                                  {1, 6, 11},
                                  {4, 6, 0}};
     char
-       *buf = NULL;
+         *buf = NULL;
     int
-        i;
+          i;
 
     /* using the ivec above to convert the basis read form the mdp file
      * in a human readable format into some numbers for the gaussian
@@ -695,8 +694,7 @@ void write_gaussian_input(int step, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm)
 
 }  /* write_gaussian_input */
 
-real read_gaussian_output(rvec QMgrad[], rvec MMgrad[], int step,
-                          t_QMrec *qm, t_MMrec *mm)
+real read_gaussian_output(rvec QMgrad[], rvec MMgrad[], t_QMrec *qm, t_MMrec *mm)
 {
     int
         i, j, atnum;
@@ -801,8 +799,7 @@ real read_gaussian_output(rvec QMgrad[], rvec MMgrad[], int step,
     return(QMener);
 }
 
-real read_gaussian_SH_output(rvec QMgrad[], rvec MMgrad[], int step,
-                             gmx_bool swapped, t_QMrec *qm, t_MMrec *mm)
+real read_gaussian_SH_output(rvec QMgrad[], rvec MMgrad[], int step, t_QMrec *qm, t_MMrec *mm)
 {
     int
         i;
@@ -1023,8 +1020,7 @@ void do_gaussian(int step, char *exe)
     }
 }
 
-real call_gaussian(t_commrec *cr,  t_forcerec *fr,
-                   t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
+real call_gaussian(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
 {
     /* normal gaussian jobs */
     static int
@@ -1045,7 +1041,7 @@ real call_gaussian(t_commrec *cr,  t_forcerec *fr,
 
     write_gaussian_input(step, fr, qm, mm);
     do_gaussian(step, exe);
-    QMener = read_gaussian_output(QMgrad, MMgrad, step, qm, mm);
+    QMener = read_gaussian_output(QMgrad, MMgrad, qm, mm);
     /* put the QMMM forces in the force array and to the fshift
      */
     for (i = 0; i < qm->nrQMatoms; i++)
@@ -1071,8 +1067,7 @@ real call_gaussian(t_commrec *cr,  t_forcerec *fr,
 
 } /* call_gaussian */
 
-real call_gaussian_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
-                      rvec f[], rvec fshift[])
+real call_gaussian_SH(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
 {
     /* a gaussian call routine intended for doing diabatic surface
      * "sliding". See the manual for the theoretical background of this
@@ -1128,7 +1123,7 @@ real call_gaussian_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
     write_gaussian_SH_input(step, swapped, fr, qm, mm);
 
     do_gaussian(step, exe);
-    QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, swapped, qm, mm);
+    QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, qm, mm);
 
     /* check for a surface hop. Only possible if we were already state
      * averaging.
@@ -1149,7 +1144,7 @@ real call_gaussian_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
         {
             write_gaussian_SH_input(step, swapped, fr, qm, mm);
             do_gaussian(step, exe);
-            QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, swapped, qm, mm);
+            QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, qm, mm);
         }
     }
     /* add the QMMM forces to the gmx force array and fshift
similarity index 91%
rename from src/gromacs/mdlib/qm_mopac.c
rename to src/gromacs/mdlib/qm_mopac.cpp
index 6eb689cd3ea5d5f4002da26408af314e9154625c..7496965bf09be4db5fcce49db1edc2266f4ff30f 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 /* mopac interface routines */
 void
-F77_FUNC(domldt, DOMLDT) (int *nrqmat, int labels[], char keywords[]);
+    F77_FUNC(domldt, DOMLDT) (int *nrqmat, int labels[], char keywords[]);
 
 void
-F77_FUNC(domop, DOMOP) (int *nrqmat, double qmcrd[], int *nrmmat,
-                        double mmchrg[], double mmcrd[], double qmgrad[],
-                        double mmgrad[], double *energy, double qmcharges[]);
+    F77_FUNC(domop, DOMOP) (int *nrqmat, double qmcrd[], int *nrmmat,
+                            double mmchrg[], double mmcrd[], double qmgrad[],
+                            double mmgrad[], double *energy, double qmcharges[]);
 
 
 
-void init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
+void init_mopac(t_QMrec *qm)
 {
     /* initializes the mopac routines ans sets up the semiempirical
      * computation by calling moldat(). The inline mopac routines can
@@ -104,8 +104,7 @@ void init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
 
 } /* init_mopac */
 
-real call_mopac(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
-                rvec f[], rvec fshift[])
+real call_mopac(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
 {
     /* do the actual QMMM calculation using directly linked mopac subroutines
      */
@@ -167,8 +166,7 @@ real call_mopac(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
     return (QMener);
 }
 
-real call_mopac_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
-                   rvec f[], rvec fshift[])
+real call_mopac_SH(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
 {
     /* do the actual SH QMMM calculation using directly linked mopac
        subroutines */
similarity index 98%
rename from src/gromacs/mdlib/qm_orca.c
rename to src/gromacs/mdlib/qm_orca.cpp
index 11ddfd325b901c82b2b25aa211aa228f59c9546b..2f121806fd3ef98fec128f7ad797d0208067ca73 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -284,7 +284,7 @@ real read_orca_output(rvec QMgrad[], rvec MMgrad[], t_forcerec *fr,
     int
         i, j, atnum;
     char
-        buf[300], tmp[300], orca_xyzFilename[300], orca_pcgradFilename[300], orca_engradFilename[300];
+        buf[300], orca_xyzFilename[300], orca_pcgradFilename[300], orca_engradFilename[300];
     real
         QMener;
     FILE
@@ -320,8 +320,8 @@ real read_orca_output(rvec QMgrad[], rvec MMgrad[], t_forcerec *fr,
                 gmx_fatal(FARGS, "Unexpected end of ORCA output");
             }
 #ifdef GMX_DOUBLE
-            sscanf(buf, "%s%lf%lf%lf\n",
-                   tmp,
+            sscanf(buf, "%d%lf%lf%lf\n",
+                   &atnum,
                    &qm->xQM[i][XX],
                    &qm->xQM[i][YY],
                    &qm->xQM[i][ZZ]);
similarity index 95%
rename from src/gromacs/mdlib/qmmm.c
rename to src/gromacs/mdlib/qmmm.cpp
index bb83405a5548e083d481bcf88616aed3c422a2b5..cf7f7b344082ac850676c7b20f2412245ac43d3d 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include <stdlib.h>
 #include <string.h>
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "gromacs/fileio/confio.h"
 #include "gromacs/legacyheaders/force.h"
 #include "gromacs/legacyheaders/macros.h"
@@ -73,36 +77,32 @@ void
 init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
 
 real
-call_gamess(t_commrec *cr, t_forcerec *fr,
+call_gamess(t_forcerec *fr,
             t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
 
 #elif defined GMX_QMMM_MOPAC
 /* MOPAC interface */
 
 void
-init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
+init_mopac(t_QMrec *qm);
 
 real
-call_mopac(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
-           t_MMrec *mm, rvec f[], rvec fshift[]);
+call_mopac(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
 
 real
-call_mopac_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
-              t_MMrec *mm, rvec f[], rvec fshift[]);
+call_mopac_SH(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
 
 #elif defined GMX_QMMM_GAUSSIAN
 /* GAUSSIAN interface */
 
 void
-init_gaussian(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
+init_gaussian(t_QMrec *qm);
 
 real
-call_gaussian_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
-                 t_MMrec *mm, rvec f[], rvec fshift[]);
+call_gaussian_SH(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
 
 real
-call_gaussian(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
-              t_MMrec *mm, rvec f[], rvec fshift[]);
+call_gaussian(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
 
 #elif defined GMX_QMMM_ORCA
 /* ORCA interface */
@@ -165,11 +165,11 @@ real call_QMroutine(t_commrec gmx_unused *cr, t_forcerec gmx_unused *fr, t_QMrec
 #ifdef GMX_QMMM_MOPAC
         if (qm->bSH)
         {
-            QMener = call_mopac_SH(cr, fr, qm, mm, f, fshift);
+            QMener = call_mopac_SH(qm, mm, f, fshift);
         }
         else
         {
-            QMener = call_mopac(cr, fr, qm, mm, f, fshift);
+            QMener = call_mopac(qm, mm, f, fshift);
         }
 #else
         gmx_fatal(FARGS, "Semi-empirical QM only supported with Mopac.");
@@ -181,7 +181,7 @@ real call_QMroutine(t_commrec gmx_unused *cr, t_forcerec gmx_unused *fr, t_QMrec
         if (qm->bSH && qm->QMmethod == eQMmethodCASSCF)
         {
 #ifdef GMX_QMMM_GAUSSIAN
-            QMener = call_gaussian_SH(cr, fr, qm, mm, f, fshift);
+            QMener = call_gaussian_SH(fr, qm, mm, f, fshift);
 #else
             gmx_fatal(FARGS, "Ab-initio Surface-hopping only supported with Gaussian.");
 #endif
@@ -189,9 +189,9 @@ real call_QMroutine(t_commrec gmx_unused *cr, t_forcerec gmx_unused *fr, t_QMrec
         else
         {
 #ifdef GMX_QMMM_GAMESS
-            QMener = call_gamess(cr, fr, qm, mm, f, fshift);
+            QMener = call_gamess(fr, qm, mm, f, fshift);
 #elif defined GMX_QMMM_GAUSSIAN
-            QMener = call_gaussian(cr, fr, qm, mm, f, fshift);
+            QMener = call_gaussian(fr, qm, mm, f, fshift);
 #elif defined GMX_QMMM_ORCA
             QMener = call_orca(fr, qm, mm, f, fshift);
 #else
@@ -210,7 +210,7 @@ void init_QMroutine(t_commrec gmx_unused *cr, t_QMrec gmx_unused *qm, t_MMrec gm
     {
 #ifdef GMX_QMMM_MOPAC
         /* do a semi-empiprical calculation */
-        init_mopac(cr, qm, mm);
+        init_mopac(qm);
 #else
         gmx_fatal(FARGS, "Semi-empirical QM only supported with Mopac.");
 #endif
@@ -221,7 +221,7 @@ void init_QMroutine(t_commrec gmx_unused *cr, t_QMrec gmx_unused *qm, t_MMrec gm
 #ifdef GMX_QMMM_GAMESS
         init_gamess(cr, qm, mm);
 #elif defined GMX_QMMM_GAUSSIAN
-        init_gaussian(cr, qm, mm);
+        init_gaussian(qm);
 #elif defined GMX_QMMM_ORCA
         init_orca(qm);
 #else
@@ -262,7 +262,7 @@ static void punch_QMMM_excl(t_QMrec *qm, t_MMrec *mm, t_blocka *excls)
     FILE
        *out = NULL;
     int
-        i, j, k, nrexcl = 0, *excluded = NULL, max = 0;
+        i, j, k, nrexcl = 0, *excluded = NULL, max_excl = 0;
 
 
     out = fopen("QMMMexcl.dat", "w");
@@ -280,10 +280,10 @@ static void punch_QMMM_excl(t_QMrec *qm, t_MMrec *mm, t_blocka *excls)
             {
                 if (mm->indexMM[k] == excls->a[j]) /* the excluded MM atom */
                 {
-                    if (nrexcl >= max)
+                    if (nrexcl >= max_excl)
                     {
-                        max += 1000;
-                        srenew(excluded, max);
+                        max_excl += 1000;
+                        srenew(excluded, max_excl);
                     }
                     excluded[nrexcl++] = k;
                     continue;
@@ -485,8 +485,8 @@ void init_QMMMrec(t_commrec  *cr,
     t_ilist                 *ilist_mol;
     gmx_mtop_atomlookup_t    alook;
 
-    c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 6));
-    c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 12));
+    c6au  = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 6));
+    c12au = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 12));
     /* issue a fatal if the user wants to run with more than one node */
     if (PAR(cr))
     {
@@ -753,7 +753,7 @@ void init_QMMMrec(t_commrec  *cr,
         {
 #ifdef GMX_QMMM_MOPAC
             /* semi-empiprical 1-layer ONIOM calculation requested (mopac93) */
-            init_mopac(cr, qr->qm[0], qr->mm);
+            init_mopac(qr->qm[0]);
 #else
             gmx_fatal(FARGS, "Semi-empirical QM only supported with Mopac.");
 #endif
@@ -764,7 +764,7 @@ void init_QMMMrec(t_commrec  *cr,
 #ifdef GMX_QMMM_GAMESS
             init_gamess(cr, qr->qm[0], qr->mm);
 #elif defined GMX_QMMM_GAUSSIAN
-            init_gaussian(cr, qr->qm[0], qr->mm);
+            init_gaussian(qr->qm[0]);
 #elif defined GMX_QMMM_ORCA
             init_orca(qr->qm[0]);
 #else
@@ -797,8 +797,6 @@ void update_QMMMrec(t_commrec      *cr,
         QMMMlist;
     rvec
         dx, crd;
-    int
-       *MMatoms;
     t_QMrec
        *qm;
     t_MMrec
@@ -810,8 +808,8 @@ void update_QMMMrec(t_commrec      *cr,
     real
         c12au, c6au;
 
-    c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 6));
-    c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 12));
+    c6au  = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 6));
+    c12au = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 12));
 
     /* every cpu has this array. On every processor we fill this array
      * with 1's and 0's. 1's indicate the atoms is a QM atom on the
@@ -873,7 +871,7 @@ void update_QMMMrec(t_commrec      *cr,
                 crd[0] = IS2X(QMMMlist.shift[i]) + IS2X(qm_i_particles[i].shift);
                 crd[1] = IS2Y(QMMMlist.shift[i]) + IS2Y(qm_i_particles[i].shift);
                 crd[2] = IS2Z(QMMMlist.shift[i]) + IS2Z(qm_i_particles[i].shift);
-                is     = XYZ2IS(crd[0], crd[1], crd[2]);
+                is     = static_cast<int>(XYZ2IS(crd[0], crd[1], crd[2]));
                 for (j = QMMMlist.jindex[i];
                      j < QMMMlist.jindex[i+1];
                      j++)
@@ -897,9 +895,13 @@ void update_QMMMrec(t_commrec      *cr,
             qsort(qm_i_particles, QMMMlist.nri,
                   (size_t)sizeof(qm_i_particles[0]),
                   struct_comp);
-            qsort(mm_j_particles, mm_nr,
-                  (size_t)sizeof(mm_j_particles[0]),
-                  struct_comp);
+            /* The mm_j_particles argument to qsort is not allowed to be NULL */
+            if (mm_nr > 0)
+            {
+                qsort(mm_j_particles, mm_nr,
+                      (size_t)sizeof(mm_j_particles[0]),
+                      struct_comp);
+            }
             /* remove multiples in the QM shift array, since in init_QMMM() we
              * went through the atom numbers from 0 to md.nr, the order sorted
              * here matches the one of QMindex already.
index 99e16425b5639626944d081b034be8413d745ffb..bf99fdd7c4771a4435fa3aecdd4c1bdbb2705302 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -50,7 +50,6 @@
 namespace gmx
 {
 
-class File;
 class HelpWriterContext;
 
 /*! \libinternal \brief
index 58be839ba3222b2400c08a6551fe92605a80cf9f..6a3678ff6c5c708085237a4ed693b43aa543fc70 100644 (file)
@@ -49,9 +49,9 @@
 #include "gromacs/onlinehelp/helpformat.h"
 #include "gromacs/onlinehelp/helpwritercontext.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 namespace gmx
 {
@@ -180,7 +180,7 @@ AbstractCompositeHelpTopic::writeSubTopicList(const HelpWriterContext &context,
     {
         return false;
     }
-    File              &file = context.outputFile();
+    TextWriter        &file = context.outputFile();
     TextTableFormatter formatter;
     formatter.addColumn(NULL, maxNameLength + 1, false);
     formatter.addColumn(NULL, 72 - maxNameLength, true);
index f4deb7918619be1e1dd3cb568a7312a1d80cdb4c..07d2f29401e29023d20f6f9fa5e25662cfcac391 100644 (file)
 
 #include "gromacs/onlinehelp/helpformat.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/programcontext.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "rstparser.h"
 
@@ -438,9 +438,9 @@ class HelpWriterContext::Impl
         {
             public:
                 //! Initializes the state with the given parameters.
-                SharedState(File *file, HelpOutputFormat format,
+                SharedState(TextOutputStream *stream, HelpOutputFormat format,
                             const HelpLinks *links)
-                    : file_(*file), format_(format), links_(links)
+                    : file_(stream), format_(format), links_(links)
                 {
                 }
 
@@ -466,8 +466,8 @@ class HelpWriterContext::Impl
                     return *consoleOptionsFormatter_;
                 }
 
-                //! Output file to which the help is written.
-                File                   &file_;
+                //! Writer for writing the help.
+                TextWriter              file_;
                 //! Output format for the help output.
                 HelpOutputFormat        format_;
                 //! Links to use.
@@ -598,14 +598,14 @@ void HelpWriterContext::Impl::processMarkup(const std::string &text,
  * HelpWriterContext
  */
 
-HelpWriterContext::HelpWriterContext(File *file, HelpOutputFormat format)
-    : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(file, format, NULL)), 0))
+HelpWriterContext::HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format)
+    : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(stream, format, NULL)), 0))
 {
 }
 
-HelpWriterContext::HelpWriterContext(File *file, HelpOutputFormat format,
+HelpWriterContext::HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format,
                                      const HelpLinks *links)
-    : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(file, format, links)), 0))
+    : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(stream, format, links)), 0))
 {
     if (links != NULL)
     {
@@ -639,9 +639,10 @@ HelpOutputFormat HelpWriterContext::outputFormat() const
     return impl_->state_->format_;
 }
 
-File &HelpWriterContext::outputFile() const
+TextWriter &HelpWriterContext::outputFile() const
 {
-    return impl_->state_->file_;
+    // TODO: Consider how to deal with the const/non-const difference better.
+    return const_cast<TextWriter &>(impl_->state_->file_);
 }
 
 void HelpWriterContext::enterSubSection(const std::string &title)
@@ -676,7 +677,7 @@ void HelpWriterContext::writeTitle(const std::string &title) const
     {
         return;
     }
-    File &file = outputFile();
+    TextWriter &file = outputFile();
     switch (outputFormat())
     {
         case eHelpOutputFormat_Console:
@@ -714,7 +715,7 @@ void HelpWriterContext::writeOptionItem(const std::string &name,
                                         const std::string &info,
                                         const std::string &description) const
 {
-    File &file = outputFile();
+    TextWriter &file = outputFile();
     switch (outputFormat())
     {
         case eHelpOutputFormat_Console:
index 1cbce8e6b5e5533eaeb4af8e7b8565f15bcc4d9d..95cac6d739a1d347ce540bad92b459c2c97b6faf 100644 (file)
@@ -51,8 +51,9 @@
 namespace gmx
 {
 
-class File;
 class TextLineWrapperSettings;
+class TextOutputStream;
+class TextWriter;
 
 /*! \cond libapi */
 //! \libinternal Output format for help writing.
@@ -132,13 +133,13 @@ class HelpWriterContext
 {
     public:
         /*! \brief
-         * Initializes a context with the given output file and format.
+         * Initializes a context with the given output stream and format.
          *
          * \throws std::bad_alloc if out of memory.
          */
-        HelpWriterContext(File *file, HelpOutputFormat format);
+        HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format);
         /*! \brief
-         * Initializes a context with the given output file, format and links.
+         * Initializes a context with the given output stream, format and links.
          *
          * \throws std::bad_alloc if out of memory.
          *
@@ -146,7 +147,7 @@ class HelpWriterContext
          * is destructed.  The caller is responsible for ensuring that the
          * links object remains valid long enough.
          */
-        HelpWriterContext(File *file, HelpOutputFormat format,
+        HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format,
                           const HelpLinks *links);
         //! Creates a copy of the context.
         HelpWriterContext(const HelpWriterContext &other);
@@ -174,15 +175,15 @@ class HelpWriterContext
          */
         HelpOutputFormat outputFormat() const;
         /*! \brief
-         * Returns the raw output file for writing the help.
+         * Returns the raw writer for writing the help.
          *
-         * Using this file directly should be avoided, as it requires one to
+         * Using this writer directly should be avoided, as it requires one to
          * have different code for each output format.
          * Using other methods in this class should be preferred.
          *
          * Does not throw.
          */
-        File &outputFile() const;
+        TextWriter &outputFile() const;
 
         /*! \brief
          * Creates a subsection in the output help.
index f28b0f8e12cdca9a5bc176cd78eeaf1968c0522c..05dfb2438202bc87f0774dcc69adee9053572f83 100644 (file)
 #include "gromacs/onlinehelp/helptopic.h"
 #include "gromacs/onlinehelp/helpwritercontext.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/stringstream.h"
 
 #include "gromacs/onlinehelp/tests/mock_helptopic.h"
 #include "testutils/stringtest.h"
 #include "testutils/testasserts.h"
-#include "testutils/testfilemanager.h"
 
 namespace
 {
@@ -68,18 +67,14 @@ class HelpTestBase : public gmx::test::StringTestBase
     public:
         HelpTestBase();
 
-        gmx::test::TestFileManager tempFiles_;
         MockHelpTopic              rootTopic_;
-        std::string                filename_;
-        gmx::File                  helpFile_;
+        gmx::StringOutputStream    helpFile_;
         gmx::HelpWriterContext     context_;
         gmx::HelpManager           manager_;
 };
 
 HelpTestBase::HelpTestBase()
     : rootTopic_("", NULL, "Root topic text"),
-      filename_(tempFiles_.getTemporaryFilePath("helptext.txt")),
-      helpFile_(filename_, "w"),
       context_(&helpFile_, gmx::eHelpOutputFormat_Console),
       manager_(rootTopic_, context_)
 {
@@ -158,7 +153,7 @@ void HelpTopicFormattingTest::checkHelpFormatting()
     ASSERT_NO_THROW_GMX(manager_.writeCurrentTopic());
     helpFile_.close();
 
-    checkFileContents(filename_, "HelpText");
+    checkText(helpFile_.toString(), "HelpText");
 }
 
 TEST_F(HelpTopicFormattingTest, FormatsSimpleTopic)
index 2e85724ac4d8e43ee679f2e55269a643fb5c03eb..8aecd065e7ed9199bc4e74abb0a4d0d4e245aab9 100644 (file)
@@ -56,6 +56,8 @@
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 /********************************************************************
  * gmx_ana_indexgrps_t functions
@@ -261,18 +263,19 @@ gmx_ana_indexgrps_find(gmx_ana_index_t *dest, std::string *destName,
 }
 
 /*!
- * \param[in]  fp     Where to print the output.
+ * \param[in]  writer Writer to use for output.
  * \param[in]  g      Index groups to print.
  * \param[in]  maxn   Maximum number of indices to print
  *      (-1 = print all, 0 = print only names).
  */
 void
-gmx_ana_indexgrps_print(FILE *fp, gmx_ana_indexgrps_t *g, int maxn)
+gmx_ana_indexgrps_print(gmx::TextWriter *writer, gmx_ana_indexgrps_t *g, int maxn)
 {
     for (int i = 0; i < g->nr; ++i)
     {
-        fprintf(fp, " Group %2d \"%s\" ", i, g->names[i].c_str());
-        gmx_ana_index_dump(fp, &g->g[i], maxn);
+        writer->writeString(gmx::formatString(" Group %2d \"%s\" ",
+                                              i, g->names[i].c_str()));
+        gmx_ana_index_dump(writer, &g->g[i], maxn);
     }
 }
 
@@ -394,34 +397,32 @@ gmx_ana_index_copy(gmx_ana_index_t *dest, gmx_ana_index_t *src, bool bAlloc)
 }
 
 /*!
- * \param[in]  fp     Where to print the output.
+ * \param[in]  writer Writer to use for output.
  * \param[in]  g      Index group to print.
  * \param[in]  maxn   Maximum number of indices to print (-1 = print all).
  */
 void
-gmx_ana_index_dump(FILE *fp, gmx_ana_index_t *g, int maxn)
+gmx_ana_index_dump(gmx::TextWriter *writer, gmx_ana_index_t *g, int maxn)
 {
-    int  j, n;
-
-    fprintf(fp, "(%d atoms)", g->isize);
+    writer->writeString(gmx::formatString("(%d atoms)", g->isize));
     if (maxn != 0)
     {
-        fprintf(fp, ":");
-        n = g->isize;
+        writer->writeString(":");
+        int n = g->isize;
         if (maxn >= 0 && n > maxn)
         {
             n = maxn;
         }
-        for (j = 0; j < n; ++j)
+        for (int j = 0; j < n; ++j)
         {
-            fprintf(fp, " %d", g->index[j]+1);
+            writer->writeString(gmx::formatString(" %d", g->index[j]+1));
         }
         if (n < g->isize)
         {
-            fprintf(fp, " ...");
+            writer->writeString(" ...");
         }
     }
-    fprintf(fp, "\n");
+    writer->writeLine();
 }
 
 int
index 7ec5bd96916b0d2610fcedbe87cd9ab2868b4784..9ac9364008ba028178a82269af688450ca373f41 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include "gromacs/legacyheaders/types/simple.h"
 #include "gromacs/topology/block.h"
 
+namespace gmx
+{
+class TextWriter;
+}
+
 struct t_topology;
 
 /** Stores a set of index groups. */
@@ -205,7 +210,7 @@ gmx_ana_indexgrps_find(gmx_ana_index_t *dest, std::string *destName,
 
 /** Writes out a list of index groups. */
 void
-gmx_ana_indexgrps_print(FILE *fp, gmx_ana_indexgrps_t *g, int maxn);
+gmx_ana_indexgrps_print(gmx::TextWriter *writer, gmx_ana_indexgrps_t *g, int maxn);
 /*@}*/
 
 /*! \name Functions for handling gmx_ana_index_t
@@ -235,7 +240,7 @@ gmx_ana_index_copy(gmx_ana_index_t *dest, gmx_ana_index_t *src, bool bAlloc);
 
 /** Writes out the contents of a index group. */
 void
-gmx_ana_index_dump(FILE *fp, gmx_ana_index_t *g, int maxn);
+gmx_ana_index_dump(gmx::TextWriter *writer, gmx_ana_index_t *g, int maxn);
 
 /*! \brief
  * Returns maximum atom index that appears in an index group.
index 768be462d87beaa763113edb6043f3e43ebb38dc..86fdcbefaed185a097378354f35f05ef366567f3 100644 (file)
@@ -62,8 +62,6 @@
 #include <algorithm>
 #include <vector>
 
-#include "thread_mpi/mutex.h"
-
 #include "gromacs/legacyheaders/names.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/pbcutil/pbc.h"
@@ -72,6 +70,7 @@
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/mutex.h"
 #include "gromacs/utility/stringutil.h"
 
 namespace gmx
@@ -309,7 +308,7 @@ class AnalysisNeighborhoodSearchImpl
         //! Data structure to hold the grid cell contents.
         CellList                cells_;
 
-        tMPI::mutex             createPairSearchMutex_;
+        Mutex                   createPairSearchMutex_;
         PairSearchList          pairSearchList_;
 
         friend class AnalysisNeighborhoodPairSearchImpl;
@@ -439,7 +438,7 @@ AnalysisNeighborhoodSearchImpl::~AnalysisNeighborhoodSearchImpl()
 AnalysisNeighborhoodSearchImpl::PairSearchImplPointer
 AnalysisNeighborhoodSearchImpl::getPairSearch()
 {
-    tMPI::lock_guard<tMPI::mutex> lock(createPairSearchMutex_);
+    lock_guard<Mutex> lock(createPairSearchMutex_);
     // TODO: Consider whether this needs to/can be faster, e.g., by keeping a
     // separate pool of unused search objects.
     PairSearchList::const_iterator i;
@@ -1252,7 +1251,7 @@ class AnalysisNeighborhood::Impl
 
         SearchImplPointer getSearch();
 
-        tMPI::mutex             createSearchMutex_;
+        Mutex                   createSearchMutex_;
         SearchList              searchList_;
         real                    cutoff_;
         const t_blocka         *excls_;
@@ -1263,7 +1262,7 @@ class AnalysisNeighborhood::Impl
 AnalysisNeighborhood::Impl::SearchImplPointer
 AnalysisNeighborhood::Impl::getSearch()
 {
-    tMPI::lock_guard<tMPI::mutex> lock(createSearchMutex_);
+    lock_guard<Mutex> lock(createSearchMutex_);
     // TODO: Consider whether this needs to/can be faster, e.g., by keeping a
     // separate pool of unused search objects.
     SearchList::const_iterator i;
index d3d9096f18211cda5374801599cbd1adb69b096d..d1eb39fcf3081e1f5e1769c12b36677cea8d2f9e 100644 (file)
@@ -66,8 +66,9 @@
  *    methods and initializes the children of the method element.
  *  - selectioncollection.h, selectioncollection.cpp:
  *    These files define the high-level public interface to the parser
- *    through SelectionCollection::parseFromStdin(),
- *    SelectionCollection::parseFromFile() and
+ *    through SelectionCollection::parseInteractive(),
+ *    SelectionCollection::parseFromStdin(),
+ *    SelectionCollection::parseFromFile(), and
  *    SelectionCollection::parseFromString().
  *
  * The basic control flow in the parser is as follows: when a parser function
 #include "gromacs/selection/selection.h"
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "keywords.h"
 #include "poscalc.h"
@@ -310,9 +311,11 @@ _gmx_selparser_handle_error(yyscan_t scanner)
     catch (gmx::UserInputError &ex)
     {
         ex.prependContext(context);
-        if (_gmx_sel_is_lexer_interactive(scanner))
+        gmx::TextWriter *statusWriter
+            = _gmx_sel_lexer_get_status_writer(scanner);
+        if (statusWriter != NULL)
         {
-            gmx::formatExceptionMessageToFile(stderr, ex);
+            gmx::formatExceptionMessageToWriter(statusWriter, ex);
             return true;
         }
         throw;
@@ -1062,10 +1065,13 @@ _gmx_sel_init_selection(const char                             *name,
     root->fillNameIfMissing(_gmx_sel_lexer_pselstr(scanner));
 
     /* Print out some information if the parser is interactive */
-    if (_gmx_sel_is_lexer_interactive(scanner))
+    gmx::TextWriter *statusWriter = _gmx_sel_lexer_get_status_writer(scanner);
+    if (statusWriter != NULL)
     {
-        fprintf(stderr, "Selection '%s' parsed\n",
-                _gmx_sel_lexer_pselstr(scanner));
+        const std::string message
+            = gmx::formatString("Selection '%s' parsed",
+                                _gmx_sel_lexer_pselstr(scanner));
+        statusWriter->writeLine(message);
     }
 
     return root;
@@ -1129,9 +1135,12 @@ _gmx_sel_assign_variable(const char                             *name,
     srenew(sc->varstrs, sc->nvars + 1);
     sc->varstrs[sc->nvars] = gmx_strdup(pselstr);
     ++sc->nvars;
-    if (_gmx_sel_is_lexer_interactive(scanner))
+    gmx::TextWriter *statusWriter = _gmx_sel_lexer_get_status_writer(scanner);
+    if (statusWriter != NULL)
     {
-        fprintf(stderr, "Variable '%s' parsed\n", pselstr);
+        const std::string message
+            = gmx::formatString("Variable '%s' parsed", pselstr);
+        statusWriter->writeLine(message);
     }
     return root;
 }
index a37f3e8cf77fd65b7dd0ab6913d1c72cd78eeb24..3cdfb93e73b11554cb8984336cb319257c61e377 100644 (file)
@@ -495,7 +495,7 @@ static yyconst flex_int16_t yy_chk[151] =
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -932,7 +932,7 @@ case 6:
 YY_RULE_SETUP
 #line 137 "scanner.l"
 {
-                    if (yytext[0] == ';' || state->bInteractive)
+                    if (yytext[0] == ';' || state->statusWriter != NULL)
                     {
                         rtrim(state->pselstr);
                         state->bCmdStart = true;
index ad7fcf56ce3e33a90ec1e620803edb1bdc5bd388..b918ccc5fd375f9266eb75d7a7f3378505bbfef1 100644 (file)
 
 #include "parser.h"
 
+namespace gmx
+{
+class TextWriter;
+}
+
 struct gmx_ana_indexgrps_t;
 struct gmx_ana_selcollection_t;
 
@@ -62,7 +67,7 @@ typedef void *yyscan_t;
 /** Initializes the selection scanner. */
 void
 _gmx_sel_init_lexer(yyscan_t *scannerp, struct gmx_ana_selcollection_t *sc,
-                    bool bInteractive, int maxnr, bool bGroups,
+                    gmx::TextWriter *statusWriter, int maxnr, bool bGroups,
                     struct gmx_ana_indexgrps_t *grps);
 /** Frees memory allocated for the selection scanner. */
 void
@@ -77,9 +82,9 @@ _gmx_sel_lexer_set_exception(yyscan_t                    scanner,
 void
 _gmx_sel_lexer_rethrow_exception_if_occurred(yyscan_t scanner);
 
-/** Returns true if the scanner is interactive. */
-bool
-_gmx_sel_is_lexer_interactive(yyscan_t scanner);
+/** Returns writer for status output (if not NULL, the scanner is interactive). */
+gmx::TextWriter *
+_gmx_sel_lexer_get_status_writer(yyscan_t scanner);
 /** Returns the selection collection for the scanner. */
 struct gmx_ana_selcollection_t *
 _gmx_sel_lexer_selcollection(yyscan_t scanner);
index f41234b2de01ea6d47b9acfe7560e89d1990633c..d2d2fd97ecbd8bc7edcfd52df34ceeaf32a89d0d 100644 (file)
@@ -135,7 +135,7 @@ COMMENT    (#.*)
 
 \\\n            { _gmx_sel_lexer_add_token(yylloc, " ", 1, state); break; }
 ";"|\n          {
-                    if (yytext[0] == ';' || state->bInteractive)
+                    if (yytext[0] == ';' || state->statusWriter != NULL)
                     {
                         rtrim(state->pselstr);
                         state->bCmdStart = true;
index 1f3be505e5d192e406ce9824d30b916a1942ad54..d66d647b58dfb524cc635ae20429cea8ae68ccdc 100644 (file)
@@ -384,8 +384,8 @@ _gmx_sel_lexer_add_token(YYLTYPE *yylloc, const char *str, int len,
 
 void
 _gmx_sel_init_lexer(yyscan_t *scannerp, struct gmx_ana_selcollection_t *sc,
-                    bool bInteractive, int maxnr, bool bGroups,
-                    struct gmx_ana_indexgrps_t *grps)
+                    gmx::TextWriter *statusWriter, int maxnr,
+                    bool bGroups, struct gmx_ana_indexgrps_t *grps)
 {
     int rc = _gmx_sel_yylex_init(scannerp);
     if (rc != 0)
@@ -401,7 +401,7 @@ _gmx_sel_init_lexer(yyscan_t *scannerp, struct gmx_ana_selcollection_t *sc,
     state->grps      = grps;
     state->nexpsel   = (maxnr > 0 ? static_cast<int>(sc->sel.size()) + maxnr : -1);
 
-    state->bInteractive = bInteractive;
+    state->statusWriter = statusWriter;
 
     snew(state->pselstr, STRSTORE_ALLOCSTEP);
     state->pselstr[0]                 = 0;
@@ -461,11 +461,11 @@ _gmx_sel_lexer_rethrow_exception_if_occurred(yyscan_t scanner)
     }
 }
 
-bool
-_gmx_sel_is_lexer_interactive(yyscan_t scanner)
+gmx::TextWriter *
+_gmx_sel_lexer_get_status_writer(yyscan_t scanner)
 {
     gmx_sel_lexer_t *state = _gmx_sel_yyget_extra(scanner);
-    return state->bInteractive;
+    return state->statusWriter;
 }
 
 struct gmx_ana_selcollection_t *
index 5bd8eb0e433718259cf383c4135cef7a1894af1b..7b01b81e27346ea116fa2a2d81c63e51ab1cb9ea 100644 (file)
@@ -48,6 +48,7 @@
 namespace gmx
 {
 class SelectionParserSymbol;
+class TextWriter;
 }
 
 /* These need to be defined before including scanner_flex.h, because it
@@ -81,8 +82,8 @@ typedef struct gmx_sel_lexer_t
     //! Number of selections at which the parser should stop.
     int                              nexpsel;
 
-    //! Whether the parser is interactive.
-    bool                             bInteractive;
+    //! Writer to use for status output (if not NULL, parser is interactive).
+    gmx::TextWriter                 *statusWriter;
 
     //! Pretty-printed version of the string parsed since last clear.
     char                            *pselstr;
index 7b77d5c5ac4845a31d286e680517d69395f48180..8c81ca51ac6feba05cfc00d6d9d67b2b14f6f969 100644 (file)
@@ -51,6 +51,7 @@
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "selelem.h"
 #include "selvalue.h"
@@ -320,7 +321,8 @@ Selection::printDebugInfo(FILE *fp, int nmaxind) const
     fprintf(fp, "    Group ");
     gmx_ana_index_t g;
     gmx_ana_index_set(&g, p.m.mapb.nra, p.m.mapb.a, 0);
-    gmx_ana_index_dump(fp, &g, nmaxind);
+    TextWriter      writer(fp);
+    gmx_ana_index_dump(&writer, &g, nmaxind);
 
     fprintf(fp, "    Block (size=%d):", p.m.mapb.nr);
     if (!p.m.mapb.index)
index e18078814b489a5bfdfc32878001dd145bad0b7a..b7ee4d0b2d67b75835196179a9a6a4878c6f5727 100644 (file)
@@ -49,6 +49,7 @@
 #include <string>
 #include <vector>
 
+#include <boost/scoped_ptr.hpp>
 #include <boost/shared_ptr.hpp>
 
 #include "gromacs/fileio/trx.h"
 #include "gromacs/selection/selhelp.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "compiler.h"
 #include "mempool.h"
@@ -129,43 +131,44 @@ namespace
 /*! \brief
  * Reads a single selection line from stdin.
  *
- * \param[in]  infile        File to read from (typically File::standardInput()).
- * \param[in]  bInteractive  Whether to print interactive prompts.
+ * \param[in]  inputStream   Stream to read from (typically the StandardInputStream).
+ * \param[in]  statusWriter  Stream to print prompts to (if NULL, no output is done).
  * \param[out] line          The read line in stored here.
  * \returns true if something was read, false if at end of input.
  *
  * Handles line continuation, reading also the continuing line(s) in one call.
  */
-bool promptLine(File *infile, bool bInteractive, std::string *line)
+bool promptLine(TextInputStream *inputStream, TextWriter *statusWriter,
+                std::string *line)
 {
-    if (bInteractive)
+    if (statusWriter != NULL)
     {
-        fprintf(stderr, "> ");
+        statusWriter->writeString("> ");
     }
-    if (!infile->readLineWithTrailingSpace(line))
+    if (!inputStream->readLine(line))
     {
         return false;
     }
     while (endsWith(*line, "\\\n"))
     {
         line->resize(line->length() - 2);
-        if (bInteractive)
+        if (statusWriter != NULL)
         {
-            fprintf(stderr, "... ");
+            statusWriter->writeString("... ");
         }
         std::string buffer;
         // Return value ignored, buffer remains empty and works correctly
         // if there is nothing to read.
-        infile->readLineWithTrailingSpace(&buffer);
+        inputStream->readLine(&buffer);
         line->append(buffer);
     }
     if (endsWith(*line, "\n"))
     {
         line->resize(line->length() - 1);
     }
-    else if (bInteractive)
+    else if (statusWriter != NULL)
     {
-        fprintf(stderr, "\n");
+        statusWriter->writeLine();
     }
     return true;
 }
@@ -185,26 +188,14 @@ int runParserLoop(yyscan_t scanner, _gmx_sel_yypstate *parserState,
                   bool bInteractive)
 {
     int status    = YYPUSH_MORE;
-    int prevToken = 0;
     do
     {
         YYSTYPE value;
         YYLTYPE location;
         int     token = _gmx_sel_yylex(&value, &location, scanner);
-        if (bInteractive)
+        if (bInteractive && token == 0)
         {
-            if (token == 0)
-            {
-                break;
-            }
-            // Empty commands cause the interactive parser to print out
-            // status information. This avoids producing those unnecessarily,
-            // e.g., from "resname RA;;".
-            if (prevToken == CMD_SEP && token == CMD_SEP)
-            {
-                continue;
-            }
-            prevToken = token;
+            break;
         }
         status = _gmx_sel_yypush_parse(parserState, token, &value, &location, scanner);
     }
@@ -216,6 +207,7 @@ int runParserLoop(yyscan_t scanner, _gmx_sel_yypstate *parserState,
 /*! \brief
  * Print current status in response to empty line in interactive input.
  *
+ * \param[in] writer         Writer to use for the output.
  * \param[in] sc             Selection collection data structure.
  * \param[in] grps           Available index groups.
  * \param[in] firstSelection Index of first selection from this interactive
@@ -227,52 +219,54 @@ int runParserLoop(yyscan_t scanner, _gmx_sel_yypstate *parserState,
  *
  * Prints the available index groups and currently provided selections.
  */
-void printCurrentStatus(gmx_ana_selcollection_t *sc, gmx_ana_indexgrps_t *grps,
-                        size_t firstSelection, int maxCount,
-                        const std::string &context, bool bFirst)
+void printCurrentStatus(TextWriter *writer, gmx_ana_selcollection_t *sc,
+                        gmx_ana_indexgrps_t *grps, size_t firstSelection,
+                        int maxCount, const std::string &context, bool bFirst)
 {
     if (grps != NULL)
     {
-        std::fprintf(stderr, "Available static index groups:\n");
-        gmx_ana_indexgrps_print(stderr, grps, 0);
+        writer->writeLine("Available static index groups:");
+        gmx_ana_indexgrps_print(writer, grps, 0);
     }
-    std::fprintf(stderr, "Specify ");
+    writer->writeString("Specify ");
     if (maxCount < 0)
     {
-        std::fprintf(stderr, "any number of selections");
+        writer->writeString("any number of selections");
     }
     else if (maxCount == 1)
     {
-        std::fprintf(stderr, "a selection");
+        writer->writeString("a selection");
     }
     else
     {
-        std::fprintf(stderr, "%d selections", maxCount);
+        writer->writeString(formatString("%d selections", maxCount));
     }
-    std::fprintf(stderr, "%s%s:\n",
-                 context.empty() ? "" : " ", context.c_str());
-    std::fprintf(stderr,
-                 "(one per line, <enter> for status/groups, 'help' for help%s)\n",
-                 maxCount < 0 ? ", Ctrl-D to end" : "");
+    writer->writeString(formatString("%s%s:\n",
+                                     context.empty() ? "" : " ", context.c_str()));
+    writer->writeString(formatString(
+                                "(one per line, <enter> for status/groups, 'help' for help%s)\n",
+                                maxCount < 0 ? ", Ctrl-D to end" : ""));
     if (!bFirst && (sc->nvars > 0 || sc->sel.size() > firstSelection))
     {
-        std::fprintf(stderr, "Currently provided selections:\n");
+        writer->writeLine("Currently provided selections:");
         for (int i = 0; i < sc->nvars; ++i)
         {
-            std::fprintf(stderr, "     %s\n", sc->varstrs[i]);
+            writer->writeString(formatString("     %s\n", sc->varstrs[i]));
         }
         for (size_t i = firstSelection; i < sc->sel.size(); ++i)
         {
-            std::fprintf(stderr, " %2d. %s\n",
-                         static_cast<int>(i - firstSelection + 1),
-                         sc->sel[i]->selectionText());
+            writer->writeString(formatString(
+                                        " %2d. %s\n",
+                                        static_cast<int>(i - firstSelection + 1),
+                                        sc->sel[i]->selectionText()));
         }
         if (maxCount > 0)
         {
             const int remaining
                 = maxCount - static_cast<int>(sc->sel.size() - firstSelection);
-            std::fprintf(stderr, "(%d more selection%s required)\n",
-                         remaining, remaining > 1 ? "s" : "");
+            writer->writeString(formatString(
+                                        "(%d more selection%s required)\n",
+                                        remaining, remaining > 1 ? "s" : ""));
         }
     }
 }
@@ -280,20 +274,21 @@ void printCurrentStatus(gmx_ana_selcollection_t *sc, gmx_ana_indexgrps_t *grps,
 /*! \brief
  * Prints selection help in interactive selection input.
  *
+ * \param[in] writer Writer to use for the output.
  * \param[in] sc    Selection collection data structure.
  * \param[in] line  Line of user input requesting help (starting with `help`).
  *
  * Initializes the selection help if not yet initialized, and finds the help
  * topic based on words on the input line.
  */
-void printHelp(gmx_ana_selcollection_t *sc, const std::string &line)
+void printHelp(TextWriter *writer, gmx_ana_selcollection_t *sc,
+               const std::string &line)
 {
     if (sc->rootHelp.get() == NULL)
     {
         sc->rootHelp = createSelectionHelpTopic();
     }
-    HelpWriterContext context(&File::standardError(),
-                              eHelpOutputFormat_Console);
+    HelpWriterContext context(&writer->stream(), eHelpOutputFormat_Console);
     HelpManager       manager(*sc->rootHelp, context);
     try
     {
@@ -307,7 +302,7 @@ void printHelp(gmx_ana_selcollection_t *sc, const std::string &line)
     }
     catch (const InvalidInputError &ex)
     {
-        fprintf(stderr, "%s\n", ex.what());
+        writer->writeLine(ex.what());
         return;
     }
     manager.writeCurrentTopic();
@@ -317,8 +312,10 @@ void printHelp(gmx_ana_selcollection_t *sc, const std::string &line)
  * Helper function that runs the parser once the tokenizer has been
  * initialized.
  *
- * \param[in,out] scanner Scanner data structure.
- * \param[in]     bStdIn  Whether to use a line-based reading
+ * \param[in,out] scanner       Scanner data structure.
+ * \param[in]     inputStream   Stream to use for input (currently only with
+ *      `bInteractive==true`).
+ * \param[in]     bInteractive  Whether to use a line-based reading
  *      algorithm designed for interactive input.
  * \param[in]     maxnr   Maximum number of selections to parse
  *      (if -1, parse as many as provided by the user).
@@ -327,11 +324,11 @@ void printHelp(gmx_ana_selcollection_t *sc, const std::string &line)
  * \throws        std::bad_alloc if out of memory.
  * \throws        InvalidInputError if there is a parsing error.
  *
- * Used internally to implement parseFromStdin(), parseFromFile() and
+ * Used internally to implement parseInteractive(), parseFromFile() and
  * parseFromString().
  */
-SelectionList runParser(yyscan_t scanner, bool bStdIn, int maxnr,
-                        const std::string &context)
+SelectionList runParser(yyscan_t scanner, TextInputStream *inputStream,
+                        bool bInteractive, int maxnr, const std::string &context)
 {
     boost::shared_ptr<void>  scannerGuard(scanner, &_gmx_sel_free_lexer);
     gmx_ana_selcollection_t *sc   = _gmx_sel_lexer_selcollection(scanner);
@@ -341,30 +338,29 @@ SelectionList runParser(yyscan_t scanner, bool bStdIn, int maxnr,
     {
         boost::shared_ptr<_gmx_sel_yypstate> parserState(
                 _gmx_sel_yypstate_new(), &_gmx_sel_yypstate_delete);
-        if (bStdIn)
+        if (bInteractive)
         {
-            File       &stdinFile(File::standardInput());
-            const bool  bInteractive = _gmx_sel_is_lexer_interactive(scanner);
-            if (bInteractive)
+            TextWriter *statusWriter = _gmx_sel_lexer_get_status_writer(scanner);
+            if (statusWriter != NULL)
             {
-                printCurrentStatus(sc, grps, oldCount, maxnr, context, true);
+                printCurrentStatus(statusWriter, sc, grps, oldCount, maxnr, context, true);
             }
             std::string line;
             int         status;
-            while (promptLine(&stdinFile, bInteractive, &line))
+            while (promptLine(inputStream, statusWriter, &line))
             {
-                if (bInteractive)
+                if (statusWriter != NULL)
                 {
                     line = stripString(line);
                     if (line.empty())
                     {
-                        printCurrentStatus(sc, grps, oldCount, maxnr, context, false);
+                        printCurrentStatus(statusWriter, sc, grps, oldCount, maxnr, context, false);
                         continue;
                     }
                     if (startsWith(line, "help")
                         && (line[4] == 0 || std::isspace(line[4])))
                     {
-                        printHelp(sc, line);
+                        printHelp(statusWriter, sc, line);
                         continue;
                     }
                 }
@@ -673,17 +669,32 @@ SelectionCollection::requiresTopology() const
     return false;
 }
 
-
 SelectionList
-SelectionCollection::parseFromStdin(int nr, bool bInteractive,
+SelectionCollection::parseFromStdin(int count, bool bInteractive,
                                     const std::string &context)
+{
+    return parseInteractive(count, &StandardInputStream::instance(),
+                            bInteractive ? &TextOutputFile::standardError() : NULL,
+                            context);
+}
+
+SelectionList
+SelectionCollection::parseInteractive(int                count,
+                                      TextInputStream   *inputStream,
+                                      TextOutputStream  *statusStream,
+                                      const std::string &context)
 {
     yyscan_t scanner;
 
-    _gmx_sel_init_lexer(&scanner, &impl_->sc_, bInteractive, nr,
-                        impl_->bExternalGroupsSet_,
-                        impl_->grps_);
-    return runParser(scanner, true, nr, context);
+    boost::scoped_ptr<TextWriter> statusWriter;
+    if (statusStream != NULL)
+    {
+        statusWriter.reset(new TextWriter(statusStream));
+        statusWriter->wrapperSettings().setLineLength(78);
+    }
+    _gmx_sel_init_lexer(&scanner, &impl_->sc_, statusWriter.get(),
+                        count, impl_->bExternalGroupsSet_, impl_->grps_);
+    return runParser(scanner, inputStream, true, count, context);
 }
 
 
@@ -693,14 +704,14 @@ SelectionCollection::parseFromFile(const std::string &filename)
 
     try
     {
-        yyscan_t scanner;
-        File     file(filename, "r");
+        yyscan_t      scanner;
+        TextInputFile file(filename);
         // TODO: Exception-safe way of using the lexer.
-        _gmx_sel_init_lexer(&scanner, &impl_->sc_, false, -1,
+        _gmx_sel_init_lexer(&scanner, &impl_->sc_, NULL, -1,
                             impl_->bExternalGroupsSet_,
                             impl_->grps_);
         _gmx_sel_set_lex_input_file(scanner, file.handle());
-        return runParser(scanner, false, -1, std::string());
+        return runParser(scanner, NULL, false, -1, std::string());
     }
     catch (GromacsException &ex)
     {
@@ -717,11 +728,11 @@ SelectionCollection::parseFromString(const std::string &str)
 {
     yyscan_t scanner;
 
-    _gmx_sel_init_lexer(&scanner, &impl_->sc_, false, -1,
+    _gmx_sel_init_lexer(&scanner, &impl_->sc_, NULL, -1,
                         impl_->bExternalGroupsSet_,
                         impl_->grps_);
     _gmx_sel_set_lex_input_str(scanner, str.c_str());
-    return runParser(scanner, false, -1, std::string());
+    return runParser(scanner, NULL, false, -1, std::string());
 }
 
 
index 8ec7ed1f230554acb65ed5cbbbbb094be6af8a80..826f20a04d1a85c3a8ddd23f6e2afda3995d8ec4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -63,6 +63,8 @@ namespace gmx
 class Options;
 class SelectionCompiler;
 class SelectionEvaluator;
+class TextInputStream;
+class TextOutputStream;
 
 /*! \brief
  * Collection of selections.
@@ -79,7 +81,7 @@ class SelectionEvaluator;
  * initialization options.
  *
  * After setting the default values, one or more selections can be parsed with
- * one or more calls to parseFromStdin(), parseFromFile(), and/or
+ * one or more calls to parseInteractive(), parseFromStdin(), parseFromFile(), and/or
  * parseFromString().  After all selections are parsed, the topology must be
  * set with setTopology() unless requiresTopology() returns false (the topology
  * can also be set earlier).
@@ -257,6 +259,31 @@ class SelectionCollection
          */
         SelectionList parseFromStdin(int count, bool bInteractive,
                                      const std::string &context);
+        /*! \brief
+         * Parses selection(s) interactively using provided streams.
+         *
+         * \param[in]  count    Number of selections to parse
+         *      (if -1, parse as many as provided by the user).
+         * \param[in]  inputStream  Stream to use for input.
+         * \param[in]  outputStream Stream to use for output
+         *      (if NULL, the parser runs non-interactively and does not
+         *      produce any status messages).
+         * \param[in]  context  Context to print for interactive input.
+         * \returns    Vector of parsed selections.
+         * \throws     std::bad_alloc if out of memory.
+         * \throws     InvalidInputError if there is a parsing error
+         *      (an interactive parser only throws this if too few selections
+         *      are provided and the user forced the end of input).
+         *
+         * Works the same as parseFromStdin(), except that the caller can
+         * provide streams to use instead of `stdin` and `stderr`.
+         *
+         * Mainly usable for unit testing interactive input.
+         */
+        SelectionList parseInteractive(int                count,
+                                       TextInputStream   *inputStream,
+                                       TextOutputStream  *outputStream,
+                                       const std::string &context);
         /*! \brief
          * Parses selection(s) from a file.
          *
index e96a955f716b7198179e1e9b130efd1b3918c217..131e9d25e8b55f1f742b0d0c6925a42ee616f1ec 100644 (file)
@@ -53,9 +53,9 @@
 #include "gromacs/onlinehelp/helptopic.h"
 #include "gromacs/onlinehelp/helpwritercontext.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "selmethod.h"
 #include "symrec.h"
@@ -708,7 +708,7 @@ void KeywordsHelpTopic::printKeywordList(const HelpWriterContext &context,
                                          e_selvalue_t             type,
                                          bool                     bModifiers) const
 {
-    File &file = context.outputFile();
+    TextWriter                &file = context.outputFile();
     MethodList::const_iterator iter;
     for (iter = methods_.begin(); iter != methods_.end(); ++iter)
     {
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesBasicInput.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesBasicInput.xml
new file mode 100644 (file)
index 0000000..b6dab1a
--- /dev/null
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+    <String Name="Output1"><![CDATA[
+Variable 'foo = resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+resname RB
+]]></String>
+    <String Name="Output2"><![CDATA[
+Selection 'resname RB' parsed
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+"Name" resname RC
+]]></String>
+    <String Name="Output3"><![CDATA[
+Selection '"Name" resname RC' parsed
+> ]]></String>
+    <String Name="Input4"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesContinuation.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesContinuation.xml
new file mode 100644 (file)
index 0000000..55bba6e
--- /dev/null
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+resname RB and \
+]]></String>
+    <String Name="Output1"><![CDATA[
+... ]]></String>
+    <String Name="Input2"><![CDATA[
+resname RC
+]]></String>
+    <String Name="Output2"><![CDATA[
+Selection 'resname RB and resname RC' parsed
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesEmptySelections.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesEmptySelections.xml
new file mode 100644 (file)
index 0000000..3f50325
--- /dev/null
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+resname RA;
+]]></String>
+    <String Name="Output1"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+; resname RB;;
+]]></String>
+    <String Name="Output2"><![CDATA[
+Selection 'resname RB' parsed
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+]]></String>
+    <String Name="Output3"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+  1. resname RA
+  2. resname RB
+> ]]></String>
+    <String Name="Input4"><![CDATA[
+;
+]]></String>
+    <String Name="Output4"><![CDATA[
+> ]]></String>
+    <String Name="Input5"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesMultiSelectionInputStatus.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesMultiSelectionInputStatus.xml
new file mode 100644 (file)
index 0000000..8a1d9bf
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+"Sel" resname RA
+]]></String>
+    <String Name="Output1"><![CDATA[
+Selection '"Sel" resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+"Sel2" resname RB
+]]></String>
+    <String Name="Output2"><![CDATA[
+Selection '"Sel2" resname RB' parsed
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+
+]]></String>
+    <String Name="Output3"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+  1. "Sel" resname RA
+  2. "Sel2" resname RB
+> ]]></String>
+    <String Name="Input4"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesMultipleSelectionsOnLine.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesMultipleSelectionsOnLine.xml
new file mode 100644 (file)
index 0000000..6f4135d
--- /dev/null
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+resname RA; resname RB and \
+]]></String>
+    <String Name="Output1"><![CDATA[
+... ]]></String>
+    <String Name="Input2"><![CDATA[
+resname RC
+]]></String>
+    <String Name="Output2"><![CDATA[
+Selection 'resname RA' parsed
+Selection 'resname RB and resname RC' parsed
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesNoFinalNewline.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesNoFinalNewline.xml
new file mode 100644 (file)
index 0000000..9555a9b
--- /dev/null
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+resname RA]]></String>
+    <String Name="Output1"><![CDATA[
+
+Selection 'resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesNoninteractiveInput.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesNoninteractiveInput.xml
new file mode 100644 (file)
index 0000000..515754e
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+    <String Name="Input2"><![CDATA[
+resname RB
+]]></String>
+    <String Name="Input3"><![CDATA[
+"Name" resname RC
+]]></String>
+    <String Name="Input4"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInput.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInput.xml
new file mode 100644 (file)
index 0000000..7e469d3
--- /dev/null
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify a selection for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+    <String Name="Output1"><![CDATA[
+Variable 'foo = resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+resname RA
+]]></String>
+    <String Name="Output2"><![CDATA[
+Selection 'resname RA' parsed
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInputNoninteractively.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInputNoninteractively.xml
new file mode 100644 (file)
index 0000000..32abef4
--- /dev/null
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+    <String Name="Input2"><![CDATA[
+resname RA
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInputStatus.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesSingleSelectionInputStatus.xml
new file mode 100644 (file)
index 0000000..1e83b16
--- /dev/null
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify a selection for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+    <String Name="Output1"><![CDATA[
+Variable 'foo = resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+
+]]></String>
+    <String Name="Output2"><![CDATA[
+Specify a selection for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+Currently provided selections:
+     foo = resname RA
+(1 more selection required)
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+resname RB
+]]></String>
+    <String Name="Output3"><![CDATA[
+Selection 'resname RB' parsed
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesStatusWithExistingSelections.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesStatusWithExistingSelections.xml
new file mode 100644 (file)
index 0000000..fc32bf7
--- /dev/null
@@ -0,0 +1,44 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+
+]]></String>
+    <String Name="Output1"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+     foo = resname RA
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+bar = resname RC
+]]></String>
+    <String Name="Output2"><![CDATA[
+Variable 'bar = resname RC' parsed
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+resname RA
+]]></String>
+    <String Name="Output3"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+    <String Name="Input4"><![CDATA[
+
+]]></String>
+    <String Name="Output4"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+     foo = resname RA
+     bar = resname RC
+  1. resname RA
+> ]]></String>
+    <String Name="Input5"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesStatusWithGroups.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesStatusWithGroups.xml
new file mode 100644 (file)
index 0000000..1bc4c4e
--- /dev/null
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Available static index groups:
+ Group  0 "GrpA" (5 atoms)
+ Group  1 "GrpB" (5 atoms)
+ Group  2 "GrpUnsorted" (8 atoms)
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+resname RA
+]]></String>
+    <String Name="Output1"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+
+]]></String>
+    <String Name="Output2"><![CDATA[
+Available static index groups:
+ Group  0 "GrpA" (5 atoms)
+ Group  1 "GrpB" (5 atoms)
+ Group  2 "GrpUnsorted" (8 atoms)
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+  1. resname RA
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesTwoSelectionInput.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesTwoSelectionInput.xml
new file mode 100644 (file)
index 0000000..cef23d4
--- /dev/null
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+resname RA
+]]></String>
+    <String Name="Output1"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+resname RB
+]]></String>
+    <String Name="Output2"><![CDATA[
+Selection 'resname RB' parsed
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
diff --git a/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesTwoSelectionInputStatus.xml b/src/gromacs/selection/tests/refdata/SelectionCollectionInteractiveTest_HandlesTwoSelectionInputStatus.xml
new file mode 100644 (file)
index 0000000..2f77680
--- /dev/null
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <InteractiveSession Name="Interactive">
+    <String Name="Output0"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+    <String Name="Input1"><![CDATA[
+"Sel" resname RA
+]]></String>
+    <String Name="Output1"><![CDATA[
+Selection '"Sel" resname RA' parsed
+> ]]></String>
+    <String Name="Input2"><![CDATA[
+
+]]></String>
+    <String Name="Output2"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+Currently provided selections:
+  1. "Sel" resname RA
+(1 more selection required)
+> ]]></String>
+    <String Name="Input3"><![CDATA[
+resname RB
+]]></String>
+    <String Name="Output3"><![CDATA[
+Selection 'resname RB' parsed
+]]></String>
+  </InteractiveSession>
+</ReferenceData>
index 1ae38d84525b8dfc62b0f5178ad9305cb3948268..b6e9bcfdb67f16b17e6b79715d4980024cf82b08 100644 (file)
@@ -68,4 +68,31 @@ and use the copy_xsl.sh script to copy it to relevant locations.
     <xsl:value-of select="."/>
 </xsl:template>
 
+<xsl:template match="InteractiveSession">
+    <pre>
+        <xsl:for-each select="*">
+            <xsl:choose>
+                <xsl:when test="starts-with(@Name, 'Output')">
+                    <xsl:value-of select="substring(.,2)"/>
+                </xsl:when>
+                <xsl:when test="string-length(.)=1">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:when>
+                <xsl:when test="contains(substring(.,2), '&#10;')">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="translate(substring(.,2), '&#10;', '&#x23ce;')"/>
+                    <xsl:text>&#10;</xsl:text>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="substring(.,2)"/>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:for-each>
+        <xsl:text>[EOF]</xsl:text>
+    </pre>
+</xsl:template>
+
 </xsl:stylesheet>
index d26d2e2b18ff1d13c151e92af80d45a3e50887e4..57048feab7ecfbd598cf2b7aa52d163c550c5929 100644 (file)
 
 <xsl:key name="SelectionName" match="ParsedSelections/ParsedSelection" use="@Name"/>
 
+<xsl:template match="InteractiveSession">
+    <h2>Interactive Session</h2>
+    <xsl:apply-imports />
+</xsl:template>
+
 <xsl:template match="ParsedSelections">
     <h2>Parsed Selections</h2>
     <table border="1">
index c04539a03fe54ac7c4631ef734fe1af79824c7e9..436abee87dafae069916d136e170ff7adefb6a08 100644 (file)
@@ -57,6 +57,7 @@
 #include "gromacs/utility/gmxregex.h"
 #include "gromacs/utility/stringutil.h"
 
+#include "testutils/interactivetest.h"
 #include "testutils/refdata.h"
 #include "testutils/testasserts.h"
 #include "testutils/testfilemanager.h"
@@ -152,6 +153,39 @@ SelectionCollectionTest::loadIndexGroups(const char *filename)
 }
 
 
+/********************************************************************
+ * Test fixture for interactive SelectionCollection tests
+ */
+
+class SelectionCollectionInteractiveTest : public SelectionCollectionTest
+{
+    public:
+        SelectionCollectionInteractiveTest()
+            : helper_(data_.rootChecker())
+        {
+        }
+
+        void runTest(int count, bool bInteractive,
+                     const gmx::ConstArrayRef<const char *> &input);
+
+        gmx::test::TestReferenceData      data_;
+        gmx::test::InteractiveTestHelper  helper_;
+};
+
+void SelectionCollectionInteractiveTest::runTest(
+        int count, bool bInteractive,
+        const gmx::ConstArrayRef<const char *> &inputLines)
+{
+    helper_.setInputLines(inputLines);
+    // TODO: Check something about the returned selections as well.
+    ASSERT_NO_THROW_GMX(sc_.parseInteractive(
+                                count, &helper_.inputStream(),
+                                bInteractive ? &helper_.outputStream() : NULL,
+                                "for test context"));
+    helper_.checkSession();
+}
+
+
 /********************************************************************
  * Test fixture for selection testing with reference data
  */
@@ -617,6 +651,150 @@ TEST_F(SelectionCollectionTest, HandlesFramesWithTooSmallAtomSubsets3)
 
 // TODO: Tests for more evaluation errors
 
+/********************************************************************
+ * Tests for interactive selection input
+ */
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesBasicInput)
+{
+    const char *const input[] = {
+        "foo = resname RA",
+        "resname RB",
+        "\"Name\" resname RC"
+    };
+    runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesContinuation)
+{
+    const char *const input[] = {
+        "resname RB and \\",
+        "resname RC"
+    };
+    runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesSingleSelectionInput)
+{
+    const char *const input[] = {
+        "foo = resname RA",
+        "resname RA"
+    };
+    runTest(1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesTwoSelectionInput)
+{
+    const char *const input[] = {
+        "resname RA",
+        "resname RB"
+    };
+    runTest(2, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesStatusWithGroups)
+{
+    const char *const input[] = {
+        "resname RA",
+        ""
+    };
+    loadIndexGroups("simple.ndx");
+    runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesStatusWithExistingSelections)
+{
+    const char *const input[] = {
+        "",
+        "bar = resname RC",
+        "resname RA",
+        ""
+    };
+    ASSERT_NO_THROW_GMX(sc_.parseFromString("foo = resname RA"));
+    ASSERT_NO_THROW_GMX(sc_.parseFromString("resname RB"));
+    runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesSingleSelectionInputStatus)
+{
+    const char *const input[] = {
+        "foo = resname RA",
+        "",
+        "resname RB"
+    };
+    runTest(1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesTwoSelectionInputStatus)
+{
+    const char *const input[] = {
+        "\"Sel\" resname RA",
+        "",
+        "resname RB"
+    };
+    runTest(2, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesMultiSelectionInputStatus)
+{
+    const char *const input[] = {
+        "\"Sel\" resname RA",
+        "\"Sel2\" resname RB",
+        ""
+    };
+    runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesNoFinalNewline)
+{
+    // TODO: There is an extra prompt printed after the input is finished; it
+    // would be cleaner not to have it, but it's only a cosmetic issue.
+    const char *const input[] = {
+        "resname RA"
+    };
+    helper_.setLastNewline(false);
+    runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesEmptySelections)
+{
+    const char *const input[] = {
+        "resname RA;",
+        "; resname RB;;",
+        " ",
+        ";"
+    };
+    runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesMultipleSelectionsOnLine)
+{
+    const char *const input[] = {
+        "resname RA; resname RB and \\",
+        "resname RC"
+    };
+    runTest(2, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesNoninteractiveInput)
+{
+    const char *const input[] = {
+        "foo = resname RA",
+        "resname RB",
+        "\"Name\" resname RC"
+    };
+    runTest(-1, false, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesSingleSelectionInputNoninteractively)
+{
+    const char *const input[] = {
+        "foo = resname RA",
+        "resname RA"
+    };
+    runTest(1, false, input);
+}
+
 
 /********************************************************************
  * Tests for selection keywords
index ceec518fe6a0b676c3bdbaae0d708bbe3c511021..061fcf2168067e3762951214c67d1e13c90dc179 100644 (file)
@@ -1,7 +1,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2014, by the GROMACS development team, led by
+# Copyright (c) 2014,2015, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -32,7 +32,7 @@
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
-file(GLOB TOOLS_SOURCES *.cpp *.c)
+file(GLOB TOOLS_SOURCES *.cpp)
 set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${TOOLS_SOURCES} PARENT_SCOPE)
 
 if (BUILD_TESTING)
similarity index 97%
rename from src/gromacs/tools/check.c
rename to src/gromacs/tools/check.cpp
index d54b84ce1408df1de6d8933bd9a2eacfc6d598f6..cc19905b072b0d06a8d053070335c61e36d3908d 100644 (file)
@@ -36,9 +36,9 @@
  */
 #include "gmxpre.h"
 
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
 
 #include "gromacs/commandline/pargs.h"
 #include "gromacs/fileio/confio.h"
@@ -85,7 +85,7 @@ typedef struct {
 
 static void tpx2system(FILE *fp, gmx_mtop_t *mtop)
 {
-    int                       i, nmol, nvsite = 0;
+    int                       nmol, nvsite = 0;
     gmx_mtop_atomloop_block_t aloop;
     t_atom                   *atom;
 
@@ -144,7 +144,6 @@ static void tpx2params(FILE *fp, t_inputrec *ir)
 static void tpx2methods(const char *tpx, const char *tex)
 {
     FILE         *fp;
-    t_tpxheader   sh;
     t_inputrec    ir;
     t_state       state;
     gmx_mtop_t    mtop;
@@ -223,12 +222,11 @@ static void chk_forces(int frame, int natoms, rvec *f)
 
 static void chk_bonds(t_idef *idef, int ePBC, rvec *x, matrix box, real tol)
 {
-    int   ftype, i, k, ai, aj, type;
-    real  b0, blen, deviation, devtot;
+    int   ftype, k, ai, aj, type;
+    real  b0, blen, deviation;
     t_pbc pbc;
     rvec  dx;
 
-    devtot = 0;
     set_pbc(&pbc, ePBC, box);
     for (ftype = 0; (ftype < F_NRE); ftype++)
     {
@@ -246,7 +244,7 @@ static void chk_bonds(t_idef *idef, int ePBC, rvec *x, matrix box, real tol)
                         b0 = idef->iparams[type].harmonic.rA;
                         break;
                     case F_G96BONDS:
-                        b0 = sqrt(idef->iparams[type].harmonic.rA);
+                        b0 = std::sqrt(idef->iparams[type].harmonic.rA);
                         break;
                     case F_MORSE:
                         b0 = idef->iparams[type].morse.b0A;
@@ -265,7 +263,7 @@ static void chk_bonds(t_idef *idef, int ePBC, rvec *x, matrix box, real tol)
                     pbc_dx(&pbc, x[ai], x[aj], dx);
                     blen      = norm(dx);
                     deviation = sqr(blen-b0);
-                    if (sqrt(deviation/sqr(b0) > tol))
+                    if (std::sqrt(deviation/sqr(b0)) > tol)
                     {
                         fprintf(stderr, "Distance between atoms %d and %d is %.3f, should be %.3f\n", ai+1, aj+1, blen, b0);
                     }
@@ -281,8 +279,8 @@ void chk_trj(const output_env_t oenv, const char *fn, const char *tpr, real tol)
     t_count          count;
     t_fr_time        first, last;
     int              j = -1, new_natoms, natoms;
-    real             rdum, tt, old_t1, old_t2, prec;
-    gmx_bool         bShowTimestep = TRUE, bOK, newline = FALSE;
+    real             old_t1, old_t2;
+    gmx_bool         bShowTimestep = TRUE, newline = FALSE;
     t_trxstatus     *status;
     gmx_mtop_t       mtop;
     gmx_localtop_t  *top = NULL;
@@ -549,7 +547,7 @@ void chk_tps(const char *fn, real vdw_fac, real bon_lo, real bon_hi)
                             *(atoms->resinfo[atoms->atom[j].resind].name),
                             atoms->resinfo[atoms->atom[j].resind].nr,
                             atom_vdw[j],
-                            sqrt(r2) );
+                            std::sqrt(r2) );
                 }
             }
         }
@@ -616,7 +614,7 @@ void chk_ndx(const char *fn)
 {
     t_blocka *grps;
     char    **grpname;
-    int       i, j;
+    int       i;
 
     grps = init_index(fn, &grpname);
     if (debug)
@@ -646,7 +644,7 @@ void chk_ndx(const char *fn)
 
 void chk_enx(const char *fn)
 {
-    int            nre, fnr, ndr;
+    int            nre, fnr;
     ener_file_t    in;
     gmx_enxnm_t   *enm = NULL;
     t_enxframe    *fr;
index 3eb6acd56f68f3112be85e6e1a6269a7ac4a00d5..435cd0d88ad0cd918827208e96a2ad6efb88109f 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef GMX_TOOLS_CHECK_H
 #define GMX_TOOLS_CHECK_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
 /*! \brief Implements gmx check
  *
  * \param[in] argc  argc value passed to main().
@@ -49,8 +42,4 @@ extern "C" {
  */
 int gmx_check(int argc, char *argv[]);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 96%
rename from src/gromacs/tools/compare.c
rename to src/gromacs/tools/compare.cpp
index 975b62f53697e3e5335b22f24e159683cef54754..bdbd351d3157bcb5d12b36c49ba5e89678386014 100644 (file)
  * the research papers on the package. Check out http://www.gromacs.org.
  */
 /* This file is completely threadsafe - keep it that way! */
+
 #include "gmxpre.h"
 
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+
+#include <algorithm>
 
 #include "gromacs/fileio/enxio.h"
 #include "gromacs/fileio/tpxio.h"
@@ -73,9 +76,9 @@ static void cmp_int64(FILE *fp, const char *s, gmx_int64_t i1, gmx_int64_t i2)
     if (i1 != i2)
     {
         fprintf(fp, "%s (", s);
-        fprintf(fp, "%"GMX_PRId64, i1);
+        fprintf(fp, "%" GMX_PRId64, i1);
         fprintf(fp, " - ");
-        fprintf(fp, "%"GMX_PRId64, i2);
+        fprintf(fp, "%" GMX_PRId64, i2);
         fprintf(fp, ")\n");
     }
 }
@@ -147,7 +150,7 @@ static gmx_bool cmp_bool(FILE *fp, const char *s, int index, gmx_bool b1, gmx_bo
 static void cmp_str(FILE *fp, const char *s, int index,
                     const char *s1, const char *s2)
 {
-    if (strcmp(s1, s2) != 0)
+    if (std::strcmp(s1, s2) != 0)
     {
         if (index != -1)
         {
@@ -347,7 +350,7 @@ static void cmp_idef(FILE *fp, t_idef *id1, t_idef *id2, real ftol, real abstol)
     {
         cmp_int(fp, "idef->ntypes", -1, id1->ntypes, id2->ntypes);
         cmp_int(fp, "idef->atnr",  -1, id1->atnr, id2->atnr);
-        for (i = 0; (i < min(id1->ntypes, id2->ntypes)); i++)
+        for (i = 0; (i < std::min(id1->ntypes, id2->ntypes)); i++)
         {
             sprintf(buf1, "idef->functype[%d]", i);
             sprintf(buf2, "idef->iparam[%d]", i);
@@ -372,7 +375,6 @@ static void cmp_idef(FILE *fp, t_idef *id1, t_idef *id2, real ftol, real abstol)
 
 static void cmp_block(FILE *fp, t_block *b1, t_block *b2, const char *s)
 {
-    int  i, j, k;
     char buf[32];
 
     fprintf(fp, "comparing block %s\n", s);
@@ -382,7 +384,6 @@ static void cmp_block(FILE *fp, t_block *b1, t_block *b2, const char *s)
 
 static void cmp_blocka(FILE *fp, t_blocka *b1, t_blocka *b2, const char *s)
 {
-    int  i, j, k;
     char buf[32];
 
     fprintf(fp, "comparing blocka %s\n", s);
@@ -394,9 +395,6 @@ static void cmp_blocka(FILE *fp, t_blocka *b1, t_blocka *b2, const char *s)
 
 static void cmp_atom(FILE *fp, int index, t_atom *a1, t_atom *a2, real ftol, real abstol)
 {
-    int  i;
-    char buf[256];
-
     if (a2)
     {
         cmp_us(fp, "atom.type", index, a1->type, a2->type);
@@ -442,8 +440,6 @@ static void cmp_atoms(FILE *fp, t_atoms *a1, t_atoms *a2, real ftol, real abstol
 
 static void cmp_top(FILE *fp, t_topology *t1, t_topology *t2, real ftol, real abstol)
 {
-    int i;
-
     fprintf(fp, "comparing top\n");
     if (t2)
     {
@@ -464,7 +460,7 @@ static void cmp_top(FILE *fp, t_topology *t1, t_topology *t2, real ftol, real ab
 static void cmp_groups(FILE *fp, gmx_groups_t *g0, gmx_groups_t *g1,
                        int natoms0, int natoms1)
 {
-    int  i, j, ndiff;
+    int  i, j;
     char buf[32];
 
     fprintf(fp, "comparing groups\n");
@@ -515,7 +511,7 @@ static void cmp_rvecs(FILE *fp, const char *title, int n, rvec x1[], rvec x2[],
                 ssd += d*d;
             }
         }
-        fprintf(fp, "%s RMSD %g\n", title, sqrt(ssd/n));
+        fprintf(fp, "%s RMSD %g\n", title, std::sqrt(ssd/n));
     }
     else
     {
@@ -562,7 +558,7 @@ static void cmp_rvecs_rmstol(FILE *fp, const char *title, int n, rvec x1[], rvec
             rms_x1 += d*d;
         }
     }
-    rms_x1 = sqrt(rms_x1/(DIM*n));
+    rms_x1 = std::sqrt(rms_x1/(DIM*n));
     /* And now do the actual comparision with a hopefully realistic abstol. */
     for (i = 0; (i < n); i++)
     {
@@ -579,7 +575,7 @@ static void cmp_grpopts(FILE *fp, t_grpopts *opt1, t_grpopts *opt2, real ftol, r
     cmp_int(fp, "inputrec->grpopts.ngacc", -1, opt1->ngacc, opt2->ngacc);
     cmp_int(fp, "inputrec->grpopts.ngfrz", -1, opt1->ngfrz, opt2->ngfrz);
     cmp_int(fp, "inputrec->grpopts.ngener", -1, opt1->ngener, opt2->ngener);
-    for (i = 0; (i < min(opt1->ngtc, opt2->ngtc)); i++)
+    for (i = 0; (i < std::min(opt1->ngtc, opt2->ngtc)); i++)
     {
         cmp_real(fp, "inputrec->grpopts.nrdf", i, opt1->nrdf[i], opt2->nrdf[i], ftol, abstol);
         cmp_real(fp, "inputrec->grpopts.ref_t", i, opt1->ref_t[i], opt2->ref_t[i], ftol, abstol);
@@ -611,11 +607,11 @@ static void cmp_grpopts(FILE *fp, t_grpopts *opt1, t_grpopts *opt2, real ftol, r
             }
         }
     }
-    for (i = 0; (i < min(opt1->ngacc, opt2->ngacc)); i++)
+    for (i = 0; (i < std::min(opt1->ngacc, opt2->ngacc)); i++)
     {
         cmp_rvec(fp, "inputrec->grpopts.acc", i, opt1->acc[i], opt2->acc[i], ftol, abstol);
     }
-    for (i = 0; (i < min(opt1->ngfrz, opt2->ngfrz)); i++)
+    for (i = 0; (i < std::min(opt1->ngfrz, opt2->ngfrz)); i++)
     {
         cmp_ivec(fp, "inputrec->grpopts.nFreeze", i, opt1->nFreeze[i], opt2->nFreeze[i]);
     }
@@ -630,7 +626,7 @@ static void cmp_cosines(FILE *fp, const char *s, t_cosines c1[DIM], t_cosines c2
     {
         sprintf(buf, "inputrec->%s[%d]", s, m);
         cmp_int(fp, buf, 0, c1->n, c2->n);
-        for (i = 0; (i < min(c1->n, c2->n)); i++)
+        for (i = 0; (i < std::min(c1->n, c2->n)); i++)
         {
             cmp_real(fp, buf, i, c1->a[i], c2->a[i], ftol, abstol);
             cmp_real(fp, buf, i, c1->phi[i], c2->phi[i], ftol, abstol);
@@ -712,7 +708,7 @@ static void cmp_fepvals(FILE *fp, t_lambda *fep1, t_lambda *fep2, real ftol, rea
     cmp_int(fp, "inputrec->fepvals->n_lambda", -1, fep1->n_lambda, fep2->n_lambda);
     for (i = 0; i < efptNR; i++)
     {
-        for (j = 0; j < min(fep1->n_lambda, fep2->n_lambda); j++)
+        for (j = 0; j < std::min(fep1->n_lambda, fep2->n_lambda); j++)
         {
             cmp_double(fp, "inputrec->fepvals->all_lambda", -1, fep1->all_lambda[i][j], fep2->all_lambda[i][j], ftol, abstol);
         }
@@ -822,12 +818,12 @@ static void cmp_inputrec(FILE *fp, t_inputrec *ir1, t_inputrec *ir2, real ftol,
     cmp_int(fp, "inputrec->bSimTemp", -1, ir1->bSimTemp, ir2->bSimTemp);
     if ((ir1->bSimTemp == ir2->bSimTemp) && (ir1->bSimTemp))
     {
-        cmp_simtempvals(fp, ir1->simtempvals, ir2->simtempvals, min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
+        cmp_simtempvals(fp, ir1->simtempvals, ir2->simtempvals, std::min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
     }
     cmp_int(fp, "inputrec->bExpanded", -1, ir1->bExpanded, ir2->bExpanded);
     if ((ir1->bExpanded == ir2->bExpanded) && (ir1->bExpanded))
     {
-        cmp_expandedvals(fp, ir1->expandedvals, ir2->expandedvals, min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
+        cmp_expandedvals(fp, ir1->expandedvals, ir2->expandedvals, std::min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
     }
     cmp_int(fp, "inputrec->nwall", -1, ir1->nwall, ir2->nwall);
     cmp_int(fp, "inputrec->wall_type", -1, ir1->wall_type, ir2->wall_type);
@@ -976,7 +972,6 @@ void comp_tpx(const char *fn1, const char *fn2,
               gmx_bool bRMSD, real ftol, real abstol)
 {
     const char  *ff[2];
-    t_tpxheader  sh[2];
     t_inputrec   ir[2];
     t_state      state[2];
     gmx_mtop_t   mtop[2];
@@ -1060,21 +1055,21 @@ void comp_frame(FILE *fp, t_trxframe *fr1, t_trxframe *fr2,
     }
     if (cmp_bool(fp, "bX", -1, fr1->bX, fr2->bX))
     {
-        cmp_rvecs(fp, "x", min(fr1->natoms, fr2->natoms), fr1->x, fr2->x, bRMSD, ftol, abstol);
+        cmp_rvecs(fp, "x", std::min(fr1->natoms, fr2->natoms), fr1->x, fr2->x, bRMSD, ftol, abstol);
     }
     if (cmp_bool(fp, "bV", -1, fr1->bV, fr2->bV))
     {
-        cmp_rvecs(fp, "v", min(fr1->natoms, fr2->natoms), fr1->v, fr2->v, bRMSD, ftol, abstol);
+        cmp_rvecs(fp, "v", std::min(fr1->natoms, fr2->natoms), fr1->v, fr2->v, bRMSD, ftol, abstol);
     }
     if (cmp_bool(fp, "bF", -1, fr1->bF, fr2->bF))
     {
         if (bRMSD)
         {
-            cmp_rvecs(fp, "f", min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, bRMSD, ftol, abstol);
+            cmp_rvecs(fp, "f", std::min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, bRMSD, ftol, abstol);
         }
         else
         {
-            cmp_rvecs_rmstol(fp, "f", min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, ftol, abstol);
+            cmp_rvecs_rmstol(fp, "f", std::min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, ftol, abstol);
         }
     }
     if (cmp_bool(fp, "bBox", -1, fr1->bBox, fr2->bBox))
@@ -1133,25 +1128,25 @@ static real ener_tensor_diag(int n, int *ind1, int *ind2,
                              int *tensi, int i,
                              t_energy e1[], t_energy e2[])
 {
-    int  d1, d2;
-    int  len;
-    int  j;
-    real prod1, prod2;
-    int  nfound;
+    int    d1, d2;
+    int    j;
+    real   prod1, prod2;
+    int    nfound;
+    size_t len;
 
     d1 = tensi[i]/DIM;
     d2 = tensi[i] - d1*DIM;
 
     /* Find the diagonal elements d1 and d2 */
-    len    = strlen(enm1[ind1[i]].name);
+    len    = std::strlen(enm1[ind1[i]].name);
     prod1  = 1;
     prod2  = 1;
     nfound = 0;
     for (j = 0; j < n; j++)
     {
         if (tensi[j] >= 0 &&
-            strlen(enm1[ind1[j]].name) == len &&
-            strncmp(enm1[ind1[i]].name, enm1[ind1[j]].name, len-2) == 0 &&
+            std::strlen(enm1[ind1[j]].name) == len &&
+            std::strncmp(enm1[ind1[i]].name, enm1[ind1[j]].name, len-2) == 0 &&
             (tensi[j] == d1*DIM+d1 || tensi[j] == d2*DIM+d2))
         {
             prod1 *= fabs(e1[ind1[j]].e);
@@ -1162,7 +1157,7 @@ static real ener_tensor_diag(int n, int *ind1, int *ind2,
 
     if (nfound == 2)
     {
-        return 0.5*(sqrt(prod1) + sqrt(prod2));
+        return 0.5*(std::sqrt(prod1) + std::sqrt(prod2));
     }
     else
     {
@@ -1174,15 +1169,15 @@ static gmx_bool enernm_equal(const char *nm1, const char *nm2)
 {
     int len1, len2;
 
-    len1 = strlen(nm1);
-    len2 = strlen(nm2);
+    len1 = std::strlen(nm1);
+    len2 = std::strlen(nm2);
 
     /* Remove " (bar)" at the end of a name */
-    if (len1 > 6 && strcmp(nm1+len1-6, " (bar)") == 0)
+    if (len1 > 6 && std::strcmp(nm1+len1-6, " (bar)") == 0)
     {
         len1 -= 6;
     }
-    if (len2 > 6 && strcmp(nm2+len2-6, " (bar)") == 0)
+    if (len2 > 6 && std::strcmp(nm2+len2-6, " (bar)") == 0)
     {
         len2 -= 6;
     }
@@ -1206,7 +1201,7 @@ static void cmp_energies(FILE *fp, int step1, int step2,
     {
         ii       = ind1[i];
         tensi[i] = -1;
-        len      = strlen(enm1[ii].name);
+        len      = std::strlen(enm1[ii].name);
         if (len > 3 && enm1[ii].name[len-3] == '-')
         {
             d1 = enm1[ii].name[len-2] - 'X';
@@ -1238,7 +1233,7 @@ static void cmp_energies(FILE *fp, int step1, int step2,
             if (abstol_i > 0)
             {
                 /* We found a diagonal, we need to check with the minimum tolerance */
-                abstol_i = min(abstol_i, abstol);
+                abstol_i = std::min(abstol_i, abstol);
             }
             else
             {
@@ -1374,10 +1369,9 @@ static void cmp_eblocks(t_enxframe *fr1, t_enxframe *fr2, real ftol, real abstol
 
 void comp_enx(const char *fn1, const char *fn2, real ftol, real abstol, const char *lastener)
 {
-    int            nre, nre1, nre2, block;
+    int            nre, nre1, nre2;
     ener_file_t    in1, in2;
     int            i, j, maxener, *ind1, *ind2, *have;
-    char           buf[256];
     gmx_enxnm_t   *enm1 = NULL, *enm2 = NULL;
     t_enxframe    *fr1, *fr2;
     gmx_bool       b1, b2;
@@ -1431,7 +1425,7 @@ void comp_enx(const char *fn1, const char *fn2, real ftol, real abstol, const ch
     maxener = nre;
     for (i = 0; i < nre; i++)
     {
-        if ((lastener != NULL) && (strstr(enm1[i].name, lastener) != NULL))
+        if ((lastener != NULL) && (std::strstr(enm1[i].name, lastener) != NULL))
         {
             maxener = i+1;
             break;
similarity index 98%
rename from src/gromacs/tools/convert_tpr.c
rename to src/gromacs/tools/convert_tpr.cpp
index ff2fd1044719719d083a14534f8da82df4c32f86..696f61c20289b3a56dcf575a392e33062d06cbbd 100644 (file)
@@ -36,7 +36,7 @@
  */
 #include "gmxpre.h"
 
-#include <math.h>
+#include <cmath>
 
 #include "gromacs/commandline/pargs.h"
 #include "gromacs/fileio/enxio.h"
@@ -360,8 +360,7 @@ int gmx_convert_tpr(int argc, char *argv[])
     gmx_bool          bFrame, bUse, bSel, bNeedEner, bReadEner, bScanEner, bFepState;
     gmx_mtop_t        mtop;
     t_atoms           atoms;
-    t_inputrec       *ir, *irnew = NULL;
-    t_gromppopts     *gopts;
+    t_inputrec       *ir;
     t_state           state;
     rvec             *newx = NULL, *newv = NULL, *tmpx, *tmpv;
     matrix            newbox;
@@ -405,7 +404,6 @@ int gmx_convert_tpr(int argc, char *argv[])
         { "-init_fep_state", FALSE, etINT, {&init_fep_state},
           "fep state to initialize from" },
     };
-    int             nerror = 0;
 
     /* Parse the command line */
     if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa,
@@ -467,9 +465,9 @@ int gmx_convert_tpr(int argc, char *argv[])
 
         if (EI_SD(ir->eI) || ir->eI == eiBD)
         {
-            fprintf(stderr, "\nChanging ld-seed from %"GMX_PRId64 " ", ir->ld_seed);
+            fprintf(stderr, "\nChanging ld-seed from %" GMX_PRId64 " ", ir->ld_seed);
             ir->ld_seed = (gmx_int64_t)gmx_rng_make_seed();
-            fprintf(stderr, "to %"GMX_PRId64 "\n\n", ir->ld_seed);
+            fprintf(stderr, "to %" GMX_PRId64 "\n\n", ir->ld_seed);
         }
 
         frame_fn = ftp2fn(efTRN, NFILE, fnm);
index bb7fc46c445bb55f2f4219956b5eea05451a3ece..23c52ffa7e3ceff1cf8163e6fc502e05b99968fa 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef GMX_TOOLS_CONVERT_TPR_H
 #define GMX_TOOLS_CONVERT_TPR_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
 /*! \brief Implements gmx convert-tpr
  *
  * \param[in] argc  argc value passed to main().
@@ -49,8 +42,4 @@ extern "C" {
  */
 int gmx_convert_tpr(int argc, char *argv[]);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
similarity index 98%
rename from src/gromacs/tools/dump.c
rename to src/gromacs/tools/dump.cpp
index 76dd6f7df7dac4bd19899a80646ec63caa663166..d9b20863d961305c046d3dc13fb2d1adca760c2c 100644 (file)
 
 #include "config.h"
 
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
 
 #include "gromacs/commandline/pargs.h"
 #include "gromacs/fileio/enxio.h"
@@ -67,7 +67,7 @@ static void list_tpx(const char *fn, gmx_bool bShowNumbers, const char *mdpfn,
                      gmx_bool bSysTop)
 {
     FILE         *gp;
-    int           fp, indent, i, j, **gcount, atot;
+    int           indent, i, j, **gcount, atot;
     t_state       state;
     rvec         *f = NULL;
     t_inputrec    ir;
@@ -100,10 +100,9 @@ static void list_tpx(const char *fn, gmx_bool bShowNumbers, const char *mdpfn,
         if (available(stdout, &tpx, 0, fn))
         {
             indent = 0;
-            indent = pr_title(stdout, indent, fn);
+            pr_title(stdout, indent, fn);
             pr_inputrec(stdout, 0, "inputrec", tpx.bIr ? &(ir) : NULL, FALSE);
 
-            indent = 0;
             pr_header(stdout, indent, "header", &(tpx));
 
             if (!bSysTop)
@@ -419,13 +418,11 @@ void list_trx(const char *fn)
 
 void list_ene(const char *fn)
 {
-    int            ndr;
     ener_file_t    in;
     gmx_bool       bCont;
     gmx_enxnm_t   *enm = NULL;
     t_enxframe    *fr;
     int            i, j, nre, b;
-    real           rav, minthird;
     char           buf[22];
 
     printf("gmx dump: %s\n", fn);
@@ -439,7 +436,6 @@ void list_ene(const char *fn)
         printf("%5d  %-24s (%s)\n", i, enm[i].name, enm[i].unit);
     }
 
-    minthird = -1.0/3.0;
     snew(fr, 1);
     do
     {
index cf0195e1e3f3379586942d8bfa602983a0a0e100..c1457d097e506da4862c8f283d4ae45cb334b8ef 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef GMX_TOOLS_DUMP_H
 #define GMX_TOOLS_DUMP_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
 /*! \brief Implements gmx dump
  *
  * \param[in] argc  argc value passed to main().
@@ -49,8 +42,4 @@ extern "C" {
  */
 int gmx_dump(int argc, char *argv[]);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
index f3e12f4170fff3fb8118006eb60c4f0c5cf005ab..3dc042e97cbdc77dd178365afa9555a15eb399ae 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -58,7 +58,7 @@
 #include "gromacs/trajectoryanalysis/analysismodule.h"
 #include "gromacs/trajectoryanalysis/analysissettings.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
 #include "gromacs/utility/gmxassert.h"
 
 #include "runnercommon.h"
@@ -145,7 +145,7 @@ TrajectoryAnalysisCommandLineRunner::Impl::parseOptions(
 
     common->initIndexGroups(selections, bUseDefaultGroups_);
 
-    const bool bInteractive = File::standardInput().isInteractive();
+    const bool bInteractive = StandardInputStream::instance().isInteractive();
     seloptManager.parseRequestedFromStdin(bInteractive);
 
     common->doneIndexGroups(selections);
index 96f752b5d54425812a8b7d2dbdda0a8346b67969..4b07d1483a4f5f81da6bba393b2f4a65d0c736bb 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -271,7 +271,7 @@ Distance::initAnalysis(const TrajectoryAnalysisSettings &settings,
     {
         AnalysisDataPlotModulePointer plotm(
                 new AnalysisDataPlotModule(settings.plotSettings()));
-        plotm->setFileName(fnAll_);
+        plotm->setFileName(fnXYZ_);
         plotm->setTitle("Distance");
         plotm->setXAxisIsTime();
         plotm->setYLabel("Distance (nm)");
index 1ae38d84525b8dfc62b0f5178ad9305cb3948268..b6e9bcfdb67f16b17e6b79715d4980024cf82b08 100644 (file)
@@ -68,4 +68,31 @@ and use the copy_xsl.sh script to copy it to relevant locations.
     <xsl:value-of select="."/>
 </xsl:template>
 
+<xsl:template match="InteractiveSession">
+    <pre>
+        <xsl:for-each select="*">
+            <xsl:choose>
+                <xsl:when test="starts-with(@Name, 'Output')">
+                    <xsl:value-of select="substring(.,2)"/>
+                </xsl:when>
+                <xsl:when test="string-length(.)=1">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:when>
+                <xsl:when test="contains(substring(.,2), '&#10;')">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="translate(substring(.,2), '&#10;', '&#x23ce;')"/>
+                    <xsl:text>&#10;</xsl:text>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="substring(.,2)"/>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:for-each>
+        <xsl:text>[EOF]</xsl:text>
+    </pre>
+</xsl:template>
+
 </xsl:stylesheet>
index 213f698280ab6760a42773a19d6d95e314bb8439..7dc5f007f4822fb7f8b11b5ff36d950cbc9808cd 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * containers to simplify implementation of other code.  Contents of the module
  * are discussed in more details under the different headings below.
  * Some of the code in installed headers in the module is intended for use
- * directly from code outside the Gromacs library, but a significant portion is
- * exposed only because other public headers depend on it.
+ * directly from code outside the \Gromacs library, but a significant portion
+ * is exposed only because other public headers depend on it.
  *
  * Since this module implements error handling, it should be at the lowest
  * level: it should not depend on other modules.  Any functionality needed by
  * the error handling code should also be kept in this module.
  *
- * <H3>Error Handling</H3>
+ * <H3>Error handling</H3>
  *
  * Exception classes used in the library are declared in the exceptions.h header
- * file.  Most Gromacs-specific exceptions derive from gmx::GromacsException.
+ * file.  Most \Gromacs-specific exceptions derive from gmx::GromacsException.
  *
  * This header also declares a ::GMX_THROW macro that should be used for
  * throwing exceptions.  ::GMX_THROW_WITH_ERRNO is also provided for reporting
  * \endif
  *
  *
- * <H3>Basic %File Handling</H3>
+ * \if libapi
+ *
+ * <H3>Basic file handling and streams</H3>
  *
- * The header file.h declares a gmx::File class for basic I/O support.
+ * The header textstream.h declares interfaces for simple text format streams.
+ * Headers filestream.h and stringstream.h provide implementations for these
+ * streams for reading/writing files and for writing to in-memory strings.
  *
- * The header path.h declares helpers for manipulating paths and for managing
- * directories.
+ * The header fileredirector.h provides interfaces for redirecting file input
+ * and/or output to alternative streams, for use in testing, as well as default
+ * implementations for these interfaces that just use the file system.
  *
- * The fate of these headers depends on what is decided in Redmine issue #950.
+ * The header textwriter.h provides gmx::TextWriter for more formatting support
+ * when writing to a text stream.  Similarly, textreader.h provides more
+ * formatting support when reading from a text stream.
  *
+ * The header path.h declares helpers for manipulating paths as strings and for
+ * managing directories and files.
+ * The fate of this header depends on what is decided in Redmine issue #950.
+ *
+ * \endif
  *
- * <H3>Implementation Helpers</H3>
+ * <H3>Implementation helpers</H3>
  *
  * The header basedefinitions.h contains common definitions and macros used
  * throughout \Gromacs.  It includes fixed-width integer types (`gmx_int64_t`
  * safety when using bit flag fields.
  *
  *
- * <H3>Other Functionality</H3>
+ * <H3>Other functionality</H3>
  *
  * The header init.h declares gmx::init() and gmx::finalize() for initializing
  * and deinitializing the \Gromacs library.
index 1933e660a6ce406932b430e016d7cb4439e5d129..1ce2ce4b20fd1ea87e347a8b6dbe4823279b16d9 100644 (file)
@@ -44,7 +44,6 @@ gmx_install_headers(
     errorcodes.h
     exceptions.h
     fatalerror.h
-    file.h
     flags.h
     futil.h
     gmxassert.h
index 2bc6a9b875015052bd5bfca2fec774ca94c33a37..e773fff84b2a55db6f2c69199d288f9e43088ab0 100644 (file)
@@ -51,7 +51,7 @@
 #include "buildinfo.h"
 #include "gromacs/utility/directoryenumerator.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
 #include "gromacs/utility/path.h"
 #include "gromacs/utility/programcontext.h"
 #include "gromacs/utility/stringutil.h"
@@ -133,7 +133,7 @@ FILE *DataFileFinder::openFile(const DataFileOptions &options) const
         fprintf(debug, "Opening library file %s\n", fn);
     }
 #endif
-    return File::openRawHandle(filename, "r");
+    return TextInputFile::openRawHandle(filename);
 }
 
 std::string DataFileFinder::findFile(const DataFileOptions &options) const
index 00c57aa67c4ac86d3f71fba0b1c96aaedf72d883..59df7ce47edf9f71a23b65f437ffb2a3c5438bb4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -45,7 +45,7 @@
 
 #include <cstdlib>
 
-#include "thread_mpi/mutex.h"
+#include "gromacs/utility/mutex.h"
 
 #include "errorformat.h"
 
@@ -100,7 +100,7 @@ void standardErrorHandler(int retcode, const char *msg,
 //! Global error handler set with setFatalErrorHandler().
 ErrorHandlerFunc g_errorHandler = standardErrorHandler;
 //! Mutex for protecting access to ::g_errorHandler.
-tMPI::mutex      handler_mutex;
+Mutex            handler_mutex;
 
 //! \}
 
@@ -117,8 +117,8 @@ const char *getErrorCodeString(int errorcode)
 
 ErrorHandlerFunc setFatalErrorHandler(ErrorHandlerFunc handler)
 {
-    tMPI::lock_guard<tMPI::mutex> lock(handler_mutex);
-    ErrorHandlerFunc              oldHandler = g_errorHandler;
+    lock_guard<Mutex> lock(handler_mutex);
+    ErrorHandlerFunc  oldHandler = g_errorHandler;
     g_errorHandler = handler;
     return oldHandler;
 }
@@ -131,7 +131,7 @@ void fatalError(int retcode, const char *msg, const char *file, int line)
 {
     ErrorHandlerFunc handler = NULL;
     {
-        tMPI::lock_guard<tMPI::mutex> lock(handler_mutex);
+        lock_guard<Mutex> lock(handler_mutex);
         handler = g_errorHandler;
     }
     if (handler != NULL)
index 14fe155e434e892d378d644b6eecefa24b8e4377..cb683dafb298772770a0a40534e2a967ce335ef7 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2011,2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -60,6 +60,7 @@
 #include "gromacs/utility/errorcodes.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "errorformat.h"
 
@@ -308,6 +309,39 @@ class MessageWriterFileNoThrow : public MessageWriterInterface
         FILE                   *fp_;
 };
 
+/*! \brief
+ * Exception information writer to format into a TextOutputStream.
+ */
+class MessageWriterTextWriter : public MessageWriterInterface
+{
+    public:
+        //! Initializes a writer that writes to the given stream.
+        explicit MessageWriterTextWriter(TextWriter *writer) : writer_(writer)
+        {
+        }
+
+        virtual void writeLine(const char *text, int indent)
+        {
+            writer_->wrapperSettings().setIndent(indent);
+            writer_->writeLine(text);
+        }
+        virtual void writeErrNoInfo(int errorNumber, const char *funcName,
+                                    int indent)
+        {
+            writer_->wrapperSettings().setIndent(indent);
+            writer_->writeLine(formatString("Reason: %s", std::strerror(errorNumber)));
+            if (funcName != NULL)
+            {
+                writer_->writeLine(
+                        formatString("(call to %s() returned error code %d)",
+                                     funcName, errorNumber));
+            }
+        }
+
+    private:
+        TextWriter     *writer_;
+};
+
 /*! \brief
  * Exception information writer to format into an std::string.
  */
@@ -519,6 +553,13 @@ void formatExceptionMessageToFile(FILE *fp, const std::exception &ex)
     formatExceptionMessageInternal(&writer, ex, 0);
 }
 
+void formatExceptionMessageToWriter(TextWriter           *writer,
+                                    const std::exception &ex)
+{
+    MessageWriterTextWriter messageWriter(writer);
+    formatExceptionMessageInternal(&messageWriter, ex, 0);
+}
+
 int processExceptionAtExit(const std::exception & /*ex*/)
 {
     int returnCode = 1;
index 40d723b8f516190447e1de215306c071d46d9a0d..493ee6bb6052a5b8d13e3de8d4318a24e57193c1 100644 (file)
@@ -60,6 +60,8 @@
 namespace gmx
 {
 
+class TextWriter;
+
 namespace internal
 {
 //! Internal container type for storing a list of nested exceptions.
@@ -444,6 +446,15 @@ std::string formatExceptionMessageToString(const std::exception &ex);
  * \throws    std::bad_alloc if out of memory.
  */
 void formatExceptionMessageToFile(FILE *fp, const std::exception &ex);
+/*! \brief
+ * Formats an error message for reporting an exception.
+ *
+ * \param     writer  Writer to use for writing the message.
+ * \param[in] ex      Exception to format.
+ * \throws    std::bad_alloc if out of memory.
+ */
+void formatExceptionMessageToWriter(TextWriter           *writer,
+                                    const std::exception &ex);
 /*! \brief
  * Handles an exception that is causing the program to terminate.
  *
diff --git a/src/gromacs/utility/file.cpp b/src/gromacs/utility/file.cpp
deleted file mode 100644 (file)
index 7614734..0000000
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- * \brief
- * Implements gmx::File.
- *
- * \author Teemu Murtola <teemu.murtola@gmail.com>
- * \ingroup module_utility
- */
-#include "gmxpre.h"
-
-#include "file.h"
-
-#include "config.h"
-
-#include <cerrno>
-#include <cstdio>
-#include <cstring>
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include <sys/stat.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/gmxassert.h"
-#include "gromacs/utility/stringutil.h"
-
-namespace gmx
-{
-
-/*! \internal \brief
- * Private implementation class for File.
- *
- * \ingroup module_utility
- */
-class File::Impl
-{
-    public:
-        /*! \brief
-         * Initialize a file object with the given handle.
-         *
-         * \param[in]  fp     %File handle to use (may be NULL).
-         * \param[in]  bClose Whether this object should close its file handle.
-         */
-        Impl(FILE *fp, bool bClose);
-        ~Impl();
-
-        //! File handle for this object (may be NULL).
-        FILE                   *fp_;
-        /*! \brief
-         * Whether \p fp_ should be closed by this object.
-         *
-         * Can be true if \p fp_ is NULL.
-         */
-        bool                    bClose_;
-};
-
-File::Impl::Impl(FILE *fp, bool bClose)
-    : fp_(fp), bClose_(bClose)
-{
-}
-
-File::Impl::~Impl()
-{
-    if (fp_ != NULL && bClose_)
-    {
-        if (fclose(fp_) != 0)
-        {
-            // TODO: Log the error somewhere
-        }
-    }
-}
-
-// static
-FILE *File::openRawHandle(const char *filename, const char *mode)
-{
-    FILE *fp = fopen(filename, mode);
-    if (fp == NULL)
-    {
-        GMX_THROW_WITH_ERRNO(
-                FileIOError(formatString("Could not open file '%s'", filename)),
-                "fopen", errno);
-    }
-    return fp;
-}
-
-// static
-FILE *File::openRawHandle(const std::string &filename, const char *mode)
-{
-    return openRawHandle(filename.c_str(), mode);
-}
-
-File::File(const char *filename, const char *mode)
-    : impl_(new Impl(NULL, true))
-{
-    open(filename, mode);
-}
-
-File::File(const std::string &filename, const char *mode)
-    : impl_(new Impl(NULL, true))
-{
-    open(filename, mode);
-}
-
-File::File(const FileInitializer &initializer)
-    : impl_(new Impl(NULL, true))
-{
-    open(initializer.filename_, initializer.mode_);
-}
-
-File::File(FILE *fp, bool bClose)
-    : impl_(new Impl(fp, bClose))
-{
-}
-
-File::~File()
-{
-}
-
-void File::open(const char *filename, const char *mode)
-{
-    GMX_RELEASE_ASSERT(impl_->fp_ == NULL,
-                       "Attempted to open the same file object twice");
-    // TODO: Port all necessary functionality from gmx_ffopen() here.
-    impl_->fp_ = openRawHandle(filename, mode);
-}
-
-void File::open(const std::string &filename, const char *mode)
-{
-    open(filename.c_str(), mode);
-}
-
-void File::close()
-{
-    GMX_RELEASE_ASSERT(impl_->fp_ != NULL,
-                       "Attempted to close a file object that is not open");
-    GMX_RELEASE_ASSERT(impl_->bClose_,
-                       "Attempted to close a file object that should not be");
-    bool bOk = (fclose(impl_->fp_) == 0);
-    impl_->fp_ = NULL;
-    if (!bOk)
-    {
-        GMX_THROW_WITH_ERRNO(
-                FileIOError("Error while closing file"), "fclose", errno);
-    }
-}
-
-bool File::isInteractive() const
-{
-    GMX_RELEASE_ASSERT(impl_->fp_ != NULL,
-                       "Attempted to access a file object that is not open");
-#ifdef HAVE_UNISTD_H
-    return isatty(fileno(impl_->fp_));
-#else
-    return true;
-#endif
-}
-
-FILE *File::handle()
-{
-    GMX_RELEASE_ASSERT(impl_->fp_ != NULL,
-                       "Attempted to access a file object that is not open");
-    return impl_->fp_;
-}
-
-void File::readBytes(void *buffer, size_t bytes)
-{
-    errno = 0;
-    FILE  *fp = handle();
-    // TODO: Retry based on errno or something else?
-    size_t bytesRead = std::fread(buffer, 1, bytes, fp);
-    if (bytesRead != bytes)
-    {
-        if (feof(fp))
-        {
-            GMX_THROW(FileIOError(
-                              formatString("Premature end of file\n"
-                                           "Attempted to read: %d bytes\n"
-                                           "Successfully read: %d bytes",
-                                           static_cast<int>(bytes),
-                                           static_cast<int>(bytesRead))));
-        }
-        else
-        {
-            GMX_THROW_WITH_ERRNO(FileIOError("Error while reading file"),
-                                 "fread", errno);
-        }
-    }
-}
-
-bool File::readLine(std::string *line)
-{
-    if (!readLineWithTrailingSpace(line))
-    {
-        return false;
-    }
-    size_t endPos = line->find_last_not_of(" \t\r\n");
-    if (endPos != std::string::npos)
-    {
-        line->resize(endPos + 1);
-    }
-    return true;
-}
-
-bool File::readLineWithTrailingSpace(std::string *line)
-{
-    line->clear();
-    const size_t bufsize = 256;
-    std::string  result;
-    char         buf[bufsize];
-    buf[0] = '\0';
-    FILE        *fp = handle();
-    while (fgets(buf, bufsize, fp) != NULL)
-    {
-        size_t length = std::strlen(buf);
-        result.append(buf, length);
-        if (length < bufsize - 1 || buf[length - 1] == '\n')
-        {
-            break;
-        }
-    }
-    if (ferror(fp))
-    {
-        GMX_THROW_WITH_ERRNO(FileIOError("Error while reading file"),
-                             "fgets", errno);
-    }
-    *line = result;
-    return !result.empty() || !feof(fp);
-}
-
-void File::writeString(const char *str)
-{
-    if (fprintf(handle(), "%s", str) < 0)
-    {
-        GMX_THROW_WITH_ERRNO(FileIOError("Writing to file failed"),
-                             "fprintf", errno);
-    }
-}
-
-void File::writeLine(const char *line)
-{
-    size_t length = std::strlen(line);
-
-    writeString(line);
-    if (length == 0 || line[length-1] != '\n')
-    {
-        writeString("\n");
-    }
-}
-
-void File::writeLine()
-{
-    writeString("\n");
-}
-
-// static
-bool File::exists(const char *filename)
-{
-    if (filename == NULL)
-    {
-        return false;
-    }
-    FILE *test = fopen(filename, "r");
-    if (test == NULL)
-    {
-        return false;
-    }
-    else
-    {
-        fclose(test);
-        // Windows doesn't allow fopen of directory, so we don't need to check
-        // this separately.
-#ifndef GMX_NATIVE_WINDOWS
-        struct stat st_buf;
-        int         status = stat(filename, &st_buf);
-        if (status != 0 || !S_ISREG(st_buf.st_mode))
-        {
-            return false;
-        }
-#endif
-        return true;
-    }
-}
-
-// static
-bool File::exists(const std::string &filename)
-{
-    return exists(filename.c_str());
-}
-
-// static
-File &File::standardInput()
-{
-    static File stdinObject(stdin, false);
-    return stdinObject;
-}
-
-// static
-File &File::standardOutput()
-{
-    static File stdoutObject(stdout, false);
-    return stdoutObject;
-}
-
-// static
-File &File::standardError()
-{
-    static File stderrObject(stderr, false);
-    return stderrObject;
-}
-
-// static
-std::string File::readToString(const char *filename)
-{
-    // Binary mode is required on Windows to be able to determine a size
-    // that can be passed to fread().
-    File  file(filename, "rb");
-    FILE *fp = file.handle();
-
-    if (std::fseek(fp, 0L, SEEK_END) != 0)
-    {
-        GMX_THROW_WITH_ERRNO(FileIOError("Seeking to end of file failed"),
-                             "fseek", errno);
-    }
-    long len = std::ftell(fp);
-    if (len == -1)
-    {
-        GMX_THROW_WITH_ERRNO(FileIOError("Reading file length failed"),
-                             "ftell", errno);
-    }
-    if (std::fseek(fp, 0L, SEEK_SET) != 0)
-    {
-        GMX_THROW_WITH_ERRNO(FileIOError("Seeking to start of file failed"),
-                             "fseek", errno);
-    }
-
-    std::vector<char> data(len);
-    file.readBytes(&data[0], len);
-    file.close();
-
-    std::string result(&data[0], len);
-    // The below is necessary on Windows to make newlines stay as '\n' on a
-    // roundtrip.
-    result = replaceAll(result, "\r\n", "\n");
-
-    return result;
-}
-
-// static
-std::string File::readToString(const std::string &filename)
-{
-    return readToString(filename.c_str());
-}
-
-// static
-void File::writeFileFromString(const std::string &filename,
-                               const std::string &text)
-{
-    File file(filename, "w");
-    file.writeString(text);
-    file.close();
-}
-
-} // namespace gmx
diff --git a/src/gromacs/utility/file.h b/src/gromacs/utility/file.h
deleted file mode 100644 (file)
index 5e45736..0000000
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \file
- * \brief
- * Declares gmx::File.
- *
- * \author Teemu Murtola <teemu.murtola@gmail.com>
- * \inpublicapi
- * \ingroup module_utility
- */
-#ifndef GMX_UTILITY_FILE_H
-#define GMX_UTILITY_FILE_H
-
-#include <cstdio>
-
-#include <string>
-
-#include "gromacs/utility/classhelpers.h"
-
-namespace gmx
-{
-
-class File;
-
-/*! \brief
- * Parameters for creating a File object.
- *
- * This class (mostly) replaces the ability to return a File object from a
- * function (since File is not copyable): returning a FileInitializer instead
- * allows the caller to construct the File object.
- *
- * \inpublicapi
- * \ingroup module_utility
- */
-class FileInitializer
-{
-    public:
-        /*! \brief
-         * Creates the initializer with given parameters.
-         *
-         * The passed strings must remain valid until the initializer is used
-         * to construct a File object.
-         */
-        FileInitializer(const char *filename, const char *mode)
-            : filename_(filename), mode_(mode)
-        {
-        }
-
-    private:
-        const char *filename_;
-        const char *mode_;
-
-        /*! \brief
-         * Needed to allow access to the parameters without otherwise
-         * unnecessary accessors.
-         */
-        friend class File;
-};
-
-/*! \brief
- * Basic file object.
- *
- * This class provides basic file I/O functionality and uses exceptions
- * (FileIOError) for error reporting.
- *
- * \inpublicapi
- * \ingroup module_utility
- */
-class File
-{
-    public:
-        /*! \brief
-         * Opens a file and returns a `FILE` handle.
-         *
-         * \param[in] filename  Path of the file to open.
-         * \param[in] mode      Mode to open the file in (for fopen()).
-         * \throws    FileIOError on any I/O error.
-         *
-         * Instead of returning `NULL` on errors, throws an exception with
-         * additional details (including the file name and `errno`).
-         */
-        static FILE *openRawHandle(const char *filename, const char *mode);
-        //! \copydoc openRawHandle(const char *, const char *)
-        static FILE *openRawHandle(const std::string &filename, const char *mode);
-        /*! \brief
-         * Creates a file object and opens a file.
-         *
-         * \param[in] filename  Path of the file to open.
-         * \param[in] mode      Mode to open the file in (for fopen()).
-         * \throws    std::bad_alloc if out of memory.
-         * \throws    FileIOError on any I/O error.
-         *
-         * \see open(const char *, const char *)
-         */
-        File(const char *filename, const char *mode);
-        //! \copydoc File(const char *, const char *)
-        File(const std::string &filename, const char *mode);
-        /*! \brief
-         * Creates a file object and opens a file.
-         *
-         * \param[in] initializer  Parameters to open the file.
-         * \throws    std::bad_alloc if out of memory.
-         * \throws    FileIOError on any I/O error.
-         */
-        File(const FileInitializer &initializer);
-        /*! \brief
-         * Destroys the file object.
-         *
-         * If the file is still open, it is closed.
-         * Any error conditions will be ignored.
-         */
-        ~File();
-
-        /*! \brief
-         * Opens a file.
-         *
-         * \param[in] filename  Path of the file to open.
-         * \param[in] mode      Mode to open the file in (for fopen()).
-         * \throws    FileIOError on any I/O error.
-         *
-         * The file object must not be open.
-         */
-        void open(const char *filename, const char *mode);
-        //! \copydoc open(const char *, const char *)
-        void open(const std::string &filename, const char *mode);
-        /*! \brief
-         * Closes the file object.
-         *
-         * \throws  FileIOError on any I/O error.
-         *
-         * The file must be open.
-         */
-        void close();
-
-        /*! \brief
-         * Returns whether the file is an interactive terminal.
-         *
-         * Only works on Unix, otherwise always returns true.
-         * It only makes sense to call this for File::standardInput() and
-         * friends.
-         *
-         * Thie file must be open.
-         * Does not throw.
-         */
-        bool isInteractive() const;
-        /*! \brief
-         * Returns a file handle for interfacing with C functions.
-         *
-         * The file must be open.
-         * Does not throw.
-         */
-        FILE *handle();
-
-        /*! \brief
-         * Reads given number of bytes from the file.
-         *
-         * \param[out] buffer  Pointer to buffer that receives the bytes.
-         * \param[in]  bytes   Number of bytes to read.
-         * \throws     FileIOError on any I/O error.
-         *
-         * The file must be open.
-         */
-        void readBytes(void *buffer, size_t bytes);
-        /*! \brief
-         * Reads a single line from the file.
-         *
-         * \param[out] line    String to receive the line.
-         * \returns    false if nothing was read because the file ended.
-         * \throws     std::bad_alloc if out of memory.
-         * \throws     FileIOError on any I/O error.
-         *
-         * On error or when false is returned, \p line will be empty.
-         * Trailing space will be removed from the line.
-         * To loop over all lines in the file, use:
-         * \code
-           std::string line;
-           while (file.readLine(&line))
-           {
-               // ...
-           }
-           \endcode
-         */
-        bool readLine(std::string *line);
-        /*! \brief
-         * Reads a single line from the file.
-         *
-         * \param[out] line    String to receive the line.
-         * \returns    false if nothing was read because the file ended.
-         * \throws     std::bad_alloc if out of memory.
-         * \throws     FileIOError on any I/O error.
-         *
-         * On error or when false is returned, \p line will be empty.
-         * Works as readLine(), except that terminating newline will be present
-         * in \p line if it was present in the file.
-         *
-         * \see readLine()
-         */
-        bool readLineWithTrailingSpace(std::string *line);
-
-        /*! \brief
-         * Writes a string to the file.
-         *
-         * \param[in]  str  String to write.
-         * \throws     FileIOError on any I/O error.
-         *
-         * The file must be open.
-         */
-        void writeString(const char *str);
-        //! \copydoc writeString(const char *)
-        void writeString(const std::string &str) { writeString(str.c_str()); }
-        /*! \brief
-         * Writes a line to the file.
-         *
-         * \param[in]  line  Line to write.
-         * \throws     FileIOError on any I/O error.
-         *
-         * If \p line does not end in a newline, one newline is appended.
-         * Otherwise, works as writeString().
-         *
-         * The file must be open.
-         */
-        void writeLine(const char *line);
-        //! \copydoc writeLine(const char *)
-        void writeLine(const std::string &line) { writeLine(line.c_str()); }
-        /*! \brief
-         * Writes a newline to the file.
-         *
-         * \throws     FileIOError on any I/O error.
-         */
-        void writeLine();
-
-        /*! \brief
-         * Checks whether a file exists and is a regular file.
-         *
-         * \param[in] filename  Path to the file to check.
-         * \returns   true if \p filename exists and is accessible.
-         *
-         * Does not throw.
-         */
-        static bool exists(const char *filename);
-        //! \copydoc exists(const char *)
-        static bool exists(const std::string &filename);
-
-        /*! \brief
-         * Returns a File object for accessing stdin.
-         *
-         * \throws    std::bad_alloc if out of memory (only on first call).
-         */
-        static File &standardInput();
-        /*! \brief
-         * Returns a File object for accessing stdout.
-         *
-         * \throws    std::bad_alloc if out of memory (only on first call).
-         */
-        static File &standardOutput();
-        /*! \brief
-         * Returns a File object for accessing stderr.
-         *
-         * \throws    std::bad_alloc if out of memory (only on first call).
-         */
-        static File &standardError();
-
-        /*! \brief
-         * Reads contents of a file to a std::string.
-         *
-         * \param[in] filename  Name of the file to read.
-         * \returns   The contents of \p filename.
-         * \throws    std::bad_alloc if out of memory.
-         * \throws    FileIOError on any I/O error.
-         */
-        static std::string readToString(const char *filename);
-        //! \copydoc readToString(const char *)
-        static std::string readToString(const std::string &filename);
-        /*! \brief
-         * Convenience method for writing a file from a string in a single call.
-         *
-         * \param[in] filename  Name of the file to read.
-         * \param[in] text      String to write to \p filename.
-         * \throws    FileIOError on any I/O error.
-         *
-         * If \p filename exists, it is overwritten.
-         */
-        static void writeFileFromString(const std::string &filename,
-                                        const std::string &text);
-
-    private:
-        /*! \brief
-         * Initialize file object from an existing file handle.
-         *
-         * \param[in]  fp     %File handle to use (may be NULL).
-         * \param[in]  bClose Whether this object should close its file handle.
-         * \throws     std::bad_alloc if out of memory.
-         *
-         * Used internally to implement standardOutput() and standardError().
-         */
-        File(FILE *fp, bool bClose);
-
-        class Impl;
-
-        PrivateImplPointer<Impl> impl_;
-};
-
-} // namespace gmx
-
-#endif
index 405702bd7280e180f17002a0abd6713d32adab96..68e595c477d214a4a84019c2a03681639bdaa544 100644 (file)
@@ -43,7 +43,8 @@
 
 #include "fileredirector.h"
 
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
+#include "gromacs/utility/path.h"
 
 namespace gmx
 {
@@ -88,13 +89,13 @@ class DefaultInputRedirector : public FileInputRedirectorInterface
 class DefaultOutputRedirector : public FileOutputRedirectorInterface
 {
     public:
-        virtual File &standardOutput()
+        virtual TextOutputStream &standardOutput()
         {
-            return File::standardOutput();
+            return TextOutputFile::standardOutput();
         }
-        virtual FileInitializer openFileForWriting(const char *filename)
+        virtual TextOutputStreamPointer openTextOutputFile(const char *filename)
         {
-            return FileInitializer(filename, "w");
+            return TextOutputStreamPointer(new TextOutputFile(filename));
         }
 };
 
index d0a7c6e47edf55ffaac4a34be22d3f100ce53acb..407d57b924e65e958d3bc4404971dd8a60bd84c3 100644 (file)
@@ -45,7 +45,7 @@
 
 #include <string>
 
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/textstream.h"
 
 namespace gmx
 {
@@ -55,11 +55,10 @@ namespace gmx
  *
  * The calling code should take in this interface and use the methods in it
  * all file system operations that need to support this redirection.
- * By default, the code can then use defaultFileInputRedirector() in case no
- * redirection is needed.
  *
  * This allows tests to override the file existence checks without actually
- * using the file system.
+ * using the file system.  See FileOutputRedirectorInterface for notes on
+ * a typical usage pattern.
  *
  * With some further refactoring of the File class, this could also support
  * redirecting input files from in-memory buffers as well, but for now the
@@ -88,16 +87,25 @@ class FileInputRedirectorInterface
 /*! \libinternal \brief
  * Allows capturing `stdout` and file output from code that supports it.
  *
- * The calling code should take in this interface and use the File objects
+ * The calling code should take in this interface and use the stream objects
  * it returns for all output that needs to support this redirection.
- * By default, the code can then use defaultFileOutputRedirector() in case no
- * redirection is needed.
  *
- * This allows tests to capture the file output without duplicating the
- * knowledge of which files are actually produced.  With some further
- * refactoring of the File class, this could support capturing the output into
- * in-memory buffers as well, but for now the current capabilities are
- * sufficient.
+ * Currently, the (nearly) only purpose for this interface is for unit tests to
+ * capture the file output without duplicating the knowledge of which files are
+ * actually produced.  The tests can also replace actual files with in-memory
+ * streams (e.g., a StringOutputStream), and test the output without actually
+ * accessing the file system and managing actual files.
+ *
+ * As the main user for non-default implementation of this interface is tests,
+ * code using this interface generally uses a pattern where the redirector is
+ * initialized to defaultFileOutputRedirector(), and a separate setter is
+ * provided for tests to change the default.  This allows code outside the
+ * tests (and outside the code actually calling the redirector) to be written
+ * as if this interface did not exist (i.e., they do not need to pass the
+ * default instance).
+ *
+ * Also, the interface only supports text files, but can be generalized if/when
+ * there is a need for binary streams (see also TextOutputStream).
  *
  * \inlibraryapi
  * \ingroup module_utility
@@ -108,20 +116,20 @@ class FileOutputRedirectorInterface
         virtual ~FileOutputRedirectorInterface();
 
         /*! \brief
-         * Returns a File object to use for `stdout` output.
+         * Returns a stream to use for `stdout` output.
          */
-        virtual File &standardOutput() = 0;
+        virtual TextOutputStream &standardOutput() = 0;
         /*! \brief
-         * Returns a File object to use for output to a given file.
+         * Returns a stream to use for output to a file at a given path.
          *
          * \param[in] filename  Requested file name.
          */
-        virtual FileInitializer openFileForWriting(const char *filename) = 0;
+        virtual TextOutputStreamPointer openTextOutputFile(const char *filename) = 0;
 
-        //! Convenience method to open a file using an std::string path.
-        FileInitializer openFileForWriting(const std::string &filename)
+        //! Convenience method to open a stream using an std::string path.
+        TextOutputStreamPointer openTextOutputFile(const std::string &filename)
         {
-            return openFileForWriting(filename.c_str());
+            return openTextOutputFile(filename.c_str());
         }
 };
 
diff --git a/src/gromacs/utility/filestream.cpp b/src/gromacs/utility/filestream.cpp
new file mode 100644 (file)
index 0000000..5f24585
--- /dev/null
@@ -0,0 +1,285 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements classes from filestream.h.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "filestream.h"
+
+#include "config.h"
+
+#include <cerrno>
+#include <cstdio>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/stringutil.h"
+
+namespace gmx
+{
+
+namespace
+{
+
+//! Helper function for implementing readLine() for input streams.
+bool readLineImpl(FILE *fp, std::string *line)
+{
+    line->clear();
+    const size_t bufsize = 256;
+    std::string  result;
+    char         buf[bufsize];
+    buf[0] = '\0';
+    while (std::fgets(buf, bufsize, fp) != NULL)
+    {
+        const size_t length = std::strlen(buf);
+        result.append(buf, length);
+        if (length < bufsize - 1 || buf[length - 1] == '\n')
+        {
+            break;
+        }
+    }
+    if (std::ferror(fp))
+    {
+        GMX_THROW_WITH_ERRNO(FileIOError("Error while reading file"),
+                             "fgets", errno);
+    }
+    *line = result;
+    return !result.empty() || !std::feof(fp);
+}
+
+}   // namespace
+
+namespace internal
+{
+
+/********************************************************************
+ * FileStreamImpl
+ */
+
+class FileStreamImpl
+{
+    public:
+        explicit FileStreamImpl(FILE *fp)
+            : fp_(fp), bClose_(false)
+        {
+        }
+        FileStreamImpl(const char *filename, const char *mode)
+            : fp_(NULL), bClose_(true)
+        {
+            fp_ = std::fopen(filename, mode);
+            if (fp_ == NULL)
+            {
+                GMX_THROW_WITH_ERRNO(
+                        FileIOError(formatString("Could not open file '%s'", filename)),
+                        "fopen", errno);
+            }
+        }
+        ~FileStreamImpl()
+        {
+            if (fp_ != NULL && bClose_)
+            {
+                if (std::fclose(fp_) != 0)
+                {
+                    // TODO: Log the error somewhere
+                }
+            }
+        }
+
+        FILE *handle()
+        {
+            GMX_RELEASE_ASSERT(fp_ != NULL,
+                               "Attempted to access a file object that is not open");
+            return fp_;
+        }
+
+        void close()
+        {
+            GMX_RELEASE_ASSERT(fp_ != NULL,
+                               "Attempted to close a file object that is not open");
+            GMX_RELEASE_ASSERT(bClose_,
+                               "Attempted to close a file object that should not be");
+            const bool bOk = (std::fclose(fp_) == 0);
+            fp_ = NULL;
+            if (!bOk)
+            {
+                GMX_THROW_WITH_ERRNO(
+                        FileIOError("Error while closing file"), "fclose", errno);
+            }
+        }
+
+    private:
+        //! File handle for this object (NULL if the stream has been closed).
+        FILE  *fp_;
+        //! Whether \p fp_ should be closed by this object.
+        bool   bClose_;
+};
+
+}   // namespace internal
+
+using internal::FileStreamImpl;
+
+/********************************************************************
+ * StandardInputStream
+ */
+
+bool StandardInputStream::isInteractive() const
+{
+#ifdef HAVE_UNISTD_H
+    return isatty(fileno(stdin));
+#else
+    return true;
+#endif
+}
+
+bool StandardInputStream::readLine(std::string *line)
+{
+    return readLineImpl(stdin, line);
+}
+
+// static
+StandardInputStream &StandardInputStream::instance()
+{
+    static StandardInputStream stdinObject;
+    return stdinObject;
+}
+
+/********************************************************************
+ * TextInputFile
+ */
+
+// static
+FILE *TextInputFile::openRawHandle(const char *filename)
+{
+    FILE *fp = fopen(filename, "r");
+    if (fp == NULL)
+    {
+        GMX_THROW_WITH_ERRNO(
+                FileIOError(formatString("Could not open file '%s'", filename)),
+                "fopen", errno);
+    }
+    return fp;
+}
+
+// static
+FILE *TextInputFile::openRawHandle(const std::string &filename)
+{
+    return openRawHandle(filename.c_str());
+}
+
+TextInputFile::TextInputFile(const std::string &filename)
+    : impl_(new FileStreamImpl(filename.c_str(), "r"))
+{
+}
+
+TextInputFile::TextInputFile(FILE *fp)
+    : impl_(new FileStreamImpl(fp))
+{
+}
+
+TextInputFile::~TextInputFile()
+{
+}
+
+FILE *TextInputFile::handle()
+{
+    return impl_->handle();
+}
+
+bool TextInputFile::readLine(std::string *line)
+{
+    return readLineImpl(impl_->handle(), line);
+}
+
+void TextInputFile::close()
+{
+    impl_->close();
+}
+
+/********************************************************************
+ * TextOutputFile
+ */
+
+TextOutputFile::TextOutputFile(const std::string &filename)
+    : impl_(new FileStreamImpl(filename.c_str(), "w"))
+{
+}
+
+TextOutputFile::TextOutputFile(FILE *fp)
+    : impl_(new FileStreamImpl(fp))
+{
+}
+
+TextOutputFile::~TextOutputFile()
+{
+}
+
+void TextOutputFile::write(const char *str)
+{
+    if (std::fprintf(impl_->handle(), "%s", str) < 0)
+    {
+        GMX_THROW_WITH_ERRNO(FileIOError("Writing to file failed"),
+                             "fprintf", errno);
+    }
+}
+
+void TextOutputFile::close()
+{
+    impl_->close();
+}
+
+// static
+TextOutputFile &TextOutputFile::standardOutput()
+{
+    static TextOutputFile stdoutObject(stdout);
+    return stdoutObject;
+}
+
+// static
+TextOutputFile &TextOutputFile::standardError()
+{
+    static TextOutputFile stderrObject(stderr);
+    return stderrObject;
+}
+
+} // namespace gmx
diff --git a/src/gromacs/utility/filestream.h b/src/gromacs/utility/filestream.h
new file mode 100644 (file)
index 0000000..23c354c
--- /dev/null
@@ -0,0 +1,195 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares implementations for textstream.h interfaces for file input/output.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_FILESTREAM_H
+#define GMX_UTILITY_FILESTREAM_H
+
+#include <cstdio>
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+namespace internal
+{
+class FileStreamImpl;
+}
+
+/*! \libinternal \brief
+ * Text input stream implementation for reading from `stdin`.
+ *
+ * Implementations for the TextInputStream methods throw FileIOError on any
+ * I/O error.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class StandardInputStream : public TextInputStream
+{
+    public:
+        /*! \brief
+         * Returns whether `stdin` is an interactive terminal.
+         *
+         * Only works on Unix, otherwise always returns true.
+         *
+         * Does not throw.
+         */
+        bool isInteractive() const;
+
+        // From TextInputStream
+        virtual bool readLine(std::string *line);
+        virtual void close() {}
+
+        /*! \brief
+         * Returns a stream for accessing `stdin`.
+         *
+         * Does not throw.
+         */
+        static StandardInputStream &instance();
+};
+
+/*! \libinternal \brief
+ * Text input stream implementation for reading from a file.
+ *
+ * Implementations for the TextInputStream methods throw FileIOError on any
+ * I/O error.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextInputFile : public TextInputStream
+{
+    public:
+        /*! \brief
+         * Opens a file and returns a `FILE` handle.
+         *
+         * \param[in] filename  Path of the file to open.
+         * \throws    FileIOError on any I/O error.
+         *
+         * Instead of returning `NULL` on errors, throws an exception with
+         * additional details (including the file name and `errno`).
+         */
+        static FILE *openRawHandle(const char *filename);
+        //! \copydoc openRawHandle(const char *, const char *)
+        static FILE *openRawHandle(const std::string &filename);
+
+        /*! \brief
+         * Opens a text file as a stream.
+         *
+         * \param[in]  filename  Path to the file to open.
+         * \throws     std::bad_alloc if out of memory.
+         * \throws     FileIOError on any I/O error.
+         */
+        explicit TextInputFile(const std::string &filename);
+        /*! \brief
+         * Initializes file object from an existing file handle.
+         *
+         * \param[in]  fp     File handle to use.
+         * \throws     std::bad_alloc if out of memory.
+         *
+         * The caller is responsible of closing the file; close() does nothing
+         * for an object constructed this way.
+         */
+        explicit TextInputFile(FILE *fp);
+        virtual ~TextInputFile();
+
+        /*! \brief
+         * Returns a raw handle to the input file.
+         *
+         * This is provided for interoperability with older C-like code.
+         */
+        FILE *handle();
+
+        // From TextInputStream
+        virtual bool readLine(std::string *line);
+        virtual void close();
+
+    private:
+        PrivateImplPointer<internal::FileStreamImpl> impl_;
+};
+
+/*! \libinternal \brief
+ * Text output stream implementation for writing to a file.
+ *
+ * Implementations for the TextOutputStream methods throw FileIOError on any
+ * I/O error.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextOutputFile : public TextOutputStream
+{
+    public:
+        //! \copydoc TextInputFile::TextInputFile(const std::string &)
+        explicit TextOutputFile(const std::string &filename);
+        //! \copydoc TextInputFile::TextInputFile(FILE *)
+        explicit TextOutputFile(FILE *fp);
+        virtual ~TextOutputFile();
+
+        // From TextOutputStream
+        virtual void write(const char *text);
+        virtual void close();
+
+        /*! \brief
+         * Returns a stream for accessing `stdout`.
+         *
+         * \throws    std::bad_alloc if out of memory (only on first call).
+         */
+        static TextOutputFile &standardOutput();
+        /*! \brief
+         * Returns a stream for accessing `stderr`.
+         *
+         * \throws    std::bad_alloc if out of memory (only on first call).
+         */
+        static TextOutputFile &standardError();
+
+    private:
+        PrivateImplPointer<internal::FileStreamImpl> impl_;
+};
+
+} // namespace gmx
+
+#endif
similarity index 63%
rename from src/gromacs/mdlib/genborn_sse2_single.h
rename to src/gromacs/utility/mutex.h
index 6753e0e17f8068226d74d8cb77857220a9d9f267..34c7375f223d96e588633e3118cbec3a8050f247 100644 (file)
@@ -1,9 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
-#ifndef _genborn_sse_h
-#define _genborn_sse_h
+/*! \libinternal \file
+ * \brief
+ * Declares C++11-style basic threading primitives
+ * (gmx::Mutex, gmx::lock_guard).
+ *
+ * For now, the implementation is imported from thread-MPI.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_THREADING_MUTEX_H
+#define GMX_THREADING_MUTEX_H
 
-#include "gromacs/legacyheaders/typedefs.h"
+#include "thread_mpi/mutex.h"
 
-float
-calc_gb_chainrule_sse2_single(int natoms, t_nblist *nl, float *dadx, float *dvda,
-                              float *xd, float *f, float *fshift, float *shift_vec,
-                              int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md);
+namespace gmx
+{
 
-int
-calc_gb_rad_still_sse2_single(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
-                              float *x, t_nblist *nl, gmx_genborn_t *born);
+//! \cond libapi
+/*! \libinternal \brief
+ * C++11-compatible basic mutex.
+ */
+typedef tMPI::mutex Mutex;
+//! \endcond
+using tMPI::lock_guard;
 
-int
-calc_gb_rad_hct_obc_sse2_single(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
-                                float *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm);
+} // namespace gmx
 
-#endif /* _genborn_sse_h */
+#endif
diff --git a/src/gromacs/utility/nodelete.h b/src/gromacs/utility/nodelete.h
new file mode 100644 (file)
index 0000000..7f9fe0c
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares no_delete deleter for boost::shared_ptr.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_NODELETE_H
+#define GMX_UTILITY_NODELETE_H
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Deleter for boost::shared_ptr that does nothing.
+ *
+ * This is useful for cases where a class needs to keep a reference to another
+ * class, and optionally also manage the lifetime of that other class.
+ * The simplest construct (that does not force all callers to use heap
+ * allocation and boost::shared_ptr for the referenced class) is to use a
+ * single boost::shared_ptr to hold that reference, and use no_delete as the
+ * deleter if the lifetime is managed externally.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+template <class T>
+struct no_delete
+{
+    //! Deleter that does nothing.
+    void operator()(T *) {}
+};
+
+} // namespace gmx
+
+#endif
index d6aa17140d65db27cd77cd0f31ef291dd0a6903a..60c0e0cd7adbb164f5c3a1ad20f47f7762e71b16 100644 (file)
 
 #include <cctype>
 #include <cerrno>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 
 #include <algorithm>
+#include <string>
 
 #include <sys/stat.h>
 
@@ -386,6 +388,44 @@ std::string Path::resolveSymlinks(const std::string &path)
     return result;
 }
 
+/********************************************************************
+ * File
+ */
+
+// static
+bool File::exists(const char *filename)
+{
+    if (filename == NULL)
+    {
+        return false;
+    }
+    FILE *test = std::fopen(filename, "r");
+    if (test == NULL)
+    {
+        return false;
+    }
+    else
+    {
+        std::fclose(test);
+        // Windows doesn't allow fopen of directory, so we don't need to check
+        // this separately.
+#ifndef GMX_NATIVE_WINDOWS
+        struct stat st_buf;
+        int         status = stat(filename, &st_buf);
+        if (status != 0 || !S_ISREG(st_buf.st_mode))
+        {
+            return false;
+        }
+#endif
+        return true;
+    }
+}
+
+// static
+bool File::exists(const std::string &filename)
+{
+    return exists(filename.c_str());
+}
 
 /********************************************************************
  * Directory
index ef2b3e884879ca7c7ecbc8ea4f282037379b8911..8dba41e84c0f1947cb21c62b907eb601e4ad911e 100644 (file)
@@ -85,6 +85,25 @@ class Path
         Path();
 };
 
+class File
+{
+    public:
+        /*! \brief
+         * Checks whether a file exists and is a regular file.
+         *
+         * \param[in] filename  Path to the file to check.
+         * \returns   `true` if \p filename exists and is accessible.
+         *
+         * Does not throw.
+         */
+        static bool exists(const char *filename);
+        //! \copydoc exists(const char *)
+        static bool exists(const std::string &filename);
+
+    private:
+        // Disallow instantiation.
+        File();
+};
 
 class Directory
 {
similarity index 62%
rename from src/gromacs/mdlib/genborn_sse2_double.h
rename to src/gromacs/utility/stringstream.cpp
index 0bf4ea9d6946ac565442f67e81510ab86c353c8e..7a93d3472c8d752fcc2c462f6d5ce04575d4d65e 100644 (file)
@@ -1,9 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
-#ifndef _genborn_sse2_double_h
-#define _genborn_sse2_double_h
+/*! \internal \file
+ * \brief
+ * Implements classes from stringstream.h.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "stringstream.h"
 
-#include "gromacs/legacyheaders/typedefs.h"
+#include <string>
 
-int
-calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
-                              double *x, t_nblist *nl, gmx_genborn_t *born);
+namespace gmx
+{
 
-int
-calc_gb_chainrule_sse2_double(int natoms, t_nblist *nl, double *dadx, double *dvda, double *xd, double *f,
-                              double *fshift, double *shift_vec, int gb_algorithm,
-                              gmx_genborn_t *born, t_mdatoms *md);
+void StringOutputStream::write(const char *str)
+{
+    str_.append(str);
+}
 
-int
-calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
-                                double *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm);
+void StringOutputStream::close()
+{
+}
 
-#endif /* _genborn_sse2_double_h */
+} // namespace gmx
diff --git a/src/gromacs/utility/stringstream.h b/src/gromacs/utility/stringstream.h
new file mode 100644 (file)
index 0000000..899a304
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares implementations for textstream.h interfaces for input/output to
+ * in-memory strings.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_STRINGSTREAM_H
+#define GMX_UTILITY_STRINGSTREAM_H
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Text output stream implementation for writing to an in-memory string.
+ *
+ * Implementations for the TextOutputStream methods throw std::bad_alloc if
+ * reallocation of the string fails.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class StringOutputStream : public TextOutputStream
+{
+    public:
+        //! Returns the text written to the stream so far.
+        const std::string &toString() const { return str_; }
+
+        // From TextOutputStream
+        virtual void write(const char *text);
+        virtual void close();
+
+    private:
+        std::string str_;
+};
+
+} // namespace gmx
+
+#endif
index 86abf82a085e9c370569609e6a6d188f994eb06f..54b32214926a49b5cd98658a72dbbbc0fe5f284b 100644 (file)
 namespace gmx
 {
 
-bool endsWith(const std::string &str, const char *suffix)
+bool endsWith(const char *str, const char *suffix)
 {
-    if (suffix == NULL || suffix[0] == '\0')
+    if (isNullOrEmpty(suffix))
     {
         return true;
     }
-    size_t length = std::strlen(suffix);
-    return (str.length() >= length
-            && str.compare(str.length() - length, length, suffix) == 0);
+    const size_t strLength    = std::strlen(str);
+    const size_t suffixLength = std::strlen(suffix);
+    return (strLength >= suffixLength
+            && std::strcmp(&str[strLength - suffixLength], suffix) == 0);
 }
 
 std::string stripSuffixIfPresent(const std::string &str, const char *suffix)
@@ -247,7 +248,7 @@ replaceAllWords(const std::string &input, const std::string &from,
 
 TextLineWrapperSettings::TextLineWrapperSettings()
     : maxLength_(0), indent_(0), firstLineIndent_(-1),
-      bStripLeadingWhitespace_(false), continuationChar_('\0')
+      bKeepFinalSpaces_(false), continuationChar_('\0')
 {
 }
 
@@ -256,13 +257,19 @@ TextLineWrapperSettings::TextLineWrapperSettings()
  * TextLineWrapper
  */
 
+bool TextLineWrapper::isTrivial() const
+{
+    return settings_.lineLength() == 0 && settings_.indent() == 0
+           && settings_.firstLineIndent_ <= 0;
+}
+
 size_t
 TextLineWrapper::findNextLine(const char *input, size_t lineStart) const
 {
     size_t inputLength = std::strlen(input);
     bool   bFirstLine  = (lineStart == 0 || input[lineStart - 1] == '\n');
     // Ignore leading whitespace if necessary.
-    if (!bFirstLine || settings_.bStripLeadingWhitespace_)
+    if (!bFirstLine)
     {
         lineStart += std::strspn(input + lineStart, " ");
         if (lineStart >= inputLength)
@@ -307,7 +314,7 @@ TextLineWrapper::formatLine(const std::string &input,
     size_t inputLength = input.length();
     bool   bFirstLine  = (lineStart == 0 || input[lineStart - 1] == '\n');
     // Strip leading whitespace if necessary.
-    if (!bFirstLine || settings_.bStripLeadingWhitespace_)
+    if (!bFirstLine)
     {
         lineStart = input.find_first_not_of(' ', lineStart);
         if (lineStart >= inputLength)
@@ -318,9 +325,12 @@ TextLineWrapper::formatLine(const std::string &input,
     int  indent        = (bFirstLine ? settings_.firstLineIndent() : settings_.indent());
     bool bContinuation = (lineEnd < inputLength && input[lineEnd - 1] != '\n');
     // Strip trailing whitespace.
-    while (lineEnd > lineStart && std::isspace(input[lineEnd - 1]))
+    if (!settings_.bKeepFinalSpaces_ || lineEnd < inputLength || input[inputLength - 1] == '\n')
     {
-        --lineEnd;
+        while (lineEnd > lineStart && std::isspace(input[lineEnd - 1]))
+        {
+            --lineEnd;
+        }
     }
 
     const size_t lineLength = lineEnd - lineStart;
index e7e11a6d9ef5c9bf5d1d3a04a7a7dd22add7df77..004daae3e0353708bfa75d731a5fe6fd2db4b0db 100644 (file)
@@ -59,7 +59,7 @@ namespace gmx
  *
  * Does not throw.
  */
-bool inline isNullOrEmpty(const char *str)
+static inline bool isNullOrEmpty(const char *str)
 {
     return str == NULL || str[0] == '\0';
 }
@@ -74,12 +74,12 @@ bool inline isNullOrEmpty(const char *str)
  * Returns true if \p prefix is empty.
  * Does not throw.
  */
-bool inline startsWith(const std::string &str, const std::string &prefix)
+static inline bool startsWith(const std::string &str, const std::string &prefix)
 {
     return str.compare(0, prefix.length(), prefix) == 0;
 }
 //! \copydoc startsWith(const std::string &, const std::string &)
-bool inline startsWith(const char *str, const char *prefix)
+static inline bool startsWith(const char *str, const char *prefix)
 {
     return std::strncmp(str, prefix, std::strlen(prefix)) == 0;
 }
@@ -94,7 +94,12 @@ bool inline startsWith(const char *str, const char *prefix)
  * Returns true if \p suffix is NULL or empty.
  * Does not throw.
  */
-bool endsWith(const std::string &str, const char *suffix);
+bool endsWith(const char *str, const char *suffix);
+//! \copydoc endsWith(const char *, const char *)
+static inline bool endsWith(const std::string &str, const char *suffix)
+{
+    return endsWith(str.c_str(), suffix);
+}
 
 /*! \brief
  * Removes a suffix from a string.
@@ -335,7 +340,7 @@ class TextLineWrapperSettings
          *  - No maximum line width (only explicit line breaks).
          *  - No indentation.
          *  - No continuation characters.
-         *  - Ignore whitespace after an explicit newline.
+         *  - Do not keep final spaces in input strings.
          */
         TextLineWrapperSettings();
 
@@ -366,18 +371,18 @@ class TextLineWrapperSettings
          */
         void setFirstLineIndent(int indent) { firstLineIndent_ = indent; }
         /*! \brief
-         * Sets whether to remove spaces after an explicit newline.
+         * Sets whether final spaces in input should be kept.
          *
-         * \param[in] bStrip  If true, spaces after newline are ignored.
+         * \param[in] bKeep  Whether to keep spaces at the end of the input.
          *
-         * If not removed, the space is added to the indentation set with
-         * setIndent().
-         * The default is to not strip such whitespace.
+         * This means that wrapping a string that ends in spaces also keeps
+         * those spaces in the output.  This allows using the wrapper for
+         * partial lines where the initial part of the line may end in a space.
+         * By default, all trailing whitespace is removed.  Note that this
+         * option does not affect spaces before an explicit newline: those are
+         * always removed.
          */
-        void setStripLeadingWhitespace(bool bStrip)
-        {
-            bStripLeadingWhitespace_ = bStrip;
-        }
+        void setKeepFinalSpaces(bool bKeep) { bKeepFinalSpaces_ = bKeep; }
         /*! \brief
          * Sets a continuation marker for wrapped lines.
          *
@@ -424,8 +429,8 @@ class TextLineWrapperSettings
          * If -1, \a indent_ is used.
          */
         int                     firstLineIndent_;
-        //! Whether to ignore or preserve space after a newline.
-        bool                    bStripLeadingWhitespace_;
+        //! Whether to keep spaces at end of input.
+        bool                    bKeepFinalSpaces_;
         //! If not \c '\0', mark each wrapping point with this character.
         char                    continuationChar_;
 
@@ -503,6 +508,9 @@ class TextLineWrapper
          */
         TextLineWrapperSettings &settings() { return settings_; }
 
+        //! Returns true if the wrapper would not modify the input string.
+        bool isTrivial() const;
+
         /*! \brief
          * Finds the next line to be wrapped.
          *
index a002c762c3f3d91d2d001eca20826f516356bf27..c980a24363ae2883e27f62dcf377ce9c0b1e2117 100644 (file)
@@ -35,4 +35,6 @@
 gmx_add_unit_test(UtilityUnitTests utility-test
                   arrayref.cpp
                   bitmask32.cpp bitmask64.cpp bitmask128.cpp
-                  stringutil.cpp)
+                  stringutil.cpp
+                  textwriter.cpp
+                  )
index f277fc930b97c4ba9836c38f9b0d203ea8c22603..1bb9fb422799f30449e826cc210bca64490a9c0a 100644 (file)
@@ -1,12 +1,7 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
 <ReferenceData>
-  <String Name="WrappedAt14StripLeading"><![CDATA[
-A quick brown
-fox jumps
-over the lazy
-dog]]></String>
-  <String Name="WrappedAt14PreserveLeading"><![CDATA[
+  <String Name="WrappedAt14"><![CDATA[
  A quick brown
 fox jumps
  over the lazy
diff --git a/src/gromacs/utility/tests/refdata/TextWriterTest_WritesLines.xml b/src/gromacs/utility/tests/refdata/TextWriterTest_WritesLines.xml
new file mode 100644 (file)
index 0000000..25f4a2f
--- /dev/null
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <String Name="Output"><![CDATA[
+Explicit newline
+Implicit newline
+Explicit newline
+Implicit newline
+
+]]></String>
+</ReferenceData>
diff --git a/src/gromacs/utility/tests/refdata/TextWriterTest_WritesLinesInParts.xml b/src/gromacs/utility/tests/refdata/TextWriterTest_WritesLinesInParts.xml
new file mode 100644 (file)
index 0000000..703467f
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <String Name="Output"><![CDATA[
+Partial spaced line
+Partial spaced line
+]]></String>
+</ReferenceData>
diff --git a/src/gromacs/utility/tests/refdata/TextWriterTest_WritesLinesInPartsWithWrapper.xml b/src/gromacs/utility/tests/refdata/TextWriterTest_WritesLinesInPartsWithWrapper.xml
new file mode 100644 (file)
index 0000000..703467f
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <String Name="Output"><![CDATA[
+Partial spaced line
+Partial spaced line
+]]></String>
+</ReferenceData>
diff --git a/src/gromacs/utility/tests/refdata/TextWriterTest_WritesWrappedLines.xml b/src/gromacs/utility/tests/refdata/TextWriterTest_WritesWrappedLines.xml
new file mode 100644 (file)
index 0000000..a87ee7d
--- /dev/null
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <String Name="Output"><![CDATA[
+  Wrapped and
+  indented text
+  Wrapped and
+  indented text
+
+]]></String>
+</ReferenceData>
index ba1683d06d236ae5a99f15c9506bb203c601a243..7145fdb109a48cddf46ce992ad2fa340e5fce318 100644 (file)
@@ -256,7 +256,23 @@ TEST_F(TextLineWrapperTest, HandlesEmptyStrings)
     EXPECT_EQ("", wrapper.wrapToString(""));
     EXPECT_EQ("", wrapper.wrapToString("   "));
     EXPECT_TRUE(wrapper.wrapToVector("").empty());
-    EXPECT_TRUE(wrapper.wrapToString("   ").empty());
+    {
+        std::vector<std::string> wrapped(wrapper.wrapToVector("   "));
+        ASSERT_EQ(1U, wrapped.size());
+        EXPECT_EQ("", wrapped[0]);
+    }
+}
+
+TEST_F(TextLineWrapperTest, HandlesTrailingWhitespace)
+{
+    gmx::TextLineWrapper wrapper;
+
+    EXPECT_EQ("line", wrapper.wrapToString("line   "));
+    EXPECT_EQ("line\n", wrapper.wrapToString("line   \n"));
+
+    wrapper.settings().setKeepFinalSpaces(true);
+    EXPECT_EQ("line   ", wrapper.wrapToString("line   "));
+    EXPECT_EQ("line\n", wrapper.wrapToString("line   \n"));
 }
 
 TEST_F(TextLineWrapperTest, HandlesTrailingNewlines)
@@ -368,14 +384,10 @@ TEST_F(TextLineWrapperTest, HandlesContinuationCharacter)
 TEST_F(TextLineWrapperTest, WrapsCorrectlyWithExtraWhitespace)
 {
     gmx::TextLineWrapper wrapper;
-
     wrapper.settings().setLineLength(14);
-    wrapper.settings().setStripLeadingWhitespace(true);
-    checkText(wrapper.wrapToString(g_wrapTextWhitespace),
-              "WrappedAt14StripLeading");
-    wrapper.settings().setStripLeadingWhitespace(false);
+
     checkText(wrapper.wrapToString(g_wrapTextWhitespace),
-              "WrappedAt14PreserveLeading");
+              "WrappedAt14");
 }
 
 } // namespace
diff --git a/src/gromacs/utility/tests/textwriter.cpp b/src/gromacs/utility/tests/textwriter.cpp
new file mode 100644 (file)
index 0000000..d1cc1ed
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Tests for gmx::TextWriter.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "gromacs/utility/textwriter.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "gromacs/utility/stringstream.h"
+#include "gromacs/utility/stringutil.h"
+
+#include "testutils/stringtest.h"
+
+namespace
+{
+
+class TextWriterTest : public gmx::test::StringTestBase
+{
+    public:
+        TextWriterTest() : writer_(&stream_)
+        {
+        }
+
+        void checkOutput()
+        {
+            checkText(stream_.toString(), "Output");
+        }
+
+        gmx::StringOutputStream  stream_;
+        gmx::TextWriter          writer_;
+};
+
+TEST_F(TextWriterTest, WritesLines)
+{
+    writer_.writeLine("Explicit newline\n");
+    writer_.writeLine("Implicit newline");
+    writer_.writeLine(std::string("Explicit newline\n"));
+    writer_.writeLine(std::string("Implicit newline"));
+    writer_.writeLine();
+    checkOutput();
+}
+
+TEST_F(TextWriterTest, WritesLinesInParts)
+{
+    writer_.writeString("Partial ");
+    writer_.writeString("spaced");
+    writer_.writeString(" line");
+    writer_.writeLine();
+    writer_.writeString(std::string("Partial "));
+    writer_.writeString(std::string("spaced"));
+    writer_.writeString(std::string(" line"));
+    writer_.writeLine();
+    checkOutput();
+}
+
+TEST_F(TextWriterTest, WritesWrappedLines)
+{
+    writer_.wrapperSettings().setIndent(2);
+    writer_.wrapperSettings().setLineLength(15);
+    writer_.writeLine("Wrapped and indented text");
+    writer_.writeLine(std::string("Wrapped and indented text"));
+    writer_.writeLine();
+    checkOutput();
+}
+
+TEST_F(TextWriterTest, WritesLinesInPartsWithWrapper)
+{
+    writer_.wrapperSettings().setLineLength(50);
+    writer_.writeString("Partial ");
+    writer_.writeString("spaced");
+    writer_.writeString(" line");
+    writer_.writeLine();
+    writer_.writeString(std::string("Partial "));
+    writer_.writeString(std::string("spaced"));
+    writer_.writeString(std::string(" line"));
+    writer_.writeLine();
+    checkOutput();
+}
+
+} // namespace
diff --git a/src/gromacs/utility/textreader.cpp b/src/gromacs/utility/textreader.cpp
new file mode 100644 (file)
index 0000000..4366149
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements gmx::TextReader.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "textreader.h"
+
+#include "gromacs/utility/filestream.h"
+#include "gromacs/utility/nodelete.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+// static
+std::string TextReader::readFileToString(const char *filename)
+{
+    std::string result;
+    TextReader  reader(filename);
+    std::string line;
+    while (reader.readLine(&line))
+    {
+        result.append(line);
+    }
+    reader.close();
+    return result;
+}
+
+// static
+std::string TextReader::readFileToString(const std::string &filename)
+{
+    return readFileToString(filename.c_str());
+}
+
+class TextReader::Impl
+{
+    public:
+        explicit Impl(const TextInputStreamPointer &stream)
+            : stream_(stream)
+        {
+        }
+
+        TextInputStreamPointer stream_;
+};
+
+TextReader::TextReader(const std::string &filename)
+    : impl_(new Impl(TextInputStreamPointer(new TextInputFile(filename))))
+{
+}
+
+TextReader::TextReader(TextInputStream *stream)
+    : impl_(new Impl(TextInputStreamPointer(stream, no_delete<TextInputStream>())))
+{
+}
+
+TextReader::TextReader(const TextInputStreamPointer &stream)
+    : impl_(new Impl(stream))
+{
+}
+
+TextReader::~TextReader()
+{
+}
+
+bool TextReader::readLine(std::string *line)
+{
+    return impl_->stream_->readLine(line);
+}
+
+bool TextReader::readLineTrimmed(std::string *line)
+{
+    if (!readLine(line))
+    {
+        return false;
+    }
+    const size_t endPos = line->find_last_not_of(" \t\r\n");
+    if (endPos != std::string::npos)
+    {
+        line->resize(endPos + 1);
+    }
+    return true;
+}
+
+void TextReader::close()
+{
+    impl_->stream_->close();
+}
+
+} // namespace gmx
diff --git a/src/gromacs/utility/textreader.h b/src/gromacs/utility/textreader.h
new file mode 100644 (file)
index 0000000..62607c6
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares gmx::TextReader.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEXTREADER_H
+#define GMX_UTILITY_TEXTREADER_H
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Reads text from a TextInputStream.
+ *
+ * This class provides more formatted reading capabilities than reading raw
+ * lines from the stream (and a natural place to implement more such
+ * capabilities).
+ *
+ * All methods that read from the stream can throw any exceptions that the
+ * underlying stream throws.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextReader
+{
+    public:
+        /*! \brief
+         * Reads contents of a file to a std::string.
+         *
+         * \param[in] filename  Name of the file to read.
+         * \returns   The contents of \p filename.
+         * \throws    std::bad_alloc if out of memory.
+         * \throws    FileIOError on any I/O error.
+         */
+        static std::string readFileToString(const char *filename);
+        //! \copydoc readFileToString(const char *)
+        static std::string readFileToString(const std::string &filename);
+
+        /*! \brief
+         * Creates a reader that reads from specified file.
+         *
+         * \param[in]  filename  Path to the file to open.
+         * \throws     std::bad_alloc if out of memory.
+         * \throws     FileIOError on any I/O error.
+         *
+         * This constructor is provided for convenience for reading directly
+         * from a file, without the need to construct multiple objects.
+         */
+        explicit TextReader(const std::string &filename);
+        /*! \brief
+         * Creates a reader that reads from specified stream.
+         *
+         * \param[in]  stream  Stream to read from.
+         * \throws     std::bad_alloc if out of memory.
+         *
+         * The caller is responsible of the lifetime of the stream (should
+         * remain in existence as long as the reader exists).
+         *
+         * This constructor is provided for convenience for cases where the
+         * stream is not allocated with `new` and/or not managed by a
+         * boost::shared_ptr (e.g., if the stream is an object on the stack).
+         */
+        explicit TextReader(TextInputStream *stream);
+        /*! \brief
+         * Creates a reader that reads from specified stream.
+         *
+         * \param[in]  stream  Stream to read from.
+         * \throws     std::bad_alloc if out of memory.
+         *
+         * The reader keeps a reference to the stream, so the caller can pass
+         * in a temporary if necessary.
+         */
+        explicit TextReader(const TextInputStreamPointer &stream);
+        ~TextReader();
+
+        /*! \brief
+         * Reads a single line (including newline) from the stream.
+         *
+         * \param[out] line    String to receive the line.
+         * \returns    `false` if nothing was read because the file ended.
+         *
+         * On error or when false is returned, \p line will be empty.
+         * Newlines will be returned as part of \p line if it was present in
+         * the stream.
+         * To loop over all lines in the stream, use:
+         * \code
+           std::string line;
+           while (reader.readLine(&line))
+           {
+               // ...
+           }
+           \endcode
+         */
+        bool readLine(std::string *line);
+        /*! \brief
+         * Reads a single line from the stream.
+         *
+         * \param[out] line    String to receive the line.
+         * \returns    false if nothing was read because the file ended.
+         *
+         * On error or when false is returned, \p line will be empty.
+         * Works as readLine(), except that trailing whitespace will be removed
+         * from \p line.
+         *
+         * \see readLine()
+         */
+        bool readLineTrimmed(std::string *line);
+
+        /*! \brief
+         * Closes the underlying stream.
+         */
+        void close();
+
+    private:
+        class Impl;
+
+        PrivateImplPointer<Impl> impl_;
+};
+
+} // namespace gmx
+
+#endif
diff --git a/src/gromacs/utility/textstream.h b/src/gromacs/utility/textstream.h
new file mode 100644 (file)
index 0000000..14fbfd4
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares interfaces for simple input/output streams.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEXTSTREAM_H
+#define GMX_UTILITY_TEXTSTREAM_H
+
+#include <boost/shared_ptr.hpp>
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Interface for reading text.
+ *
+ * Concrete implementations can read the text from, e.g., a file or an in-memory
+ * string.  The main use is to allow unit tests to inject in-memory buffers
+ * instead of writing files to be read by the code under test, but there are
+ * also use cases outside the tests where it is useful to abstract out whether
+ * the input is from a real file or something else.
+ *
+ * To use more advanced formatting than reading raw lines, use TextReader.
+ *
+ * Both methods in the interface can throw std::bad_alloc or other exceptions
+ * that indicate failures to read from the stream.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextInputStream
+{
+    public:
+        virtual ~TextInputStream() {}
+
+        /*! \brief
+         * Reads a line (with newline included) from the stream.
+         *
+         * \param[out] line    String to receive the line.
+         * \returns    `false` if nothing was read because the stream ended.
+         *
+         * On error or when `false` is returned, \p line will be empty.
+         */
+        virtual bool readLine(std::string *line) = 0;
+        /*! \brief
+         * Closes the stream.
+         *
+         * It is not allowed to read from a stream after it has been closed.
+         * See TextOutputStream::close() for rationale for a close() method
+         * separate from the destructor.  For input, failures during close
+         * should be rare, but it is clearer to keep the interface symmetric.
+         */
+        virtual void close() = 0;
+};
+
+/*! \libinternal \brief
+ * Interface for writing text.
+ *
+ * Concrete implementations can write the text to, e.g., a file or an in-memory
+ * string.  The main use is to allow unit tests to inject in-memory buffers
+ * instead of reading in files produced by the code under test, but there are
+ * also use cases outside the tests where it is useful to abstract out whether
+ * the output is into a real file or something else.
+ *
+ * To use more advanced formatting than writing plain strings, use TextWriter.
+ *
+ * The current implementation assumes text-only output in several places, but
+ * this interface could possibly be generalized also for binary files.
+ * However, since all binary files currently written by \Gromacs are either
+ * XDR- or TNG-based, they may require a different approach.  Also, it is worth
+ * keeping the distinction between text and binary files clear, since Windows
+ * does transparent `LF`-`CRLF` newline translation for text files, so mixing
+ * modes when reading and/or writing the same file can cause subtle issues.
+ *
+ * Both methods in the interface can throw std::bad_alloc or other exceptions
+ * that indicate failures to write to the stream.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextOutputStream
+{
+    public:
+        virtual ~TextOutputStream() {}
+
+        /*! \brief
+         * Writes a given string to the stream.
+         */
+        virtual void write(const char *text) = 0;
+        /*! \brief
+         * Closes the stream.
+         *
+         * It is not allowed to write to a stream after it has been closed.
+         * A method separate from the destructor is provided such that errors
+         * that occur while closing the stream (e.g., when closing the file)
+         * can be handled using exceptions.
+         * The destructor is not allowed to throw, so code that wants to
+         * observe such errors needs to call close() after it has finished
+         * writing to the stream.
+         */
+        virtual void close() = 0;
+};
+
+//! Shorthand for a smart pointer to a TextInputStream.
+typedef boost::shared_ptr<TextInputStream> TextInputStreamPointer;
+//! Shorthand for a smart pointer to a TextOutputStream.
+typedef boost::shared_ptr<TextOutputStream> TextOutputStreamPointer;
+
+} // namespace gmx
+
+#endif
diff --git a/src/gromacs/utility/textwriter.cpp b/src/gromacs/utility/textwriter.cpp
new file mode 100644 (file)
index 0000000..4b1e58c
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements gmx::TextWriter.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "textwriter.h"
+
+#include <cstring>
+
+#include "gromacs/utility/filestream.h"
+#include "gromacs/utility/nodelete.h"
+#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+class TextWriter::Impl
+{
+    public:
+        explicit Impl(const TextOutputStreamPointer &stream)
+            : stream_(stream)
+        {
+            wrapper_.settings().setKeepFinalSpaces(true);
+        }
+
+        void writeWrappedString(const std::string &str)
+        {
+            stream_->write(wrapper_.wrapToString(str).c_str());
+        }
+
+        TextOutputStreamPointer stream_;
+        TextLineWrapper         wrapper_;
+};
+
+// static
+void TextWriter::writeFileFromString(const std::string &filename,
+                                     const std::string &text)
+{
+    TextWriter file(filename);
+    file.writeString(text);
+    file.close();
+}
+
+TextWriter::TextWriter(const std::string &filename)
+    : impl_(new Impl(TextOutputStreamPointer(new TextOutputFile(filename))))
+{
+}
+
+TextWriter::TextWriter(FILE *fp)
+    : impl_(new Impl(TextOutputStreamPointer(new TextOutputFile(fp))))
+{
+}
+
+TextWriter::TextWriter(TextOutputStream *stream)
+    : impl_(new Impl(TextOutputStreamPointer(stream, no_delete<TextOutputStream>())))
+{
+}
+
+TextWriter::TextWriter(const TextOutputStreamPointer &stream)
+    : impl_(new Impl(stream))
+{
+}
+
+TextWriter::~TextWriter()
+{
+}
+
+TextOutputStream &TextWriter::stream()
+{
+    return *impl_->stream_;
+}
+
+TextLineWrapperSettings &TextWriter::wrapperSettings()
+{
+    return impl_->wrapper_.settings();
+}
+
+void TextWriter::writeString(const char *str)
+{
+    if (impl_->wrapper_.isTrivial())
+    {
+        impl_->stream_->write(str);
+    }
+    else
+    {
+        impl_->writeWrappedString(str);
+    }
+}
+
+void TextWriter::writeString(const std::string &str)
+{
+    impl_->writeWrappedString(str);
+}
+
+void TextWriter::writeLine(const char *line)
+{
+    writeString(line);
+    if (!endsWith(line, "\n"))
+    {
+        writeLine();
+    }
+}
+
+void TextWriter::writeLine(const std::string &line)
+{
+    writeString(line);
+    if (!endsWith(line, "\n"))
+    {
+        writeLine();
+    }
+}
+
+void TextWriter::writeLine()
+{
+    writeString("\n");
+}
+
+void TextWriter::close()
+{
+    impl_->stream_->close();
+}
+
+} // namespace gmx
diff --git a/src/gromacs/utility/textwriter.h b/src/gromacs/utility/textwriter.h
new file mode 100644 (file)
index 0000000..5f70083
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares gmx::TextWriter.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEXTWRITER_H
+#define GMX_UTILITY_TEXTWRITER_H
+
+#include <cstdio>
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+class TextLineWrapperSettings;
+
+/*! \libinternal \brief
+ * Writes text into a TextOutputStream.
+ *
+ * This class provides more formatting and line-oriented writing capabilities
+ * than writing raw strings into the stream.
+ *
+ * All methods that write to the stream can throw any exceptions that the
+ * underlying stream throws.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextWriter
+{
+    public:
+        /*! \brief
+         * Convenience method for writing a file from a string in a single call.
+         *
+         * \param[in] filename  Name of the file to read.
+         * \param[in] text      String to write to \p filename.
+         * \throws    std::bad_alloc if out of memory.
+         * \throws    FileIOError on any I/O error.
+         *
+         * If \p filename exists, it is overwritten.
+         */
+        static void writeFileFromString(const std::string &filename,
+                                        const std::string &text);
+
+        /*! \brief
+         * Creates a writer that writes to specified file.
+         *
+         * \param[in]  filename  Path to the file to open.
+         * \throws     std::bad_alloc if out of memory.
+         * \throws     FileIOError on any I/O error.
+         *
+         * This constructor is provided for convenience for writing directly to
+         * a file, without the need to construct multiple objects.
+         */
+        explicit TextWriter(const std::string &filename);
+        /*! \brief
+         * Creates a writer that writes to specified file.
+         *
+         * \param[in]  fp  File handle to write to.
+         * \throws     std::bad_alloc if out of memory.
+         * \throws     FileIOError on any I/O error.
+         *
+         * This constructor is provided for interoperability with C-like code
+         * for writing directly to an already opened file, without the need to
+         * construct multiple objects.
+         *
+         * The caller is responsible of closing \p fp; it is not allowed to
+         * call close() on the writer.
+         */
+        explicit TextWriter(FILE *fp);
+        /*! \brief
+         * Creates a writer that writes to specified stream.
+         *
+         * \param[in]  stream  Stream to write to.
+         * \throws     std::bad_alloc if out of memory.
+         *
+         * The caller is responsible of the lifetime of the stream (should
+         * remain in existence as long as the writer exists).
+         *
+         * This constructor is provided for convenience for cases where the
+         * stream is not allocated with `new` and/or not managed by a
+         * boost::shared_ptr (e.g., if the stream is an object on the stack).
+         */
+        explicit TextWriter(TextOutputStream *stream);
+        /*! \brief
+         * Creates a writer that writes to specified stream.
+         *
+         * \param[in]  stream  Stream to write to.
+         * \throws     std::bad_alloc if out of memory.
+         *
+         * The writer keeps a reference to the stream, so the caller can pass
+         * in a temporary if necessary.
+         */
+        explicit TextWriter(const TextOutputStreamPointer &stream);
+        ~TextWriter();
+
+        //! Returns the underlying stream for this writer.
+        TextOutputStream &stream();
+
+        /*! \brief
+         * Allows adjusting wrapping settings for the writer.
+         *
+         * \todo
+         * Wrapping is not currently implemented for code that writes partial
+         * lines with writeString().
+         */
+        TextLineWrapperSettings &wrapperSettings();
+
+        /*! \brief
+         * Writes a string to the stream.
+         *
+         * \param[in]  str  String to write.
+         */
+        void writeString(const char *str);
+        //! \copydoc writeString(const char *)
+        void writeString(const std::string &str);
+        /*! \brief
+         * Writes a line to the stream.
+         *
+         * \param[in]  line  Line to write.
+         *
+         * If \p line does not end in a newline, one newline is appended.
+         * Otherwise, works as writeString().
+         */
+        void writeLine(const char *line);
+        //! \copydoc writeLine(const char *)
+        void writeLine(const std::string &line);
+        //! Writes a newline to the stream.
+        void writeLine();
+
+        /*! \brief
+         * Closes the underlying stream.
+         */
+        void close();
+
+    private:
+        class Impl;
+
+        PrivateImplPointer<Impl> impl_;
+};
+
+} // namespace gmx
+
+#endif
index 8378845cc1d9ffa213dbe37176fe0f7dda6bccf1..9042aee8a9c860ac8a4399d3abf55c3357de6c8e 100644 (file)
 #include "gromacs/utility/fatalerror.h"
 
 
+/* DISCLAIMER: All the atom count and thread numbers below are heuristic.
+ * The real switching points will depend on the system simulation,
+ * the algorithms used and the hardware it's running on, as well as if there
+ * are other jobs running on the same machine. We try to take into account
+ * factors that have a large influence, such as recent Intel CPUs being
+ * much better at wide multi-threading. The remaining factors should
+ * (hopefully) have a small influence, such that the performance just before
+ * and after a switch point doesn't change too much.
+ */
+
+#ifdef GMX_OPENMP
+static const bool bOMP = true;
+#else
+static const bool bOMP = false;
+#endif
+
 #ifdef GMX_THREAD_MPI
 /* The minimum number of atoms per tMPI thread. With fewer atoms than this,
  * the number of threads will get lowered.
@@ -64,7 +80,7 @@ static const int min_atoms_per_gpu        = 900;
 /* TODO choose nthreads_omp based on hardware topology
    when we have a hardware topology detection library */
 /* First we consider the case of no MPI (1 MPI rank).
- * In general, when running up to 4 threads, OpenMP should be faster.
+ * In general, when running up to 8 threads, OpenMP should be faster.
  * Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
  * On Intel>=Nehalem running OpenMP on a single CPU is always faster,
  * even on two CPUs it's usually faster (but with many OpenMP threads
@@ -77,16 +93,16 @@ static const int min_atoms_per_gpu        = 900;
  * Sandy/Ivy Bridge, Has/Broadwell. By checking for AVX instead of
  * model numbers we ensure also future Intel CPUs are covered.
  */
-const int nthreads_omp_always_faster_default   =  6;
-const int nthreads_omp_always_faster_Nehalem   = 12;
-const int nthreads_omp_always_faster_Intel_AVX = 16;
+const int nthreads_omp_faster_default   =  8;
+const int nthreads_omp_faster_Nehalem   = 12;
+const int nthreads_omp_faster_Intel_AVX = 16;
 /* For CPU only runs the fastest options are usually MPI or OpenMP only.
  * With one GPU, using MPI only is almost never optimal, so we need to
  * compare running pure OpenMP with combined MPI+OpenMP. This means higher
  * OpenMP threads counts can still be ok. Multiplying the numbers above
  * by a factor of 2 seems to be a good estimate.
  */
-const int nthreads_omp_always_faster_gpu_fac   =  2;
+const int nthreads_omp_faster_gpu_fac   =  2;
 
 /* This is the case with MPI (2 or more MPI PP ranks).
  * By default we will terminate with a fatal error when more than 8
@@ -104,34 +120,30 @@ const int nthreads_omp_mpi_ok_min_gpu          =  2;
 const int nthreads_omp_mpi_target_max          =  6;
 
 
-#ifdef GMX_USE_OPENCL
-static const bool bGpuSharingSupported = false;
-#else
-static const bool bGpuSharingSupported = true;
-#endif
-
-
-static int nthreads_omp_always_faster(gmx_cpuid_t cpuid_info, gmx_bool bUseGPU)
+/* Returns the maximum OpenMP thread count for which using a single MPI rank
+ * should be faster than using multiple ranks with the same total thread count.
+ */
+static int nthreads_omp_faster(gmx_cpuid_t cpuid_info, gmx_bool bUseGPU)
 {
     int nth;
 
     if (gmx_cpuid_vendor(cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
         gmx_cpuid_feature(cpuid_info, GMX_CPUID_FEATURE_X86_AVX))
     {
-        nth = nthreads_omp_always_faster_Intel_AVX;
+        nth = nthreads_omp_faster_Intel_AVX;
     }
     else if (gmx_cpuid_is_intel_nehalem(cpuid_info))
     {
-        nth = nthreads_omp_always_faster_Nehalem;
+        nth = nthreads_omp_faster_Nehalem;
     }
     else
     {
-        nth = nthreads_omp_always_faster_default;
+        nth = nthreads_omp_faster_default;
     }
 
     if (bUseGPU)
     {
-        nth *= nthreads_omp_always_faster_gpu_fac;
+        nth *= nthreads_omp_faster_gpu_fac;
     }
 
     nth = std::min(nth, GMX_OPENMP_MAX_THREADS);
@@ -139,6 +151,26 @@ static int nthreads_omp_always_faster(gmx_cpuid_t cpuid_info, gmx_bool bUseGPU)
     return nth;
 }
 
+/* Returns that maximum OpenMP thread count that passes the efficiency check */
+static int nthreads_omp_efficient_max(int gmx_unused nrank,
+                                      gmx_cpuid_t    cpuid_info,
+                                      gmx_bool       bUseGPU)
+{
+#if defined GMX_OPENMP && defined GMX_MPI
+    if (nrank > 1)
+    {
+        return nthreads_omp_mpi_ok_max;
+    }
+    else
+#endif
+    {
+        return nthreads_omp_faster(cpuid_info, bUseGPU);
+    }
+}
+
+/* Return the number of thread-MPI ranks to use.
+ * This is chosen such that we can always obey our own efficiency checks.
+ */
 static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
                                         const gmx_hw_opt_t  *hw_opt,
                                         int                  nthreads_tot,
@@ -161,9 +193,9 @@ static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
             /* #thread < #gpu is very unlikely, but if so: waste gpu(s) */
             nrank = nthreads_tot;
         }
-        else if (bGpuSharingSupported &&
-                 (nthreads_tot > nthreads_omp_always_faster(hwinfo->cpuid_info,
-                                                            ngpu > 0) ||
+        else if (gmx_gpu_sharing_supported() &&
+                 (nthreads_tot > nthreads_omp_faster(hwinfo->cpuid_info,
+                                                     ngpu > 0) ||
                   (ngpu > 1 && nthreads_tot/ngpu > nthreads_omp_mpi_target_max)))
         {
             /* The high OpenMP thread count will likely result in sub-optimal
@@ -193,8 +225,7 @@ static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
     }
     else
     {
-        if (nthreads_tot <= nthreads_omp_always_faster(hwinfo->cpuid_info,
-                                                       ngpu > 0))
+        if (nthreads_tot <= nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0))
         {
             /* Use pure OpenMP parallelization */
             nrank = 1;
@@ -210,6 +241,34 @@ static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
 }
 
 
+static int getMaxGpuUsable(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t *hwinfo, int cutoff_scheme)
+{
+    /* This code relies on the fact that GPU are not detected when GPU
+     * acceleration was disabled at run time by the user.
+     */
+    if (cutoff_scheme == ecutsVERLET &&
+        hwinfo->gpu_info.n_dev_compatible > 0)
+    {
+        if (gmx_multiple_gpu_per_node_supported())
+        {
+            return hwinfo->gpu_info.n_dev_compatible;
+        }
+        else
+        {
+            if (hwinfo->gpu_info.n_dev_compatible > 1)
+            {
+                md_print_warn(cr, fplog, "More than one compatible GPU is available, but GROMACS can only use one of them. Using a single thread-MPI rank.\n");
+            }
+            return 1;
+        }
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+
 #ifdef GMX_THREAD_MPI
 /* Get the number of MPI ranks to use for thread-MPI based on how many
  * were requested, which algorithms we're using,
@@ -219,7 +278,7 @@ static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
  * with the hardware, except that ntmpi could be larger than #GPU.
  */
 int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
-                     const gmx_hw_opt_t  *hw_opt,
+                     gmx_hw_opt_t        *hw_opt,
                      const t_inputrec    *inputrec,
                      const gmx_mtop_t    *mtop,
                      const t_commrec     *cr,
@@ -227,16 +286,15 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
 {
     int      nthreads_hw, nthreads_tot_max, nrank, ngpu;
     int      min_atoms_per_mpi_rank;
-    gmx_bool bCanUseGPU;
 
     /* Check if an algorithm does not support parallel simulation.  */
     if (inputrec->eI == eiLBFGS ||
         inputrec->coulombtype == eelEWALD)
     {
-        md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
+        md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI rank.\n");
         if (hw_opt->nthreads_tmpi > 1)
         {
-            gmx_fatal(FARGS, "You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
+            gmx_fatal(FARGS, "You asked for more than 1 thread-MPI rank, but an algorithm doesn't support that");
         }
 
         return 1;
@@ -266,16 +324,7 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
         nthreads_tot_max = nthreads_hw;
     }
 
-    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET &&
-                  hwinfo->gpu_info.n_dev_compatible > 0);
-    if (bCanUseGPU)
-    {
-        ngpu = hwinfo->gpu_info.n_dev_compatible;
-    }
-    else
-    {
-        ngpu = 0;
-    }
+    ngpu = getMaxGpuUsable(fplog, cr, hwinfo, inputrec->cutoff_scheme);
 
     if (inputrec->cutoff_scheme == ecutsGROUP)
     {
@@ -297,7 +346,7 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
     }
     else
     {
-        if (bCanUseGPU)
+        if (ngpu >= 1)
         {
             min_atoms_per_mpi_rank = min_atoms_per_gpu;
         }
@@ -362,6 +411,26 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
 
         nrank = nrank_new;
 
+        /* We reduced the number of tMPI ranks, which means we might violate
+         * our own efficiency checks if we simply use all hardware threads.
+         */
+        if (bOMP && hw_opt->nthreads_omp <= 0 && hw_opt->nthreads_tot <= 0)
+        {
+            /* The user set neither the total nor the OpenMP thread count,
+             * we should use all hardware threads, unless we will violate
+             * our own efficiency limitation on the thread count.
+             */
+            int  nt_omp_max;
+
+            nt_omp_max = nthreads_omp_efficient_max(nrank, hwinfo->cpuid_info, ngpu >= 1);
+
+            if (nrank*nt_omp_max < hwinfo->nthreads_hw_avail)
+            {
+                /* Limit the number of OpenMP threads to start */
+                hw_opt->nthreads_omp = nt_omp_max;
+            }
+        }
+
         fprintf(stderr, "\n");
         fprintf(stderr, "NOTE: Parallelization is limited by the small number of atoms,\n");
         fprintf(stderr, "      only starting %d thread-MPI ranks.\n", nrank);
@@ -375,7 +444,7 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
 
 void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
                                         const gmx_hw_opt_t  *hw_opt,
-                                        gmx_bool             bNTOptSet,
+                                        gmx_bool             bNtOmpOptionSet,
                                         t_commrec           *cr,
                                         FILE                *fplog)
 {
@@ -392,6 +461,7 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
      * OpenMP have been initialized. Check that here.
      */
 #ifdef GMX_THREAD_MPI
+    assert(nthreads_omp_faster_default >= nthreads_omp_mpi_ok_max);
     assert(hw_opt->nthreads_tmpi >= 1);
 #endif
     assert(gmx_omp_nthreads_get(emntDefault) >= 1);
@@ -434,7 +504,7 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
     if (DOMAINDECOMP(cr) && cr->nnodes > 1)
     {
         if (nth_omp_max < nthreads_omp_mpi_ok_min ||
-            (!(ngpu > 0 && !bGpuSharingSupported) &&
+            (!(ngpu > 0 && !gmx_gpu_sharing_supported()) &&
              nth_omp_max > nthreads_omp_mpi_ok_max))
         {
             /* Note that we print target_max here, not ok_max */
@@ -443,7 +513,7 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
                     nthreads_omp_mpi_ok_min,
                     nthreads_omp_mpi_target_max);
 
-            if (bNTOptSet)
+            if (bNtOmpOptionSet)
             {
                 md_print_warn(cr, fplog, "NOTE: %s\n", buf);
             }
@@ -460,8 +530,8 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
     else
     {
         /* No domain decomposition (or only one domain) */
-        if (!(ngpu > 0 && !bGpuSharingSupported) &&
-            nth_omp_max > nthreads_omp_always_faster(hwinfo->cpuid_info, ngpu > 0))
+        if (!(ngpu > 0 && !gmx_gpu_sharing_supported()) &&
+            nth_omp_max > nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0))
         {
             /* To arrive here, the user/system set #ranks and/or #OMPthreads */
             gmx_bool bEnvSet;
@@ -469,7 +539,7 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
 
             bEnvSet = (getenv("OMP_NUM_THREADS") != NULL);
 
-            if (bNTOptSet || bEnvSet)
+            if (bNtOmpOptionSet || bEnvSet)
             {
                 sprintf(buf2, "You requested %d OpenMP threads", nth_omp_max);
             }
@@ -489,7 +559,7 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
              * with different values per rank or node, since in that case
              * the user can not set -ntomp to override the error.
              */
-            if (bNTOptSet || (bEnvSet && nth_omp_min != nth_omp_max))
+            if (bNtOmpOptionSet || (bEnvSet && nth_omp_min != nth_omp_max))
             {
                 md_print_warn(cr, fplog, "NOTE: %s\n", buf);
             }
@@ -502,7 +572,7 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
 #else /* GMX_OPENMP && GMX_MPI */
       /* No OpenMP and/or MPI: it doesn't make much sense to check */
     GMX_UNUSED_VALUE(hw_opt);
-    GMX_UNUSED_VALUE(bNTOptSet);
+    GMX_UNUSED_VALUE(bNtOmpOptionSet);
     /* Check if we have more than 1 physical core, if detected,
      * or more than 1 hardware thread if physical cores were not detected.
      */
@@ -538,17 +608,18 @@ void check_and_update_hw_opt_1(gmx_hw_opt_t *hw_opt,
     }
     if (hw_opt->nthreads_tmpi > 0)
     {
-        gmx_fatal(FARGS, "Setting the number of thread-MPI threads is only supported with thread-MPI and GROMACS was compiled without thread-MPI");
+        gmx_fatal(FARGS, "Setting the number of thread-MPI ranks is only supported with thread-MPI and GROMACS was compiled without thread-MPI");
     }
 #endif
 
-#ifndef GMX_OPENMP
-    if (hw_opt->nthreads_omp > 1)
+    if (!bOMP)
     {
-        gmx_fatal(FARGS, "More than 1 OpenMP thread requested, but GROMACS was compiled without OpenMP support");
+        if (hw_opt->nthreads_omp > 1)
+        {
+            gmx_fatal(FARGS, "More than 1 OpenMP thread requested, but GROMACS was compiled without OpenMP support");
+        }
+        hw_opt->nthreads_omp = 1;
     }
-    hw_opt->nthreads_omp = 1;
-#endif
 
     if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
     {
@@ -559,14 +630,14 @@ void check_and_update_hw_opt_1(gmx_hw_opt_t *hw_opt,
             hw_opt->nthreads_omp > 0 &&
             hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
         {
-            gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
+            gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI ranks (%d) times the OpenMP threads (%d) requested",
                       hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp);
         }
 
         if (hw_opt->nthreads_tmpi > 0 &&
             hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
         {
-            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
+            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI ranks requested (%d)",
                       hw_opt->nthreads_tot, hw_opt->nthreads_tmpi);
         }
 
@@ -584,12 +655,10 @@ void check_and_update_hw_opt_1(gmx_hw_opt_t *hw_opt,
         }
     }
 
-#ifndef GMX_OPENMP
-    if (hw_opt->nthreads_omp > 1)
+    if (!bOMP && hw_opt->nthreads_omp > 1)
     {
         gmx_fatal(FARGS, "OpenMP threads are requested, but GROMACS was compiled without OpenMP support");
     }
-#endif
 
     if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
     {
index ca982377414f2c575358fdf0e5018513f01d9173..42f4f05182472753a829148947068115549e7129 100644 (file)
  * At the point we have already called check_and_update_hw_opt.
  * Thus all options should be internally consistent and consistent
  * with the hardware, except that ntmpi could be larger than #GPU.
+ * If necessary, this function will modify hw_opt->nthreads_omp.
  */
 int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
-                     const gmx_hw_opt_t  *hw_opt,
+                     gmx_hw_opt_t        *hw_opt,
                      const t_inputrec    *inputrec,
                      const gmx_mtop_t    *mtop,
                      const t_commrec     *cr,
@@ -58,12 +59,12 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
  * intended to catch cases where the user starts 1 MPI rank per hardware
  * thread or 1 rank per physical node.
  * With a sub-optimal setup a note is printed to fplog and stderr when
- * bNtOptSet==TRUE; with bNtOptSet==FALSE a fatal error is issued.
+ * bNtOmpSet==TRUE; with bNtOptOptionSet==FALSE a fatal error is issued.
  * This function should be called after thread-MPI and OpenMP are set up.
  */
 void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
                                         const gmx_hw_opt_t  *hw_opt,
-                                        gmx_bool             bNTOptSet,
+                                        gmx_bool             bNtOmpOptionSet,
                                         t_commrec           *cr,
                                         FILE                *fplog);
 
index a917c62ec581adecdcb06d00ab9b4d4d070b2a29..f25ad46c9b9c3b7bf35ba5d9927ce902b21bae19 100644 (file)
@@ -781,10 +781,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
                                                  hw_opt,
                                                  inputrec, mtop,
                                                  cr, fplog);
-        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
-        {
-            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
-        }
 
         if (hw_opt->nthreads_tmpi > 1)
         {
index 845aa58ac0fadebb683eebd647bb366c4567b342..b793af46c56d853cbbeb4e3a69ef496b1ca47ca3 100644 (file)
@@ -50,8 +50,8 @@
 #include "gromacs/options/options.h"
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/basenetwork.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/textwriter.h"
 #include "programs/mdrun/mdrun_main.h"
 
 #include "testutils/cmdlinetest.h"
@@ -133,13 +133,13 @@ SimulationRunner::useStringAsMdpFile(const char *mdpString)
 void
 SimulationRunner::useStringAsMdpFile(const std::string &mdpString)
 {
-    gmx::File::writeFileFromString(mdpInputFileName_, mdpString);
+    gmx::TextWriter::writeFileFromString(mdpInputFileName_, mdpString);
 }
 
 void
 SimulationRunner::useStringAsNdxFile(const char *ndxString)
 {
-    gmx::File::writeFileFromString(ndxFileName_, ndxString);
+    gmx::TextWriter::writeFileFromString(ndxFileName_, ndxString);
 }
 
 void
index 131d9351f5de5ed471f404d874e6d7fa806807b4..a2a5cdde513b344965bc02c57a480a67d217da30 100644 (file)
@@ -37,6 +37,7 @@ include_directories(${LIBXML2_INCLUDE_DIR})
 set(TESTUTILS_SOURCES
     cmdlinetest.cpp
     integrationtests.cpp
+    interactivetest.cpp
     mpi-printer.cpp
     refdata.cpp
     stringtest.cpp
index 3d5a956fca8c09fd79a121b81dddc618572cb3f8..6b336b27f313f3cb8f4ba78760c3a1b8c1460bb7 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include "gromacs/commandline/cmdlineoptionsmodule.h"
 #include "gromacs/commandline/cmdlineprogramcontext.h"
 #include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/file.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textreader.h"
+#include "gromacs/utility/textwriter.h"
 
 #include "testutils/refdata.h"
 #include "testutils/testfilemanager.h"
@@ -297,7 +298,7 @@ void CommandLineTestHelper::setInputFileContents(
     GMX_ASSERT(extension[0] != '.', "Extension should not contain a dot");
     std::string fullFilename = impl_->fileManager_.getTemporaryFilePath(
                 formatString("%d.%s", args->argc(), extension));
-    File::writeFileFromString(fullFilename, contents);
+    TextWriter::writeFileFromString(fullFilename, contents);
     args->addOption(option, fullFilename);
 }
 
@@ -308,7 +309,7 @@ void CommandLineTestHelper::setInputFileContents(
     GMX_ASSERT(extension[0] != '.', "Extension should not contain a dot");
     std::string fullFilename = impl_->fileManager_.getTemporaryFilePath(
                 formatString("%d.%s", args->argc(), extension));
-    File        file(fullFilename, "w");
+    TextWriter  file(fullFilename);
     ConstArrayRef<const char *>::const_iterator i;
     for (i = contents.begin(); i != contents.end(); ++i)
     {
@@ -345,7 +346,7 @@ void CommandLineTestHelper::checkOutputFiles(TestReferenceChecker checker) const
              outfile != impl_->outputFiles_.end();
              ++outfile)
         {
-            std::string output = File::readToString(outfile->path);
+            std::string output = TextReader::readFileToString(outfile->path);
             outputChecker.checkStringBlock(output, outfile->option.c_str());
         }
     }
index 1ae38d84525b8dfc62b0f5178ad9305cb3948268..b6e9bcfdb67f16b17e6b79715d4980024cf82b08 100644 (file)
@@ -68,4 +68,31 @@ and use the copy_xsl.sh script to copy it to relevant locations.
     <xsl:value-of select="."/>
 </xsl:template>
 
+<xsl:template match="InteractiveSession">
+    <pre>
+        <xsl:for-each select="*">
+            <xsl:choose>
+                <xsl:when test="starts-with(@Name, 'Output')">
+                    <xsl:value-of select="substring(.,2)"/>
+                </xsl:when>
+                <xsl:when test="string-length(.)=1">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:when>
+                <xsl:when test="contains(substring(.,2), '&#10;')">
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="translate(substring(.,2), '&#10;', '&#x23ce;')"/>
+                    <xsl:text>&#10;</xsl:text>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:text>&#x25ba;</xsl:text>
+                    <xsl:value-of select="substring(.,2)"/>
+                    <xsl:text>&#xb6;</xsl:text>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:for-each>
+        <xsl:text>[EOF]</xsl:text>
+    </pre>
+</xsl:template>
+
 </xsl:stylesheet>
index 20fa85f566a0159ee8007e9fe9290f90fca5c850..9913f6053ee00b7ff7e115d0596b669d8434aa62 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -46,7 +46,7 @@
 #include <stdio.h>
 
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/textwriter.h"
 
 namespace gmx
 {
@@ -69,7 +69,7 @@ void
 IntegrationTestFixture::redirectStringToStdin(const char* theString)
 {
     std::string fakeStdin("fake-stdin");
-    gmx::File::writeFileFromString(fakeStdin, theString);
+    gmx::TextWriter::writeFileFromString(fakeStdin, theString);
     if (NULL == std::freopen(fakeStdin.c_str(), "r", stdin))
     {
         GMX_THROW_WITH_ERRNO(FileIOError("Failed to redirect a string to stdin"),
diff --git a/src/testutils/interactivetest.cpp b/src/testutils/interactivetest.cpp
new file mode 100644 (file)
index 0000000..ce8c854
--- /dev/null
@@ -0,0 +1,187 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements classes from interactivetest.h.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_testutils
+ */
+#include "gmxpre.h"
+
+#include "interactivetest.h"
+
+#include <string>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textstream.h"
+
+#include "testutils/refdata.h"
+#include "testutils/stringtest.h"
+
+namespace gmx
+{
+namespace test
+{
+
+// These two classes cannot be in an unnamed namespace (easily), since
+// then their use as members below would trigger warnings.
+// But if anyone needs these outside this file, they can easily be moved to a
+// separate header.
+
+class MockTextInputStream : public TextInputStream
+{
+    public:
+        MOCK_METHOD1(readLine, bool(std::string *));
+        MOCK_METHOD0(close, void());
+};
+
+class MockTextOutputStream : public TextOutputStream
+{
+    public:
+        MOCK_METHOD1(write, void(const char *));
+        MOCK_METHOD0(close, void());
+};
+
+class InteractiveTestHelper::Impl
+{
+    public:
+        explicit Impl(TestReferenceChecker checker)
+            : checker_(checker), bLastNewline_(true),
+              currentLine_(0), bHasOutput_(false)
+        {
+            using ::testing::_;
+            using ::testing::Invoke;
+            EXPECT_CALL(inputStream_, readLine(_))
+                .WillRepeatedly(Invoke(this, &Impl::readInputLine));
+            EXPECT_CALL(inputStream_, close()).Times(0);
+            EXPECT_CALL(outputStream_, write(_))
+                .WillRepeatedly(Invoke(this, &Impl::addOutput));
+            EXPECT_CALL(outputStream_, close()).Times(0);
+        }
+
+        bool readInputLine(std::string *line)
+        {
+            checkOutput();
+            line->clear();
+            const bool bPresent = (currentLine_ < inputLines_.size());
+            if (bPresent)
+            {
+                line->assign(inputLines_[currentLine_]);
+                if (bLastNewline_ || currentLine_ + 1 < inputLines_.size())
+                {
+                    line->append("\n");
+                }
+            }
+            ++currentLine_;
+            const std::string id = formatString("Input%d", static_cast<int>(currentLine_));
+            StringTestBase::checkText(&checker_, *line, id.c_str());
+            return bPresent;
+        }
+        void addOutput(const char *str)
+        {
+            bHasOutput_ = true;
+            currentOutput_.append(str);
+        }
+
+        void checkOutput()
+        {
+            const std::string id = formatString("Output%d", static_cast<int>(currentLine_));
+            if (checker_.checkPresent(bHasOutput_, id.c_str()))
+            {
+                StringTestBase::checkText(&checker_, currentOutput_, id.c_str());
+            }
+            bHasOutput_ = false;
+            currentOutput_.clear();
+        }
+        void checkPendingInput()
+        {
+            const std::string id = formatString("Input%d", static_cast<int>(currentLine_+1));
+            checker_.checkPresent(false, id.c_str());
+        }
+
+        TestReferenceChecker             checker_;
+        ConstArrayRef<const char *>      inputLines_;
+        bool                             bLastNewline_;
+        size_t                           currentLine_;
+        bool                             bHasOutput_;
+        std::string                      currentOutput_;
+        MockTextInputStream              inputStream_;
+        MockTextOutputStream             outputStream_;
+};
+
+InteractiveTestHelper::InteractiveTestHelper(TestReferenceChecker checker)
+    : impl_(new Impl(checker.checkCompound("InteractiveSession", "Interactive")))
+{
+}
+
+InteractiveTestHelper::~InteractiveTestHelper()
+{
+}
+
+void InteractiveTestHelper::setLastNewline(bool bInclude)
+{
+    impl_->bLastNewline_ = bInclude;
+}
+
+void InteractiveTestHelper::setInputLines(
+        const ConstArrayRef<const char *> &inputLines)
+{
+    impl_->inputLines_  = inputLines;
+    impl_->currentLine_ = 0;
+}
+
+TextInputStream &InteractiveTestHelper::inputStream()
+{
+    return impl_->inputStream_;
+}
+
+TextOutputStream &InteractiveTestHelper::outputStream()
+{
+    return impl_->outputStream_;
+}
+
+void InteractiveTestHelper::checkSession()
+{
+    impl_->checkOutput();
+    impl_->checkPendingInput();
+}
+
+} // namespace test
+} // namespace gmx
diff --git a/src/testutils/interactivetest.h b/src/testutils/interactivetest.h
new file mode 100644 (file)
index 0000000..66167f1
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Provides helper classes for testing interactive prompts.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_testutils
+ */
+#ifndef GMX_TESTUTILS_INTERACTIVETEST_H
+#define GMX_TESTUTILS_INTERACTIVETEST_H
+
+#include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/classhelpers.h"
+
+namespace gmx
+{
+
+class TextInputStream;
+class TextOutputStream;
+
+namespace test
+{
+
+class TestReferenceChecker;
+
+/*! \libinternal \brief
+ * Helper class for testing interactive sessions.
+ *
+ * The calling test can set the user input using setInputLines() (and possibly
+ * setLastNewline()), pass the streams from inputStream() and outputStream() to
+ * the code that executes the interactive session, and then call checkSession()
+ * after the session is finished.
+ * The input is provided from the array set with setInputLines(), and all
+ * output is checked using the reference data framework.
+ * The reference XML data can be viewed with the XSLT stylesheet to show
+ * exactly how the session went.
+ *
+ * \inlibraryapi
+ * \ingroup module_testutils
+ */
+class InteractiveTestHelper
+{
+    public:
+        /*! \brief
+         * Initializes the helper.
+         *
+         * \param[in] checker  Parent reference checker to use.
+         *
+         * The helper creates a compound item under \p checker for the
+         * interactive session it tests.
+         */
+        explicit InteractiveTestHelper(gmx::test::TestReferenceChecker checker);
+        ~InteractiveTestHelper();
+
+        //! Sets whether the last input line contains a newline (by default, it does).
+        void setLastNewline(bool bInclude);
+        /*! \brief
+         * Sets the input lines for the interactive session.
+         *
+         * Calls to TextInputStream::readLine() will return strings from this
+         * array in sequence.
+         * Newlines are added at the end automatically (except for the last
+         * line if `setLastNewLine(false)` has been called).
+         * If there are more `readLine()` calls than there are input lines,
+         * the remaining calls return end-of-input.
+         */
+        void setInputLines(const ConstArrayRef<const char *> &inputLines);
+
+        //! Returns the input stream for the session.
+        TextInputStream  &inputStream();
+        //! Returns the output stream for the session.
+        TextOutputStream &outputStream();
+
+        /*! \brief
+         * Finalizes the checking for the session.
+         *
+         * This must be called after all input and output from a session has
+         * occurred, as the helper will not otherwise know when output after
+         * the last input has finished.  This method also checks that the
+         * required number of input lines were read in the session.
+         */
+        void checkSession();
+
+    private:
+        class Impl;
+
+        PrivateImplPointer<Impl> impl_;
+};
+} // namespace test
+} // namespace gmx
+
+#endif
index 7ce6008345f5ee323fcd8ee11bf075a04f09d39c..ce37bdfee5f873631851795b37dd32500df8ea05 100644 (file)
 
 #include "stringtest.h"
 
-#include <algorithm>
 #include <string>
-#include <utility>
-#include <vector>
 
 #include <boost/scoped_ptr.hpp>
 
 #include "gromacs/options/basicoptions.h"
 #include "gromacs/options/options.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
-#include "gromacs/utility/fileredirector.h"
+#include "gromacs/utility/textreader.h"
 
 #include "testutils/refdata.h"
-#include "testutils/testexceptions.h"
-#include "testutils/testfilemanager.h"
 #include "testutils/testoptions.h"
 
 namespace gmx
@@ -70,27 +63,6 @@ namespace
 {
 //! Stores the -stdout flag value to print out values instead of checking them.
 bool g_bWriteToStdOut = false;
-
-/*! \brief
- * Helper for checking a block of text, e.g., implementing the `-stdout`
- * option.
- *
- * \ingroup module_testutils
- */
-void checkTextImpl(TestReferenceChecker *checker, const std::string &text,
-                   const char *id)
-{
-    if (g_bWriteToStdOut)
-    {
-        printf("%s:\n", id);
-        printf("%s[END]\n", text.c_str());
-    }
-    else
-    {
-        checker->checkStringBlock(text, id);
-    }
-}
-
 }
 
 // TODO: Only add this option to those test binaries that actually need it
@@ -106,74 +78,6 @@ GMX_TEST_OPTIONS(StringTestOptions, options)
 }
 //! \endcond
 
-/********************************************************************
- * TestFileOutputRedirector
- */
-
-/*! \internal
- * \brief
- * Implementation of FileOutputRedirectorInterface for tests.
- *
- * This class redirects all output files to temporary files managed by a
- * TestFileManager, and supports checking the contents of these files using the
- * reference data framework.
- *
- * \ingroup module_testutils
- */
-class TestFileOutputRedirector : public FileOutputRedirectorInterface
-{
-    public:
-        //! Initializes the redirector with the given file manager.
-        explicit TestFileOutputRedirector(TestFileManager *fileManager)
-            : fileManager_(*fileManager)
-        {
-        }
-
-        virtual File &standardOutput()
-        {
-            if (!stdoutFile_)
-            {
-                const std::string path = fileManager_.getTemporaryFilePath("stdout.txt");
-                stdoutFile_.reset(new File(path, "w"));
-                fileList_.push_back(FileListEntry("<stdout>", path));
-            }
-            return *stdoutFile_;
-        }
-        virtual FileInitializer openFileForWriting(const char *filename)
-        {
-            std::string       suffix = filename;
-            std::replace(suffix.begin(), suffix.end(), '/', '_');
-            const std::string path = fileManager_.getTemporaryFilePath(suffix);
-            fileList_.push_back(FileListEntry(filename, path));
-            return FileInitializer(fileList_.back().second.c_str(), "w");
-        }
-
-        /*! \brief
-         * Checks the contents of all redirected files.
-         */
-        void checkRedirectedFiles(TestReferenceChecker *checker)
-        {
-            if (stdoutFile_)
-            {
-                stdoutFile_->close();
-                stdoutFile_.reset();
-            }
-            std::vector<FileListEntry>::const_iterator i;
-            for (i = fileList_.begin(); i != fileList_.end(); ++i)
-            {
-                const std::string text = File::readToString(i->second);
-                checkTextImpl(checker, text, i->first.c_str());
-            }
-        }
-
-    private:
-        typedef std::pair<std::string, std::string> FileListEntry;
-
-        TestFileManager            &fileManager_;
-        boost::scoped_ptr<File>     stdoutFile_;
-        std::vector<FileListEntry>  fileList_;
-};
-
 /********************************************************************
  * StringTestBase::Impl
  */
@@ -183,31 +87,34 @@ class StringTestBase::Impl
     public:
         TestReferenceData                           data_;
         boost::scoped_ptr<TestReferenceChecker>     checker_;
-        boost::scoped_ptr<TestFileOutputRedirector> redirector_;
 };
 
 /********************************************************************
  * StringTestBase
  */
 
-StringTestBase::StringTestBase()
-    : impl_(new Impl)
+// static
+void StringTestBase::checkText(TestReferenceChecker *checker,
+                               const std::string &text, const char *id)
 {
+    if (g_bWriteToStdOut)
+    {
+        printf("%s:\n", id);
+        printf("%s[END]\n", text.c_str());
+    }
+    else
+    {
+        checker->checkStringBlock(text, id);
+    }
 }
 
-StringTestBase::~StringTestBase()
+StringTestBase::StringTestBase()
+    : impl_(new Impl)
 {
 }
 
-FileOutputRedirectorInterface &
-StringTestBase::initOutputRedirector(TestFileManager *fileManager)
+StringTestBase::~StringTestBase()
 {
-    if (impl_->redirector_)
-    {
-        GMX_THROW(TestException("initOutputRedirector() called more than once"));
-    }
-    impl_->redirector_.reset(new TestFileOutputRedirector(fileManager));
-    return *impl_->redirector_;
 }
 
 TestReferenceChecker &
@@ -223,25 +130,15 @@ StringTestBase::checker()
 void
 StringTestBase::checkText(const std::string &text, const char *id)
 {
-    checkTextImpl(&checker(), text, id);
+    checkText(&checker(), text, id);
 }
 
 void
 StringTestBase::checkFileContents(const std::string &filename, const char *id)
 {
-    const std::string text = File::readToString(filename);
+    const std::string text = TextReader::readFileToString(filename);
     checkText(text, id);
 }
 
-void
-StringTestBase::checkRedirectedOutputFiles()
-{
-    if (!impl_->redirector_)
-    {
-        GMX_THROW(TestException("initOutputRedirector() not called"));
-    }
-    impl_->redirector_->checkRedirectedFiles(&checker());
-}
-
 } // namespace test
 } // namespace gmx
index 67e89113a3b7e6dc0d2b06275ae0b53eae0c241c..ce0f00af86084ab4b25caeef6283a45be62a1787 100644 (file)
 namespace gmx
 {
 
-class FileOutputRedirectorInterface;
-
 namespace test
 {
 
-class TestFileManager;
 class TestReferenceChecker;
 
 /*! \libinternal \brief
@@ -74,20 +71,18 @@ class TestReferenceChecker;
 class StringTestBase : public ::testing::Test
 {
     public:
-        StringTestBase();
-        ~StringTestBase();
-
         /*! \brief
-         * Creates a redirector that directs all output to temporary files.
+         * Checks a block of text.
          *
-         * \param[in] fileManager  File manager to use for temporary files.
-         *
-         * Can only be called once in a test.
-         *
-         * \see checkRedirectedOutputFiles()
+         * This static method is provided for code that does not derive from
+         * StringTestBase to use the same functionality, e.g., implementing the
+         * `-stdout` option.
          */
-        FileOutputRedirectorInterface &
-        initOutputRedirector(TestFileManager *fileManager);
+        static void checkText(TestReferenceChecker *checker,
+                              const std::string &text, const char *id);
+
+        StringTestBase();
+        ~StringTestBase();
 
         /*! \brief
          * Returns the root checker for this test's reference data.
@@ -114,20 +109,6 @@ class StringTestBase : public ::testing::Test
          * single string and calls checkText().
          */
         void checkFileContents(const std::string &filename, const char *id);
-        /*! \brief
-         * Checks contents of all files redirected with initOutputRedirector().
-         *
-         * Uses the same logic as checkFileContents() to check each file
-         * (including `stdout`) that has been created using the redirector
-         * returned by initOutputRedirector().
-         *
-         * initOutputRedirector() must have been called.
-         * This method should not be called if the redirector will still be
-         * used for further output in the test.  Behavior is not designed for
-         * checking in the middle of the test, although that could potentially
-         * be changed if necessary.
-         */
-        void checkRedirectedOutputFiles();
 
     private:
         class Impl;
index 62ea64812b3d056c6e82da0aa1e57986329f63b2..eebcca434fa37645c1f27f06930c69a323a335f6 100644 (file)
 
 #include <set>
 #include <string>
+#include <utility>
+#include <vector>
+
+#include <boost/shared_ptr.hpp>
+
+#include "gromacs/utility/stringstream.h"
+
+#include "testutils/stringtest.h"
 
 namespace gmx
 {
 namespace test
 {
 
+/********************************************************************
+ * TestFileInputRedirector
+ */
+
 TestFileInputRedirector::TestFileInputRedirector()
 {
 }
@@ -69,5 +81,59 @@ bool TestFileInputRedirector::fileExists(const char *filename) const
     return existingFiles_.count(filename) > 0;
 }
 
+/********************************************************************
+ * TestFileOutputRedirector::Impl
+ */
+
+class TestFileOutputRedirector::Impl
+{
+    public:
+        typedef boost::shared_ptr<StringOutputStream> StringStreamPointer;
+        typedef std::pair<std::string, StringStreamPointer> FileListEntry;
+
+        StringStreamPointer         stdoutStream_;
+        std::vector<FileListEntry>  fileList_;
+};
+
+/********************************************************************
+ * TestFileOutputRedirector
+ */
+
+TestFileOutputRedirector::TestFileOutputRedirector()
+    : impl_(new Impl)
+{
+}
+
+TestFileOutputRedirector::~TestFileOutputRedirector()
+{
+}
+
+TextOutputStream &TestFileOutputRedirector::standardOutput()
+{
+    if (!impl_->stdoutStream_)
+    {
+        impl_->stdoutStream_.reset(new StringOutputStream);
+        impl_->fileList_.push_back(Impl::FileListEntry("<stdout>", impl_->stdoutStream_));
+    }
+    return *impl_->stdoutStream_;
+}
+
+TextOutputStreamPointer
+TestFileOutputRedirector::openTextOutputFile(const char *filename)
+{
+    Impl::StringStreamPointer stream(new StringOutputStream);
+    impl_->fileList_.push_back(Impl::FileListEntry(filename, stream));
+    return stream;
+}
+
+void TestFileOutputRedirector::checkRedirectedFiles(TestReferenceChecker *checker)
+{
+    std::vector<Impl::FileListEntry>::const_iterator i;
+    for (i = impl_->fileList_.begin(); i != impl_->fileList_.end(); ++i)
+    {
+        StringTestBase::checkText(checker, i->second->toString(), i->first.c_str());
+    }
+}
+
 } // namespace test
 } // namespace gmx
index 711c6c8ac14e735cc573e100b912c270b10ef87d..ccb3a69490e43780c3356f01b6fc2d1388ba375e 100644 (file)
@@ -54,6 +54,8 @@ namespace gmx
 namespace test
 {
 
+class TestReferenceChecker;
+
 /*! \libinternal \brief
  * In-memory implementation for FileInputRedirectorInterface for tests.
  *
@@ -87,6 +89,40 @@ class TestFileInputRedirector : public FileInputRedirectorInterface
         GMX_DISALLOW_COPY_AND_ASSIGN(TestFileInputRedirector);
 };
 
+/*! \libinternal \brief
+ * In-memory implementation of FileOutputRedirectorInterface for tests.
+ *
+ * This class redirects all output files to in-memory buffers, and supports
+ * checking the contents of these files using the reference data framework.
+ *
+ * \ingroup module_testutils
+ */
+class TestFileOutputRedirector : public FileOutputRedirectorInterface
+{
+    public:
+        TestFileOutputRedirector();
+        virtual ~TestFileOutputRedirector();
+
+        /*! \brief
+         * Checks contents of all redirected files (including stdout).
+         *
+         * This method should not be called if the redirector will still be
+         * used for further output in the test.  Behavior is not designed for
+         * checking in the middle of the test, although that could potentially
+         * be changed if necessary.
+         */
+        void checkRedirectedFiles(TestReferenceChecker *checker);
+
+        // From FileOutputRedirectorInterface
+        virtual TextOutputStream &standardOutput();
+        virtual TextOutputStreamPointer openTextOutputFile(const char *filename);
+
+    private:
+        class Impl;
+
+        PrivateImplPointer<Impl> impl_;
+};
+
 } // namespace test
 } // namespace gmx
 
index 0fd8d1100b1784407475280e25f382c6930a47f9..a17ed2309df7d2906f046f887641fcd2264ceebf 100644 (file)
@@ -60,7 +60,7 @@
 #include "gromacs/options/options.h"
 #include "gromacs/utility/errorcodes.h"
 #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
 #include "gromacs/utility/futil.h"
 #include "gromacs/utility/path.h"
 #include "gromacs/utility/programcontext.h"
@@ -142,7 +142,7 @@ void printHelp(const Options &options)
     std::fprintf(stderr,
                  "\nYou can use the following GROMACS-specific command-line flags\n"
                  "to control the behavior of the tests:\n\n");
-    CommandLineHelpContext context(&File::standardError(),
+    CommandLineHelpContext context(&TextOutputFile::standardError(),
                                    eHelpOutputFormat_Console, NULL, program);
     context.setModuleDisplayName(program);
     CommandLineHelpWriter(options).writeHelp(context);
index 9bc036f51dd9cd3463a9a4bade3bec64e62579f1..976b8d79892061c74a6ac35928e68428165fdd30 100644 (file)
@@ -45,9 +45,8 @@
 
 #include <list>
 
-#include "thread_mpi/mutex.h"
-
 #include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/mutex.h"
 
 namespace gmx
 {
@@ -75,7 +74,7 @@ class TestOptionsRegistry
         //! Adds a provider into the registry.
         void add(const char * /*name*/, TestOptionsProvider *provider)
         {
-            tMPI::lock_guard<tMPI::mutex> lock(listMutex_);
+            lock_guard<Mutex> lock(listMutex_);
             providerList_.push_back(provider);
         }
 
@@ -87,7 +86,7 @@ class TestOptionsRegistry
 
         typedef std::list<TestOptionsProvider *> ProviderList;
 
-        tMPI::mutex             listMutex_;
+        Mutex                   listMutex_;
         ProviderList            providerList_;
 
         GMX_DISALLOW_COPY_AND_ASSIGN(TestOptionsRegistry);
@@ -97,7 +96,7 @@ void TestOptionsRegistry::initOptions(Options *options)
 {
     // TODO: Have some deterministic order for the options; now it depends on
     // the order in which the global initializers are run.
-    tMPI::lock_guard<tMPI::mutex> lock(listMutex_);
+    lock_guard<Mutex>             lock(listMutex_);
     ProviderList::const_iterator  i;
     for (i = providerList_.begin(); i != providerList_.end(); ++i)
     {
index 045e7c5616b2e4ace992cdb142f168d2f0d3db8f..351584dab7ea6fa08b6fb86b64cc3588ca669f93 100644 (file)
@@ -1,7 +1,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2011,2012,2014, by the GROMACS development team, led by
+# Copyright (c) 2011,2012,2014,2015, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -33,5 +33,6 @@
 # the research papers on the package. Check out http://www.gromacs.org.
 
 gmx_add_unit_test(TestUtilsUnitTests testutils-test
+                  interactivetest.cpp
                   refdata_tests.cpp
                   testasserts_tests.cpp)
diff --git a/src/testutils/tests/interactivetest.cpp b/src/testutils/tests/interactivetest.cpp
new file mode 100644 (file)
index 0000000..4a3d390
--- /dev/null
@@ -0,0 +1,378 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Self-tests for interactive test helpers.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_testutils
+ */
+#include "gmxpre.h"
+
+#include "testutils/interactivetest.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+#include <gtest/gtest-spi.h>
+
+#include "gromacs/utility/textstream.h"
+
+#include "testutils/refdata.h"
+
+namespace
+{
+
+class InteractiveSession
+{
+    public:
+        InteractiveSession(gmx::test::ReferenceDataMode mode)
+            : data_(mode), helper_(data_.rootChecker()), nextInputLine_(0)
+        {
+        }
+
+        void addOutput(const char *output)
+        {
+            events_.push_back(Event(WriteOutput, output));
+        }
+        void addInputLine(const char *inputLine)
+        {
+            inputLines_.push_back(inputLine);
+        }
+        void addReadInput()
+        {
+            events_.push_back(Event(ReadInput, ""));
+        }
+        void addInput(const char *inputLine)
+        {
+            addInputLine(inputLine);
+            addReadInput();
+        }
+        void addInputNoNewline(const char *inputLine)
+        {
+            addInputLine(inputLine);
+            helper_.setLastNewline(false);
+            events_.push_back(Event(ReadInputNoNewline, ""));
+        }
+
+        void run()
+        {
+            gmx::TextInputStream              &input  = helper_.inputStream();
+            gmx::TextOutputStream             &output = helper_.outputStream();
+            helper_.setInputLines(inputLines_);
+            std::vector<Event>::const_iterator event;
+            for (event = events_.begin(); event != events_.end(); ++event)
+            {
+                if (event->first == WriteOutput)
+                {
+                    output.write(event->second);
+                }
+                else
+                {
+                    std::string expectedLine;
+                    const bool  bInputRemaining = (nextInputLine_ < inputLines_.size());
+                    if (bInputRemaining)
+                    {
+                        expectedLine = inputLines_[nextInputLine_];
+                        if (event->first != ReadInputNoNewline)
+                        {
+                            expectedLine.append("\n");
+                        }
+                    }
+                    ++nextInputLine_;
+                    std::string line;
+                    EXPECT_EQ(bInputRemaining, input.readLine(&line));
+                    EXPECT_EQ(expectedLine, line);
+                }
+            }
+            helper_.checkSession();
+        }
+
+    private:
+        enum EventType
+        {
+            ReadInput,
+            ReadInputNoNewline,
+            WriteOutput
+        };
+        // The latter is the output string.
+        typedef std::pair<EventType, const char *> Event;
+
+        gmx::test::TestReferenceData     data_;
+        gmx::test::InteractiveTestHelper helper_;
+        std::vector<const char *>        inputLines_;
+        size_t                           nextInputLine_;
+        std::vector<Event>               events_;
+};
+
+TEST(InteractiveTestHelperTest, ChecksSimpleSession)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n");
+        session.addOutput("> ");
+        session.addInput("input");
+        session.addOutput("Second line\n");
+        session.addOutput("> ");
+        session.addReadInput();
+        session.addOutput("\n");
+        session.addOutput(".\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n");
+        session.addOutput("> ");
+        session.addInput("input");
+        session.addOutput("Second line\n");
+        session.addOutput("> ");
+        session.addReadInput();
+        session.addOutput("\n");
+        session.addOutput(".\n");
+        session.run();
+    }
+}
+
+TEST(InteractiveTestHelperTest, ChecksSessionWithoutLastNewline)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n");
+        session.addOutput("> ");
+        session.addInput("input");
+        session.addOutput("Second line\n");
+        session.addOutput("> ");
+        session.addInputNoNewline("input2");
+        session.addOutput("\n");
+        session.addOutput(".\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n");
+        session.addOutput("> ");
+        session.addInput("input");
+        session.addOutput("Second line\n");
+        session.addOutput("> ");
+        session.addInputNoNewline("input2");
+        session.addOutput("\n");
+        session.addOutput(".\n");
+        session.run();
+    }
+}
+
+TEST(InteractiveTestHelperTest, ChecksSessionWithMissingOutput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addInput("input2");
+        session.addOutput("Second line\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addInput("input2");
+        session.addOutput("Second line\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        session.run();
+    }
+}
+
+TEST(InteractiveTestHelperTest, ChecksSessionWithEquivalentOutput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n");
+        session.addOutput("> ");
+        session.addInput("input");
+        session.addOutput("Second line\n> ");
+        session.addReadInput();
+        session.addOutput("\n");
+        session.addOutput(".\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addOutput("Second line\n");
+        session.addOutput("> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        session.run();
+    }
+}
+
+TEST(InteractiveTestHelperTest, DetectsIncorrectOutput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addOutput("Second line\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addOutput("Incorrect line\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        EXPECT_NONFATAL_FAILURE(session.run(), "");
+    }
+}
+
+TEST(InteractiveTestHelperTest, DetectsMissingOutput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addOutput("Second line\n> ");
+        session.addInput("input2");
+        session.addOutput("Third line\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addInput("input2");
+        session.addOutput("Third line\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        EXPECT_NONFATAL_FAILURE(session.run(), "");
+    }
+}
+
+TEST(InteractiveTestHelperTest, DetectsMissingFinalOutput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addOutput("Second line\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addOutput("Second line\n> ");
+        session.addReadInput();
+        EXPECT_NONFATAL_FAILURE(session.run(), "");
+    }
+}
+
+TEST(InteractiveTestHelperTest, DetectsExtraOutput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addInput("input2");
+        session.addOutput("More output\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addOutput("First line\n> ");
+        session.addInput("input");
+        session.addOutput("Extra output\n> ");
+        session.addInput("input2");
+        session.addOutput("More output\n> ");
+        session.addReadInput();
+        session.addOutput("\n.\n");
+        EXPECT_NONFATAL_FAILURE(session.run(), "");
+    }
+}
+
+TEST(InteractiveTestHelperTest, DetectsMissingInput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addInput("input");
+        session.addInput("input2");
+        session.addReadInput();
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addInputLine("input");
+        session.addInputLine("input2");
+        session.addReadInput();
+        session.addReadInput();
+        EXPECT_NONFATAL_FAILURE(session.run(), "");
+    }
+}
+
+TEST(InteractiveTestHelperTest, DetectsExtraInput)
+{
+    {
+        InteractiveSession session(gmx::test::erefdataUpdateAll);
+        session.addInput("input");
+        session.addInput("input2");
+        session.addReadInput();
+        session.run();
+    }
+    {
+        InteractiveSession session(gmx::test::erefdataCompare);
+        session.addInputLine("input");
+        session.addInputLine("input2");
+        session.addReadInput();
+        session.addReadInput();
+        session.addReadInput();
+        session.addReadInput();
+        EXPECT_NONFATAL_FAILURE(session.run(), "");
+    }
+}
+
+} // namespace
index a99fccd9dac05911488d62b1c6d4d85fa27b274a..a82192dc4dca93c0ad47c63ad3ecbaf33bd9f114 100644 (file)
  *  - gmx::test::TestFileInputRedirector (in testfileredirector.h) provides
  *    functionality for capturing file existence checks in code that uses
  *    gmx::FileInputRedirectorInterface.
+ *  - gmx::test::TestFileOutputRedirector (in testfileredirector.h) provides
+ *    functionality for capturing file output (including `stdout`) from code
+ *    that uses gmx::FileOutputRedirectorInterface, and checking that output
+ *    against reference data.
+ *  - gmx::test::InteractiveTestHelper (in interactivetest.h) provides
+ *    a helper class for testing an interactive session that uses
+ *    gmx::TextInputStream and gmx::TextOutputStream for prompting input and
+ *    printing status messages.
  *  - #GMX_TEST_OPTIONS macro provides facilities for adding custom command
  *    line options for the test binary.
  *  - testasserts.h provides several custom test assertions for better