# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-# Adapted from code posted on cmake-users by Mark Moll
+# Adapted from code posted on cmake-users by Mark Moll (the execute_process()
+# call remains, but other things have been rewritten for nicer behavior).
find_package(PythonInterp)
-function(find_python_module module)
- string(TOUPPER ${module} module_upper)
- if(NOT PYTHONMODULE_${module_upper})
- if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED")
- set(${module}_FIND_REQUIRED TRUE)
- endif()
- if (NOT PYTHON_EXECUTABLE)
- message(STATUS "Cannot find python module ${module} because no python executable is known")
- else()
- # A module's location is usually a directory, but for binary modules
- # it's a .so file.
- execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
- "import re, ${module}; print re.compile('/__init__.py.*').sub('',${module}.__file__)"
- RESULT_VARIABLE _${module}_status
- OUTPUT_VARIABLE _${module}_location
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+function (find_python_module module)
+ string(TOUPPER ${module} _module_upper)
+ set(_find_package_module ${module})
+ set(_out_var PYTHONMODULE_${_module_upper})
+
+ include(CMakeParseArguments)
+ set(_options QUIET REQUIRED)
+ cmake_parse_arguments(ARG "${_options}" "" "" ${ARGN})
+ if (ARG_UNPARSED_ARGUMENTS)
+ message(FATAL_ERROR "Unknown arguments: ${ARG_UNPARSED_ARGUMENTS}")
+ endif()
+ if (ARG_REQUIRED)
+ set(${_find_package_module}_FIND_REQUIRED TRUE)
+ endif()
+ if (ARG_QUIET)
+ set(${_find_package_module}_FIND_QUIETLY TRUE)
+ endif()
+
+ if (NOT ${_out_var})
+ set(_status 1)
+ if (PYTHON_EXECUTABLE)
+ # A module's location is usually a directory, but for binary modules
+ # it's a .so file.
+ execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+ "import re, ${module}; print re.compile('/__init__.py.*').sub('',${module}.__file__)"
+ RESULT_VARIABLE _status
+ OUTPUT_VARIABLE _location
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+ endif()
+ if(_status)
+ set(_location ${_find_package_module}-NOTFOUND)
endif()
- if(NOT _${module}_status)
- set(PYTHONMODULE_${module_upper} ${_${module}_location} CACHE STRING
- "Location of Python module ${module}")
- endif()
+ set(${_out_var} ${_location} CACHE STRING
+ "Location of Python module ${module}" FORCE)
+ mark_as_advanced(${_out_var})
endif()
- find_package_handle_standard_args(PYTHONMODULE_${module} DEFAULT_MSG PYTHONMODULE_${module_upper})
+ include(FindPackageHandleStandardArgs)
+ find_package_handle_standard_args(
+ ${_find_package_module} DEFAULT_MSG
+ ${_out_var} PYTHON_EXECUTABLE)
endfunction()
mark_as_advanced(SPHINX_EXECUTABLE)
# Detect Sphinx version
-
-if(SPHINX_FOUND AND NOT DEFINED SPHINX_EXECUTABLE_VERSION)
+if (SPHINX_EXECUTABLE AND NOT DEFINED SPHINX_EXECUTABLE_VERSION)
execute_process(
COMMAND ${SPHINX_EXECUTABLE} --version
OUTPUT_VARIABLE SPHINX_VERSION_OUTPUT_VARIABLE
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
- string(REGEX REPLACE "Sphinx \\(${SPHINX_EXECUTABLE}\\) ([^ ]+)" "\\1" SPHINX_EXECUTABLE_VERSION ${SPHINX_VERSION_OUTPUT_VARIABLE})
+ string(REGEX REPLACE "Sphinx \\([^)]*\\) ([^ ]+)" "\\1" SPHINX_EXECUTABLE_VERSION ${SPHINX_VERSION_OUTPUT_VARIABLE})
set(SPHINX_EXECUTABLE_VERSION "${SPHINX_EXECUTABLE_VERSION}" CACHE INTERNAL "Version of ${SPHINX_EXECUTABLE}")
endif()
+set(_find_deps_options)
+if (Sphinx_FIND_QUIETLY)
+ set(_find_deps_options QUIET)
+endif()
include(FindPythonModule)
-find_python_module(pygments)
-
-if(PYTHONMODULE_PYGMENTS)
+find_python_module(pygments ${_find_deps_options})
+if (PYTHONMODULE_PYGMENTS)
set(Sphinx_pygments_FOUND 1)
endif()
# The GROMACS convention is that these are the version number of the next
# release that is going to be made from this branch.
set(GMX_VERSION_MAJOR 5)
-set(GMX_VERSION_MINOR 1)
+set(GMX_VERSION_MINOR 2)
set(GMX_VERSION_PATCH 0)
# The suffix, on the other hand, is used mainly for betas and release
# candidates, where it signifies the most recent such release from
# this branch; it will be empty before the first such release, as well
# as after the final release is out.
-set(GMX_VERSION_SUFFIX "-beta1")
+set(GMX_VERSION_SUFFIX "")
# Conventionally with libtool, any ABI change must change the major
# version number, the minor version number should change if it's just
# here. The important thing is to minimize the chance of third-party
# code being able to dynamically link with a version of libgromacs
# that might not work.
-set(LIBRARY_SOVERSION_MAJOR 1)
+set(LIBRARY_SOVERSION_MAJOR 2)
set(LIBRARY_SOVERSION_MINOR 0)
set(LIBRARY_VERSION ${LIBRARY_SOVERSION_MAJOR}.${LIBRARY_SOVERSION_MINOR}.0)
# each release. It's hard to test because it is only used for
# REGRESSIONTEST_DOWNLOAD, which doesn't work until that tarball has
# been placed on the server.
-set(REGRESSIONTEST_MD5SUM "6f8531a6e3c2a8912327b9cd450d8745" CACHE INTERNAL "MD5 sum of the regressiontests tarball")
+set(REGRESSIONTEST_MD5SUM "bb67f145095249e9d4a93227fc4c352e" CACHE INTERNAL "MD5 sum of the regressiontests tarball")
math(EXPR GMX_VERSION_NUMERIC
"${GMX_VERSION_MAJOR}*10000 + ${GMX_VERSION_MINOR}*100 + ${GMX_VERSION_PATCH}")
set(EXPECTED_DOXYGEN_VERSION 1.8.5)
find_package(PythonInterp)
-find_package(Sphinx 1.2.3 COMPONENTS pygments)
+find_package(Sphinx 1.2.3 QUIET COMPONENTS pygments)
# Even if we aren't going to make the full webpage, set up to put all
# the documentation output in the same place, for convenience
COMMAND ${CMAKE_COMMAND} -E echo
"HTML pages cannot be built because Sphinx is not available"
VERBATIM)
+ add_custom_target(install-guide
+ COMMAND ${CMAKE_COMMAND} -E echo
+ "INSTALL cannot be built because Sphinx is not available"
+ VERBATIM)
add_custom_target(man
COMMAND ${CMAKE_COMMAND} -E echo
"man pages cannot be built because Sphinx is not available"
"""Scan the file contents and initialize information based on it."""
# TODO: Consider a more robust regex.
include_re = r'^\s*#\s*include\s+(?P<quote>["<])(?P<path>[^">]*)[">]'
- define_re = r'^\s*#.*define\s+(\w*)'
+ define_re = r'^\s*#.*define(?:01)?\s+(\w*)'
current_block = None
with open(self._abspath, 'r') as scanfile:
contents = scanfile.read()
system, *e.g.* a water slab (see Engin et al. J. Chem. Phys. B
2010).
-.. mdp:: pull-coord1-groups
-
- The two groups indices should be given on which this pull
- coordinate will operate. The first index can be 0, in which case an
- absolute reference of :mdp:`pull-coord1-origin` is used. With an
- absolute reference the system is no longer translation invariant
- and one should think about what to do with the center of mass
- motion. Note that (only) for :mdp:`pull-coord1-geometry` =
- :mdp-value:`direction-relative` four groups are required.
-
.. mdp:: pull-coord1-type:
.. mdp-value:: umbrella
component. This geometry is not supported with constraint
pulling.
+.. mdp:: pull-coord1-groups
+
+ The two groups indices should be given on which this pull
+ coordinate will operate. The first index can be 0, in which case an
+ absolute reference of :mdp:`pull-coord1-origin` is used. With an
+ absolute reference the system is no longer translation invariant
+ and one should think about what to do with the center of mass
+ motion. Note that (only) for :mdp:`pull-coord1-geometry` =
+ :mdp-value:`direction-relative` four groups are required.
+
.. mdp:: pull-coord1-dim
(Y Y Y)
-586
+592
If You Want Something Done You Have to Do It Yourself_(Highlander II)
I Live the Life They Wish They Did_(Tricky)
Jesus Built My Hotrod_(Ministry)
In science, truth always wins._(Max Perutz)
Creativity in science, as in art, cannot be organized. It arises spontaneously from individual talent. Well-run laboratories can foster it, but hierarchical organizations, inflexible bureaucratic rules, and mountains of futile paperwork can kill it._(Max Perutz)
Every electron is sacred._(Greg McMullan, on Cryo-EM detectors)
+Science adjusts its views based on what's observed. Faith is the denial of observation so that belief can be preserved._(Tim Minchin)
+Isn’t this enough? Just this world? Just this beautiful, complex wonderfully unfathomable world? How does it so fail to hold our attention that we have to diminish it with the invention of cheap, man-made myths and monsters?_(Tim Minchin)
+If you open your mind too much, your brains will fall out._(Tim Minchin)
+"Everything organic and natural is good" - ignoring the fact that organic natural substances include arsenic and poo and crocodiles. And everything chemical is bad, ignoring the fact that... everything is chemicals._(Tim Minchin)
+A program that has not been tested does not work._(Bjarne Stroustrup)
+You could give Aristotle a tutorial. And you could thrill him to the core of his being. Such is the privilege of living after Newton, Darwin, Einstein, Planck, Watson, Crick and their colleagues._(Richard Dawkins)
/* IEEE754 floating-point format. Memory layout is defined by macros
* GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER and GMX_IEEE754_BIG_ENDIAN_WORD_ORDER.
*/
-#cmakedefine GMX_FLOAT_FORMAT_IEEE754
+#cmakedefine01 GMX_FLOAT_FORMAT_IEEE754
/* Work around broken calloc() */
#cmakedefine GMX_BROKEN_CALLOC
/* Do not optimize FFTW setups (not needed with SSE FFT kernels) */
-#cmakedefine GMX_DISABLE_FFTW_MEASURE
-
-/* Use Built-in FFTPACK FFT library */
-#cmakedefine GMX_FFT_FFTPACK
+#cmakedefine01 GMX_DISABLE_FFTW_MEASURE
/* Use FFTW3 FFT library */
-#cmakedefine GMX_FFT_FFTW3
-
-/* Use Intel MKL FFT library */
-#cmakedefine GMX_FFT_MKL
+#cmakedefine01 GMX_FFT_FFTW3
/* Target platform is x86 or x86_64 */
#cmakedefine GMX_TARGET_X86
#define GMX_SIMD_ACCURACY_BITS_DOUBLE @GMX_SIMD_ACCURACY_BITS_DOUBLE@
/* Integer byte order is big endian. */
-#cmakedefine GMX_INTEGER_BIG_ENDIAN
+#cmakedefine01 GMX_INTEGER_BIG_ENDIAN
/* Use our own instead of system XDR libraries */
-#cmakedefine GMX_INTERNAL_XDR
+#cmakedefine01 GMX_INTERNAL_XDR
/* Compile to use TNG library */
#cmakedefine GMX_USE_TNG
/* Bytes in IEEE fp word are in big-endian order if set, little-endian if not.
Only relevant when FLOAT_FORMAT_IEEE754 is defined. */
-#cmakedefine GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER
+#cmakedefine01 GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER
/* The two words in a double precision variable are in b ig-endian order if
set, little-endian if not. Do NOT assume this is the same as the byte
order! Only relevant when FLOAT_FORMAT_IEEE754 is defined. */
-#cmakedefine GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#cmakedefine01 GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
/* Define if SIGUSR1 is present */
#cmakedefine HAVE_SIGUSR1
printf("Going to open %s\n",fn);
fpread = open_trn(fn,"r");
fpwrite = open_tpx(NULL,"w");
- gmx_fio_setdebug(fpwrite,TRUE);
mmm=mass[0]+2*mass[1];
for(i=0; (i<5); i++)
<xsl:value-of select="."/>
</xsl:template>
+<xsl:template match="InteractiveSession">
+ <pre>
+ <xsl:for-each select="*">
+ <xsl:choose>
+ <xsl:when test="starts-with(@Name, 'Output')">
+ <xsl:value-of select="substring(.,2)"/>
+ </xsl:when>
+ <xsl:when test="string-length(.)=1">
+ <xsl:text>►</xsl:text>
+ <xsl:text>¶</xsl:text>
+ </xsl:when>
+ <xsl:when test="contains(substring(.,2), ' ')">
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="translate(substring(.,2), ' ', '⏎')"/>
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="substring(.,2)"/>
+ <xsl:text>¶</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:for-each>
+ <xsl:text>[EOF]</xsl:text>
+ </pre>
+</xsl:template>
+
</xsl:stylesheet>
{
public:
//! Creates the implementation class and the low-level context.
- Impl(File *file, HelpOutputFormat format, const HelpLinks *links)
- : writerContext_(file, format, links), moduleDisplayName_("gmx"),
+ Impl(TextOutputStream *stream, HelpOutputFormat format,
+ const HelpLinks *links)
+ : writerContext_(stream, format, links), moduleDisplayName_("gmx"),
completionWriter_(NULL), bHidden_(false)
{
}
};
CommandLineHelpContext::CommandLineHelpContext(
- File *file, HelpOutputFormat format, const HelpLinks *links,
- const std::string &programName)
- : impl_(new Impl(file, format, links))
+ TextOutputStream *stream, HelpOutputFormat format,
+ const HelpLinks *links, const std::string &programName)
+ : impl_(new Impl(stream, format, links))
{
impl_->writerContext_.setReplacement("[PROGRAM]", programName);
}
CommandLineHelpContext::CommandLineHelpContext(
ShellCompletionWriter *writer)
- : impl_(new Impl(writer->outputFile(), eHelpOutputFormat_Other, NULL))
+ : impl_(new Impl(&writer->outputStream(), eHelpOutputFormat_Other, NULL))
{
impl_->completionWriter_ = writer;
}
*
* Wraps the constructor of HelpWriterContext.
*/
- CommandLineHelpContext(File *file, HelpOutputFormat format,
- const HelpLinks *links,
+ CommandLineHelpContext(TextOutputStream *stream,
+ HelpOutputFormat format, const HelpLinks *links,
const std::string &programName);
//! Creates a context for a particular HelpWriterContext.
explicit CommandLineHelpContext(const HelpWriterContext &writerContext);
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/baseversion.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/fileredirector.h"
#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/path.h"
#include "gromacs/utility/programcontext.h"
+#include "gromacs/utility/stringstream.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textreader.h"
+#include "gromacs/utility/textstream.h"
+#include "gromacs/utility/textwriter.h"
#include "shellcompletions.h"
context.writeTextBlock(
"Usage: [PROGRAM] [<options>] <command> [<args>][PAR]"
"Available commands:");
- File &file = context.outputFile();
+ TextWriter &file = context.outputFile();
TextTableFormatter formatter;
formatter.addColumn(NULL, maxNameLength + 1, false);
formatter.addColumn(NULL, 72 - maxNameLength, true);
{
public:
//! Initializes reST exporter.
- explicit HelpExportReStructuredText(
- const CommandLineHelpModuleImpl &helpModule);
+ HelpExportReStructuredText(
+ const CommandLineHelpModuleImpl &helpModule,
+ FileOutputRedirectorInterface *outputRedirector);
virtual void startModuleExport();
virtual void exportModuleHelp(
FileOutputRedirectorInterface *outputRedirector_;
const std::string &binaryName_;
HelpLinks links_;
- boost::scoped_ptr<File> indexFile_;
- boost::scoped_ptr<File> manPagesFile_;
+ boost::scoped_ptr<TextWriter> indexFile_;
+ boost::scoped_ptr<TextWriter> manPagesFile_;
};
HelpExportReStructuredText::HelpExportReStructuredText(
- const CommandLineHelpModuleImpl &helpModule)
- : outputRedirector_(helpModule.outputRedirector_),
+ const CommandLineHelpModuleImpl &helpModule,
+ FileOutputRedirectorInterface *outputRedirector)
+ : outputRedirector_(outputRedirector),
binaryName_(helpModule.binaryName_),
links_(eHelpOutputFormat_Rst)
{
- File linksFile("links.dat", "r");
- std::string line;
- while (linksFile.readLine(&line))
+ TextReader linksFile("links.dat");
+ std::string line;
+ while (linksFile.readLineTrimmed(&line))
{
links_.addLink("[REF]." + line + "[ref]",
formatString(":ref:`.%s <%s>`", line.c_str(), line.c_str()),
void HelpExportReStructuredText::startModuleExport()
{
indexFile_.reset(
- new File(outputRedirector_->openFileForWriting("fragments/byname.rst")));
+ new TextWriter(
+ outputRedirector_->openTextOutputFile("fragments/byname.rst")));
indexFile_->writeLine(formatString("* :doc:`%s </onlinehelp/%s>` - %s",
binaryName_.c_str(), binaryName_.c_str(),
RootHelpText::title));
manPagesFile_.reset(
- new File(outputRedirector_->openFileForWriting("conf-man.py")));
+ new TextWriter(
+ outputRedirector_->openTextOutputFile("conf-man.py")));
manPagesFile_->writeLine("man_pages = [");
}
const std::string &tag,
const std::string &displayName)
{
- // TODO: Ideally, the file would only be touched if it really changes.
- // This would make Sphinx reruns much faster.
- File file(outputRedirector_->openFileForWriting("onlinehelp/" + tag + ".rst"));
- file.writeLine(formatString(".. _%s:", displayName.c_str()));
+ TextOutputStreamPointer file
+ = outputRedirector_->openTextOutputFile("onlinehelp/" + tag + ".rst");
+ TextWriter writer(file);
+ writer.writeLine(formatString(".. _%s:", displayName.c_str()));
if (0 == displayName.compare(binaryName_ + " mdrun"))
{
// Make an extra link target for the convenience of
// MPI-specific documentation
- file.writeLine(".. _mdrun_mpi:");
+ writer.writeLine(".. _mdrun_mpi:");
}
- file.writeLine();
+ writer.writeLine();
- CommandLineHelpContext context(&file, eHelpOutputFormat_Rst, &links_, binaryName_);
+ CommandLineHelpContext context(file.get(), eHelpOutputFormat_Rst, &links_, binaryName_);
context.enterSubSection(displayName);
context.setModuleDisplayName(displayName);
module.writeHelp(context);
- file.writeLine();
- file.writeLine(".. only:: man");
- file.writeLine();
- file.writeLine(" See also");
- file.writeLine(" --------");
- file.writeLine();
- file.writeLine(formatString(" :manpage:`%s(1)`", binaryName_.c_str()));
- file.writeLine();
- file.writeLine(" More information about |Gromacs| is available at <http://www.gromacs.org/>.");
- file.close();
+ writer.writeLine();
+ writer.writeLine(".. only:: man");
+ writer.writeLine();
+ writer.writeLine(" See also");
+ writer.writeLine(" --------");
+ writer.writeLine();
+ writer.writeLine(formatString(" :manpage:`%s(1)`", binaryName_.c_str()));
+ writer.writeLine();
+ writer.writeLine(" More information about |Gromacs| is available at <http://www.gromacs.org/>.");
+ file->close();
indexFile_->writeLine(formatString("* :doc:`%s </onlinehelp/%s>` - %s",
displayName.c_str(), tag.c_str(),
void HelpExportReStructuredText::startModuleGroupExport()
{
indexFile_.reset(
- new File(outputRedirector_->openFileForWriting("fragments/bytopic.rst")));
+ new TextWriter(
+ outputRedirector_->openTextOutputFile("fragments/bytopic.rst")));
manPagesFile_.reset(
- new File(outputRedirector_->openFileForWriting("fragments/bytopic-man.rst")));
+ new TextWriter(
+ outputRedirector_->openTextOutputFile("fragments/bytopic-man.rst")));
}
void HelpExportReStructuredText::exportModuleGroup(
void HelpExportReStructuredText::exportTopic(const HelpTopicInterface &topic)
{
- const std::string path("onlinehelp/" + std::string(topic.name()) + ".rst");
- File file(outputRedirector_->openFileForWriting(path));
- CommandLineHelpContext context(&file, eHelpOutputFormat_Rst, &links_,
- binaryName_);
- HelpManager manager(topic, context.writerContext());
+ const std::string path("onlinehelp/" + std::string(topic.name()) + ".rst");
+ TextOutputStreamPointer file(outputRedirector_->openTextOutputFile(path));
+ CommandLineHelpContext context(file.get(), eHelpOutputFormat_Rst, &links_,
+ binaryName_);
+ HelpManager manager(topic, context.writerContext());
manager.writeCurrentTopic();
+ file->close();
}
/********************************************************************
rootTopic_->exportHelp(exporter);
}
+namespace
+{
+
+/********************************************************************
+ * ModificationCheckingFileOutputStream
+ */
+
+class ModificationCheckingFileOutputStream : public TextOutputStream
+{
+ public:
+ ModificationCheckingFileOutputStream(
+ const char *path,
+ FileOutputRedirectorInterface *redirector)
+ : path_(path), redirector_(redirector)
+ {
+ }
+
+ virtual void write(const char *str) { contents_.write(str); }
+ virtual void close()
+ {
+ const std::string &newContents = contents_.toString();
+ // TODO: Redirect these for unit tests.
+ if (File::exists(path_))
+ {
+ const std::string originalContents_
+ = TextReader::readFileToString(path_);
+ if (originalContents_ == newContents)
+ {
+ return;
+ }
+ }
+ TextWriter writer(redirector_->openTextOutputFile(path_));
+ writer.writeString(newContents);
+ }
+
+ private:
+ std::string path_;
+ StringOutputStream contents_;
+ FileOutputRedirectorInterface *redirector_;
+};
+
+/********************************************************************
+ * ModificationCheckingFileOutputRedirector
+ */
+
+class ModificationCheckingFileOutputRedirector : public FileOutputRedirectorInterface
+{
+ public:
+ explicit ModificationCheckingFileOutputRedirector(
+ FileOutputRedirectorInterface *redirector)
+ : redirector_(redirector)
+ {
+ }
+
+ virtual TextOutputStream &standardOutput()
+ {
+ return redirector_->standardOutput();
+ }
+ virtual TextOutputStreamPointer openTextOutputFile(const char *filename)
+ {
+ return TextOutputStreamPointer(
+ new ModificationCheckingFileOutputStream(filename, redirector_));
+ }
+
+ private:
+ FileOutputRedirectorInterface *redirector_;
+};
+
+} // namespace
+
/********************************************************************
* CommandLineHelpModule
*/
CommandLineParser(&options).parse(&argc, argv);
if (!exportFormat.empty())
{
- boost::scoped_ptr<HelpExportInterface> exporter;
+ ModificationCheckingFileOutputRedirector redirector(impl_->outputRedirector_);
+ boost::scoped_ptr<HelpExportInterface> exporter;
if (exportFormat == "rst")
{
- exporter.reset(new HelpExportReStructuredText(*impl_));
+ exporter.reset(new HelpExportReStructuredText(*impl_, &redirector));
}
else if (exportFormat == "completion")
{
return 0;
}
- File &outputFile = impl_->outputRedirector_->standardOutput();
+ TextOutputStream &outputFile = impl_->outputRedirector_->standardOutput();
HelpLinks links(eHelpOutputFormat_Console);
initProgramLinks(&links, *impl_);
CommandLineHelpContext context(&outputFile, eHelpOutputFormat_Console, &links,
#include "gromacs/options/timeunitmanager.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "shellcompletions.h"
{
currentLength_ = std::strlen(name) + 1;
indent_ = std::min(currentLength_, 13);
- File &file = context_.outputFile();
+ TextWriter &file = context_.outputFile();
switch (context_.outputFormat())
{
case eHelpOutputFormat_Console:
void SynopsisFormatter::finish()
{
- File &file = context_.outputFile();
+ TextWriter &file = context_.outputFile();
file.writeLine();
file.writeLine();
}
}
fullOptionText.append(bFormatted_ ? "`]" : "]");
- File &file = context_.outputFile();
+ TextWriter &file = context_.outputFile();
currentLength_ += totalLength;
if (currentLength_ >= lineLength_)
{
#include <boost/scoped_ptr.hpp>
-#include "thread_mpi/mutex.h"
-
#include "buildinfo.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/mutex.h"
#include "gromacs/utility/path.h"
#include "gromacs/utility/stringutil.h"
mutable std::string fullBinaryPath_;
mutable std::string installationPrefix_;
mutable bool bSourceLayout_;
- mutable tMPI::mutex binaryPathMutex_;
+ mutable Mutex binaryPathMutex_;
};
CommandLineProgramContext::Impl::Impl()
const char *CommandLineProgramContext::fullBinaryPath() const
{
- tMPI::lock_guard<tMPI::mutex> lock(impl_->binaryPathMutex_);
+ lock_guard<Mutex> lock(impl_->binaryPathMutex_);
impl_->findBinaryPath();
return impl_->fullBinaryPath_.c_str();
}
InstallationPrefixInfo CommandLineProgramContext::installationPrefix() const
{
- tMPI::lock_guard<tMPI::mutex> lock(impl_->binaryPathMutex_);
+ lock_guard<Mutex> lock(impl_->binaryPathMutex_);
if (impl_->installationPrefix_.empty())
{
impl_->findBinaryPath();
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/options/optionsvisitor.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
namespace gmx
{
class OptionCompletionWriter : public OptionsVisitor
{
public:
- explicit OptionCompletionWriter(File *out) : out_(*out) {}
+ explicit OptionCompletionWriter(TextWriter *out) : out_(*out) {}
virtual void visitSubSection(const Options §ion)
{
void writeOptionCompletion(const OptionInfo &option,
const std::string &completion);
- File &out_;
+ TextWriter &out_;
};
void OptionCompletionWriter::visitOption(const OptionInfo &option)
return formatString("_%s_%s_compl", binaryName_.c_str(), moduleName);
}
- std::string binaryName_;
- boost::scoped_ptr<File> file_;
+ std::string binaryName_;
+ boost::scoped_ptr<TextWriter> file_;
};
ShellCompletionWriter::ShellCompletionWriter(const std::string &binaryName,
{
}
-File *ShellCompletionWriter::outputFile()
+TextOutputStream &ShellCompletionWriter::outputStream()
{
- return impl_->file_.get();
+ return impl_->file_->stream();
}
void ShellCompletionWriter::startCompletions()
{
- impl_->file_.reset(new File(impl_->binaryName_ + "-completion.bash", "w"));
+ impl_->file_.reset(new TextWriter(impl_->binaryName_ + "-completion.bash"));
impl_->file_->writeLine("shopt -s extglob");
}
const char *moduleName,
const Options &options)
{
- File &out = *impl_->file_;
+ TextWriter &out = *impl_->file_;
out.writeLine(formatString("%s() {", impl_->completionFunctionName(moduleName).c_str()));
out.writeLine("local IFS=$'\\n'");
out.writeLine("local c=${COMP_WORDS[COMP_CWORD]}");
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
class CommandLineHelpContext;
-class File;
class Options;
+class TextOutputStream;
//! \cond internal
//! \addtogroup module_commandline
ShellCompletionFormat format);
~ShellCompletionWriter();
- File *outputFile();
+ TextOutputStream &outputStream();
void startCompletions();
void writeModuleCompletions(const char *moduleName,
#include "gromacs/commandline/cmdlinemodulemanager.h"
#include "gromacs/options/basicoptions.h"
#include "gromacs/options/options.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/textwriter.h"
#include "gromacs/onlinehelp/tests/mock_helptopic.h"
#include "testutils/cmdlinetest.h"
};
CommandLine args(cmdline);
initManager(args, "test");
- redirectManagerOutput();
addModule("module", "First module");
addModule("other", "Second module");
addHelpTopic("topic", "Test topic");
int rc = 0;
ASSERT_NO_THROW_GMX(rc = manager().run(args.argc(), args.argv()));
ASSERT_EQ(0, rc);
- checkRedirectedOutputFiles();
+ checkRedirectedOutput();
}
TEST_F(CommandLineHelpModuleTest, PrintsHelpOnTopic)
};
CommandLine args(cmdline);
initManager(args, "test");
- redirectManagerOutput();
addModule("module", "First module");
MockHelpTopic &topic = addHelpTopic("topic", "Test topic");
topic.addSubTopic("sub1", "Subtopic 1", "");
int rc = 0;
ASSERT_NO_THROW_GMX(rc = manager().run(args.argc(), args.argv()));
ASSERT_EQ(0, rc);
- checkRedirectedOutputFiles();
+ checkRedirectedOutput();
}
/*! \brief
"test", "help", "-export", "rst"
};
// TODO: Find a more elegant solution, or get rid of the links.dat altogether.
- gmx::File::writeFileFromString("links.dat", "");
- CommandLine args(cmdline);
+ gmx::TextWriter::writeFileFromString("links.dat", "");
+ CommandLine args(cmdline);
initManager(args, "test");
- redirectManagerOutput();
MockOptionsModule &mod1 = addOptionsModule("module", "First module");
MockOptionsModule &mod2 = addOptionsModule("other", "Second module");
{
int rc = 0;
ASSERT_NO_THROW_GMX(rc = manager().run(args.argc(), args.argv()));
ASSERT_EQ(0, rc);
- checkRedirectedOutputFiles();
+ checkRedirectedOutput();
std::remove("links.dat");
}
#include "gromacs/options/basicoptions.h"
#include "gromacs/options/filenameoption.h"
#include "gromacs/options/options.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/stringstream.h"
#include "testutils/stringtest.h"
-#include "testutils/testfilemanager.h"
namespace
{
void checkHelp(gmx::CommandLineHelpWriter *writer);
- gmx::test::TestFileManager tempFiles_;
bool bHidden_;
};
void CommandLineHelpWriterTest::checkHelp(gmx::CommandLineHelpWriter *writer)
{
- std::string filename = tempFiles_.getTemporaryFilePath("helptext.txt");
- gmx::File file(filename, "w");
- gmx::CommandLineHelpContext context(&file, gmx::eHelpOutputFormat_Console,
+ gmx::StringOutputStream stream;
+ gmx::CommandLineHelpContext context(&stream, gmx::eHelpOutputFormat_Console,
NULL, "test");
context.setShowHidden(bHidden_);
writer->writeHelp(context);
- file.close();
+ stream.close();
- checkFileContents(filename, "HelpText");
+ checkText(stream.toString(), "HelpText");
}
#include "gromacs/onlinehelp/tests/mock_helptopic.h"
#include "testutils/cmdlinetest.h"
-#include "testutils/testfilemanager.h"
+#include "testutils/testfileredirector.h"
namespace gmx
{
class CommandLineModuleManagerTestBase::Impl
{
public:
+ TestFileOutputRedirector redirector_;
boost::scoped_ptr<CommandLineProgramContext> programContext_;
boost::scoped_ptr<CommandLineModuleManager> manager_;
- TestFileManager fileManager_;
};
CommandLineModuleManagerTestBase::CommandLineModuleManagerTestBase()
impl_->manager_.reset(new gmx::CommandLineModuleManager(
realBinaryName, impl_->programContext_.get()));
impl_->manager_->setQuiet(true);
+ impl_->manager_->setOutputRedirector(&impl_->redirector_);
}
MockModule &
return *impl_->manager_;
}
-void CommandLineModuleManagerTestBase::redirectManagerOutput()
+void CommandLineModuleManagerTestBase::checkRedirectedOutput()
{
- impl_->manager_->setOutputRedirector(&initOutputRedirector(&impl_->fileManager_));
+ impl_->redirector_.checkRedirectedFiles(&checker());
}
} // namespace test
class CommandLine;
class MockHelpTopic;
+class TestFileOutputRedirector;
/*! \internal \brief
* Mock implementation of gmx::CommandLineModuleInterface.
CommandLineModuleManager &manager();
/*! \brief
- * Redirects all manager output to files.
+ * Checks all output from the manager using reference data.
*
- * Can be used to silence tests that would otherwise print out
- * something, and/or checkRedirectedFileContents() from the base class
- * can be used to check the output.
+ * Both output to `stdout` and to files is checked.
*
* The manager is put into quiet mode by default, so the manager will
* only print out information if, e.g., help is explicitly requested.
*/
- void redirectManagerOutput();
+ void checkRedirectedOutput();
private:
class Impl;
#include <gtest/gtest.h>
#include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/path.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "testutils/cmdlinetest.h"
#include "testutils/testasserts.h"
FileArgumentType type)
{
std::string filename(tempFiles_.getTemporaryFilePath(extension));
- gmx::File::writeFileFromString(filename, "Dummy file");
+ gmx::TextWriter::writeFileFromString(filename, "Dummy file");
if (name != NULL)
{
args_.append(name);
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
<ReferenceData>
- <String Name="fragments/byname.rst"><![CDATA[
-* :doc:`test </onlinehelp/test>` - molecular dynamics simulation suite
-* :doc:`test help </onlinehelp/test-help>` - Print help information
-* :doc:`test module </onlinehelp/test-module>` - First module
-* :doc:`test other </onlinehelp/test-other>` - Second module
-]]></String>
- <String Name="conf-man.py"><![CDATA[
-man_pages = [
- ('onlinehelp/test-help', 'test-help', "Print help information", '', 1),
- ('onlinehelp/test-module', 'test-module', "First module", '', 1),
- ('onlinehelp/test-other', 'test-other', "Second module", '', 1),
- ('onlinehelp/test', 'test', 'molecular dynamics simulation suite', '', 1)
-]
-]]></String>
<String Name="onlinehelp/test-help.rst"><![CDATA[
.. _test help:
:manpage:`test(1)`
More information about |Gromacs| is available at <http://www.gromacs.org/>.
+]]></String>
+ <String Name="fragments/byname.rst"><![CDATA[
+* :doc:`test </onlinehelp/test>` - molecular dynamics simulation suite
+* :doc:`test help </onlinehelp/test-help>` - Print help information
+* :doc:`test module </onlinehelp/test-module>` - First module
+* :doc:`test other </onlinehelp/test-other>` - Second module
+]]></String>
+ <String Name="conf-man.py"><![CDATA[
+man_pages = [
+ ('onlinehelp/test-help', 'test-help', "Print help information", '', 1),
+ ('onlinehelp/test-module', 'test-module', "First module", '', 1),
+ ('onlinehelp/test-other', 'test-other', "Second module", '', 1),
+ ('onlinehelp/test', 'test', 'molecular dynamics simulation suite', '', 1)
+]
]]></String>
<String Name="fragments/bytopic.rst"><![CDATA[
Group 1
};
enum {
- edlbAUTO, edlbNO, edlbYES, edlbNR
+ edlbsOffForever, /* DLB is off and will never be turned on */
+ edlbsOffCanTurnOn, /* DLB is off and will turn on with imbalance */
+ edlbsOffTemporarilyLocked, /* DLB is off and temporarily can not turn on */
+ edlbsOn, /* DLB is on and will stay on forever */
+ edlbsNR
};
-const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
+/* Allowed DLB state transitions:
+ * edlbsOffCanTurnOn -> edlbsOn
+ * edlbsOffCanTurnOn -> edlbsOffForever
+ * edlbsOffCanTurnOn -> edlbsOffTemporarilyLocked
+ * edlbsOffTemporarilyLocked -> edlbsOffCanTurnOn
+ */
+
+const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on" };
typedef struct
{
t_blocka *cglink;
char *bLocalCG;
- /* The DLB option */
- int eDLB;
- /* Is eDLB=edlbAUTO locked such that we currently can't turn it on? */
- gmx_bool bDLB_locked;
- /* With eDLB=edlbAUTO, should we check if to DLB on at the next DD? */
+ /* The DLB state, possible values are defined above */
+ int dlbState;
+ /* With dlbState=edlbsOffCanTurnOn, should we check if to DLB on at the next DD? */
gmx_bool bCheckWhetherToTurnDlbOn;
- /* Are we actually using DLB? */
- gmx_bool bDynLoadBal;
/* Cell sizes for static load balancing, first index cartesian */
real **slb_frac;
return &dd->comm->cgs_gl;
}
+static bool dlbIsOn(const gmx_domdec_comm_t *comm)
+{
+ return (comm->dlbState == edlbsOn);
+}
+
static void vec_rvec_init(vec_rvec_t *v)
{
v->nalloc = 0;
dim = dd->dim[d];
shift0[dim] = zones->izone[izone].shift0[dim];
shift1[dim] = zones->izone[izone].shift1[dim];
- if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
+ if (dd->comm->tric_dir[dim] || (dlbIsOn(dd->comm) && d > 0))
{
/* A conservative approach, this can be optimized */
shift0[dim] -= 1;
}
}
- if (!comm->bDynLoadBal)
+ if (!dlbIsOn(comm))
{
copy_rvec(cellsize_min, comm->cellsize_min);
}
copy_rvec(comm->cell_x0, comm->old_cell_x0);
copy_rvec(comm->cell_x1, comm->old_cell_x1);
- if (comm->bDynLoadBal)
+ if (dlbIsOn(comm))
{
if (DDMASTER(dd))
{
/* Without PBC we don't have restrictions on the outer cells */
if (!(dim >= ddbox->npbcdim &&
(dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
- comm->bDynLoadBal &&
+ dlbIsOn(comm) &&
(comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
comm->cellsize_min[dim])
{
}
}
- if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
+ if ((dlbIsOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
{
/* Communicate the boundaries and update cell_ns_x0/1 */
dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
- if (dd->bGridJump && dd->ndim > 1)
+ if (dlbIsOn(dd->comm) && dd->ndim > 1)
{
check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
}
/* Check which direction this cg should go */
for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
{
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
/* The cell boundaries for dimension d2 are not equal
* for each cell row of the lower dimension(s),
(dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
{
load = &comm->load[d];
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
}
{
sbuf[pos++] = dd_force_load(comm);
sbuf[pos++] = sbuf[0];
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
sbuf[pos++] = sbuf[0];
sbuf[pos++] = cell_frac;
{
sbuf[pos++] = comm->load[d+1].sum;
sbuf[pos++] = comm->load[d+1].max;
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
sbuf[pos++] = comm->load[d+1].sum_m;
sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
if (dd->ci[dim] == dd->master_ci[dim])
{
/* We are the root, process this row */
- if (comm->bDynLoadBal)
+ if (dlbIsOn(comm))
{
root = comm->root[d];
}
load->sum += load->load[pos++];
load->max = std::max(load->max, load->load[pos]);
pos++;
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
if (root->bLimited)
{
pos++;
}
}
- if (comm->bDynLoadBal && root->bLimited)
+ if (dlbIsOn(comm) && root->bLimited)
{
load->sum_m *= dd->nc[dim];
load->flags |= (1<<d);
comm->load_step += comm->cycl[ddCyclStep];
comm->load_sum += comm->load[0].sum;
comm->load_max += comm->load[0].max;
- if (comm->bDynLoadBal)
+ if (dlbIsOn(comm))
{
for (d = 0; d < dd->ndim; d++)
{
fprintf(stderr, "%s", buf);
}
bLim = FALSE;
- if (comm->bDynLoadBal)
+ if (dlbIsOn(comm))
{
sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
for (d = 0; d < dd->ndim; d++)
sprintf(buf,
"NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
" in the domain decomposition.\n", lossf*100);
- if (!comm->bDynLoadBal)
+ if (!dlbIsOn(comm))
{
sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
}
float dd_pme_f_ratio(gmx_domdec_t *dd)
{
+ /* Should only be called on the DD master rank */
+ assert(DDMASTER(dd));
+
if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
{
return dd->comm->load[0].pme/dd->comm->load[0].mdf;
fprintf(fplog, "\n");
}
fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
- if (dd->comm->bDynLoadBal)
+ if (dlbIsOn(dd->comm))
{
fprintf(fplog, " vol min/aver %5.3f%c",
dd_vol_min(dd), flags ? '!' : ' ');
static void dd_print_load_verbose(gmx_domdec_t *dd)
{
- if (dd->comm->bDynLoadBal)
+ if (dlbIsOn(dd->comm))
{
fprintf(stderr, "vol %4.2f%c ",
dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
if (bPartOfGroup)
{
dd->comm->mpi_comm_load[dim_ind] = c_row;
- if (dd->comm->eDLB != edlbNO)
+ if (dd->comm->dlbState != edlbsOffForever)
{
if (dd->ci[dim] == dd->master_ci[dim])
{
}
}
- if (dd->comm->eDLB != edlbNO)
+ if (dd->comm->dlbState != edlbsOffForever)
{
snew(dd->comm->root, dd->ndim);
}
const char *dlb_opt, gmx_bool bRecordLoad,
unsigned long Flags, t_inputrec *ir)
{
- int eDLB = -1;
+ int dlbState = -1;
char buf[STRLEN];
switch (dlb_opt[0])
{
- case 'a': eDLB = edlbAUTO; break;
- case 'n': eDLB = edlbNO; break;
- case 'y': eDLB = edlbYES; break;
+ case 'a': dlbState = edlbsOffCanTurnOn; break;
+ case 'n': dlbState = edlbsOffForever; break;
+ case 'y': dlbState = edlbsOn; break;
default: gmx_incons("Unknown dlb_opt");
}
if (Flags & MD_RERUN)
{
- return edlbNO;
+ return edlbsOffForever;
}
if (!EI_DYNAMICS(ir->eI))
{
- if (eDLB == edlbYES)
+ if (dlbState == edlbsOn)
{
sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
dd_warning(cr, fplog, buf);
}
- return edlbNO;
+ return edlbsOffForever;
}
if (!bRecordLoad)
{
dd_warning(cr, fplog, "NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
- return edlbNO;
+ return edlbsOffForever;
}
if (Flags & MD_REPRODUCIBLE)
{
- switch (eDLB)
+ switch (dlbState)
{
- case edlbNO:
+ case edlbsOffForever:
break;
- case edlbAUTO:
+ case edlbsOffCanTurnOn:
dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
- eDLB = edlbNO;
+ dlbState = edlbsOffForever;
break;
- case edlbYES:
+ case edlbsOn:
dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
break;
default:
- gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", eDLB);
+ gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
break;
}
}
- return eDLB;
+ return dlbState;
}
static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
/* Initialize to GPU share count to 0, might change later */
comm->nrank_gpu_shared = 0;
- comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
- comm->bDLB_locked = FALSE;
+ comm->dlbState = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
comm->bCheckWhetherToTurnDlbOn = TRUE;
- comm->bDynLoadBal = (comm->eDLB == edlbYES);
if (fplog)
{
- fprintf(fplog, "Dynamic load balancing: %s\n", edlb_names[comm->eDLB]);
+ fprintf(fplog, "Dynamic load balancing: %s\n",
+ edlbs_names[comm->dlbState]);
}
- dd->bGridJump = comm->bDynLoadBal;
comm->bPMELoadBalDLBLimits = FALSE;
if (comm->nstSortCG)
/* We need to choose the optimal DD grid and possibly PME nodes */
limit = dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
- comm->eDLB != edlbNO, dlb_scale,
+ comm->dlbState != edlbsOffForever, dlb_scale,
comm->cellsize_limit, comm->cutoff,
comm->bInterCGBondeds);
bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
!bC ? "-rdd" : "-rcon",
- comm->eDLB != edlbNO ? " or -dds" : "",
+ comm->dlbState != edlbsOffForever ? " or -dds" : "",
bC ? " or your LINCS settings" : "");
gmx_fatal_collective(FARGS, cr, NULL,
*npme_y = comm->npmenodes_y;
snew(comm->slb_frac, DIM);
- if (comm->eDLB == edlbNO)
+ if (comm->dlbState == edlbsOffForever)
{
comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], sizex);
comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], sizey);
if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
{
- if (comm->bBondComm || comm->eDLB != edlbNO)
+ if (comm->bBondComm || comm->dlbState != edlbsOffForever)
{
/* Set the bonded communication distance to halfway
* the minimum and the maximum,
*/
acs = average_cellsize_min(dd, ddbox);
comm->cutoff_mbody = 0.5*(r_bonded + acs);
- if (comm->eDLB != edlbNO)
+ if (comm->dlbState != edlbsOffForever)
{
/* Check if this does not limit the scaling */
comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
/* Change DLB from "auto" to "no". */
- comm->eDLB = edlbNO;
+ comm->dlbState = edlbsOffForever;
return;
}
dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
- comm->bDynLoadBal = TRUE;
- dd->bGridJump = TRUE;
+ comm->dlbState = edlbsOn;
set_dlb_limits(dd);
std::max(comm->cutoff, comm->cutoff_mbody));
fprintf(fplog, "%40s %-7s %6.3f nm\n",
"multi-body bonded interactions", "(-rdd)",
- (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
+ (comm->bBondComm || dlbIsOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
}
if (dd->vsite_comm)
{
{
comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
}
- if (comm->bDynLoadBal)
+ if (dlbIsOn(comm))
{
set_dlb_limits(dd);
}
{
fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
}
- if (comm->eDLB != edlbNO)
+ if (comm->dlbState != edlbsOffForever)
{
set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
}
- print_dd_settings(fplog, dd, ir, comm->bDynLoadBal, dlb_scale, ddbox);
- if (comm->eDLB == edlbAUTO)
+ print_dd_settings(fplog, dd, ir, dlbIsOn(comm), dlb_scale, ddbox);
+ if (comm->dlbState == edlbsOffCanTurnOn)
{
if (fplog)
{
np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
- if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
+ if (dd->comm->dlbState != edlbsOffForever && dim < ddbox.npbcdim &&
dd->comm->cd[d].np_dlb > 0)
{
if (np > dd->comm->cd[d].np_dlb)
}
}
- if (dd->comm->eDLB != edlbNO)
+ if (dd->comm->dlbState != edlbsOffForever)
{
/* If DLB is not active yet, we don't need to check the grid jumps.
* Actually we shouldn't, because then the grid jump data is not set.
*/
- if (dd->comm->bDynLoadBal &&
+ if (dlbIsOn(dd->comm) &&
check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
{
LocallyLimited = 1;
*/
static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
{
- if (dd->comm->eDLB == edlbAUTO && !dd_dlb_is_locked(dd))
+ if (dd->comm->dlbState == edlbsOffCanTurnOn)
{
dd->comm->bCheckWhetherToTurnDlbOn = bValue;
}
{
const int nddp_chk_dlb = 100;
- if (dd->comm->eDLB != edlbAUTO)
+ if (dd->comm->dlbState != edlbsOffCanTurnOn)
{
return FALSE;
}
gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
{
- return dd->comm->bDynLoadBal;
+ return (dd->comm->dlbState == edlbsOn);
}
gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
{
- return dd->comm->bDLB_locked;
+ return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
}
void dd_dlb_lock(gmx_domdec_t *dd)
{
/* We can only lock the DLB when it is set to auto, otherwise don't do anything */
- if (dd->comm->eDLB == edlbAUTO)
+ if (dd->comm->dlbState == edlbsOffCanTurnOn)
{
- dd->comm->bDLB_locked = TRUE;
+ dd->comm->dlbState = edlbsOffTemporarilyLocked;
}
}
void dd_dlb_unlock(gmx_domdec_t *dd)
{
/* We can only lock the DLB when it is set to auto, otherwise don't do anything */
- if (dd->comm->eDLB == edlbAUTO)
+ if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
{
- dd->comm->bDLB_locked = FALSE;
- dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, !dd->comm->bDynLoadBal);
+ dd->comm->dlbState = edlbsOffCanTurnOn;
+ dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
}
}
c->c[1][0] = comm->cell_x0[dim1];
/* All rows can see this row */
c->c[1][1] = comm->cell_x0[dim1];
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
if (bDistMB)
{
c->c[2][j] = comm->cell_x0[dim2];
}
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
/* Use the maximum of the i-cells that see a j-cell */
for (i = 0; i < zones->nizone; i++)
*/
c->cr1[0] = comm->cell_x1[dim1];
c->cr1[3] = comm->cell_x1[dim1];
- if (dd->bGridJump)
+ if (dlbIsOn(dd->comm))
{
c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
if (bDistMB)
bBondComm = comm->bBondComm;
/* Do we need to determine extra distances for multi-body bondeds? */
- bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
+ bDistMB = (comm->bInterCGMultiBody && dlbIsOn(dd->comm) && dd->ndim > 1);
/* Do we need to determine extra distances for only two-body bondeds? */
bDist2B = (bBondComm && !bDistMB);
zones = &comm->zones;
/* Do we need to determine extra distances for multi-body bondeds? */
- bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
+ bDistMB = (comm->bInterCGMultiBody && dlbIsOn(dd->comm) && dd->ndim > 1);
for (z = zone_start; z < zone_end; z++)
{
/* With a staggered grid we have different sizes
* for non-shifted dimensions.
*/
- if (dd->bGridJump && zones->shift[z][dim] == 0)
+ if (dlbIsOn(dd->comm) && zones->shift[z][dim] == 0)
{
if (d == 1)
{
if (zones->shift[z][dim] > 0)
{
dim = dd->dim[d];
- if (!dd->bGridJump || d == 0)
+ if (!dlbIsOn(dd->comm) || d == 0)
{
zones->size[z].x0[dim] = comm->cell_x1[dim];
zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
bNStGlobalComm = (step % nstglobalcomm == 0);
- if (!comm->bDynLoadBal)
+ if (!dlbIsOn(comm))
{
bDoDLB = FALSE;
}
set_ddbox(dd, bMasterState, cr, ir, state_local->box,
TRUE, &top_local->cgs, state_local->x, &ddbox);
- bRedist = comm->bDynLoadBal;
+ bRedist = dlbIsOn(comm);
}
else
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2006,2007,2008,2009,2010,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2006,2007,2008,2009,2010,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
dd->ci[XX], dd->ci[YY], dd->ci[ZZ],
nrecv_local, ireq->n, specat_type,
specat_type, add_err,
- dd->bGridJump ? " or use the -rcon option of mdrun" : "");
+ dd_dlb_is_on(dd) ? " or use the -rcon option of mdrun" : "");
}
spac->at_start = at_start;
{
pme_lb->bBalance = dd_dlb_is_on(cr->dd);
}
- else
+ /* We should ignore the first timing to avoid timing allocation
+ * overhead. And since the PME load balancing is called just
+ * before DD repartitioning, the ratio returned by dd_pme_f_ratio
+ * is not over the last nstlist steps, but the nstlist steps before
+ * that. So the first useful ratio is available at step_rel=3*nstlist.
+ */
+ else if (step_rel >= 3*ir->nstlist)
{
if (DDMASTER(cr->dd))
{
- /* PME node load is too high, start tuning */
+ /* If PME rank load is too high, start tuning */
pme_lb->bBalance =
(dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor);
}
}
if (!pme_lb->bBalance &&
- (!pme_lb->bSepPMERanks || (step_rel <= pme_lb->step_rel_stop)))
+ (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop))
{
/* We have just deactivated the balancing and we're not measuring PP/PME
* imbalance during the first steps of the run: deactivate the tuning.
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 1991-2003 Erik Lindahl, David van der Spoel, University of Groningen.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* files like fft_fftw3.c or fft_mkl.c for that.
*/
-#ifndef GMX_FFT_FFTW3
+#if !GMX_FFT_FFTW3
struct gmx_many_fft {
int howmany;
FILE* debug = 0;
#endif
-#ifdef GMX_FFT_FFTW3
-#include "thread_mpi/mutex.h"
+#if GMX_FFT_FFTW3
#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/mutex.h"
/* none of the fftw3 calls, except execute(), are thread-safe, so
we need to serialize them with this mutex. */
-static tMPI::mutex big_fftw_mutex;
+static gmx::Mutex big_fftw_mutex;
#define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
#define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
#endif /* GMX_FFT_FFTW3 */
fprintf(debug, "Running on %d threads\n", nthreads);
}
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
/* Don't add more stuff here! We have already had at least one bug because we are reimplementing
* the low-level FFT interface instead of using the Gromacs FFT module. If we need more
* generic functionality it is far better to extend the interface so we can use it for
}
}
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
}
#endif
if ((flags&FFT5D_ORDER_YZ)) /*plan->cart is in the order of transposes */
int s = 0, tstart, tend, bParallelDim;
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
if (plan->p3d)
{
if (thread == 0)
plan->oNout[s] = 0;
}
}
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
FFTW_LOCK;
#ifdef FFT5D_MPI_TRANSPOS
for (s = 0; s < 2; s++)
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2009,2010,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#endif
/*currently only special optimization for FFTE*/
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
#include <fftw3.h>
#endif
t_complex *lin;
t_complex *lout, *lout2, *lout3;
gmx_fft_t* p1d[3]; /*1D plans*/
-#ifdef GMX_FFT_FFTW3
+#if GMX_FFT_FFTW3
FFTW(plan) p2d; /*2D plan: used for 1D decomposition if FFT supports transposed output*/
FFTW(plan) p3d; /*3D plan: used for 0D decomposition if FFT supports transposed output*/
FFTW(plan) mpip[2];
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <fftw3.h>
-#include "thread_mpi/mutex.h"
-
#include "gromacs/fft/fft.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/mutex.h"
#ifdef GMX_DOUBLE
#define FFTWPREFIX(name) fftw_ ## name
/* none of the fftw3 calls, except execute(), are thread-safe, so
we need to serialize them with this mutex. */
-static tMPI::mutex big_fftw_mutex;
+static gmx::Mutex big_fftw_mutex;
#define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
#define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
int i, j, k;
int fftw_flags;
-#ifdef GMX_DISABLE_FFTW_MEASURE
+#if GMX_DISABLE_FFTW_MEASURE
flags |= GMX_FFT_FLAG_CONSERVATIVE;
#endif
int i, j, k;
int fftw_flags;
-#ifdef GMX_DISABLE_FFTW_MEASURE
+#if GMX_DISABLE_FFTW_MEASURE
flags |= GMX_FFT_FLAG_CONSERVATIVE;
#endif
int i, j, k;
int fftw_flags;
-#ifdef GMX_DISABLE_FFTW_MEASURE
+#if GMX_DISABLE_FFTW_MEASURE
flags |= GMX_FFT_FLAG_CONSERVATIVE;
#endif
#include "config.h"
-#ifdef GMX_INTERNAL_XDR
+#if GMX_INTERNAL_XDR
#include "gmx_system_xdr.h"
struct t_fileio
{
FILE *fp; /* the file pointer */
- gmx_bool bOpen, /* the file is open */
- bRead, /* the file is open for reading */
+ gmx_bool bRead, /* the file is open for reading */
bDouble, /* write doubles instead of floats */
- bDebug, /* the file ops should come with debug info */
bReadWrite; /* the file is open for reading and writing */
char *fn; /* the file name */
XDR *xdr; /* the xdr data pointer */
"IVEC", "STRING"
};
+void gmx_fio_setprecision(t_fileio *fio, gmx_bool bDouble)
+{
+ gmx_fio_lock(fio);
+ fio->bDouble = bDouble;
+ gmx_fio_unlock(fio);
+}
+
XDR *gmx_fio_getxdr(t_fileio *fio)
{
XDR *ret = NULL;
default:
gmx_fio_fe(fio, eio, desc, srcfile, line);
}
- if ((res == 0) && (fio->bDebug))
- {
- fprintf(stderr, "Error in xdr I/O %s %s to file %s (source %s, line %d)\n",
- eioNames[eio], desc, fio->fn, srcfile, line);
- }
return (res != 0);
}
struct t_fileio;
+void gmx_fio_setprecision(struct t_fileio *fio, gmx_bool bDouble);
+/* Select the floating point precision for reading and writing files */
+
XDR *gmx_fio_getxdr(struct t_fileio *fio);
/* Return the file pointer itself */
{
rc = fflush(fio->fp);
}
- else if (fio->xdr)
- {
- rc = fflush((FILE *) fio->xdr->x_private);
- }
return rc;
}
/* Check if it should be opened as a binary file */
if (!ftp_is_text(fn2ftp(fn)))
{
- /* Not ascii, add b to file mode */
- if ((strchr(newmode, 'b') == NULL) && (strchr(newmode, 'B') == NULL))
- {
- strcat(newmode, "b");
- }
+ strcat(newmode, "b");
}
snew(fio, 1);
fio->iFTP = fn2ftp(fn);
fio->fn = gmx_strdup(fn);
+ fio->fp = gmx_ffopen(fn, newmode);
/* If this file type is in the list of XDR files, open it like that */
if (ftp_is_xdr(fio->iFTP))
{
- /* First check whether we have to make a backup,
- * only for writing, not for read or append.
- */
- if (newmode[0] == 'w')
- {
-#ifndef GMX_FAHCORE
- /* only make backups for normal gromacs */
- make_backup(fn);
-#endif
- }
- else
- {
- /* Check whether file exists */
- if (!gmx_fexist(fn))
- {
- gmx_open(fn);
- }
- }
- /* Open the file */
- fio->fp = gmx_ffopen(fn, newmode);
-
/* determine the XDR direction */
if (newmode[0] == 'w' || newmode[0] == 'a')
{
{
fio->xdrmode = XDR_DECODE;
}
-
snew(fio->xdr, 1);
xdrstdio_create(fio->xdr, fio->fp, fio->xdrmode);
}
- else
- {
- /* If it is not, open it as a regular file */
- fio->fp = gmx_ffopen(fn, newmode);
- }
/* for appending seek to end of file to make sure ftell gives correct position
* important for checkpointing */
fio->bRead = bRead;
fio->bReadWrite = bReadWrite;
fio->bDouble = (sizeof(real) == sizeof(double));
- fio->bDebug = FALSE;
- fio->bOpen = TRUE;
/* and now insert this file into the list of open files. */
gmx_fio_insert(fio);
{
int rc = 0;
- if (!fio->bOpen)
- {
- gmx_fatal(FARGS, "File %s closed twice!\n", fio->fn);
- }
-
- if (ftp_is_xdr(fio->iFTP))
+ if (fio->xdr != NULL)
{
xdr_destroy(fio->xdr);
sfree(fio->xdr);
rc = gmx_ffclose(fio->fp); /* fclose returns 0 if happy */
}
- fio->bOpen = FALSE;
return rc;
}
/* We don't want two processes operating on the list at the same time */
tMPI_Thread_mutex_lock(&open_file_mutex);
- if (fio->iFTP == efTNG)
- {
- gmx_incons("gmx_fio_close should not be called on a TNG file");
- }
gmx_fio_lock(fio);
/* first remove it from the list */
gmx_fio_remove(fio);
{
int rc = 0;
gmx_fio_lock(fio);
- if (!ftp_is_xdr(fio->iFTP))
+ if (fio->xdr == NULL)
{
rc = gmx_ffclose(fio->fp); /* fclose returns 0 if happy */
fio->fp = NULL;
{
/* Skip the checkpoint files themselves, since they could be open when
we call this routine... */
- if (cur->bOpen && !cur->bRead && cur->iFTP != efCPT)
+ if (!cur->bRead && cur->iFTP != efCPT)
{
/* This is an output file currently open for writing, add it */
if (nfiles == nalloc)
}
-void gmx_fio_setprecision(t_fileio *fio, gmx_bool bDouble)
-{
- gmx_fio_lock(fio);
- fio->bDouble = bDouble;
- gmx_fio_unlock(fio);
-}
-
-void gmx_fio_setdebug(t_fileio *fio, gmx_bool bDebug)
-{
- gmx_fio_lock(fio);
- fio->bDebug = bDebug;
- gmx_fio_unlock(fio);
-}
-
char *gmx_fio_getname(t_fileio *fio)
{
char *ret;
{
rc = gmx_fsync(fio->fp);
}
- else if (fio->xdr) /* this should normally not happen */
- {
- rc = gmx_fsync((FILE*) fio->xdr->x_private);
- /* ^ is this actually OK? */
- }
return rc;
}
cur = gmx_fio_get_first();
while (cur)
{
- if (cur->bOpen && !cur->bRead)
+ if (!cur->bRead)
{
/* if any of them fails, return failure code */
int rc = gmx_fio_int_fsync(cur);
* Change properties of the open file
********************************************************/
-void gmx_fio_setprecision(t_fileio *fio, gmx_bool bDouble);
-/* Select the floating point precision for reading and writing files */
-
char *gmx_fio_getname(t_fileio *fio);
/* Return the filename corresponding to the fio index */
was opened as a specific file type and changing that midway is most
likely an evil hack. */
-void gmx_fio_setdebug(t_fileio *fio, gmx_bool bDebug);
-/* Set the debug mode */
-
gmx_bool gmx_fio_getread(t_fileio *fio);
/* Return whether read mode is on in fio */
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2009,2010,2011,2012,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2011,2012,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "config.h"
-#ifdef GMX_INTEGER_BIG_ENDIAN
+#if GMX_INTEGER_BIG_ENDIAN
#define ARCH_IS_BIG_ENDIAN 1
#else
#define ARCH_IS_BIG_ENDIAN 0
int idum = 0;
real rdum = 0;
- gmx_fio_setdebug(fio, bDebugMode());
-
/* XDR binary topology file */
precision = sizeof(real);
if (bRead)
#include "config.h"
-#ifdef GMX_INTERNAL_XDR
+#if GMX_INTERNAL_XDR
#include "gromacs/fileio/gmx_system_xdr.h"
#else
#include <rpc/rpc.h>
#include "gromacs/math/units.h"
#include "gromacs/math/utilities.h"
#include "gromacs/math/vec.h"
+#include "gromacs/topology/index.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/futil.h"
#include "gromacs/utility/smalloc.h"
VACF, MVACF, DOS, DOS_SOLID, DOS_DIFF, DOS_CP, DOS_S, DOS_A, DOS_E, DOS_NR
};
+static int calcMoleculesInIndexGroup(t_block *mols, int natoms, atom_id *index, int nindex)
+{
+ int i = 0;
+ int mol = 0;
+ int nMol = 0;
+ int j;
+
+ while (i < nindex)
+ {
+ while (index[i] > mols->index[mol])
+ {
+ mol++;
+ if (mol >= mols->nr)
+ {
+ gmx_fatal(FARGS, "Atom index out of range: %d", index[i]+1);
+ }
+ }
+ for (j = mols->index[mol]; j < mols->index[mol+1]; j++)
+ {
+ if (index[i] != j)
+ {
+ gmx_fatal(FARGS, "The index group does not consist of whole molecules");
+ }
+ i++;
+ if (i == natoms)
+ {
+ gmx_fatal(FARGS, "Index contains atom numbers larger than the topology");
+ }
+ }
+ nMol++;
+ }
+ return nMol;
+}
+
static double FD(double Delta, double f)
{
return (2*pow(Delta, -4.5)*pow(f, 7.5) -
}
}
-static void dump_fy(output_env_t oenv, real toler)
-{
- FILE *fp;
- double Delta, f, y, DD;
- const char *leg[] = { "f", "fy", "y" };
-
- DD = pow(10.0, 0.125);
- fp = xvgropen("fy.xvg", "Fig. 2, Lin2003a", "Delta", "y or fy", oenv);
- xvgr_legend(fp, asize(leg), leg, oenv);
- if (output_env_get_print_xvgr_codes(oenv))
- {
- fprintf(fp, "@ world 1e-05, 0, 1000, 1\n");
- fprintf(fp, "@ xaxes scale Logarithmic\n");
- }
- for (Delta = 1e-5; (Delta <= 1000); Delta *= DD)
- {
- f = calc_fluidicity(Delta, toler);
- y = calc_y(f, Delta, toler);
- fprintf(fp, "%10g %10g %10g %10g\n", Delta, f, f*y, y);
- }
- xvgrclose(fp);
-}
-
-static void dump_w(output_env_t oenv, real beta)
-{
- FILE *fp;
- double nu;
- const char *leg[] = { "wCv", "wS", "wA", "wE" };
-
- fp = xvgropen("w.xvg", "Fig. 1, Berens1983a", "\\f{12}b\\f{4}h\\f{12}n",
- "w", oenv);
- xvgr_legend(fp, asize(leg), leg, oenv);
- for (nu = 1; (nu < 100); nu += 0.05)
- {
- fprintf(fp, "%10g %10g %10g %10g %10g\n", beta*PLANCK*nu,
- wCsolid(nu, beta), wSsolid(nu, beta),
- wAsolid(nu, beta), wEsolid(nu, beta));
- }
- xvgrclose(fp);
-}
-
int gmx_dos(int argc, char *argv[])
{
const char *desc[] = {
"all vibrations. For flexible systems that would be around a few fs",
"between saving. Properties based on the DoS are printed on the",
"standard output."
+ "Note that the density of states is calculated from the mass-weighted",
+ "autocorrelation, and by default only from the square of the real",
+ "component rather than absolute value. This means the shape can differ",
+ "substantially from the plain vibrational power spectrum you can",
+ "calculate with gmx velacc."
};
const char *bugs[] = {
"This program needs a lot of memory: total usage equals the number of atoms times 3 times number of frames times 4 (or 8 when run in double precision)."
gmx_fft_t fft;
double cP, S, A, E, DiffCoeff, Delta, f, y, z, sigHS, Shs, Sig, DoS0, recip_fac;
double wCdiff, wSdiff, wAdiff, wEdiff;
-
- static gmx_bool bVerbose = TRUE, bAbsolute = FALSE, bNormalize = FALSE;
- static gmx_bool bRecip = FALSE, bDump = FALSE;
+ int grpNatoms;
+ atom_id *index;
+ char *grpname;
+ double invNormalize;
+ gmx_bool normalizeAutocorrelation;
+
+ static gmx_bool bVerbose = TRUE, bAbsolute = FALSE, bNormalizeDos = FALSE;
+ static gmx_bool bRecip = FALSE;
static real Temp = 298.15, toler = 1e-6;
+
t_pargs pa[] = {
{ "-v", FALSE, etBOOL, {&bVerbose},
"Be loud and noisy." },
"Use cm^-1 on X-axis instead of 1/ps for DoS plots." },
{ "-abs", FALSE, etBOOL, {&bAbsolute},
"Use the absolute value of the Fourier transform of the VACF as the Density of States. Default is to use the real component only" },
- { "-normdos", FALSE, etBOOL, {&bNormalize},
- "Normalize the DoS such that it adds up to 3N. This is a hack that should not be necessary." },
+ { "-normdos", FALSE, etBOOL, {&bNormalizeDos},
+ "Normalize the DoS such that it adds up to 3N. This should usually not be necessary." },
{ "-T", FALSE, etREAL, {&Temp},
"Temperature in the simulation" },
{ "-toler", FALSE, etREAL, {&toler},
- "[HIDDEN]Tolerance when computing the fluidicity using bisection algorithm" },
- { "-dump", FALSE, etBOOL, {&bDump},
- "[HIDDEN]Dump the y/fy plot corresponding to Fig. 2 inLin2003a and the and the weighting functions corresponding to Fig. 1 in Berens1983a." }
+ "[HIDDEN]Tolerance when computing the fluidicity using bisection algorithm" }
};
t_filenm fnm[] = {
}
beta = 1/(Temp*BOLTZ);
- if (bDump)
- {
- printf("Dumping reference figures. Thanks for your patience.\n");
- dump_fy(oenv, toler);
- dump_w(oenv, beta);
- exit(0);
- }
fplog = gmx_fio_fopen(ftp2fn(efLOG, NFILE, fnm), "w");
fprintf(fplog, "Doing density of states analysis based on trajectory.\n");
please_cite(fplog, "Pascal2011a");
please_cite(fplog, "Caleman2011b");
- read_tps_conf(ftp2fn(efTPR, NFILE, fnm), title, &top, &ePBC, NULL, NULL, box,
- TRUE);
+ read_tps_conf(ftp2fn(efTPR, NFILE, fnm), title, &top, &ePBC, NULL, NULL, box, TRUE);
+
+ /* Handle index groups */
+ get_index(&top.atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &grpNatoms, &index, &grpname);
+
V = det(box);
tmass = 0;
- for (i = 0; (i < top.atoms.nr); i++)
+ for (i = 0; i < grpNatoms; i++)
{
- tmass += top.atoms.atom[i].m;
+ tmass += top.atoms.atom[index[i]].m;
}
- Natom = top.atoms.nr;
- Nmol = top.mols.nr;
+ Natom = grpNatoms;
+ Nmol = calcMoleculesInIndexGroup(&top.mols, top.atoms.nr, index, grpNatoms);
gnx = Natom*DIM;
/* Correlation stuff */
}
for (i = 0; i < gnx; i += DIM)
{
- c1[i+XX][nframes] = fr.v[i/DIM][XX];
- c1[i+YY][nframes] = fr.v[i/DIM][YY];
- c1[i+ZZ][nframes] = fr.v[i/DIM][ZZ];
+ c1[i+XX][nframes] = fr.v[index[i/DIM]][XX];
+ c1[i+YY][nframes] = fr.v[index[i/DIM]][YY];
+ c1[i+ZZ][nframes] = fr.v[index[i/DIM]][ZZ];
}
t1 = fr.time;
printf("Going to do %d fourier transforms of length %d. Hang on.\n",
gnx, nframes);
}
+ /* Unfortunately the -normalize program option for the autocorrelation
+ * function calculation is added as a hack with a static variable in the
+ * autocorrelation.c source. That would work if we called the normal
+ * do_autocorr(), but this routine overrides that by directly calling
+ * the low-level functionality. That unfortunately leads to ignoring the
+ * default value for the option (which is to normalize).
+ * Since the absolute value seems to be important for the subsequent
+ * analysis below, we detect the value directly from the option, calculate
+ * the autocorrelation without normalization, and then apply the
+ * normalization just to the autocorrelation output
+ * (or not, if the user asked for a non-normalized autocorrelation).
+ */
+ normalizeAutocorrelation = opt2parg_bool("-normalize", npargs, ppa);
+
+ /* Note that we always disable normalization here, regardless of user settings */
low_do_autocorr(NULL, oenv, NULL, nframes, gnx, nframes, c1, dt, eacNormal, 0, FALSE,
FALSE, FALSE, -1, -1, 0);
snew(dos, DOS_NR);
}
for (i = 0; (i < gnx); i += DIM)
{
- mi = top.atoms.atom[i/DIM].m;
+ mi = top.atoms.atom[index[i/DIM]].m;
for (j = 0; (j < nframes/2); j++)
{
c1j = (c1[i+XX][j] + c1[i+YY][j] + c1[i+ZZ][j]);
dos[MVACF][j] += mi*c1j;
}
}
- fp = xvgropen(opt2fn("-vacf", NFILE, fnm), "Velocity ACF",
+
+ fp = xvgropen(opt2fn("-vacf", NFILE, fnm), "Velocity autocorrelation function",
"Time (ps)", "C(t)", oenv);
snew(tt, nframes/2);
+
+ invNormalize = normalizeAutocorrelation ? 1.0/dos[VACF][0] : 1.0;
+
for (j = 0; (j < nframes/2); j++)
{
tt[j] = j*dt;
- fprintf(fp, "%10g %10g\n", tt[j], dos[VACF][j]);
+ fprintf(fp, "%10g %10g\n", tt[j], dos[VACF][j] * invNormalize);
}
xvgrclose(fp);
- fp = xvgropen(opt2fn("-mvacf", NFILE, fnm), "Mass-weighted velocity ACF",
+
+ fp = xvgropen(opt2fn("-mvacf", NFILE, fnm), "Mass-weighted velocity autocorrelation function",
"Time (ps)", "C(t)", oenv);
+
+ invNormalize = normalizeAutocorrelation ? 1.0/dos[VACF][0] : 1.0;
+
for (j = 0; (j < nframes/2); j++)
{
- fprintf(fp, "%10g %10g\n", tt[j], dos[MVACF][j]);
+ fprintf(fp, "%10g %10g\n", tt[j], dos[MVACF][j] * invNormalize);
}
xvgrclose(fp);
}
/* Normalize it */
dostot = evaluate_integral(nframes/4, nu, dos[DOS], NULL, nframes/4, &stddev);
- if (bNormalize)
+ if (bNormalizeDos)
{
for (j = 0; (j < nframes/4); j++)
{
cP = BOLTZ * evaluate_integral(nframes/4, nu, dos[DOS_CP], NULL,
nframes/4, &stddev);
fprintf(fplog, "Heat capacity %g J/mol K\n", 1000*cP/Nmol);
-
- /*
- S = BOLTZ * evaluate_integral(nframes/4,nu,dos[DOS_S],NULL,
- nframes/4,&stddev);
- fprintf(fplog,"Entropy %g J/mol K\n",1000*S/Nmol);
- A = BOLTZ * evaluate_integral(nframes/4,nu,dos[DOS_A],NULL,
- nframes/4,&stddev);
- fprintf(fplog,"Helmholtz energy %g kJ/mol\n",A/Nmol);
- E = BOLTZ * evaluate_integral(nframes/4,nu,dos[DOS_E],NULL,
- nframes/4,&stddev);
- fprintf(fplog,"Internal energy %g kJ/mol\n",E/Nmol);
- */
fprintf(fplog, "\nArrivederci!\n");
gmx_fio_fclose(fplog);
#ifdef GMX_GPU
-const gmx_bool bGPUBinary = TRUE;
+
+static const bool bGPUBinary = TRUE;
+
# ifdef GMX_USE_OPENCL
-const char *gpu_implementation = "OpenCL";
+
+static const char *gpu_implementation = "OpenCL";
/* Our current OpenCL implementation only supports using exactly one
* GPU per PP rank, so sharing is impossible */
-const gmx_bool bGpuSharingSupported = FALSE;
+static const bool bGpuSharingSupported = false;
/* Our current OpenCL implementation is not known to handle
* concurrency correctly (at context creation, JIT compilation, or JIT
* cache-management stages). OpenCL runtimes need not support it
* either; library MPI segfaults when creating OpenCL contexts;
* thread-MPI seems to work but is not yet known to be safe. */
-const gmx_bool bMultiGpuPerNodeSupported = FALSE;
-# else
-const char *gpu_implementation = "CUDA";
-const gmx_bool bGpuSharingSupported = TRUE;
-const gmx_bool bMultiGpuPerNodeSupported = TRUE;
-# endif
-#else
-const gmx_bool bGPUBinary = FALSE;
-const char *gpu_implementation = "non-GPU";
-const gmx_bool bGpuSharingSupported = FALSE;
-const gmx_bool bMultiGpuPerNodeSupported = FALSE;
-#endif
+static const bool bMultiGpuPerNodeSupported = false;
+
+# else /* GMX_USE_OPENCL */
+
+// Our CUDA implementation supports everything
+static const char *gpu_implementation = "CUDA";
+static const bool bGpuSharingSupported = true;
+static const bool bMultiGpuPerNodeSupported = true;
+
+# endif /* GMX_USE_OPENCL */
+
+#else /* GMX_GPU */
+
+// Not compiled with GPU support
+static const bool bGPUBinary = false;
+static const char *gpu_implementation = "non-GPU";
+static const bool bGpuSharingSupported = false;
+static const bool bMultiGpuPerNodeSupported = false;
+
+#endif /* GMX_GPU */
/* Names of the GPU detection/check results (see e_gpu_detect_res_t in hw_info.h). */
const char * const gpu_detect_res_str[egpuNR] =
static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
const gmx_gpu_opt_t *gpu_opt);
+gmx_bool gmx_multiple_gpu_per_node_supported()
+{
+ return bMultiGpuPerNodeSupported;
+}
+
+gmx_bool gmx_gpu_sharing_supported()
+{
+ return bGpuSharingSupported;
+}
+
static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
{
int i, ndev;
}
else
{
- if (ngpu_comp > npppn)
+ /* TODO Should we have a gpu_opt->n_dev_supported field? */
+ if (ngpu_comp > npppn && gmx_multiple_gpu_per_node_supported())
{
md_print_warn(cr, fplog,
"NOTE: potentially sub-optimal launch configuration, %s started with less\n"
*/
if (cr->rank_pp_intranode == 0)
{
+ std::string reasonForLimit;
+ if (ngpu_comp > 1 &&
+ ngpu_use == 1 &&
+ !gmx_multiple_gpu_per_node_supported())
+ {
+ reasonForLimit = "can be used by ";
+ reasonForLimit += gpu_implementation;
+ reasonForLimit += " in GROMACS";
+ }
+ else
+ {
+ reasonForLimit = "was detected";
+ }
gmx_fatal(FARGS,
"Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
- "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
+ "%s was started with %d PP %s%s%s, but only %d GPU%s %s.",
th_or_proc, btMPI ? "s" : "es", pernode,
ShortProgram(), npppn, th_or_proc,
th_or_proc_plural, pernode,
- ngpu_use, gpu_use_plural);
+ ngpu_use, gpu_use_plural, reasonForLimit.c_str());
}
}
}
{
int device_id;
- device_id = bGpuSharingSupported ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
+ device_id = gmx_gpu_sharing_supported() ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
uniq_ids[device_id] = 1;
}
/* Count the devices used. */
parse_digits_from_plain_string(env,
&gpu_opt->n_dev_use,
&gpu_opt->dev_use);
- if (!bMultiGpuPerNodeSupported && 1 < gpu_opt->n_dev_use)
+ if (!gmx_multiple_gpu_per_node_supported() && 1 < gpu_opt->n_dev_use)
{
gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", gpu_implementation);
}
- if (!bGpuSharingSupported && anyGpuIdIsRepeated(gpu_opt))
+ if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(gpu_opt))
{
gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", gpu_implementation);
}
{
if (nrank % gpu_opt->n_dev_compatible == 0)
{
- nshare = bGpuSharingSupported ? nrank/gpu_opt->n_dev_compatible : 1;
+ nshare = gmx_gpu_sharing_supported() ? nrank/gpu_opt->n_dev_compatible : 1;
}
else
{
/* Here we will waste GPUs when nrank < gpu_opt->n_dev_compatible */
gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_compatible*nshare, nrank);
- if (!bMultiGpuPerNodeSupported)
+ if (!gmx_multiple_gpu_per_node_supported())
{
gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
}
for (std::string::size_type i = 0; i < unescaped_ocl_root_path.length(); i++)
{
- if (inputStr[i] == ' ')
+ if (unescaped_ocl_root_path[i] == ' ')
{
ocl_root_path.push_back('\\');
}
#include "gromacs/utility/directoryenumerator.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/futil.h"
#include "gromacs/utility/path.h"
#include "gromacs/utility/smalloc.h"
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2012,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_GMXPREPROCESS_GMXCPP_H
#define GMX_GMXPREPROCESS_GMXCPP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef struct gmx_cpp *gmx_cpp_t;
/* The possible return codes for these functions */
NOT THREAD SAFE
*/
char *cpp_error(gmx_cpp_t *handlep, int status);
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
#include "gromacs/utility/dir_separator.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/futil.h"
#include "gromacs/utility/path.h"
#include "gromacs/utility/programcontext.h"
int ngroup;
pcrd = &pull->coord[i-1];
- sprintf(buf, "pull-coord%d-groups", i);
- STYPE(buf, groups, "");
sprintf(buf, "pull-coord%d-type", i);
EETYPE(buf, pcrd->eType, epull_names);
sprintf(buf, "pull-coord%d-geometry", i);
EETYPE(buf, pcrd->eGeom, epullg_names);
+ sprintf(buf, "pull-coord%d-groups", i);
+ STYPE(buf, groups, "");
nscan = sscanf(groups, "%d %d %d %d %d", &pcrd->group[0], &pcrd->group[1], &pcrd->group[2], &pcrd->group[3], &idum);
ngroup = (pcrd->eGeom == epullgDIRRELATIVE) ? 4 : 2;
} /* fixes auto-indentation problems */
#endif
+/*! \brief Return whether mdrun can use more than one GPU per node
+ *
+ * The OpenCL implementation cannot use more than one GPU per node,
+ * for example. */
+gmx_bool gmx_multiple_gpu_per_node_supported();
+
+/*! \brief Return whether PP ranks can share a GPU
+ *
+ * The OpenCL implementation cannot share a GPU between ranks, for
+ * example. */
+gmx_bool gmx_gpu_sharing_supported();
+
/* the init and consistency functions depend on commrec that may not be
consistent in cuda because MPI types don't exist there. */
#ifndef __CUDACC__
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
ivec nc;
int ndim;
ivec dim; /* indexed by 0 to ndim */
- gmx_bool bGridJump;
/* PBC from dim 0 to npbcdim */
int npbcdim;
/* Abstract type for PME that is defined only in the routine that use them. */
struct gmx_pme_t;
struct nonbonded_verlet_t;
+struct bonded_threading_t;
/* Structure describing the data in a single table */
typedef struct
/* Forward declaration of type for managing Ewald tables */
struct gmx_ewald_tab_t;
-typedef struct f_thread_t f_thread_t;
+typedef struct ewald_corr_thread_t ewald_corr_thread_t;
typedef struct {
interaction_const_t *ic;
real userreal3;
real userreal4;
- /* Thread local force and energy data */
- /* FIXME move to bonded_thread_data_t */
- int nthreads;
- int red_ashift;
- int red_nblock;
- f_thread_t *f_t;
+ /* Pointer to struct for managing threading of bonded force calculation */
+ struct bonded_threading_t *bonded_threading;
- /* Maximum thread count for uniform distribution of bondeds over threads */
- int bonded_max_nthread_uniform;
-
- /* Exclusion load distribution over the threads */
- int *excl_load;
+ /* Ewald correction thread local virial and energy data */
+ int nthread_ewc;
+ ewald_corr_thread_t *ewc_t;
+ /* Ewald charge correction load distribution over the threads */
+ int *excl_load;
} t_forcerec;
/* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
#include "gromacs/listed-forces/bonded.h"
#include "gromacs/listed-forces/position-restraints.h"
#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/forcerec-threading.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/simd/simd.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/smalloc.h"
+#include "listed-internal.h"
#include "pairs.h"
namespace
t_fcdata *fcd, int *global_atom_index,
int force_flags)
{
- gmx_bool bCalcEnerVir;
- int i;
- real dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
- of lambda, which will be thrown away in the end*/
- const t_pbc *pbc_null;
- int thread;
+ struct bonded_threading_t *bt;
+ gmx_bool bCalcEnerVir;
+ int i;
+ /* The dummy array is to have a place to store the dhdl at other values
+ of lambda, which will be thrown away in the end */
+ real dvdl[efptNR];
+ const t_pbc *pbc_null;
+ int thread;
+
+ bt = fr->bonded_threading;
- assert(fr->nthreads == idef->nthreads);
+ assert(bt->nthreads == idef->nthreads);
bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
}
wallcycle_sub_start(wcycle, ewcsLISTED);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
- for (thread = 0; thread < fr->nthreads; thread++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+ for (thread = 0; thread < bt->nthreads; thread++)
{
int ftype;
real *epot, v;
}
else
{
- zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
- fr->red_nblock, 1<<fr->red_ashift);
-
- ft = fr->f_t[thread].f;
- fshift = fr->f_t[thread].fshift;
- epot = fr->f_t[thread].ener;
- grpp = &fr->f_t[thread].grpp;
- dvdlt = fr->f_t[thread].dvdl;
+ zero_thread_forces(&bt->f_t[thread], fr->natoms_force,
+ bt->red_nblock, 1<<bt->red_ashift);
+
+ ft = bt->f_t[thread].f;
+ fshift = bt->f_t[thread].fshift;
+ epot = bt->f_t[thread].ener;
+ grpp = &bt->f_t[thread].grpp;
+ dvdlt = bt->f_t[thread].dvdl;
}
/* Loop over all bonded force types to calculate the bonded forces */
for (ftype = 0; (ftype < F_NRE); ftype++)
}
wallcycle_sub_stop(wcycle, ewcsLISTED);
- if (fr->nthreads > 1)
+ if (bt->nthreads > 1)
{
wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
reduce_thread_forces(fr->natoms_force, f, fr->fshift,
enerd->term, &enerd->grpp, dvdl,
- fr->nthreads, fr->f_t,
- fr->red_nblock, 1<<fr->red_ashift,
+ bt->nthreads, bt->f_t,
+ bt->red_nblock, 1<<bt->red_ashift,
bCalcEnerVir,
force_flags & GMX_FORCE_DHDL);
wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_LISTED_FORCES_LISTED_INTERNAL_H
#define GMX_LISTED_FORCES_LISTED_INTERNAL_H
+#include "gromacs/legacyheaders/types/forcerec.h"
+#include "gromacs/math/vectypes.h"
+#include "gromacs/topology/idef.h"
+#include "gromacs/utility/bitmask.h"
+
+/*! \internal \brief struct with output for bonded forces, used per thread */
+typedef struct
+{
+ rvec *f; /**< Force array */
+ int f_nalloc; /**< Allocation size of f */
+ gmx_bitmask_t red_mask; /**< Mask for marking which parts of f are filled */
+ rvec *fshift; /**< Shift force array, size SHIFTS */
+ real ener[F_NRE]; /**< Energy array */
+ gmx_grppairener_t grpp; /**< Group pair energy data for pairs */
+ real dvdl[efptNR]; /**< Free-energy dV/dl output */
+}
+f_thread_t;
+
+/*! \internal \brief struct contain all data for bonded force threading */
+struct bonded_threading_t
+{
+ /* Thread local force and energy data */
+ int nthreads; /**< Number of threads to be used for bondeds */
+ int red_ashift; /**< Size of force reduction blocks in bits */
+ int red_nblock; /**< The number of force blocks to reduce */
+ f_thread_t *f_t; /**< Force/enegry data per thread, size nthreads */
+
+ /* There are two different ways to distribute the bonded force calculation
+ * over the threads. We dedice which to use based on the number of threads.
+ */
+ int bonded_max_nthread_uniform; /**< Maximum thread count for uniform distribution of bondeds over threads */
+};
+
+
/*! \brief Returns the global topology atom number belonging to local
* atom index i.
*
#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
#include "gromacs/listed-forces/listed-forces.h"
-#include "gromacs/mdlib/forcerec-threading.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/stringutil.h"
+#include "listed-internal.h"
+
/*! \brief struct for passing all data required for a function type */
typedef struct {
int ftype; /**< the function type index */
void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
{
- int t;
- int ctot, c, b;
+ bonded_threading_t *bt;
+ int t;
+ int ctot, c, b;
+
+ bt = fr->bonded_threading;
- assert(fr->nthreads >= 1);
+ assert(bt->nthreads >= 1);
/* Divide the bonded interaction over the threads */
divide_bondeds_over_threads(idef,
- fr->nthreads,
- fr->bonded_max_nthread_uniform);
+ bt->nthreads,
+ bt->bonded_max_nthread_uniform);
- if (fr->nthreads == 1)
+ if (bt->nthreads == 1)
{
- fr->red_nblock = 0;
+ bt->red_nblock = 0;
return;
}
- fr->red_ashift = 6;
- while (fr->natoms_force > (int)(maxBlockBits*(1U<<fr->red_ashift)))
+ bt->red_ashift = 6;
+ while (fr->natoms_force > (int)(maxBlockBits*(1U<<bt->red_ashift)))
{
- fr->red_ashift++;
+ bt->red_ashift++;
}
if (debug)
{
fprintf(debug, "bonded force buffer block atom shift %d bits\n",
- fr->red_ashift);
+ bt->red_ashift);
}
/* Determine to which blocks each thread's bonded force calculation
* contributes. Store this is a mask for each thread.
*/
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
- for (t = 1; t < fr->nthreads; t++)
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+ for (t = 1; t < bt->nthreads; t++)
{
- calc_bonded_reduction_mask(&fr->f_t[t].red_mask,
- idef, fr->red_ashift, t, fr->nthreads);
+ calc_bonded_reduction_mask(&bt->f_t[t].red_mask,
+ idef, bt->red_ashift, t, bt->nthreads);
}
/* Determine the maximum number of blocks we need to reduce over */
- fr->red_nblock = 0;
+ bt->red_nblock = 0;
ctot = 0;
- for (t = 0; t < fr->nthreads; t++)
+ for (t = 0; t < bt->nthreads; t++)
{
c = 0;
for (b = 0; b < maxBlockBits; b++)
{
- if (bitmask_is_set(fr->f_t[t].red_mask, b))
+ if (bitmask_is_set(bt->f_t[t].red_mask, b))
{
- fr->red_nblock = std::max(fr->red_nblock, b+1);
+ bt->red_nblock = std::max(bt->red_nblock, b+1);
c++;
}
}
if (debug)
{
#if BITMASK_SIZE <= 64 //move into bitmask when it is C++
- std::string flags = gmx::formatString("%x", fr->f_t[t].red_mask);
+ std::string flags = gmx::formatString("%x", bt->f_t[t].red_mask);
#else
- std::string flags = gmx::formatAndJoin(fr->f_t[t].red_mask,
- fr->f_t[t].red_mask+BITMASK_ALEN,
+ std::string flags = gmx::formatAndJoin(bt->f_t[t].red_mask,
+ bt->f_t[t].red_mask+BITMASK_ALEN,
"", gmx::StringFormatter("%x"));
#endif
fprintf(debug, "thread %d flags %s count %d\n",
if (debug)
{
fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
- fr->red_nblock, 1<<fr->red_ashift);
+ bt->red_nblock, 1<<bt->red_ashift);
fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
- ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
- ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
+ ctot*(1<<bt->red_ashift)/(double)fr->natoms_force,
+ ctot*(1<<bt->red_ashift)/(double)(fr->natoms_force*bt->nthreads));
}
}
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp)
+void init_bonded_threading(FILE *fplog, int nenergrp,
+ struct bonded_threading_t **bt_ptr)
{
+ bonded_threading_t *bt;
+
+ snew(bt, 1);
+
/* These thread local data structures are used for bondeds only */
- fr->nthreads = gmx_omp_nthreads_get(emntBonded);
+ bt->nthreads = gmx_omp_nthreads_get(emntBonded);
- if (fr->nthreads > 1)
+ if (bt->nthreads > 1)
{
int t;
- snew(fr->f_t, fr->nthreads);
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
- for (t = 0; t < fr->nthreads; t++)
+ snew(bt->f_t, bt->nthreads);
+#pragma omp parallel for num_threads(bt->nthreads) schedule(static)
+ for (t = 0; t < bt->nthreads; t++)
{
/* Thread 0 uses the global force and energy arrays */
if (t > 0)
{
int i;
- fr->f_t[t].f = NULL;
- fr->f_t[t].f_nalloc = 0;
- snew(fr->f_t[t].fshift, SHIFTS);
- fr->f_t[t].grpp.nener = nenergrp*nenergrp;
+ bt->f_t[t].f = NULL;
+ bt->f_t[t].f_nalloc = 0;
+ snew(bt->f_t[t].fshift, SHIFTS);
+ bt->f_t[t].grpp.nener = nenergrp*nenergrp;
for (i = 0; i < egNR; i++)
{
- snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
+ snew(bt->f_t[t].grpp.ener[i], bt->f_t[t].grpp.nener);
}
}
}
if ((ptr = getenv("GMX_BONDED_NTHREAD_UNIFORM")) != NULL)
{
- sscanf(ptr, "%d", &fr->bonded_max_nthread_uniform);
+ sscanf(ptr, "%d", &bt->bonded_max_nthread_uniform);
if (fplog != NULL)
{
fprintf(fplog, "\nMax threads for uniform bonded distribution set to %d by env.var.\n",
- fr->bonded_max_nthread_uniform);
+ bt->bonded_max_nthread_uniform);
}
}
else
{
- fr->bonded_max_nthread_uniform = max_nthread_uniform;
+ bt->bonded_max_nthread_uniform = max_nthread_uniform;
}
}
+
+ *bt_ptr = bt;
}
*/
void setup_bonded_threading(t_forcerec *fr, t_idef *idef);
-/*! \brief Initialize the bonded threading data structures */
-void init_bonded_threading(FILE *fplog, t_forcerec *fr, int nenergrp);
+/*! \brief Initialize the bonded threading data structures
+ *
+ * Allocates and initializes a bonded threading data structure.
+ * A pointer to this struct is returned as \p *bb_ptr.
+ */
+void init_bonded_threading(FILE *fplog, int nenergrp,
+ struct bonded_threading_t **bt_ptr);
#ifdef __cplusplus
}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
double gmx_erfd(double x)
{
-#ifdef GMX_FLOAT_FORMAT_IEEE754
+#if GMX_FLOAT_FORMAT_IEEE754
gmx_int32_t hx, ix, i;
double R, S, P, Q, s, y, z, r;
conv.d = x;
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
hx = conv.i[0];
#else
hx = conv.i[1];
conv.d = x;
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
conv.i[1] = 0;
#else
conv.i[0] = 0;
double gmx_erfcd(double x)
{
-#ifdef GMX_FLOAT_FORMAT_IEEE754
+#if GMX_FLOAT_FORMAT_IEEE754
gmx_int32_t hx, ix;
double R, S, P, Q, s, y, z, r;
conv.d = x;
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
hx = conv.i[0];
#else
hx = conv.i[1];
conv.d = x;
-#ifdef GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
+#if GMX_IEEE754_BIG_ENDIAN_WORD_ORDER
conv.i[1] = 0;
#else
conv.i[0] = 0;
}
}
-static void reduce_thread_forces(int n, rvec *f,
- tensor vir_q, tensor vir_lj,
- real *Vcorr_q, real *Vcorr_lj,
- real *dvdl_q, real *dvdl_lj,
- int nthreads, f_thread_t *f_t)
+static void reduce_thread_energies(tensor vir_q, tensor vir_lj,
+ real *Vcorr_q, real *Vcorr_lj,
+ real *dvdl_q, real *dvdl_lj,
+ int nthreads,
+ ewald_corr_thread_t *ewc_t)
{
- int t, i;
- int nthreads_loop gmx_unused;
+ int t;
- // cppcheck-suppress unreadVariable
- nthreads_loop = gmx_omp_nthreads_get(emntBonded);
- /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
- for (i = 0; i < n; i++)
- {
- for (t = 1; t < nthreads; t++)
- {
- rvec_inc(f[i], f_t[t].f[i]);
- }
- }
for (t = 1; t < nthreads; t++)
{
- *Vcorr_q += f_t[t].Vcorr_q;
- *Vcorr_lj += f_t[t].Vcorr_lj;
- *dvdl_q += f_t[t].dvdl[efptCOUL];
- *dvdl_lj += f_t[t].dvdl[efptVDW];
- m_add(vir_q, f_t[t].vir_q, vir_q);
- m_add(vir_lj, f_t[t].vir_lj, vir_lj);
+ *Vcorr_q += ewc_t[t].Vcorr_q;
+ *Vcorr_lj += ewc_t[t].Vcorr_lj;
+ *dvdl_q += ewc_t[t].dvdl[efptCOUL];
+ *dvdl_lj += ewc_t[t].dvdl[efptVDW];
+ m_add(vir_q, ewc_t[t].vir_q, vir_q);
+ m_add(vir_lj, ewc_t[t].vir_lj, vir_lj);
}
}
gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
}
- nthreads = gmx_omp_nthreads_get(emntBonded);
+ nthreads = fr->nthread_ewc;
#pragma omp parallel for num_threads(nthreads) schedule(static)
for (t = 0; t < nthreads; t++)
{
- int i;
- rvec *fnv;
tensor *vir_q, *vir_lj;
real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
if (t == 0)
{
- fnv = fr->f_novirsum;
vir_q = &fr->vir_el_recip;
vir_lj = &fr->vir_lj_recip;
Vcorrt_q = &Vcorr_q;
}
else
{
- fnv = fr->f_t[t].f;
- vir_q = &fr->f_t[t].vir_q;
- vir_lj = &fr->f_t[t].vir_lj;
- Vcorrt_q = &fr->f_t[t].Vcorr_q;
- Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
- dvdlt_q = &fr->f_t[t].dvdl[efptCOUL];
- dvdlt_lj = &fr->f_t[t].dvdl[efptVDW];
- for (i = 0; i < fr->natoms_force; i++)
- {
- clear_rvec(fnv[i]);
- }
+ vir_q = &fr->ewc_t[t].vir_q;
+ vir_lj = &fr->ewc_t[t].vir_lj;
+ Vcorrt_q = &fr->ewc_t[t].Vcorr_q;
+ Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
+ dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL];
+ dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW];
clear_mat(*vir_q);
clear_mat(*vir_lj);
}
*dvdlt_q = 0;
*dvdlt_lj = 0;
+ /* Threading is only supported with the Verlet cut-off
+ * scheme and then only single particle forces (no
+ * exclusion forces) are calculated, so we can store
+ * the forces in the normal, single fr->f_novirsum array.
+ */
ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
cr, t, fr,
md->chargeA, md->chargeB,
excl, x, bSB ? boxs : box, mu_tot,
ir->ewald_geometry,
ir->epsilon_surface,
- fnv, *vir_q, *vir_lj,
+ fr->f_novirsum, *vir_q, *vir_lj,
Vcorrt_q, Vcorrt_lj,
lambda[efptCOUL], lambda[efptVDW],
dvdlt_q, dvdlt_lj);
}
if (nthreads > 1)
{
- reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
- fr->vir_el_recip, fr->vir_lj_recip,
- &Vcorr_q, &Vcorr_lj,
- &dvdl_long_range_correction_q,
- &dvdl_long_range_correction_lj,
- nthreads, fr->f_t);
+ reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
+ &Vcorr_q, &Vcorr_lj,
+ &dvdl_long_range_correction_q,
+ &dvdl_long_range_correction_lj,
+ nthreads, fr->ewc_t);
}
wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_MDLIB_FORCEREC_THREADING_H
#define GMX_MDLIB_FORCEREC_THREADING_H
-#include "gromacs/utility/bitmask.h"
-
#ifdef __cplusplus
extern "C" {
#endif
-struct f_thread_t {
- rvec *f;
- int f_nalloc;
- gmx_bitmask_t red_mask; /* Mask for marking which parts of f are filled */
- rvec *fshift;
- real ener[F_NRE];
- gmx_grppairener_t grpp;
+struct ewald_corr_thread_t {
real Vcorr_q;
real Vcorr_lj;
real dvdl[efptNR];
}
/* Initialize the thread working data for bonded interactions */
- init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
+ init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
+ &fr->bonded_threading);
- snew(fr->excl_load, fr->nthreads+1);
+ fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
+ snew(fr->ewc_t, fr->nthread_ewc);
+ snew(fr->excl_load, fr->nthread_ewc + 1);
/* fr->ic is used both by verlet and group kernels (to some extent) now */
init_interaction_const(fp, &fr->ic, fr);
fr->excl_load[0] = 0;
n = 0;
i = 0;
- for (t = 1; t <= fr->nthreads; t++)
+ for (t = 1; t <= fr->nthread_ewc; t++)
{
- ntarget = (ntot*t)/fr->nthreads;
+ ntarget = (ntot*t)/fr->nthread_ewc;
while (i < top->excls.nr && n < ntarget)
{
for (j = ind[i]; j < ind[i+1]; j++)
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/legacyheaders/genborn.h"
-#include <math.h>
#include <string.h>
+#include <cmath>
+
+#include <algorithm>
+
#include "gromacs/domdec/domdec.h"
#include "gromacs/fileio/pdbio.h"
#include "gromacs/legacyheaders/names.h"
#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/units.h"
#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/genborn_allvsall.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/smalloc.h"
-#ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
-# ifdef GMX_DOUBLE
-# include "gromacs/mdlib/genborn_allvsall_sse2_double.h"
-# include "gromacs/mdlib/genborn_sse2_double.h"
-# else
-# include "gromacs/mdlib/genborn_allvsall_sse2_single.h"
-# include "gromacs/mdlib/genborn_sse2_single.h"
-# endif /* GMX_DOUBLE */
-#endif /* SSE or AVX present */
-
-#include "gromacs/mdlib/genborn_allvsall.h"
-
-/*#define DISABLE_SSE*/
typedef struct {
int shift;
gmx_genborn_t *born, int natoms)
{
- int i, j, i1, i2, k, m, nbond, nang, ia, ib, ic, id, nb, idx, idx2, at;
- int iam, ibm;
- int at0, at1;
- real length, angle;
- real r, ri, rj, ri2, ri3, rj2, r2, r3, r4, rk, ratio, term, h, doffset;
- real p1, p2, p3, factor, cosine, rab, rbc;
+ int i, j, m, ia, ib;
+ real r, ri, rj, ri2, rj2, r3, r4, ratio, term, h, doffset;
real *vsol;
real *gp;
snew(gp, natoms);
snew(born->gpol_still_work, natoms+3);
- at0 = 0;
- at1 = natoms;
-
doffset = born->gb_doffset;
for (i = 0; i < natoms; i++)
rj = atype->gb_radius[atoms->atom[ib].type];
ri2 = ri*ri;
- ri3 = ri2*ri;
rj2 = rj*rj;
ratio = (rj2-ri2-r*r)/(2*ri*r);
t_forcerec *fr, const t_inputrec *ir,
const gmx_mtop_t *mtop, int gb_algorithm)
{
- int i, j, m, ai, aj, jj, natoms, nalloc;
- real rai, sk, p, doffset;
+ int i, jj, natoms;
+ real rai, sk, doffset;
t_atoms atoms;
gmx_genborn_t *born;
rvec x[], t_nblist *nl,
gmx_genborn_t *born, t_mdatoms *md)
{
- int i, k, n, nj0, nj1, ai, aj, type;
+ int i, k, n, nj0, nj1, ai, aj;
int shift;
real shX, shY, shZ;
- real gpi, dr, dr2, dr4, idr4, rvdw, ratio, ccf, theta, term, rai, raj;
+ real gpi, dr2, idr4, rvdw, ratio, ccf, theta, term, rai, raj;
real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
real rinv, idr2, idr6, vaj, dccf, cosq, sinq, prod, gpi2;
real factor;
rvec x[], t_nblist *nl,
gmx_genborn_t *born, t_mdatoms *md)
{
- int i, k, n, ai, aj, nj0, nj1, at0, at1;
+ int i, k, n, ai, aj, nj0, nj1;
int shift;
real shX, shY, shZ;
- real rai, raj, gpi, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai;
+ real rai, raj, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai;
real rad, min_rad, rinv, rai_inv;
real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
real lij2, uij2, lij3, uij3, t1, t2, t3;
- real lij_inv, dlij, duij, sk2_rinv, prod, log_term;
+ real lij_inv, dlij, sk2_rinv, prod, log_term;
real doffset, raj_inv, dadx_val;
real *gb_radius;
/* Keep the compiler happy */
n = 0;
- prod = 0;
for (i = 0; i < nl->nri; i++)
{
sk2_rinv = sk2*rinv;
prod = 0.25*sk2_rinv;
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
prod*(-diff2);
/* log_term = table_log(uij*lij_inv,born->log_table,
LOG_TABLE_ACCURACY); */
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
prod*(-diff2);
min_rad = rai + doffset;
rad = 1.0/sum_ai;
- born->bRad[i] = rad > min_rad ? rad : min_rad;
+ born->bRad[i] = std::max(rad, min_rad);
fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
}
}
calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top,
rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md)
{
- int i, k, ai, aj, nj0, nj1, n, at0, at1;
+ int i, k, ai, aj, nj0, nj1, n;
int shift;
real shX, shY, shZ;
- real rai, raj, gpi, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai;
- real rad, min_rad, sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2;
+ real rai, raj, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai;
+ real sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2;
real log_term, prod, sk2_rinv, sk_ai, sk2_ai;
real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
- real lij2, uij2, lij3, uij3, dlij, duij, t1, t2, t3;
+ real lij2, uij2, lij3, uij3, dlij, t1, t2, t3;
real doffset, raj_inv, dadx_val;
real *gb_radius;
/* Keep the compiler happy */
n = 0;
- prod = 0;
- raj = 0;
doffset = born->gb_doffset;
gb_radius = born->gb_radius;
sk2_rinv = sk2*rinv;
prod = 0.25*sk2_rinv;
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
prod = 0.25 * sk2_rinv;
/* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *top,
rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, t_nrnb *nrnb)
{
- real *p;
int cnt;
int ndadx;
if (ir->gb_algorithm == egbSTILL)
{
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
- if (fr->use_simd_kernels)
- {
-# ifdef GMX_DOUBLE
- genborn_allvsall_calc_still_radii_sse2_double(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
-# else
- genborn_allvsall_calc_still_radii_sse2_single(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
-# endif
- }
- else
- {
- genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
- }
-#else
genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], &fr->AllvsAll_workgb);
-#endif
/* 13 flops in outer loop, 47 flops in inner loop */
inc_nrnb(nrnb, eNR_BORN_AVA_RADII_STILL, md->homenr*13+cnt*47);
}
else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC)
{
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
- if (fr->use_simd_kernels)
- {
-# ifdef GMX_DOUBLE
- genborn_allvsall_calc_hct_obc_radii_sse2_double(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
-# else
- genborn_allvsall_calc_hct_obc_radii_sse2_single(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
-# endif
- }
- else
- {
- genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
- }
-#else
genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], &fr->AllvsAll_workgb);
-#endif
/* 24 flops in outer loop, 183 in inner */
inc_nrnb(nrnb, eNR_BORN_AVA_RADII_HCT_OBC, md->homenr*24+cnt*183);
}
/* Switch for determining which algorithm to use for Born radii calculation */
#ifdef GMX_DOUBLE
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
- /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
- switch (ir->gb_algorithm)
- {
- case egbSTILL:
- if (fr->use_simd_kernels)
- {
- calc_gb_rad_still_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born);
- }
- else
- {
- calc_gb_rad_still(cr, fr, top, x, nl, born, md);
- }
- break;
- case egbHCT:
- if (fr->use_simd_kernels)
- {
- calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
- }
- else
- {
- calc_gb_rad_hct(cr, fr, top, x, nl, born, md);
- }
- break;
- case egbOBC:
- if (fr->use_simd_kernels)
- {
- calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
- }
- else
- {
- calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
- }
- break;
-
- default:
- gmx_fatal(FARGS, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
- }
-#else
switch (ir->gb_algorithm)
{
case egbSTILL:
gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d", ir->gb_algorithm);
}
-#endif
-
#else
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
- /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
- switch (ir->gb_algorithm)
- {
- case egbSTILL:
- if (fr->use_simd_kernels)
- {
- calc_gb_rad_still_sse2_single(cr, fr, born->nr, top, x[0], nl, born);
- }
- else
- {
- calc_gb_rad_still(cr, fr, top, x, nl, born, md);
- }
- break;
- case egbHCT:
- if (fr->use_simd_kernels)
- {
- calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
- }
- else
- {
- calc_gb_rad_hct(cr, fr, top, x, nl, born, md);
- }
- break;
-
- case egbOBC:
- if (fr->use_simd_kernels)
- {
- calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
- }
- else
- {
- calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
- }
- break;
-
- default:
- gmx_fatal(FARGS, "Unknown sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
- }
-
-#else
switch (ir->gb_algorithm)
{
case egbSTILL:
gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d", ir->gb_algorithm);
}
-#endif /* Single precision sse */
-
#endif /* Double or single precision */
if (fr->bAllvsAll == FALSE)
real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph)
{
- int i, j, n0, m, nnn, type, ai, aj;
+ int i, j, n0, m, nnn, ai, aj;
int ki;
real isai, isaj;
real r, rsq11;
real rinv11, iq;
real isaprod, qq, gbscale, gbtabscale, Y, F, Geps, Heps2, Fp, VV, FF, rt, eps, eps2;
- real vgb, fgb, vcoul, fijC, dvdatmp, fscal, dvdaj;
+ real vgb, fgb, fijC, dvdatmp, fscal;
real vctot;
rvec dx;
gbscale = isaprod*gbtabscale;
r = rsq11*rinv11;
rt = r*gbscale;
- n0 = rt;
+ n0 = static_cast<int>(rt);
eps = rt-n0;
eps2 = eps*eps;
nnn = 4*n0;
real *dvda, t_mdatoms *md)
{
int ai, i, at0, at1;
- real e, es, rai, rbi, term, probe, tmp, factor;
+ real e, es, rai, term, probe, tmp, factor;
real rbi_inv, rbi_inv2;
- /* To keep the compiler happy */
- factor = 0;
-
if (DOMAINDECOMP(cr))
{
at0 = 0;
/* factor is the surface tension */
factor = born->sa_surface_tension;
- /*
-
- // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
- if(gb_algorithm==egbSTILL)
- {
- factor=0.0049*100*CAL2JOULE;
- }
- else
- {
- factor=0.0054*100*CAL2JOULE;
- }
- */
- /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
es = 0;
probe = 0.14;
int i, k, n, ai, aj, nj0, nj1, n0, n1;
int shift;
real shX, shY, shZ;
- real fgb, fij, rb2, rbi, fix1, fiy1, fiz1;
- real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11, rsq11;
- real rinv11, tx, ty, tz, rbai, rbaj, fgb_ai;
+ real fgb, rbi, fix1, fiy1, fiz1;
+ real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
+ real tx, ty, tz, rbai, rbaj, fgb_ai;
real *rb;
- volatile int idx;
n = 0;
rb = born->work;
rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb,
const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd)
{
- real v = 0;
int cnt;
- int i;
/* PBC or not? */
const t_pbc *pbc_null;
if (fr->bAllvsAll)
{
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
- if (fr->use_simd_kernels)
- {
-# ifdef GMX_DOUBLE
- genborn_allvsall_calc_chainrule_sse2_double(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-# else
- genborn_allvsall_calc_chainrule_sse2_single(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-# endif
- }
- else
- {
- genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
- }
-#else
genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
-#endif
cnt = md->homenr*(md->nr/2+1);
/* 9 flops for outer loop, 15 for inner */
inc_nrnb(nrnb, eNR_BORN_AVA_CHAINRULE, md->homenr*9+cnt*15);
return;
}
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
- if (fr->use_simd_kernels)
- {
-# ifdef GMX_DOUBLE
- calc_gb_chainrule_sse2_double(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
- f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
-# else
- calc_gb_chainrule_sse2_single(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
- f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
-# endif
- }
- else
- {
- calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
- x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);
- }
-#else
calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
x, f, fr->fshift, fr->shift_vec, gb_algorithm, born);
-#endif
if (!fr->bAllvsAll)
{
gmx_bool bMolPBC, t_pbc *pbc, t_graph *g, rvec *x,
struct gbtmpnbls *nls)
{
- int ind, j, ai, aj, shift, found;
+ int ind, j, ai, aj, found;
rvec dx;
ivec dt;
gbtmpnbl_t *list;
- shift = CENTRAL;
for (ind = 0; ind < il->nr; ind += 3)
{
ai = il->iatoms[ind+1];
aj = il->iatoms[ind+2];
- shift = CENTRAL;
+ int shift = CENTRAL;
if (g != NULL)
{
rvec_sub(x[ai], x[aj], dx);
rvec x[], matrix box,
t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born)
{
- int i, l, ii, j, k, n, nj0, nj1, ai, aj, at0, at1, found, shift, s;
- int apa;
+ int i, j, k, n, nj0, nj1, ai, shift, s;
t_nblist *nblist;
t_pbc pbc;
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "genborn_allvsall.h"
-#include <math.h>
+#include <cmath>
+
+#include <algorithm>
#include "gromacs/legacyheaders/genborn.h"
#include "gromacs/legacyheaders/network.h"
gmx_bool bInclude13,
gmx_bool bInclude14)
{
- int i, j, k, tp;
+ int i, j, k;
int a1, a2;
- int nj0, nj1;
int max_offset;
int max_excl_offset;
- int nj;
/* This routine can appear to be a bit complex, but it is mostly book-keeping.
* To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
}
if (k > 0 && k <= max_offset)
{
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
+ max_excl_offset = std::max(k, max_excl_offset);
}
}
}
}
if (k > 0 && k <= max_offset)
{
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
+ max_excl_offset = std::max(k, max_excl_offset);
}
}
}
}
if (k > 0 && k <= max_offset)
{
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
+ max_excl_offset = std::max(k, max_excl_offset);
}
}
}
- max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
+ max_excl_offset = std::min(max_offset, max_excl_offset);
aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
gmx_bool bInclude13,
gmx_bool bInclude14)
{
- int i, j, idx;
gmx_allvsallgb2_data_t *aadata;
- real *p;
snew(aadata, 1);
*p_aadata = aadata;
ni1 = mdatoms->homenr;
n = 0;
- prod = 0;
- raj = 0;
doffset = born->gb_doffset;
aadata = *((gmx_allvsallgb2_data_t **)work);
sk2_rinv = sk2*rinv;
prod = 0.25*sk2_rinv;
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
/* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
prod = 0.25 * sk2_rinv;
/* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
sk2_rinv = sk2*rinv;
prod = 0.25*sk2_rinv;
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
/* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
prod = 0.25 * sk2_rinv;
/* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
- log_term = log(uij*lij_inv);
+ log_term = std::log(uij*lij_inv);
tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
min_rad = rai + born->gb_doffset;
rad = 1.0/sum_ai;
- born->bRad[i] = rad > min_rad ? rad : min_rad;
+ born->bRad[i] = std::max(rad, min_rad);
fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
}
}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2012,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/types/simple.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/genborn_allvsall.h"
-#include "gromacs/utility/smalloc.h"
-
-
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-
-#include <gmx_sse2_double.h>
-
-
-#define SIMD_WIDTH 2
-#define UNROLLI 2
-#define UNROLLJ 2
-
-
-
-
-
-
-
-
-
-typedef struct
-{
- int * jindex_gb;
- int ** prologue_mask_gb;
- int ** epilogue_mask;
- int * imask;
- double * gb_radius;
- double * workparam;
- double * work;
- double * x_align;
- double * y_align;
- double * z_align;
- double * fx_align;
- double * fy_align;
- double * fz_align;
-}
-gmx_allvsallgb2_data_t;
-
-
-static int
-calc_maxoffset(int i, int natoms)
-{
- int maxoffset;
-
- if ((natoms % 2) == 1)
- {
- /* Odd number of atoms, easy */
- maxoffset = natoms/2;
- }
- else if ((natoms % 4) == 0)
- {
- /* Multiple of four is hard */
- if (i < natoms/2)
- {
- if ((i % 2) == 0)
- {
- maxoffset = natoms/2;
- }
- else
- {
- maxoffset = natoms/2-1;
- }
- }
- else
- {
- if ((i % 2) == 1)
- {
- maxoffset = natoms/2;
- }
- else
- {
- maxoffset = natoms/2-1;
- }
- }
- }
- else
- {
- /* natoms/2 = odd */
- if ((i % 2) == 0)
- {
- maxoffset = natoms/2;
- }
- else
- {
- maxoffset = natoms/2-1;
- }
- }
-
- return maxoffset;
-}
-
-static void
-setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
- t_ilist * ilist,
- int start,
- int end,
- int natoms,
- gmx_bool bInclude12,
- gmx_bool bInclude13,
- gmx_bool bInclude14)
-{
- int i, j, k, tp;
- int a1, a2;
- int ni0, ni1, nj0, nj1, nj;
- int imin, imax, iexcl;
- int max_offset;
- int max_excl_offset;
- int firstinteraction;
- int ibase;
- int *pi;
-
- /* This routine can appear to be a bit complex, but it is mostly book-keeping.
- * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
- * whether they should interact or not.
- *
- * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
- * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
- * we create a jindex array with three elements per i atom: the starting point, the point to
- * which we need to check exclusions, and the end point.
- * This way we only have to allocate a short exclusion mask per i atom.
- */
-
- ni0 = (start/UNROLLI)*UNROLLI;
- ni1 = ((end+UNROLLI-1)/UNROLLI)*UNROLLI;
-
- /* Set the interaction mask to only enable the i atoms we want to include */
- snew(pi, 2*(natoms+UNROLLI+2*SIMD_WIDTH));
- aadata->imask = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
- for (i = 0; i < natoms+UNROLLI; i++)
- {
- aadata->imask[2*i] = (i >= start && i < end) ? 0xFFFFFFFF : 0;
- aadata->imask[2*i+1] = (i >= start && i < end) ? 0xFFFFFFFF : 0;
- }
-
- /* Allocate memory for our modified jindex array */
- snew(aadata->jindex_gb, 4*(natoms+UNROLLI));
- for (i = 0; i < 4*(natoms+UNROLLI); i++)
- {
- aadata->jindex_gb[i] = 0;
- }
-
- /* Create the exclusion masks for the prologue part */
- snew(aadata->prologue_mask_gb, natoms+UNROLLI); /* list of pointers */
-
- /* First zero everything to avoid uninitialized data */
- for (i = 0; i < natoms+UNROLLI; i++)
- {
- aadata->prologue_mask_gb[i] = NULL;
- }
-
- /* Calculate the largest exclusion range we need for each UNROLLI-tuplet of i atoms. */
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- max_excl_offset = -1;
-
- /* First find maxoffset for the next 4 atoms (or fewer if we are close to end) */
- imax = ((ibase+UNROLLI) < end) ? (ibase+UNROLLI) : end;
-
- /* Which atom is the first we (might) interact with? */
- imin = natoms; /* Guaranteed to be overwritten by one of 'firstinteraction' */
- for (i = ibase; i < imax; i++)
- {
- /* Before exclusions, which atom is the first we (might) interact with? */
- firstinteraction = i+1;
- max_offset = calc_maxoffset(i, natoms);
-
- if (!bInclude12)
- {
- for (j = 0; j < ilist[F_GB12].nr; j += 3)
- {
- a1 = ilist[F_GB12].iatoms[j+1];
- a2 = ilist[F_GB12].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k == firstinteraction)
- {
- firstinteraction++;
- }
- }
- }
- if (!bInclude13)
- {
- for (j = 0; j < ilist[F_GB13].nr; j += 3)
- {
- a1 = ilist[F_GB13].iatoms[j+1];
- a2 = ilist[F_GB13].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k == firstinteraction)
- {
- firstinteraction++;
- }
- }
- }
- if (!bInclude14)
- {
- for (j = 0; j < ilist[F_GB14].nr; j += 3)
- {
- a1 = ilist[F_GB14].iatoms[j+1];
- a2 = ilist[F_GB14].iatoms[j+2];
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k == firstinteraction)
- {
- firstinteraction++;
- }
- }
- }
- imin = (firstinteraction < imin) ? firstinteraction : imin;
- }
- /* round down to j unrolling factor */
- imin = (imin/UNROLLJ)*UNROLLJ;
-
- for (i = ibase; i < imax; i++)
- {
- max_offset = calc_maxoffset(i, natoms);
-
- if (!bInclude12)
- {
- for (j = 0; j < ilist[F_GB12].nr; j += 3)
- {
- a1 = ilist[F_GB12].iatoms[j+1];
- a2 = ilist[F_GB12].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k < imin)
- {
- k += natoms;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
-
- k = k - imin;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
- }
- }
- if (!bInclude13)
- {
- for (j = 0; j < ilist[F_GB13].nr; j += 3)
- {
- a1 = ilist[F_GB13].iatoms[j+1];
- a2 = ilist[F_GB13].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k < imin)
- {
- k += natoms;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
-
- k = k - imin;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
- }
- }
- if (!bInclude14)
- {
- for (j = 0; j < ilist[F_GB14].nr; j += 3)
- {
- a1 = ilist[F_GB14].iatoms[j+1];
- a2 = ilist[F_GB14].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k < imin)
- {
- k += natoms;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
-
- k = k - imin;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
- }
- }
- }
-
- /* The offset specifies the last atom to be excluded, so add one unit to get an upper loop limit */
- max_excl_offset++;
- /* round up to j unrolling factor */
- max_excl_offset = (max_excl_offset/UNROLLJ+1)*UNROLLJ;
-
- /* Set all the prologue masks length to this value (even for i>end) */
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
- aadata->jindex_gb[4*i] = imin;
- aadata->jindex_gb[4*i+1] = imin+max_excl_offset;
- }
- }
-
- /* Now the hard part, loop over it all again to calculate the actual contents of the prologue masks */
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
- nj = aadata->jindex_gb[4*i+1] - aadata->jindex_gb[4*i];
- imin = aadata->jindex_gb[4*i];
-
- /* Allocate aligned memory */
- snew(pi, 2*(nj+2*SIMD_WIDTH));
- aadata->prologue_mask_gb[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
- max_offset = calc_maxoffset(i, natoms);
-
- /* Include interactions i+1 <= j < i+maxoffset */
- for (k = 0; k < nj; k++)
- {
- j = imin + k;
-
- if ( (j > i) && (j <= i+max_offset) )
- {
- aadata->prologue_mask_gb[i][2*k] = 0xFFFFFFFF;
- aadata->prologue_mask_gb[i][2*k+1] = 0xFFFFFFFF;
- }
- else
- {
- aadata->prologue_mask_gb[i][2*k] = 0;
- aadata->prologue_mask_gb[i][2*k+1] = 0;
- }
- }
-
- /* Clear out the explicit exclusions */
- if (i < end)
- {
- if (!bInclude12)
- {
- for (j = 0; j < ilist[F_GB12].nr; j += 3)
- {
- a1 = ilist[F_GB12].iatoms[j+1];
- a2 = ilist[F_GB12].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
- k = k-i;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
-
- k = k+i-imin;
- if (k >= 0)
- {
- aadata->prologue_mask_gb[i][2*k] = 0;
- aadata->prologue_mask_gb[i][2*k+1] = 0;
- }
- }
- }
- if (!bInclude13)
- {
- for (j = 0; j < ilist[F_GB13].nr; j += 3)
- {
- a1 = ilist[F_GB13].iatoms[j+1];
- a2 = ilist[F_GB13].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
- k = k-i;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
-
- k = k+i-imin;
- if (k >= 0)
- {
- aadata->prologue_mask_gb[i][2*k] = 0;
- aadata->prologue_mask_gb[i][2*k+1] = 0;
- }
- }
- }
- if (!bInclude14)
- {
- for (j = 0; j < ilist[F_GB14].nr; j += 3)
- {
- a1 = ilist[F_GB14].iatoms[j+1];
- a2 = ilist[F_GB14].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
- k = k-i;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
-
- k = k+i-imin;
- if (k >= 0)
- {
- aadata->prologue_mask_gb[i][2*k] = 0;
- aadata->prologue_mask_gb[i][2*k+1] = 0;
- }
- }
- }
- }
- }
- }
-
- /* Construct the epilogue mask - this just contains the check for maxoffset */
- snew(aadata->epilogue_mask, natoms+UNROLLI);
-
- /* First zero everything to avoid uninitialized data */
- for (i = 0; i < natoms+UNROLLI; i++)
- {
- aadata->jindex_gb[4*i+2] = aadata->jindex_gb[4*i+1];
- aadata->jindex_gb[4*i+3] = aadata->jindex_gb[4*i+1];
- aadata->epilogue_mask[i] = NULL;
- }
-
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- /* Find the lowest index for which we need to use the epilogue */
- imin = ibase;
- max_offset = calc_maxoffset(imin, natoms);
-
- imin = imin + 1 + max_offset;
-
- /* Find largest index for which we need to use the epilogue */
- imax = ibase + UNROLLI-1;
- imax = (imax < end) ? imax : end;
-
- max_offset = calc_maxoffset(imax, natoms);
- imax = imax + 1 + max_offset + UNROLLJ - 1;
-
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
- /* Start of epilogue - round down to j tile limit */
- aadata->jindex_gb[4*i+2] = (imin/UNROLLJ)*UNROLLJ;
- /* Make sure we dont overlap - for small systems everything is done in the prologue */
- aadata->jindex_gb[4*i+2] = (aadata->jindex_gb[4*i+1] > aadata->jindex_gb[4*i+2]) ? aadata->jindex_gb[4*i+1] : aadata->jindex_gb[4*i+2];
- /* Round upwards to j tile limit */
- aadata->jindex_gb[4*i+3] = (imax/UNROLLJ)*UNROLLJ;
- /* Make sure we dont have a negative range for the epilogue */
- aadata->jindex_gb[4*i+3] = (aadata->jindex_gb[4*i+2] > aadata->jindex_gb[4*i+3]) ? aadata->jindex_gb[4*i+2] : aadata->jindex_gb[4*i+3];
- }
- }
-
- /* And fill it with data... */
-
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
-
- nj = aadata->jindex_gb[4*i+3] - aadata->jindex_gb[4*i+2];
-
- /* Allocate aligned memory */
- snew(pi, 2*(nj+2*SIMD_WIDTH));
- aadata->epilogue_mask[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
- max_offset = calc_maxoffset(i, natoms);
-
- for (k = 0; k < nj; k++)
- {
- j = aadata->jindex_gb[4*i+2] + k;
- aadata->epilogue_mask[i][2*k] = (j <= i+max_offset) ? 0xFFFFFFFF : 0;
- aadata->epilogue_mask[i][2*k+1] = (j <= i+max_offset) ? 0xFFFFFFFF : 0;
- }
- }
- }
-}
-
-
-static void
-genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
- gmx_localtop_t * top,
- gmx_genborn_t * born,
- t_mdatoms * mdatoms,
- double radius_offset,
- int gb_algorithm,
- gmx_bool bInclude12,
- gmx_bool bInclude13,
- gmx_bool bInclude14)
-{
- int i, j, idx;
- int natoms;
- gmx_allvsallgb2_data_t *aadata;
- double *p;
-
- natoms = mdatoms->nr;
-
- snew(aadata, 1);
- *p_aadata = aadata;
-
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->x_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->y_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->z_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->fx_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->fy_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->fz_align = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
- snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
- aadata->gb_radius = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
- snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
- aadata->workparam = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
- snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
- aadata->work = (double *) (((size_t) p + 16) & (~((size_t) 15)));
-
- for (i = 0; i < mdatoms->nr; i++)
- {
- aadata->gb_radius[i] = top->atomtypes.gb_radius[mdatoms->typeA[i]] - radius_offset;
- if (gb_algorithm == egbSTILL)
- {
- aadata->workparam[i] = born->vsolv[i];
- }
- else if (gb_algorithm == egbOBC)
- {
- aadata->workparam[i] = born->param[i];
- }
- aadata->work[i] = 0.0;
- }
- for (i = 0; i < mdatoms->nr; i++)
- {
- aadata->gb_radius[natoms+i] = aadata->gb_radius[i];
- aadata->workparam[natoms+i] = aadata->workparam[i];
- aadata->work[natoms+i] = aadata->work[i];
- }
-
- for (i = 0; i < 2*natoms+SIMD_WIDTH; i++)
- {
- aadata->x_align[i] = 0.0;
- aadata->y_align[i] = 0.0;
- aadata->z_align[i] = 0.0;
- aadata->fx_align[i] = 0.0;
- aadata->fy_align[i] = 0.0;
- aadata->fz_align[i] = 0.0;
- }
-
- setup_gb_exclusions_and_indices(aadata, top->idef.il, 0, mdatoms->homenr, mdatoms->nr,
- bInclude12, bInclude13, bInclude14);
-}
-
-
-/*
- * This routine apparently hits a compiler bug visual studio has had 'forever'.
- * It is present both in VS2005 and VS2008, and the only way around it is to
- * decrease optimization. We do that with at pragma, and only for MSVC, so it
- * will not hurt any of the well-behaving and supported compilers out there.
- * MS: Fix your compiler, it sucks like a black hole!
- */
-#ifdef _MSC_VER
-#pragma optimize("t",off)
-#endif
-
-int
-genborn_allvsall_calc_still_radii_sse2_double(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- gmx_localtop_t * top,
- double * x,
- t_commrec * cr,
- void * paadata)
-{
- gmx_allvsallgb2_data_t *aadata;
- int natoms;
- int ni0, ni1;
- int nj0, nj1, nj2, nj3;
- int i, j, k, n;
- int * mask;
- int * pmask0;
- int * pmask1;
- int * emask0;
- int * emask1;
- double ix, iy, iz;
- double jx, jy, jz;
- double dx, dy, dz;
- double rsq, rinv;
- double gpi, rai, vai;
- double prod_ai;
- double irsq, idr4, idr6;
- double raj, rvdw, ratio;
- double vaj, ccf, dccf, theta, cosq;
- double term, prod, icf4, icf6, gpi2, factor, sinq;
- double * gb_radius;
- double * vsolv;
- double * work;
- double tmpsum[2];
- double * x_align;
- double * y_align;
- double * z_align;
- int * jindex;
- double * dadx;
-
- __m128d ix_SSE0, iy_SSE0, iz_SSE0;
- __m128d ix_SSE1, iy_SSE1, iz_SSE1;
- __m128d gpi_SSE0, rai_SSE0, prod_ai_SSE0;
- __m128d gpi_SSE1, rai_SSE1, prod_ai_SSE1;
- __m128d imask_SSE0, jmask_SSE0;
- __m128d imask_SSE1, jmask_SSE1;
- __m128d jx_SSE, jy_SSE, jz_SSE;
- __m128d dx_SSE0, dy_SSE0, dz_SSE0;
- __m128d dx_SSE1, dy_SSE1, dz_SSE1;
- __m128d rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
- __m128d rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
- __m128d raj_SSE, vaj_SSE, prod_SSE;
- __m128d rvdw_SSE0, ratio_SSE0;
- __m128d rvdw_SSE1, ratio_SSE1;
- __m128d theta_SSE0, sinq_SSE0, cosq_SSE0, term_SSE0;
- __m128d theta_SSE1, sinq_SSE1, cosq_SSE1, term_SSE1;
- __m128d ccf_SSE0, dccf_SSE0;
- __m128d ccf_SSE1, dccf_SSE1;
- __m128d icf4_SSE0, icf6_SSE0;
- __m128d icf4_SSE1, icf6_SSE1;
- __m128d half_SSE, one_SSE, two_SSE, four_SSE;
- __m128d still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
-
- natoms = mdatoms->nr;
- ni0 = 0;
- ni1 = mdatoms->homenr;
-
- n = 0;
-
- aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
- if (aadata == NULL)
- {
- genborn_allvsall_setup(&aadata, top, born, mdatoms, 0.0,
- egbSTILL, FALSE, FALSE, TRUE);
- *((gmx_allvsallgb2_data_t **)paadata) = aadata;
- }
-
- x_align = aadata->x_align;
- y_align = aadata->y_align;
- z_align = aadata->z_align;
-
- gb_radius = aadata->gb_radius;
- vsolv = aadata->workparam;
- work = aadata->work;
- jindex = aadata->jindex_gb;
- dadx = fr->dadx;
-
- still_p4_SSE = _mm_set1_pd(STILL_P4);
- still_p5inv_SSE = _mm_set1_pd(STILL_P5INV);
- still_pip5_SSE = _mm_set1_pd(STILL_PIP5);
- half_SSE = _mm_set1_pd(0.5);
- one_SSE = _mm_set1_pd(1.0);
- two_SSE = _mm_set1_pd(2.0);
- four_SSE = _mm_set1_pd(4.0);
-
- /* This will be summed, so it has to extend to natoms + buffer */
- for (i = 0; i < natoms+1+natoms/2; i++)
- {
- work[i] = 0;
- }
-
- for (i = ni0; i < ni1+1+natoms/2; i++)
- {
- k = i%natoms;
- x_align[i] = x[3*k];
- y_align[i] = x[3*k+1];
- z_align[i] = x[3*k+2];
- work[i] = 0;
- }
-
- for (i = ni0; i < ni1; i += UNROLLI)
- {
- /* We assume shifts are NOT used for all-vs-all interactions */
- /* Load i atom data */
- ix_SSE0 = _mm_load1_pd(x_align+i);
- iy_SSE0 = _mm_load1_pd(y_align+i);
- iz_SSE0 = _mm_load1_pd(z_align+i);
- ix_SSE1 = _mm_load1_pd(x_align+i+1);
- iy_SSE1 = _mm_load1_pd(y_align+i+1);
- iz_SSE1 = _mm_load1_pd(z_align+i+1);
-
- gpi_SSE0 = _mm_setzero_pd();
- gpi_SSE1 = _mm_setzero_pd();
-
- rai_SSE0 = _mm_load1_pd(gb_radius+i);
- rai_SSE1 = _mm_load1_pd(gb_radius+i+1);
-
- prod_ai_SSE0 = _mm_set1_pd(STILL_P4*vsolv[i]);
- prod_ai_SSE1 = _mm_set1_pd(STILL_P4*vsolv[i+1]);
-
- /* Load limits for loop over neighbors */
- nj0 = jindex[4*i];
- nj1 = jindex[4*i+1];
- nj2 = jindex[4*i+2];
- nj3 = jindex[4*i+3];
-
- pmask0 = aadata->prologue_mask_gb[i];
- pmask1 = aadata->prologue_mask_gb[i+1];
- emask0 = aadata->epilogue_mask[i];
- emask1 = aadata->epilogue_mask[i+1];
-
- imask_SSE0 = _mm_load1_pd((double *)(aadata->imask+2*i));
- imask_SSE1 = _mm_load1_pd((double *)(aadata->imask+2*i+2));
-
- /* Prologue part, including exclusion mask */
- for (j = nj0; j < nj1; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_pd((double *)pmask0);
- jmask_SSE1 = _mm_load_pd((double *)pmask1);
- pmask0 += 2*UNROLLJ;
- pmask1 += 2*UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_pd(x_align+j);
- jy_SSE = _mm_load_pd(y_align+j);
- jz_SSE = _mm_load_pd(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_pd(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_pd(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_pd(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_pd(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_pd(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_pd(iz_SSE1, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_pd(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_pd(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_pd(rsq_SSE1);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_pd(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
- irsq_SSE0 = _mm_mul_pd(rinv_SSE0, rinv_SSE0);
- irsq_SSE1 = _mm_mul_pd(rinv_SSE1, rinv_SSE1);
- idr4_SSE0 = _mm_mul_pd(irsq_SSE0, irsq_SSE0);
- idr4_SSE1 = _mm_mul_pd(irsq_SSE1, irsq_SSE1);
- idr6_SSE0 = _mm_mul_pd(idr4_SSE0, irsq_SSE0);
- idr6_SSE1 = _mm_mul_pd(idr4_SSE1, irsq_SSE1);
-
- raj_SSE = _mm_load_pd(gb_radius+j);
- vaj_SSE = _mm_load_pd(vsolv+j);
-
- rvdw_SSE0 = _mm_add_pd(rai_SSE0, raj_SSE);
- rvdw_SSE1 = _mm_add_pd(rai_SSE1, raj_SSE);
-
- ratio_SSE0 = _mm_mul_pd(rsq_SSE0, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE0, rvdw_SSE0)));
- ratio_SSE1 = _mm_mul_pd(rsq_SSE1, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE1, rvdw_SSE1)));
-
- ratio_SSE0 = _mm_min_pd(ratio_SSE0, still_p5inv_SSE);
- ratio_SSE1 = _mm_min_pd(ratio_SSE1, still_p5inv_SSE);
- theta_SSE0 = _mm_mul_pd(ratio_SSE0, still_pip5_SSE);
- theta_SSE1 = _mm_mul_pd(ratio_SSE1, still_pip5_SSE);
- gmx_mm_sincos_pd(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
- gmx_mm_sincos_pd(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
- term_SSE0 = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE0));
- term_SSE1 = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE1));
- ccf_SSE0 = _mm_mul_pd(term_SSE0, term_SSE0);
- ccf_SSE1 = _mm_mul_pd(term_SSE1, term_SSE1);
- dccf_SSE0 = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE0),
- _mm_mul_pd(sinq_SSE0, theta_SSE0));
- dccf_SSE1 = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE1),
- _mm_mul_pd(sinq_SSE1, theta_SSE1));
-
- prod_SSE = _mm_mul_pd(still_p4_SSE, vaj_SSE);
- icf4_SSE0 = _mm_mul_pd(ccf_SSE0, idr4_SSE0);
- icf4_SSE1 = _mm_mul_pd(ccf_SSE1, idr4_SSE1);
- icf6_SSE0 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
- icf6_SSE1 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-
- _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
- _mm_add_pd(_mm_mul_pd(prod_ai_SSE0, icf4_SSE0),
- _mm_mul_pd(prod_ai_SSE1, icf4_SSE1))));
-
-
- gpi_SSE0 = _mm_add_pd(gpi_SSE0, _mm_mul_pd(prod_SSE, icf4_SSE0));
- gpi_SSE1 = _mm_add_pd(gpi_SSE1, _mm_mul_pd(prod_SSE, icf4_SSE1));
-
- /* Save ai->aj and aj->ai chain rule terms */
- _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE1));
- dadx += 2;
-
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE0, icf6_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE1, icf6_SSE1));
- dadx += 2;
- }
-
- /* Main part, no exclusions */
- for (j = nj1; j < nj2; j += UNROLLJ)
- {
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_pd(x_align+j);
- jy_SSE = _mm_load_pd(y_align+j);
- jz_SSE = _mm_load_pd(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_pd(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_pd(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_pd(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_pd(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_pd(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_pd(iz_SSE1, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_pd(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_pd(rsq_SSE1);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_pd(rinv_SSE0, imask_SSE0);
- rinv_SSE1 = _mm_and_pd(rinv_SSE1, imask_SSE1);
-
- irsq_SSE0 = _mm_mul_pd(rinv_SSE0, rinv_SSE0);
- irsq_SSE1 = _mm_mul_pd(rinv_SSE1, rinv_SSE1);
- idr4_SSE0 = _mm_mul_pd(irsq_SSE0, irsq_SSE0);
- idr4_SSE1 = _mm_mul_pd(irsq_SSE1, irsq_SSE1);
- idr6_SSE0 = _mm_mul_pd(idr4_SSE0, irsq_SSE0);
- idr6_SSE1 = _mm_mul_pd(idr4_SSE1, irsq_SSE1);
-
- raj_SSE = _mm_load_pd(gb_radius+j);
-
- rvdw_SSE0 = _mm_add_pd(rai_SSE0, raj_SSE);
- rvdw_SSE1 = _mm_add_pd(rai_SSE1, raj_SSE);
- vaj_SSE = _mm_load_pd(vsolv+j);
-
- ratio_SSE0 = _mm_mul_pd(rsq_SSE0, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE0, rvdw_SSE0)));
- ratio_SSE1 = _mm_mul_pd(rsq_SSE1, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE1, rvdw_SSE1)));
-
- ratio_SSE0 = _mm_min_pd(ratio_SSE0, still_p5inv_SSE);
- ratio_SSE1 = _mm_min_pd(ratio_SSE1, still_p5inv_SSE);
- theta_SSE0 = _mm_mul_pd(ratio_SSE0, still_pip5_SSE);
- theta_SSE1 = _mm_mul_pd(ratio_SSE1, still_pip5_SSE);
- gmx_mm_sincos_pd(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
- gmx_mm_sincos_pd(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
- term_SSE0 = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE0));
- term_SSE1 = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE1));
- ccf_SSE0 = _mm_mul_pd(term_SSE0, term_SSE0);
- ccf_SSE1 = _mm_mul_pd(term_SSE1, term_SSE1);
- dccf_SSE0 = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE0),
- _mm_mul_pd(sinq_SSE0, theta_SSE0));
- dccf_SSE1 = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE1),
- _mm_mul_pd(sinq_SSE1, theta_SSE1));
-
- prod_SSE = _mm_mul_pd(still_p4_SSE, vaj_SSE );
- icf4_SSE0 = _mm_mul_pd(ccf_SSE0, idr4_SSE0);
- icf4_SSE1 = _mm_mul_pd(ccf_SSE1, idr4_SSE1);
- icf6_SSE0 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
- icf6_SSE1 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-
- _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
- _mm_add_pd(_mm_mul_pd(prod_ai_SSE0, icf4_SSE0),
- _mm_mul_pd(prod_ai_SSE1, icf4_SSE1))));
-
- gpi_SSE0 = _mm_add_pd(gpi_SSE0, _mm_mul_pd(prod_SSE, icf4_SSE0));
- gpi_SSE1 = _mm_add_pd(gpi_SSE1, _mm_mul_pd(prod_SSE, icf4_SSE1));
-
- /* Save ai->aj and aj->ai chain rule terms */
- _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE1));
- dadx += 2;
-
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE0, icf6_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE1, icf6_SSE1));
- dadx += 2;
- }
- /* Epilogue part, including exclusion mask */
- for (j = nj2; j < nj3; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_pd((double *)emask0);
- jmask_SSE1 = _mm_load_pd((double *)emask1);
- emask0 += 2*UNROLLJ;
- emask1 += 2*UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_pd(x_align+j);
- jy_SSE = _mm_load_pd(y_align+j);
- jz_SSE = _mm_load_pd(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_pd(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_pd(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_pd(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_pd(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_pd(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_pd(iz_SSE1, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_pd(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_pd(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_pd(rsq_SSE1);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_pd(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
- irsq_SSE0 = _mm_mul_pd(rinv_SSE0, rinv_SSE0);
- irsq_SSE1 = _mm_mul_pd(rinv_SSE1, rinv_SSE1);
- idr4_SSE0 = _mm_mul_pd(irsq_SSE0, irsq_SSE0);
- idr4_SSE1 = _mm_mul_pd(irsq_SSE1, irsq_SSE1);
- idr6_SSE0 = _mm_mul_pd(idr4_SSE0, irsq_SSE0);
- idr6_SSE1 = _mm_mul_pd(idr4_SSE1, irsq_SSE1);
-
- raj_SSE = _mm_load_pd(gb_radius+j);
- vaj_SSE = _mm_load_pd(vsolv+j);
-
- rvdw_SSE0 = _mm_add_pd(rai_SSE0, raj_SSE);
- rvdw_SSE1 = _mm_add_pd(rai_SSE1, raj_SSE);
-
- ratio_SSE0 = _mm_mul_pd(rsq_SSE0, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE0, rvdw_SSE0)));
- ratio_SSE1 = _mm_mul_pd(rsq_SSE1, gmx_mm_inv_pd( _mm_mul_pd(rvdw_SSE1, rvdw_SSE1)));
-
- ratio_SSE0 = _mm_min_pd(ratio_SSE0, still_p5inv_SSE);
- ratio_SSE1 = _mm_min_pd(ratio_SSE1, still_p5inv_SSE);
- theta_SSE0 = _mm_mul_pd(ratio_SSE0, still_pip5_SSE);
- theta_SSE1 = _mm_mul_pd(ratio_SSE1, still_pip5_SSE);
- gmx_mm_sincos_pd(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
- gmx_mm_sincos_pd(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
- term_SSE0 = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE0));
- term_SSE1 = _mm_mul_pd(half_SSE, _mm_sub_pd(one_SSE, cosq_SSE1));
- ccf_SSE0 = _mm_mul_pd(term_SSE0, term_SSE0);
- ccf_SSE1 = _mm_mul_pd(term_SSE1, term_SSE1);
- dccf_SSE0 = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE0),
- _mm_mul_pd(sinq_SSE0, theta_SSE0));
- dccf_SSE1 = _mm_mul_pd(_mm_mul_pd(two_SSE, term_SSE1),
- _mm_mul_pd(sinq_SSE1, theta_SSE1));
-
- prod_SSE = _mm_mul_pd(still_p4_SSE, vaj_SSE);
- icf4_SSE0 = _mm_mul_pd(ccf_SSE0, idr4_SSE0);
- icf4_SSE1 = _mm_mul_pd(ccf_SSE1, idr4_SSE1);
- icf6_SSE0 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
- icf6_SSE1 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
-
- _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
- _mm_add_pd(_mm_mul_pd(prod_ai_SSE0, icf4_SSE0),
- _mm_mul_pd(prod_ai_SSE1, icf4_SSE1))));
-
- gpi_SSE0 = _mm_add_pd(gpi_SSE0, _mm_mul_pd(prod_SSE, icf4_SSE0));
- gpi_SSE1 = _mm_add_pd(gpi_SSE1, _mm_mul_pd(prod_SSE, icf4_SSE1));
-
- /* Save ai->aj and aj->ai chain rule terms */
- _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_SSE, icf6_SSE1));
- dadx += 2;
-
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE0, icf6_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai_SSE1, icf6_SSE1));
- dadx += 2;
- }
- GMX_MM_TRANSPOSE2_PD(gpi_SSE0, gpi_SSE1);
- gpi_SSE0 = _mm_add_pd(gpi_SSE0, gpi_SSE1);
- _mm_store_pd(work+i, _mm_add_pd(gpi_SSE0, _mm_load_pd(work+i)));
- }
-
- /* In case we have written anything beyond natoms, move it back.
- * Never mind that we leave stuff above natoms; that will not
- * be accessed later in the routine.
- * In principle this should be a move rather than sum, but this
- * way we dont have to worry about even/odd offsets...
- */
- for (i = natoms; i < ni1+1+natoms/2; i++)
- {
- work[i-natoms] += work[i];
- }
-
- /* Parallel summations would go here if ever implemented with DD */
-
- factor = 0.5 * ONE_4PI_EPS0;
- /* Calculate the radii - should we do all atoms, or just our local ones? */
- for (i = 0; i < natoms; i++)
- {
- if (born->use[i] != 0)
- {
- gpi = born->gpol[i]+work[i];
- gpi2 = gpi * gpi;
- born->bRad[i] = factor*gmx_invsqrt(gpi2);
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- return 0;
-}
-/* Reinstate MSVC optimization */
-#ifdef _MSC_VER
-#pragma optimize("",on)
-#endif
-
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_double(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- int gb_algorithm,
- gmx_localtop_t * top,
- double * x,
- t_commrec * cr,
- void * paadata)
-{
- gmx_allvsallgb2_data_t *aadata;
- int natoms;
- int ni0, ni1;
- int nj0, nj1, nj2, nj3;
- int i, j, k, n;
- int * mask;
- int * pmask0;
- int * pmask1;
- int * emask0;
- int * emask1;
- double * gb_radius;
- double * vsolv;
- double * work;
- double tmpsum[2];
- double * x_align;
- double * y_align;
- double * z_align;
- int * jindex;
- double * dadx;
- double * obc_param;
- double rad, min_rad;
- double rai, rai_inv, rai_inv2, sum_ai, sum_ai2, sum_ai3, tsum, tchain;
-
- __m128d ix_SSE0, iy_SSE0, iz_SSE0;
- __m128d ix_SSE1, iy_SSE1, iz_SSE1;
- __m128d gpi_SSE0, rai_SSE0, prod_ai_SSE0;
- __m128d gpi_SSE1, rai_SSE1, prod_ai_SSE1;
- __m128d imask_SSE0, jmask_SSE0;
- __m128d imask_SSE1, jmask_SSE1;
- __m128d jx_SSE, jy_SSE, jz_SSE;
- __m128d dx_SSE0, dy_SSE0, dz_SSE0;
- __m128d dx_SSE1, dy_SSE1, dz_SSE1;
- __m128d rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
- __m128d rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
- __m128d raj_SSE, raj_inv_SSE, sk_aj_SSE, sk2_aj_SSE;
- __m128d ccf_SSE0, dccf_SSE0, prod_SSE0;
- __m128d ccf_SSE1, dccf_SSE1, prod_SSE1;
- __m128d icf4_SSE0, icf6_SSE0;
- __m128d icf4_SSE1, icf6_SSE1;
- __m128d oneeighth_SSE, onefourth_SSE, half_SSE, one_SSE, two_SSE, four_SSE;
- __m128d still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
- __m128d rai_inv_SSE0;
- __m128d rai_inv_SSE1;
- __m128d sk_ai_SSE0, sk2_ai_SSE0, sum_ai_SSE0;
- __m128d sk_ai_SSE1, sk2_ai_SSE1, sum_ai_SSE1;
- __m128d lij_inv_SSE0, sk2_rinv_SSE0;
- __m128d lij_inv_SSE1, sk2_rinv_SSE1;
- __m128d dr_SSE0;
- __m128d dr_SSE1;
- __m128d t1_SSE0, t2_SSE0, t3_SSE0, t4_SSE0;
- __m128d t1_SSE1, t2_SSE1, t3_SSE1, t4_SSE1;
- __m128d obc_mask1_SSE0, obc_mask2_SSE0, obc_mask3_SSE0;
- __m128d obc_mask1_SSE1, obc_mask2_SSE1, obc_mask3_SSE1;
- __m128d uij_SSE0, uij2_SSE0, uij3_SSE0;
- __m128d uij_SSE1, uij2_SSE1, uij3_SSE1;
- __m128d lij_SSE0, lij2_SSE0, lij3_SSE0;
- __m128d lij_SSE1, lij2_SSE1, lij3_SSE1;
- __m128d dlij_SSE0, diff2_SSE0, logterm_SSE0;
- __m128d dlij_SSE1, diff2_SSE1, logterm_SSE1;
- __m128d doffset_SSE, tmpSSE;
-
- natoms = mdatoms->nr;
- ni0 = 0;
- ni1 = mdatoms->homenr;
-
- n = 0;
-
- aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
- if (aadata == NULL)
- {
- genborn_allvsall_setup(&aadata, top, born, mdatoms, born->gb_doffset,
- egbOBC, TRUE, TRUE, TRUE);
- *((gmx_allvsallgb2_data_t **)paadata) = aadata;
- }
-
- x_align = aadata->x_align;
- y_align = aadata->y_align;
- z_align = aadata->z_align;
-
- gb_radius = aadata->gb_radius;
- work = aadata->work;
- jindex = aadata->jindex_gb;
- dadx = fr->dadx;
- obc_param = aadata->workparam;
-
- oneeighth_SSE = _mm_set1_pd(0.125);
- onefourth_SSE = _mm_set1_pd(0.25);
- half_SSE = _mm_set1_pd(0.5);
- one_SSE = _mm_set1_pd(1.0);
- two_SSE = _mm_set1_pd(2.0);
- four_SSE = _mm_set1_pd(4.0);
- doffset_SSE = _mm_set1_pd(born->gb_doffset);
-
- for (i = 0; i < natoms; i++)
- {
- x_align[i] = x[3*i];
- y_align[i] = x[3*i+1];
- z_align[i] = x[3*i+2];
- }
-
- /* Copy again */
- for (i = 0; i < natoms/2+1; i++)
- {
- x_align[natoms+i] = x_align[i];
- y_align[natoms+i] = y_align[i];
- z_align[natoms+i] = z_align[i];
- }
-
- for (i = 0; i < natoms+natoms/2+1; i++)
- {
- work[i] = 0;
- }
-
- for (i = ni0; i < ni1; i += UNROLLI)
- {
- /* We assume shifts are NOT used for all-vs-all interactions */
-
- /* Load i atom data */
- ix_SSE0 = _mm_load1_pd(x_align+i);
- iy_SSE0 = _mm_load1_pd(y_align+i);
- iz_SSE0 = _mm_load1_pd(z_align+i);
- ix_SSE1 = _mm_load1_pd(x_align+i+1);
- iy_SSE1 = _mm_load1_pd(y_align+i+1);
- iz_SSE1 = _mm_load1_pd(z_align+i+1);
-
- rai_SSE0 = _mm_load1_pd(gb_radius+i);
- rai_SSE1 = _mm_load1_pd(gb_radius+i+1);
- rai_inv_SSE0 = gmx_mm_inv_pd(rai_SSE0);
- rai_inv_SSE1 = gmx_mm_inv_pd(rai_SSE1);
-
- sk_ai_SSE0 = _mm_load1_pd(obc_param+i);
- sk_ai_SSE1 = _mm_load1_pd(obc_param+i+1);
- sk2_ai_SSE0 = _mm_mul_pd(sk_ai_SSE0, sk_ai_SSE0);
- sk2_ai_SSE1 = _mm_mul_pd(sk_ai_SSE1, sk_ai_SSE1);
-
- sum_ai_SSE0 = _mm_setzero_pd();
- sum_ai_SSE1 = _mm_setzero_pd();
-
- /* Load limits for loop over neighbors */
- nj0 = jindex[4*i];
- nj1 = jindex[4*i+1];
- nj2 = jindex[4*i+2];
- nj3 = jindex[4*i+3];
-
- pmask0 = aadata->prologue_mask_gb[i];
- pmask1 = aadata->prologue_mask_gb[i+1];
- emask0 = aadata->epilogue_mask[i];
- emask1 = aadata->epilogue_mask[i+1];
-
- imask_SSE0 = _mm_load1_pd((double *)(aadata->imask+2*i));
- imask_SSE1 = _mm_load1_pd((double *)(aadata->imask+2*i+2));
-
- /* Prologue part, including exclusion mask */
- for (j = nj0; j < nj1; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_pd((double *)pmask0);
- jmask_SSE1 = _mm_load_pd((double *)pmask1);
- pmask0 += 2*UNROLLJ;
- pmask1 += 2*UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_pd(x_align+j);
- jy_SSE = _mm_load_pd(y_align+j);
- jz_SSE = _mm_load_pd(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_pd(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_pd(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_pd(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_pd(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_pd(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_pd(iz_SSE1, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_pd(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_pd(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_pd(rsq_SSE1);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_pd(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
- dr_SSE0 = _mm_mul_pd(rsq_SSE0, rinv_SSE0);
- dr_SSE1 = _mm_mul_pd(rsq_SSE1, rinv_SSE1);
-
- sk_aj_SSE = _mm_load_pd(obc_param+j);
- raj_SSE = _mm_load_pd(gb_radius+j);
- raj_inv_SSE = gmx_mm_inv_pd(raj_SSE);
-
- /* Evaluate influence of atom aj -> ai */
- t1_SSE0 = _mm_add_pd(dr_SSE0, sk_aj_SSE);
- t1_SSE1 = _mm_add_pd(dr_SSE1, sk_aj_SSE);
- t2_SSE0 = _mm_sub_pd(dr_SSE0, sk_aj_SSE);
- t2_SSE1 = _mm_sub_pd(dr_SSE1, sk_aj_SSE);
- t3_SSE0 = _mm_sub_pd(sk_aj_SSE, dr_SSE0);
- t3_SSE1 = _mm_sub_pd(sk_aj_SSE, dr_SSE1);
-
- obc_mask1_SSE0 = _mm_cmplt_pd(rai_SSE0, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_pd(rai_SSE1, t1_SSE1);
- obc_mask2_SSE0 = _mm_cmplt_pd(rai_SSE0, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_pd(rai_SSE1, t2_SSE1);
- obc_mask3_SSE0 = _mm_cmplt_pd(rai_SSE0, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_pd(rai_SSE1, t3_SSE1);
- obc_mask1_SSE0 = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
- uij_SSE0 = gmx_mm_inv_pd(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_pd(t1_SSE1);
- lij_SSE0 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
- _mm_andnot_pd(obc_mask2_SSE0, rai_inv_SSE0));
- lij_SSE1 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
- _mm_andnot_pd(obc_mask2_SSE1, rai_inv_SSE1));
- dlij_SSE0 = _mm_and_pd(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
- uij2_SSE0 = _mm_mul_pd(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_pd(uij_SSE1, uij_SSE1);
- uij3_SSE0 = _mm_mul_pd(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_pd(uij2_SSE1, uij_SSE1);
- lij2_SSE0 = _mm_mul_pd(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_pd(lij_SSE1, lij_SSE1);
- lij3_SSE0 = _mm_mul_pd(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
- diff2_SSE0 = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
- lij_inv_SSE0 = gmx_mm_invsqrt_pd(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_pd(lij2_SSE1);
- sk2_aj_SSE = _mm_mul_pd(sk_aj_SSE, sk_aj_SSE);
- sk2_rinv_SSE0 = _mm_mul_pd(sk2_aj_SSE, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_pd(sk2_aj_SSE, rinv_SSE1);
- prod_SSE0 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
- logterm_SSE0 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-
- t1_SSE0 = _mm_sub_pd(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_pd(lij_SSE1, uij_SSE1);
- t2_SSE0 = _mm_mul_pd(diff2_SSE0,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_pd(diff2_SSE1,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
- prod_SSE1));
-
- t3_SSE0 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
- t1_SSE0 = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
- t4_SSE0 = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE0, lij_SSE0));
- t4_SSE1 = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE1, lij_SSE1));
- t4_SSE0 = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
- t1_SSE0 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
- sum_ai_SSE0 = _mm_add_pd(sum_ai_SSE0, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- sum_ai_SSE1 = _mm_add_pd(sum_ai_SSE1, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-
- t1_SSE0 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
- _mm_mul_pd(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
- _mm_mul_pd(prod_SSE1, lij3_SSE1));
- t1_SSE0 = _mm_sub_pd(t1_SSE0,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
- _mm_mul_pd(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_pd(t1_SSE1,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
- _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-
- t2_SSE0 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
- _mm_mul_pd(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
- _mm_mul_pd(uij3_SSE1, dr_SSE1)));
- t2_SSE0 = _mm_sub_pd(t2_SSE0,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
- _mm_mul_pd(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_pd(t2_SSE1,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
- _mm_mul_pd(prod_SSE1, uij3_SSE1)));
- t3_SSE0 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
- _mm_mul_pd(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
- _mm_mul_pd(rinv_SSE1, rinv_SSE1));
- t3_SSE0 = _mm_sub_pd(t3_SSE0,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_pd(t3_SSE1,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
- t1_SSE0 = _mm_mul_pd(rinv_SSE0,
- _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
- _mm_add_pd(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_pd(rinv_SSE1,
- _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
- _mm_add_pd(t2_SSE1, t3_SSE1)));
-
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
- dadx += 2;
-
- /* Evaluate influence of atom ai -> aj */
- t1_SSE0 = _mm_add_pd(dr_SSE0, sk_ai_SSE0);
- t1_SSE1 = _mm_add_pd(dr_SSE1, sk_ai_SSE1);
- t2_SSE0 = _mm_sub_pd(dr_SSE0, sk_ai_SSE0);
- t2_SSE1 = _mm_sub_pd(dr_SSE1, sk_ai_SSE1);
- t3_SSE0 = _mm_sub_pd(sk_ai_SSE0, dr_SSE0);
- t3_SSE1 = _mm_sub_pd(sk_ai_SSE1, dr_SSE1);
-
- obc_mask1_SSE0 = _mm_cmplt_pd(raj_SSE, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_pd(raj_SSE, t1_SSE1);
- obc_mask2_SSE0 = _mm_cmplt_pd(raj_SSE, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_pd(raj_SSE, t2_SSE1);
- obc_mask3_SSE0 = _mm_cmplt_pd(raj_SSE, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_pd(raj_SSE, t3_SSE1);
- obc_mask1_SSE0 = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
- uij_SSE0 = gmx_mm_inv_pd(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_pd(t1_SSE1);
- lij_SSE0 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
- _mm_andnot_pd(obc_mask2_SSE0, raj_inv_SSE));
- lij_SSE1 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
- _mm_andnot_pd(obc_mask2_SSE1, raj_inv_SSE));
- dlij_SSE0 = _mm_and_pd(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
- uij2_SSE0 = _mm_mul_pd(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_pd(uij_SSE1, uij_SSE1);
- uij3_SSE0 = _mm_mul_pd(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_pd(uij2_SSE1, uij_SSE1);
- lij2_SSE0 = _mm_mul_pd(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_pd(lij_SSE1, lij_SSE1);
- lij3_SSE0 = _mm_mul_pd(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
- diff2_SSE0 = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
- lij_inv_SSE0 = gmx_mm_invsqrt_pd(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_pd(lij2_SSE1);
- sk2_rinv_SSE0 = _mm_mul_pd(sk2_ai_SSE0, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_pd(sk2_ai_SSE1, rinv_SSE1);
- prod_SSE0 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
- logterm_SSE0 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
- t1_SSE0 = _mm_sub_pd(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_pd(lij_SSE1, uij_SSE1);
- t2_SSE0 = _mm_mul_pd(diff2_SSE0,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_pd(diff2_SSE1,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t3_SSE0 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
- t1_SSE0 = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
- t4_SSE0 = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE0));
- t4_SSE1 = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE1));
- t4_SSE0 = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
- t1_SSE0 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
- _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
- _mm_add_pd(_mm_and_pd(t1_SSE0, obc_mask1_SSE0),
- _mm_and_pd(t1_SSE1, obc_mask1_SSE1))));
-
- t1_SSE0 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
- _mm_mul_pd(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
- _mm_mul_pd(prod_SSE1, lij3_SSE1));
- t1_SSE0 = _mm_sub_pd(t1_SSE0,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
- _mm_mul_pd(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_pd(t1_SSE1,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
- _mm_mul_pd(lij3_SSE1, dr_SSE1))));
- t2_SSE0 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
- _mm_mul_pd(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
- _mm_mul_pd(uij3_SSE1, dr_SSE1)));
- t2_SSE0 = _mm_sub_pd(t2_SSE0,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
- _mm_mul_pd(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_pd(t2_SSE1,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
- _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-
- t3_SSE0 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
- _mm_mul_pd(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
- _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-
- t3_SSE0 = _mm_sub_pd(t3_SSE0,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_pd(t3_SSE1,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
-
- t1_SSE0 = _mm_mul_pd(rinv_SSE0,
- _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
- _mm_add_pd(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_pd(rinv_SSE1,
- _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
- _mm_add_pd(t2_SSE1, t3_SSE1)));
-
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
- dadx += 2;
- }
-
- /* Main part, no exclusions */
- for (j = nj1; j < nj2; j += UNROLLJ)
- {
- /* load j atom coordinates */
- jx_SSE = _mm_load_pd(x_align+j);
- jy_SSE = _mm_load_pd(y_align+j);
- jz_SSE = _mm_load_pd(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_pd(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_pd(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_pd(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_pd(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_pd(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_pd(iz_SSE1, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_pd(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_pd(rsq_SSE1);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_pd(rinv_SSE0, imask_SSE0);
- rinv_SSE1 = _mm_and_pd(rinv_SSE1, imask_SSE1);
-
- dr_SSE0 = _mm_mul_pd(rsq_SSE0, rinv_SSE0);
- dr_SSE1 = _mm_mul_pd(rsq_SSE1, rinv_SSE1);
-
- sk_aj_SSE = _mm_load_pd(obc_param+j);
- raj_SSE = _mm_load_pd(gb_radius+j);
-
- raj_inv_SSE = gmx_mm_inv_pd(raj_SSE);
-
- /* Evaluate influence of atom aj -> ai */
- t1_SSE0 = _mm_add_pd(dr_SSE0, sk_aj_SSE);
- t1_SSE1 = _mm_add_pd(dr_SSE1, sk_aj_SSE);
- t2_SSE0 = _mm_sub_pd(dr_SSE0, sk_aj_SSE);
- t2_SSE1 = _mm_sub_pd(dr_SSE1, sk_aj_SSE);
- t3_SSE0 = _mm_sub_pd(sk_aj_SSE, dr_SSE0);
- t3_SSE1 = _mm_sub_pd(sk_aj_SSE, dr_SSE1);
-
- obc_mask1_SSE0 = _mm_cmplt_pd(rai_SSE0, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_pd(rai_SSE1, t1_SSE1);
- obc_mask2_SSE0 = _mm_cmplt_pd(rai_SSE0, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_pd(rai_SSE1, t2_SSE1);
- obc_mask3_SSE0 = _mm_cmplt_pd(rai_SSE0, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_pd(rai_SSE1, t3_SSE1);
- obc_mask1_SSE0 = _mm_and_pd(obc_mask1_SSE0, imask_SSE0);
- obc_mask1_SSE1 = _mm_and_pd(obc_mask1_SSE1, imask_SSE1);
-
- uij_SSE0 = gmx_mm_inv_pd(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_pd(t1_SSE1);
- lij_SSE0 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
- _mm_andnot_pd(obc_mask2_SSE0, rai_inv_SSE0));
- lij_SSE1 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
- _mm_andnot_pd(obc_mask2_SSE1, rai_inv_SSE1));
- dlij_SSE0 = _mm_and_pd(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
- uij2_SSE0 = _mm_mul_pd(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_pd(uij_SSE1, uij_SSE1);
- uij3_SSE0 = _mm_mul_pd(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_pd(uij2_SSE1, uij_SSE1);
- lij2_SSE0 = _mm_mul_pd(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_pd(lij_SSE1, lij_SSE1);
- lij3_SSE0 = _mm_mul_pd(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
- diff2_SSE0 = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
- lij_inv_SSE0 = gmx_mm_invsqrt_pd(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_pd(lij2_SSE1);
- sk2_aj_SSE = _mm_mul_pd(sk_aj_SSE, sk_aj_SSE);
- sk2_rinv_SSE0 = _mm_mul_pd(sk2_aj_SSE, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_pd(sk2_aj_SSE, rinv_SSE1);
- prod_SSE0 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
- logterm_SSE0 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-
- t1_SSE0 = _mm_sub_pd(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_pd(lij_SSE1, uij_SSE1);
- t2_SSE0 = _mm_mul_pd(diff2_SSE0,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_pd(diff2_SSE1,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
- prod_SSE1));
-
- t3_SSE0 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
- t1_SSE0 = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
- t4_SSE0 = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE0, lij_SSE0));
- t4_SSE1 = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE1, lij_SSE1));
- t4_SSE0 = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
- t1_SSE0 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
- sum_ai_SSE0 = _mm_add_pd(sum_ai_SSE0, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- sum_ai_SSE1 = _mm_add_pd(sum_ai_SSE1, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-
- t1_SSE0 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
- _mm_mul_pd(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
- _mm_mul_pd(prod_SSE1, lij3_SSE1));
-
- t1_SSE0 = _mm_sub_pd(t1_SSE0,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
- _mm_mul_pd(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_pd(t1_SSE1,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
- _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-
- t2_SSE0 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
- _mm_mul_pd(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
- _mm_mul_pd(uij3_SSE1, dr_SSE1)));
- t2_SSE0 = _mm_sub_pd(t2_SSE0,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
- _mm_mul_pd(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_pd(t2_SSE1,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
- _mm_mul_pd(prod_SSE1, uij3_SSE1)));
- t3_SSE0 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
- _mm_mul_pd(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
- _mm_mul_pd(rinv_SSE1, rinv_SSE1));
- t3_SSE0 = _mm_sub_pd(t3_SSE0,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_pd(t3_SSE1,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
- t1_SSE0 = _mm_mul_pd(rinv_SSE0,
- _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
- _mm_add_pd(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_pd(rinv_SSE1,
- _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
- _mm_add_pd(t2_SSE1, t3_SSE1)));
-
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
- dadx += 2;
-
- /* Evaluate influence of atom ai -> aj */
- t1_SSE0 = _mm_add_pd(dr_SSE0, sk_ai_SSE0);
- t1_SSE1 = _mm_add_pd(dr_SSE1, sk_ai_SSE1);
- t2_SSE0 = _mm_sub_pd(dr_SSE0, sk_ai_SSE0);
- t2_SSE1 = _mm_sub_pd(dr_SSE1, sk_ai_SSE1);
- t3_SSE0 = _mm_sub_pd(sk_ai_SSE0, dr_SSE0);
- t3_SSE1 = _mm_sub_pd(sk_ai_SSE1, dr_SSE1);
-
- obc_mask1_SSE0 = _mm_cmplt_pd(raj_SSE, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_pd(raj_SSE, t1_SSE1);
- obc_mask2_SSE0 = _mm_cmplt_pd(raj_SSE, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_pd(raj_SSE, t2_SSE1);
- obc_mask3_SSE0 = _mm_cmplt_pd(raj_SSE, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_pd(raj_SSE, t3_SSE1);
- obc_mask1_SSE0 = _mm_and_pd(obc_mask1_SSE0, imask_SSE0);
- obc_mask1_SSE1 = _mm_and_pd(obc_mask1_SSE1, imask_SSE1);
-
- uij_SSE0 = gmx_mm_inv_pd(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_pd(t1_SSE1);
- lij_SSE0 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
- _mm_andnot_pd(obc_mask2_SSE0, raj_inv_SSE));
- lij_SSE1 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
- _mm_andnot_pd(obc_mask2_SSE1, raj_inv_SSE));
- dlij_SSE0 = _mm_and_pd(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
- uij2_SSE0 = _mm_mul_pd(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_pd(uij_SSE1, uij_SSE1);
- uij3_SSE0 = _mm_mul_pd(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_pd(uij2_SSE1, uij_SSE1);
- lij2_SSE0 = _mm_mul_pd(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_pd(lij_SSE1, lij_SSE1);
- lij3_SSE0 = _mm_mul_pd(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
- diff2_SSE0 = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
- lij_inv_SSE0 = gmx_mm_invsqrt_pd(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_pd(lij2_SSE1);
- sk2_rinv_SSE0 = _mm_mul_pd(sk2_ai_SSE0, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_pd(sk2_ai_SSE1, rinv_SSE1);
- prod_SSE0 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
- logterm_SSE0 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
- t1_SSE0 = _mm_sub_pd(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_pd(lij_SSE1, uij_SSE1);
- t2_SSE0 = _mm_mul_pd(diff2_SSE0,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_pd(diff2_SSE1,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t3_SSE0 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
- t1_SSE0 = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
- t4_SSE0 = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE0));
- t4_SSE1 = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE1));
- t4_SSE0 = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
- t1_SSE0 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
- _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
- _mm_add_pd(_mm_and_pd(t1_SSE0, obc_mask1_SSE0),
- _mm_and_pd(t1_SSE1, obc_mask1_SSE1))));
-
- t1_SSE0 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
- _mm_mul_pd(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
- _mm_mul_pd(prod_SSE1, lij3_SSE1));
- t1_SSE0 = _mm_sub_pd(t1_SSE0,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
- _mm_mul_pd(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_pd(t1_SSE1,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
- _mm_mul_pd(lij3_SSE1, dr_SSE1))));
- t2_SSE0 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
- _mm_mul_pd(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
- _mm_mul_pd(uij3_SSE1, dr_SSE1)));
- t2_SSE0 = _mm_sub_pd(t2_SSE0,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
- _mm_mul_pd(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_pd(t2_SSE1,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
- _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-
- t3_SSE0 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
- _mm_mul_pd(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
- _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-
- t3_SSE0 = _mm_sub_pd(t3_SSE0,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_pd(t3_SSE1,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
- t1_SSE0 = _mm_mul_pd(rinv_SSE0,
- _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
- _mm_add_pd(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_pd(rinv_SSE1,
- _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
- _mm_add_pd(t2_SSE1, t3_SSE1)));
-
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
- dadx += 2;
- }
-
- /* Epilogue part, including exclusion mask */
- for (j = nj2; j < nj3; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_pd((double *)emask0);
- jmask_SSE1 = _mm_load_pd((double *)emask1);
- emask0 += 2*UNROLLJ;
- emask1 += 2*UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_pd(x_align+j);
- jy_SSE = _mm_load_pd(y_align+j);
- jz_SSE = _mm_load_pd(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_pd(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_pd(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_pd(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_pd(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_pd(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_pd(iz_SSE1, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_pd(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_pd(dx_SSE1, dy_SSE1, dz_SSE1);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_pd(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_pd(jmask_SSE1, imask_SSE1);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_pd(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_pd(rsq_SSE1);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_pd(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_pd(rinv_SSE1, jmask_SSE1);
-
- dr_SSE0 = _mm_mul_pd(rsq_SSE0, rinv_SSE0);
- dr_SSE1 = _mm_mul_pd(rsq_SSE1, rinv_SSE1);
-
- sk_aj_SSE = _mm_load_pd(obc_param+j);
- raj_SSE = _mm_load_pd(gb_radius+j);
-
- raj_inv_SSE = gmx_mm_inv_pd(raj_SSE);
-
- /* Evaluate influence of atom aj -> ai */
- t1_SSE0 = _mm_add_pd(dr_SSE0, sk_aj_SSE);
- t1_SSE1 = _mm_add_pd(dr_SSE1, sk_aj_SSE);
- t2_SSE0 = _mm_sub_pd(dr_SSE0, sk_aj_SSE);
- t2_SSE1 = _mm_sub_pd(dr_SSE1, sk_aj_SSE);
- t3_SSE0 = _mm_sub_pd(sk_aj_SSE, dr_SSE0);
- t3_SSE1 = _mm_sub_pd(sk_aj_SSE, dr_SSE1);
-
- obc_mask1_SSE0 = _mm_cmplt_pd(rai_SSE0, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_pd(rai_SSE1, t1_SSE1);
- obc_mask2_SSE0 = _mm_cmplt_pd(rai_SSE0, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_pd(rai_SSE1, t2_SSE1);
- obc_mask3_SSE0 = _mm_cmplt_pd(rai_SSE0, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_pd(rai_SSE1, t3_SSE1);
- obc_mask1_SSE0 = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
- uij_SSE0 = gmx_mm_inv_pd(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_pd(t1_SSE1);
- lij_SSE0 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
- _mm_andnot_pd(obc_mask2_SSE0, rai_inv_SSE0));
- lij_SSE1 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
- _mm_andnot_pd(obc_mask2_SSE1, rai_inv_SSE1));
-
- dlij_SSE0 = _mm_and_pd(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
- uij2_SSE0 = _mm_mul_pd(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_pd(uij_SSE1, uij_SSE1);
- uij3_SSE0 = _mm_mul_pd(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_pd(uij2_SSE1, uij_SSE1);
- lij2_SSE0 = _mm_mul_pd(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_pd(lij_SSE1, lij_SSE1);
- lij3_SSE0 = _mm_mul_pd(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
- diff2_SSE0 = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
- lij_inv_SSE0 = gmx_mm_invsqrt_pd(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_pd(lij2_SSE1);
- sk2_aj_SSE = _mm_mul_pd(sk_aj_SSE, sk_aj_SSE);
- sk2_rinv_SSE0 = _mm_mul_pd(sk2_aj_SSE, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_pd(sk2_aj_SSE, rinv_SSE1);
- prod_SSE0 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
- logterm_SSE0 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
-
- t1_SSE0 = _mm_sub_pd(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_pd(lij_SSE1, uij_SSE1);
- t2_SSE0 = _mm_mul_pd(diff2_SSE0,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_pd(diff2_SSE1,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
- prod_SSE1));
-
- t3_SSE0 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
- t1_SSE0 = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
- t4_SSE0 = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE0, lij_SSE0));
- t4_SSE1 = _mm_mul_pd(two_SSE, _mm_sub_pd(rai_inv_SSE1, lij_SSE1));
- t4_SSE0 = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
- t1_SSE0 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
- sum_ai_SSE0 = _mm_add_pd(sum_ai_SSE0, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- sum_ai_SSE1 = _mm_add_pd(sum_ai_SSE1, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
-
- t1_SSE0 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
- _mm_mul_pd(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
- _mm_mul_pd(prod_SSE1, lij3_SSE1));
- t1_SSE0 = _mm_sub_pd(t1_SSE0,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
- _mm_mul_pd(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_pd(t1_SSE1,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
- _mm_mul_pd(lij3_SSE1, dr_SSE1))));
-
- t2_SSE0 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
- _mm_mul_pd(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
- _mm_mul_pd(uij3_SSE1, dr_SSE1)));
- t2_SSE0 = _mm_sub_pd(t2_SSE0,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
- _mm_mul_pd(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_pd(t2_SSE1,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
- _mm_mul_pd(prod_SSE1, uij3_SSE1)));
- t3_SSE0 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
- _mm_mul_pd(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
- _mm_mul_pd(rinv_SSE1, rinv_SSE1));
- t3_SSE0 = _mm_sub_pd(t3_SSE0,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_pd(t3_SSE1,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
- t1_SSE0 = _mm_mul_pd(rinv_SSE0,
- _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
- _mm_add_pd(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_pd(rinv_SSE1,
- _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
- _mm_add_pd(t2_SSE1, t3_SSE1)));
-
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
- dadx += 2;
-
- /* Evaluate influence of atom ai -> aj */
- t1_SSE0 = _mm_add_pd(dr_SSE0, sk_ai_SSE0);
- t1_SSE1 = _mm_add_pd(dr_SSE1, sk_ai_SSE1);
- t2_SSE0 = _mm_sub_pd(dr_SSE0, sk_ai_SSE0);
- t2_SSE1 = _mm_sub_pd(dr_SSE1, sk_ai_SSE1);
- t3_SSE0 = _mm_sub_pd(sk_ai_SSE0, dr_SSE0);
- t3_SSE1 = _mm_sub_pd(sk_ai_SSE1, dr_SSE1);
-
- obc_mask1_SSE0 = _mm_cmplt_pd(raj_SSE, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_pd(raj_SSE, t1_SSE1);
- obc_mask2_SSE0 = _mm_cmplt_pd(raj_SSE, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_pd(raj_SSE, t2_SSE1);
- obc_mask3_SSE0 = _mm_cmplt_pd(raj_SSE, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_pd(raj_SSE, t3_SSE1);
- obc_mask1_SSE0 = _mm_and_pd(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_pd(obc_mask1_SSE1, jmask_SSE1);
-
- uij_SSE0 = gmx_mm_inv_pd(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_pd(t1_SSE1);
- lij_SSE0 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE0, gmx_mm_inv_pd(t2_SSE0)),
- _mm_andnot_pd(obc_mask2_SSE0, raj_inv_SSE));
- lij_SSE1 = _mm_or_pd( _mm_and_pd(obc_mask2_SSE1, gmx_mm_inv_pd(t2_SSE1)),
- _mm_andnot_pd(obc_mask2_SSE1, raj_inv_SSE));
-
- dlij_SSE0 = _mm_and_pd(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_pd(one_SSE, obc_mask2_SSE1);
-
- uij2_SSE0 = _mm_mul_pd(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_pd(uij_SSE1, uij_SSE1);
- uij3_SSE0 = _mm_mul_pd(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_pd(uij2_SSE1, uij_SSE1);
- lij2_SSE0 = _mm_mul_pd(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_pd(lij_SSE1, lij_SSE1);
- lij3_SSE0 = _mm_mul_pd(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_pd(lij2_SSE1, lij_SSE1);
-
- diff2_SSE0 = _mm_sub_pd(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_pd(uij2_SSE1, lij2_SSE1);
- lij_inv_SSE0 = gmx_mm_invsqrt_pd(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_pd(lij2_SSE1);
- sk2_rinv_SSE0 = _mm_mul_pd(sk2_ai_SSE0, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_pd(sk2_ai_SSE1, rinv_SSE1);
- prod_SSE0 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_pd(onefourth_SSE, sk2_rinv_SSE1);
-
- logterm_SSE0 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_pd(_mm_mul_pd(uij_SSE1, lij_inv_SSE1));
- t1_SSE0 = _mm_sub_pd(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_pd(lij_SSE1, uij_SSE1);
- t2_SSE0 = _mm_mul_pd(diff2_SSE0,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_pd(diff2_SSE1,
- _mm_sub_pd(_mm_mul_pd(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t3_SSE0 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_pd(half_SSE, _mm_mul_pd(rinv_SSE1, logterm_SSE1));
- t1_SSE0 = _mm_add_pd(t1_SSE0, _mm_add_pd(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_pd(t1_SSE1, _mm_add_pd(t2_SSE1, t3_SSE1));
- t4_SSE0 = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE0));
- t4_SSE1 = _mm_mul_pd(two_SSE, _mm_sub_pd(raj_inv_SSE, lij_SSE1));
- t4_SSE0 = _mm_and_pd(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_pd(t4_SSE1, obc_mask3_SSE1);
- t1_SSE0 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_pd(half_SSE, _mm_add_pd(t1_SSE1, t4_SSE1));
-
- _mm_store_pd(work+j, _mm_add_pd(_mm_load_pd(work+j),
- _mm_add_pd(_mm_and_pd(t1_SSE0, obc_mask1_SSE0),
- _mm_and_pd(t1_SSE1, obc_mask1_SSE1))));
-
- t1_SSE0 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE0),
- _mm_mul_pd(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_pd(_mm_mul_pd(half_SSE, lij2_SSE1),
- _mm_mul_pd(prod_SSE1, lij3_SSE1));
-
- t1_SSE0 = _mm_sub_pd(t1_SSE0,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE0, rinv_SSE0),
- _mm_mul_pd(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_pd(t1_SSE1,
- _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(lij_SSE1, rinv_SSE1),
- _mm_mul_pd(lij3_SSE1, dr_SSE1))));
- t2_SSE0 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE0, rinv_SSE0),
- _mm_mul_pd(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_pd(onefourth_SSE,
- _mm_add_pd(_mm_mul_pd(uij_SSE1, rinv_SSE1),
- _mm_mul_pd(uij3_SSE1, dr_SSE1)));
- t2_SSE0 = _mm_sub_pd(t2_SSE0,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE0),
- _mm_mul_pd(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_pd(t2_SSE1,
- _mm_add_pd(_mm_mul_pd(half_SSE, uij2_SSE1),
- _mm_mul_pd(prod_SSE1, uij3_SSE1)));
-
- t3_SSE0 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE0),
- _mm_mul_pd(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_pd(_mm_mul_pd(onefourth_SSE, logterm_SSE1),
- _mm_mul_pd(rinv_SSE1, rinv_SSE1));
-
- t3_SSE0 = _mm_sub_pd(t3_SSE0,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE0, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_pd(t3_SSE1,
- _mm_mul_pd(_mm_mul_pd(diff2_SSE1, oneeighth_SSE),
- _mm_add_pd(one_SSE,
- _mm_mul_pd(sk2_rinv_SSE1, rinv_SSE1))));
-
- t1_SSE0 = _mm_mul_pd(rinv_SSE0,
- _mm_add_pd(_mm_mul_pd(dlij_SSE0, t1_SSE0),
- _mm_add_pd(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_pd(rinv_SSE1,
- _mm_add_pd(_mm_mul_pd(dlij_SSE1, t1_SSE1),
- _mm_add_pd(t2_SSE1, t3_SSE1)));
-
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE0, obc_mask1_SSE0));
- dadx += 2;
- _mm_store_pd(dadx, _mm_and_pd(t1_SSE1, obc_mask1_SSE1));
- dadx += 2;
- }
- GMX_MM_TRANSPOSE2_PD(sum_ai_SSE0, sum_ai_SSE1);
- sum_ai_SSE0 = _mm_add_pd(sum_ai_SSE0, sum_ai_SSE1);
- _mm_store_pd(work+i, _mm_add_pd(sum_ai_SSE0, _mm_load_pd(work+i)));
- }
-
-
- for (i = 0; i < natoms/2+1; i++)
- {
- work[i] += work[natoms+i];
- }
-
- /* Parallel summations would go here if ever implemented in DD */
-
- if (gb_algorithm == egbHCT)
- {
- /* HCT */
- for (i = 0; i < natoms; i++)
- {
- if (born->use[i] != 0)
- {
- rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
- sum_ai = 1.0/rai - work[i];
- min_rad = rai + born->gb_doffset;
- rad = 1.0/sum_ai;
-
- born->bRad[i] = rad > min_rad ? rad : min_rad;
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- }
- else
- {
- /* OBC */
-
- /* Calculate the radii */
- for (i = 0; i < natoms; i++)
- {
-
- if (born->use[i] != 0)
- {
- rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
- rai_inv2 = 1.0/rai;
- rai = rai-born->gb_doffset;
- rai_inv = 1.0/rai;
- sum_ai = rai * work[i];
- sum_ai2 = sum_ai * sum_ai;
- sum_ai3 = sum_ai2 * sum_ai;
-
- tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
- born->bRad[i] = rai_inv - tsum*rai_inv2;
- born->bRad[i] = 1.0 / born->bRad[i];
-
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
- tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
- born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
- }
- }
- }
-
- return 0;
-}
-
-
-
-
-
-
-
-
-int
-genborn_allvsall_calc_chainrule_sse2_double(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- double * x,
- double * f,
- int gb_algorithm,
- void * paadata)
-{
- gmx_allvsallgb2_data_t *aadata;
- int natoms;
- int ni0, ni1;
- int nj0, nj1, nj2, nj3;
- int i, j, k, n;
- int idx;
- int * mask;
- int * pmask0;
- int * emask0;
- int * jindex;
-
- double ix, iy, iz;
- double fix, fiy, fiz;
- double jx, jy, jz;
- double dx, dy, dz;
- double tx, ty, tz;
- double rbai, rbaj, fgb, fgb_ai, rbi;
- double * rb;
- double * dadx;
- double * x_align;
- double * y_align;
- double * z_align;
- double * fx_align;
- double * fy_align;
- double * fz_align;
- double tmpsum[2];
-
- __m128d jmask_SSE0, jmask_SSE1;
- __m128d ix_SSE0, iy_SSE0, iz_SSE0;
- __m128d ix_SSE1, iy_SSE1, iz_SSE1;
- __m128d fix_SSE0, fiy_SSE0, fiz_SSE0;
- __m128d fix_SSE1, fiy_SSE1, fiz_SSE1;
- __m128d rbai_SSE0, rbai_SSE1;
- __m128d imask_SSE0, imask_SSE1;
- __m128d jx_SSE, jy_SSE, jz_SSE, rbaj_SSE;
- __m128d dx_SSE0, dy_SSE0, dz_SSE0;
- __m128d dx_SSE1, dy_SSE1, dz_SSE1;
- __m128d fgb_SSE0, fgb_ai_SSE0;
- __m128d fgb_SSE1, fgb_ai_SSE1;
- __m128d tx_SSE0, ty_SSE0, tz_SSE0;
- __m128d tx_SSE1, ty_SSE1, tz_SSE1;
- __m128d t1, t2, tmpSSE;
-
- natoms = mdatoms->nr;
- ni0 = 0;
- ni1 = mdatoms->homenr;
-
- aadata = (gmx_allvsallgb2_data_t *)paadata;
-
- x_align = aadata->x_align;
- y_align = aadata->y_align;
- z_align = aadata->z_align;
- fx_align = aadata->fx_align;
- fy_align = aadata->fy_align;
- fz_align = aadata->fz_align;
-
- jindex = aadata->jindex_gb;
- dadx = fr->dadx;
-
- n = 0;
- rb = aadata->work;
-
- /* Loop to get the proper form for the Born radius term */
- if (gb_algorithm == egbSTILL)
- {
- for (i = 0; i < natoms; i++)
- {
- rbi = born->bRad[i];
- rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
- }
- }
- else if (gb_algorithm == egbHCT)
- {
- for (i = 0; i < natoms; i++)
- {
- rbi = born->bRad[i];
- rb[i] = rbi * rbi * fr->dvda[i];
- }
- }
- else if (gb_algorithm == egbOBC)
- {
- for (idx = 0; idx < natoms; idx++)
- {
- rbi = born->bRad[idx];
- rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
- }
- }
-
- for (i = 0; i < 2*natoms; i++)
- {
- fx_align[i] = 0;
- fy_align[i] = 0;
- fz_align[i] = 0;
- }
-
-
- for (i = 0; i < natoms; i++)
- {
- rb[i+natoms] = rb[i];
- }
-
- for (i = ni0; i < ni1; i += UNROLLI)
- {
- /* We assume shifts are NOT used for all-vs-all interactions */
-
- /* Load i atom data */
- ix_SSE0 = _mm_load1_pd(x_align+i);
- iy_SSE0 = _mm_load1_pd(y_align+i);
- iz_SSE0 = _mm_load1_pd(z_align+i);
- ix_SSE1 = _mm_load1_pd(x_align+i+1);
- iy_SSE1 = _mm_load1_pd(y_align+i+1);
- iz_SSE1 = _mm_load1_pd(z_align+i+1);
-
- fix_SSE0 = _mm_setzero_pd();
- fiy_SSE0 = _mm_setzero_pd();
- fiz_SSE0 = _mm_setzero_pd();
- fix_SSE1 = _mm_setzero_pd();
- fiy_SSE1 = _mm_setzero_pd();
- fiz_SSE1 = _mm_setzero_pd();
-
- rbai_SSE0 = _mm_load1_pd(rb+i);
- rbai_SSE1 = _mm_load1_pd(rb+i+1);
-
- /* Load limits for loop over neighbors */
- nj0 = jindex[4*i];
- nj3 = jindex[4*i+3];
-
- /* No masks necessary, since the stored chain rule derivatives will be zero in those cases! */
- for (j = nj0; j < nj3; j += UNROLLJ)
- {
- /* load j atom coordinates */
- jx_SSE = _mm_load_pd(x_align+j);
- jy_SSE = _mm_load_pd(y_align+j);
- jz_SSE = _mm_load_pd(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_pd(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_pd(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_pd(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_pd(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_pd(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_pd(iz_SSE1, jz_SSE);
-
- rbaj_SSE = _mm_load_pd(rb+j);
-
- fgb_SSE0 = _mm_mul_pd(rbai_SSE0, _mm_load_pd(dadx));
- dadx += 2;
- fgb_SSE1 = _mm_mul_pd(rbai_SSE1, _mm_load_pd(dadx));
- dadx += 2;
-
- fgb_ai_SSE0 = _mm_mul_pd(rbaj_SSE, _mm_load_pd(dadx));
- dadx += 2;
- fgb_ai_SSE1 = _mm_mul_pd(rbaj_SSE, _mm_load_pd(dadx));
- dadx += 2;
-
- /* Total force between ai and aj is the sum of ai->aj and aj->ai */
- fgb_SSE0 = _mm_add_pd(fgb_SSE0, fgb_ai_SSE0);
- fgb_SSE1 = _mm_add_pd(fgb_SSE1, fgb_ai_SSE1);
-
- /* Calculate temporary vectorial force */
- tx_SSE0 = _mm_mul_pd(fgb_SSE0, dx_SSE0);
- ty_SSE0 = _mm_mul_pd(fgb_SSE0, dy_SSE0);
- tz_SSE0 = _mm_mul_pd(fgb_SSE0, dz_SSE0);
- tx_SSE1 = _mm_mul_pd(fgb_SSE1, dx_SSE1);
- ty_SSE1 = _mm_mul_pd(fgb_SSE1, dy_SSE1);
- tz_SSE1 = _mm_mul_pd(fgb_SSE1, dz_SSE1);
-
- /* Increment i atom force */
- fix_SSE0 = _mm_add_pd(fix_SSE0, tx_SSE0);
- fiy_SSE0 = _mm_add_pd(fiy_SSE0, ty_SSE0);
- fiz_SSE0 = _mm_add_pd(fiz_SSE0, tz_SSE0);
- fix_SSE1 = _mm_add_pd(fix_SSE1, tx_SSE1);
- fiy_SSE1 = _mm_add_pd(fiy_SSE1, ty_SSE1);
- fiz_SSE1 = _mm_add_pd(fiz_SSE1, tz_SSE1);
-
- /* Decrement j atom force */
- _mm_store_pd(fx_align+j,
- _mm_sub_pd( _mm_load_pd(fx_align+j), _mm_add_pd(tx_SSE0, tx_SSE1) ));
- _mm_store_pd(fy_align+j,
- _mm_sub_pd( _mm_load_pd(fy_align+j), _mm_add_pd(ty_SSE0, ty_SSE1) ));
- _mm_store_pd(fz_align+j,
- _mm_sub_pd( _mm_load_pd(fz_align+j), _mm_add_pd(tz_SSE0, tz_SSE1) ));
- }
-
- /* Add i forces to mem */
- GMX_MM_TRANSPOSE2_PD(fix_SSE0, fix_SSE1);
- fix_SSE0 = _mm_add_pd(fix_SSE0, fix_SSE1);
- _mm_store_pd(fx_align+i, _mm_add_pd(fix_SSE0, _mm_load_pd(fx_align+i)));
-
- GMX_MM_TRANSPOSE2_PD(fiy_SSE0, fiy_SSE1);
- fiy_SSE0 = _mm_add_pd(fiy_SSE0, fiy_SSE1);
- _mm_store_pd(fy_align+i, _mm_add_pd(fiy_SSE0, _mm_load_pd(fy_align+i)));
-
- GMX_MM_TRANSPOSE2_PD(fiz_SSE0, fiz_SSE1);
- fiz_SSE0 = _mm_add_pd(fiz_SSE0, fiz_SSE1);
- _mm_store_pd(fz_align+i, _mm_add_pd(fiz_SSE0, _mm_load_pd(fz_align+i)));
- }
-
- for (i = 0; i < natoms; i++)
- {
- f[3*i] += fx_align[i] + fx_align[natoms+i];
- f[3*i+1] += fy_align[i] + fy_align[natoms+i];
- f[3*i+2] += fz_align[i] + fz_align[natoms+i];
- }
-
- return 0;
-}
-
-#else
-/* dummy variable when not using SSE */
-int genborn_allvsall_sse2_double_dummy;
-
-
-#endif
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _GENBORN_ALLVSALL_SSE2_DOUBLE_H
-#define _GENBORN_ALLVSALL_SSE2_DOUBLE_H
-
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/simple.h"
-
-int
-genborn_allvsall_calc_still_radii_sse2_double(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- gmx_localtop_t * top,
- double * x,
- t_commrec * cr,
- void * work);
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_double(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- int gb_algorithm,
- gmx_localtop_t * top,
- double * x,
- t_commrec * cr,
- void * work);
-
-int
-genborn_allvsall_calc_chainrule_sse2_double(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- double * x,
- double * f,
- int gb_algorithm,
- void * work);
-
-#endif
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2012,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/types/simple.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/genborn_allvsall.h"
-#include "gromacs/utility/smalloc.h"
-
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-
-#include <gmx_sse2_single.h>
-
-
-#define SIMD_WIDTH 4
-#define UNROLLI 4
-#define UNROLLJ 4
-
-
-
-
-
-
-
-
-
-typedef struct
-{
- int * jindex_gb;
- int ** prologue_mask_gb;
- int ** epilogue_mask;
- int * imask;
- real * gb_radius;
- real * workparam;
- real * work;
- real * x_align;
- real * y_align;
- real * z_align;
- real * fx_align;
- real * fy_align;
- real * fz_align;
-}
-gmx_allvsallgb2_data_t;
-
-
-static int
-calc_maxoffset(int i, int natoms)
-{
- int maxoffset;
-
- if ((natoms % 2) == 1)
- {
- /* Odd number of atoms, easy */
- maxoffset = natoms/2;
- }
- else if ((natoms % 4) == 0)
- {
- /* Multiple of four is hard */
- if (i < natoms/2)
- {
- if ((i % 2) == 0)
- {
- maxoffset = natoms/2;
- }
- else
- {
- maxoffset = natoms/2-1;
- }
- }
- else
- {
- if ((i % 2) == 1)
- {
- maxoffset = natoms/2;
- }
- else
- {
- maxoffset = natoms/2-1;
- }
- }
- }
- else
- {
- /* natoms/2 = odd */
- if ((i % 2) == 0)
- {
- maxoffset = natoms/2;
- }
- else
- {
- maxoffset = natoms/2-1;
- }
- }
-
- return maxoffset;
-}
-
-static void
-setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
- t_ilist * ilist,
- int start,
- int end,
- int natoms,
- gmx_bool bInclude12,
- gmx_bool bInclude13,
- gmx_bool bInclude14)
-{
- int i, j, k, tp;
- int a1, a2;
- int ni0, ni1, nj0, nj1, nj;
- int imin, imax, iexcl;
- int max_offset;
- int max_excl_offset;
- int firstinteraction;
- int ibase;
- int *pi;
-
- /* This routine can appear to be a bit complex, but it is mostly book-keeping.
- * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
- * whether they should interact or not.
- *
- * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
- * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
- * we create a jindex array with three elements per i atom: the starting point, the point to
- * which we need to check exclusions, and the end point.
- * This way we only have to allocate a short exclusion mask per i atom.
- */
-
- ni0 = (start/UNROLLI)*UNROLLI;
- ni1 = ((end+UNROLLI-1)/UNROLLI)*UNROLLI;
-
- /* Set the interaction mask to only enable the i atoms we want to include */
- snew(pi, natoms+UNROLLI+2*SIMD_WIDTH);
- aadata->imask = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
- for (i = 0; i < natoms+UNROLLI; i++)
- {
- aadata->imask[i] = (i >= start && i < end) ? 0xFFFFFFFF : 0;
- }
-
- /* Allocate memory for our modified jindex array */
- snew(aadata->jindex_gb, 4*(natoms+UNROLLI));
- for (i = 0; i < 4*(natoms+UNROLLI); i++)
- {
- aadata->jindex_gb[i] = 0;
- }
-
- /* Create the exclusion masks for the prologue part */
- snew(aadata->prologue_mask_gb, natoms+UNROLLI); /* list of pointers */
-
- /* First zero everything to avoid uninitialized data */
- for (i = 0; i < natoms+UNROLLI; i++)
- {
- aadata->prologue_mask_gb[i] = NULL;
- }
-
- /* Calculate the largest exclusion range we need for each UNROLLI-tuplet of i atoms. */
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- max_excl_offset = -1;
-
- /* First find maxoffset for the next 4 atoms (or fewer if we are close to end) */
- imax = ((ibase+UNROLLI) < end) ? (ibase+UNROLLI) : end;
-
- /* Which atom is the first we (might) interact with? */
- imin = natoms; /* Guaranteed to be overwritten by one of 'firstinteraction' */
- for (i = ibase; i < imax; i++)
- {
- /* Before exclusions, which atom is the first we (might) interact with? */
- firstinteraction = i+1;
- max_offset = calc_maxoffset(i, natoms);
-
- if (!bInclude12)
- {
- for (j = 0; j < ilist[F_GB12].nr; j += 3)
- {
- a1 = ilist[F_GB12].iatoms[j+1];
- a2 = ilist[F_GB12].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k == firstinteraction)
- {
- firstinteraction++;
- }
- }
- }
- if (!bInclude13)
- {
- for (j = 0; j < ilist[F_GB13].nr; j += 3)
- {
- a1 = ilist[F_GB13].iatoms[j+1];
- a2 = ilist[F_GB13].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k == firstinteraction)
- {
- firstinteraction++;
- }
- }
- }
- if (!bInclude14)
- {
- for (j = 0; j < ilist[F_GB14].nr; j += 3)
- {
- a1 = ilist[F_GB14].iatoms[j+1];
- a2 = ilist[F_GB14].iatoms[j+2];
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k == firstinteraction)
- {
- firstinteraction++;
- }
- }
- }
- imin = (firstinteraction < imin) ? firstinteraction : imin;
- }
- /* round down to j unrolling factor */
- imin = (imin/UNROLLJ)*UNROLLJ;
-
- for (i = ibase; i < imax; i++)
- {
- max_offset = calc_maxoffset(i, natoms);
-
- if (!bInclude12)
- {
- for (j = 0; j < ilist[F_GB12].nr; j += 3)
- {
- a1 = ilist[F_GB12].iatoms[j+1];
- a2 = ilist[F_GB12].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k < imin)
- {
- k += natoms;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
-
- k = k - imin;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
- }
- }
- if (!bInclude13)
- {
- for (j = 0; j < ilist[F_GB13].nr; j += 3)
- {
- a1 = ilist[F_GB13].iatoms[j+1];
- a2 = ilist[F_GB13].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k < imin)
- {
- k += natoms;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
-
- k = k - imin;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
- }
- }
- if (!bInclude14)
- {
- for (j = 0; j < ilist[F_GB14].nr; j += 3)
- {
- a1 = ilist[F_GB14].iatoms[j+1];
- a2 = ilist[F_GB14].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k < imin)
- {
- k += natoms;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
-
- k = k - imin;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
- max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
- }
- }
- }
-
- /* The offset specifies the last atom to be excluded, so add one unit to get an upper loop limit */
- max_excl_offset++;
- /* round up to j unrolling factor */
- max_excl_offset = (max_excl_offset/UNROLLJ+1)*UNROLLJ;
-
- /* Set all the prologue masks length to this value (even for i>end) */
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
- aadata->jindex_gb[4*i] = imin;
- aadata->jindex_gb[4*i+1] = imin+max_excl_offset;
- }
- }
-
- /* Now the hard part, loop over it all again to calculate the actual contents of the prologue masks */
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
- nj = aadata->jindex_gb[4*i+1] - aadata->jindex_gb[4*i];
- imin = aadata->jindex_gb[4*i];
-
- /* Allocate aligned memory */
- snew(pi, nj+2*SIMD_WIDTH);
- aadata->prologue_mask_gb[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
- max_offset = calc_maxoffset(i, natoms);
-
- /* Include interactions i+1 <= j < i+maxoffset */
- for (k = 0; k < nj; k++)
- {
- j = imin + k;
-
- if ( (j > i) && (j <= i+max_offset) )
- {
- aadata->prologue_mask_gb[i][k] = 0xFFFFFFFF;
- }
- else
- {
- aadata->prologue_mask_gb[i][k] = 0;
- }
- }
-
- /* Clear out the explicit exclusions */
- if (i < end)
- {
- if (!bInclude12)
- {
- for (j = 0; j < ilist[F_GB12].nr; j += 3)
- {
- a1 = ilist[F_GB12].iatoms[j+1];
- a2 = ilist[F_GB12].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
- k = k-i;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
-
- k = k+i-imin;
- if (k >= 0)
- {
- aadata->prologue_mask_gb[i][k] = 0;
- }
- }
- }
- if (!bInclude13)
- {
- for (j = 0; j < ilist[F_GB13].nr; j += 3)
- {
- a1 = ilist[F_GB13].iatoms[j+1];
- a2 = ilist[F_GB13].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
- k = k-i;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
-
- k = k+i-imin;
- if (k >= 0)
- {
- aadata->prologue_mask_gb[i][k] = 0;
- }
- }
- }
- if (!bInclude14)
- {
- for (j = 0; j < ilist[F_GB14].nr; j += 3)
- {
- a1 = ilist[F_GB14].iatoms[j+1];
- a2 = ilist[F_GB14].iatoms[j+2];
-
- if (a1 == i)
- {
- k = a2;
- }
- else if (a2 == i)
- {
- k = a1;
- }
- else
- {
- continue;
- }
-
- if (k > i+max_offset)
- {
- continue;
- }
- k = k-i;
-
- if (k+natoms <= max_offset)
- {
- k += natoms;
- }
-
- k = k+i-imin;
- if (k >= 0)
- {
- aadata->prologue_mask_gb[i][k] = 0;
- }
- }
- }
- }
- }
- }
-
- /* Construct the epilogue mask - this just contains the check for maxoffset */
- snew(aadata->epilogue_mask, natoms+UNROLLI);
-
- /* First zero everything to avoid uninitialized data */
- for (i = 0; i < natoms+UNROLLI; i++)
- {
- aadata->jindex_gb[4*i+2] = aadata->jindex_gb[4*i+1];
- aadata->jindex_gb[4*i+3] = aadata->jindex_gb[4*i+1];
- aadata->epilogue_mask[i] = NULL;
- }
-
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- /* Find the lowest index for which we need to use the epilogue */
- imin = ibase;
- max_offset = calc_maxoffset(imin, natoms);
-
- imin = imin + 1 + max_offset;
-
- /* Find largest index for which we need to use the epilogue */
- imax = ibase + UNROLLI-1;
- imax = (imax < end) ? imax : end;
-
- max_offset = calc_maxoffset(imax, natoms);
- imax = imax + 1 + max_offset + UNROLLJ - 1;
-
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
- /* Start of epilogue - round down to j tile limit */
- aadata->jindex_gb[4*i+2] = (imin/UNROLLJ)*UNROLLJ;
- /* Make sure we dont overlap - for small systems everything is done in the prologue */
- aadata->jindex_gb[4*i+2] = (aadata->jindex_gb[4*i+1] > aadata->jindex_gb[4*i+2]) ? aadata->jindex_gb[4*i+1] : aadata->jindex_gb[4*i+2];
- /* Round upwards to j tile limit */
- aadata->jindex_gb[4*i+3] = (imax/UNROLLJ)*UNROLLJ;
- /* Make sure we dont have a negative range for the epilogue */
- aadata->jindex_gb[4*i+3] = (aadata->jindex_gb[4*i+2] > aadata->jindex_gb[4*i+3]) ? aadata->jindex_gb[4*i+2] : aadata->jindex_gb[4*i+3];
- }
- }
-
- /* And fill it with data... */
-
- for (ibase = ni0; ibase < ni1; ibase += UNROLLI)
- {
- for (i = ibase; i < ibase+UNROLLI; i++)
- {
-
- nj = aadata->jindex_gb[4*i+3] - aadata->jindex_gb[4*i+2];
-
- /* Allocate aligned memory */
- snew(pi, nj+2*SIMD_WIDTH);
- aadata->epilogue_mask[i] = (int *) (((size_t) pi + 16) & (~((size_t) 15)));
-
- max_offset = calc_maxoffset(i, natoms);
-
- for (k = 0; k < nj; k++)
- {
- j = aadata->jindex_gb[4*i+2] + k;
- aadata->epilogue_mask[i][k] = (j <= i+max_offset) ? 0xFFFFFFFF : 0;
- }
- }
- }
-}
-
-
-static void
-genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
- gmx_localtop_t * top,
- gmx_genborn_t * born,
- t_mdatoms * mdatoms,
- real radius_offset,
- int gb_algorithm,
- gmx_bool bInclude12,
- gmx_bool bInclude13,
- gmx_bool bInclude14)
-{
- int i, j, idx;
- int natoms;
- gmx_allvsallgb2_data_t *aadata;
- real *p;
-
- natoms = mdatoms->nr;
-
- snew(aadata, 1);
- *p_aadata = aadata;
-
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->x_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->y_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->z_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->fx_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->fy_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
- snew(p, 2*natoms+2*SIMD_WIDTH);
- aadata->fz_align = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
- snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
- aadata->gb_radius = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
- snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
- aadata->workparam = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
- snew(p, 2*natoms+UNROLLJ+SIMD_WIDTH);
- aadata->work = (real *) (((size_t) p + 16) & (~((size_t) 15)));
-
- for (i = 0; i < mdatoms->nr; i++)
- {
- aadata->gb_radius[i] = top->atomtypes.gb_radius[mdatoms->typeA[i]] - radius_offset;
- if (gb_algorithm == egbSTILL)
- {
- aadata->workparam[i] = born->vsolv[i];
- }
- else if (gb_algorithm == egbOBC)
- {
- aadata->workparam[i] = born->param[i];
- }
- aadata->work[i] = 0.0;
- }
- for (i = 0; i < mdatoms->nr; i++)
- {
- aadata->gb_radius[natoms+i] = aadata->gb_radius[i];
- aadata->workparam[natoms+i] = aadata->workparam[i];
- aadata->work[natoms+i] = aadata->work[i];
- }
-
- for (i = 0; i < 2*natoms+SIMD_WIDTH; i++)
- {
- aadata->x_align[i] = 0.0;
- aadata->y_align[i] = 0.0;
- aadata->z_align[i] = 0.0;
- aadata->fx_align[i] = 0.0;
- aadata->fy_align[i] = 0.0;
- aadata->fz_align[i] = 0.0;
- }
-
- setup_gb_exclusions_and_indices(aadata, top->idef.il, 0, mdatoms->homenr, mdatoms->nr,
- bInclude12, bInclude13, bInclude14);
-}
-
-
-int
-genborn_allvsall_calc_still_radii_sse2_single(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- gmx_localtop_t * top,
- real * x,
- t_commrec * cr,
- void * paadata)
-{
- gmx_allvsallgb2_data_t *aadata;
- int natoms;
- int ni0, ni1;
- int nj0, nj1, nj2, nj3;
- int i, j, k, n;
- int * mask;
- int * pmask0;
- int * pmask1;
- int * pmask2;
- int * pmask3;
- int * emask0;
- int * emask1;
- int * emask2;
- int * emask3;
- real ix, iy, iz;
- real jx, jy, jz;
- real dx, dy, dz;
- real rsq, rinv;
- real gpi, rai, vai;
- real prod_ai;
- real irsq, idr4, idr6;
- real raj, rvdw, ratio;
- real vaj, ccf, dccf, theta, cosq;
- real term, prod, icf4, icf6, gpi2, factor, sinq;
- real * gb_radius;
- real * vsolv;
- real * work;
- real tmpsum[4];
- real * x_align;
- real * y_align;
- real * z_align;
- int * jindex;
- real * dadx;
-
- __m128 ix_SSE0, iy_SSE0, iz_SSE0;
- __m128 ix_SSE1, iy_SSE1, iz_SSE1;
- __m128 ix_SSE2, iy_SSE2, iz_SSE2;
- __m128 ix_SSE3, iy_SSE3, iz_SSE3;
- __m128 gpi_SSE0, rai_SSE0, prod_ai_SSE0;
- __m128 gpi_SSE1, rai_SSE1, prod_ai_SSE1;
- __m128 gpi_SSE2, rai_SSE2, prod_ai_SSE2;
- __m128 gpi_SSE3, rai_SSE3, prod_ai_SSE3;
- __m128 imask_SSE0, jmask_SSE0;
- __m128 imask_SSE1, jmask_SSE1;
- __m128 imask_SSE2, jmask_SSE2;
- __m128 imask_SSE3, jmask_SSE3;
- __m128 jx_SSE, jy_SSE, jz_SSE;
- __m128 dx_SSE0, dy_SSE0, dz_SSE0;
- __m128 dx_SSE1, dy_SSE1, dz_SSE1;
- __m128 dx_SSE2, dy_SSE2, dz_SSE2;
- __m128 dx_SSE3, dy_SSE3, dz_SSE3;
- __m128 rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
- __m128 rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
- __m128 rsq_SSE2, rinv_SSE2, irsq_SSE2, idr4_SSE2, idr6_SSE2;
- __m128 rsq_SSE3, rinv_SSE3, irsq_SSE3, idr4_SSE3, idr6_SSE3;
- __m128 raj_SSE, vaj_SSE, prod_SSE;
- __m128 rvdw_SSE0, ratio_SSE0;
- __m128 rvdw_SSE1, ratio_SSE1;
- __m128 rvdw_SSE2, ratio_SSE2;
- __m128 rvdw_SSE3, ratio_SSE3;
- __m128 theta_SSE0, sinq_SSE0, cosq_SSE0, term_SSE0;
- __m128 theta_SSE1, sinq_SSE1, cosq_SSE1, term_SSE1;
- __m128 theta_SSE2, sinq_SSE2, cosq_SSE2, term_SSE2;
- __m128 theta_SSE3, sinq_SSE3, cosq_SSE3, term_SSE3;
- __m128 ccf_SSE0, dccf_SSE0;
- __m128 ccf_SSE1, dccf_SSE1;
- __m128 ccf_SSE2, dccf_SSE2;
- __m128 ccf_SSE3, dccf_SSE3;
- __m128 icf4_SSE0, icf6_SSE0;
- __m128 icf4_SSE1, icf6_SSE1;
- __m128 icf4_SSE2, icf6_SSE2;
- __m128 icf4_SSE3, icf6_SSE3;
- __m128 half_SSE, one_SSE, two_SSE, four_SSE;
- __m128 still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
-
- natoms = mdatoms->nr;
- ni0 = 0;
- ni1 = mdatoms->homenr;
-
- n = 0;
-
- aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
- if (aadata == NULL)
- {
- genborn_allvsall_setup(&aadata, top, born, mdatoms, 0.0,
- egbSTILL, FALSE, FALSE, TRUE);
- *((gmx_allvsallgb2_data_t **)paadata) = aadata;
- }
-
- x_align = aadata->x_align;
- y_align = aadata->y_align;
- z_align = aadata->z_align;
-
- gb_radius = aadata->gb_radius;
- vsolv = aadata->workparam;
- work = aadata->work;
- jindex = aadata->jindex_gb;
- dadx = fr->dadx;
-
- still_p4_SSE = _mm_set1_ps(STILL_P4);
- still_p5inv_SSE = _mm_set1_ps(STILL_P5INV);
- still_pip5_SSE = _mm_set1_ps(STILL_PIP5);
- half_SSE = _mm_set1_ps(0.5);
- one_SSE = _mm_set1_ps(1.0);
- two_SSE = _mm_set1_ps(2.0);
- four_SSE = _mm_set1_ps(4.0);
-
- /* This will be summed, so it has to extend to natoms + buffer */
- for (i = 0; i < natoms+1+natoms/2; i++)
- {
- work[i] = 0;
- }
-
- for (i = ni0; i < ni1+1+natoms/2; i++)
- {
- k = i%natoms;
- x_align[i] = x[3*k];
- y_align[i] = x[3*k+1];
- z_align[i] = x[3*k+2];
- work[i] = 0;
- }
-
-
- for (i = ni0; i < ni1; i += UNROLLI)
- {
- /* We assume shifts are NOT used for all-vs-all interactions */
-
- /* Load i atom data */
- ix_SSE0 = _mm_load1_ps(x_align+i);
- iy_SSE0 = _mm_load1_ps(y_align+i);
- iz_SSE0 = _mm_load1_ps(z_align+i);
- ix_SSE1 = _mm_load1_ps(x_align+i+1);
- iy_SSE1 = _mm_load1_ps(y_align+i+1);
- iz_SSE1 = _mm_load1_ps(z_align+i+1);
- ix_SSE2 = _mm_load1_ps(x_align+i+2);
- iy_SSE2 = _mm_load1_ps(y_align+i+2);
- iz_SSE2 = _mm_load1_ps(z_align+i+2);
- ix_SSE3 = _mm_load1_ps(x_align+i+3);
- iy_SSE3 = _mm_load1_ps(y_align+i+3);
- iz_SSE3 = _mm_load1_ps(z_align+i+3);
-
- gpi_SSE0 = _mm_setzero_ps();
- gpi_SSE1 = _mm_setzero_ps();
- gpi_SSE2 = _mm_setzero_ps();
- gpi_SSE3 = _mm_setzero_ps();
-
- rai_SSE0 = _mm_load1_ps(gb_radius+i);
- rai_SSE1 = _mm_load1_ps(gb_radius+i+1);
- rai_SSE2 = _mm_load1_ps(gb_radius+i+2);
- rai_SSE3 = _mm_load1_ps(gb_radius+i+3);
-
- prod_ai_SSE0 = _mm_set1_ps(STILL_P4*vsolv[i]);
- prod_ai_SSE1 = _mm_set1_ps(STILL_P4*vsolv[i+1]);
- prod_ai_SSE2 = _mm_set1_ps(STILL_P4*vsolv[i+2]);
- prod_ai_SSE3 = _mm_set1_ps(STILL_P4*vsolv[i+3]);
-
- /* Load limits for loop over neighbors */
- nj0 = jindex[4*i];
- nj1 = jindex[4*i+1];
- nj2 = jindex[4*i+2];
- nj3 = jindex[4*i+3];
-
- pmask0 = aadata->prologue_mask_gb[i];
- pmask1 = aadata->prologue_mask_gb[i+1];
- pmask2 = aadata->prologue_mask_gb[i+2];
- pmask3 = aadata->prologue_mask_gb[i+3];
- emask0 = aadata->epilogue_mask[i];
- emask1 = aadata->epilogue_mask[i+1];
- emask2 = aadata->epilogue_mask[i+2];
- emask3 = aadata->epilogue_mask[i+3];
-
- imask_SSE0 = _mm_load1_ps((real *)(aadata->imask+i));
- imask_SSE1 = _mm_load1_ps((real *)(aadata->imask+i+1));
- imask_SSE2 = _mm_load1_ps((real *)(aadata->imask+i+2));
- imask_SSE3 = _mm_load1_ps((real *)(aadata->imask+i+3));
-
- /* Prologue part, including exclusion mask */
- for (j = nj0; j < nj1; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_ps((real *)pmask0);
- jmask_SSE1 = _mm_load_ps((real *)pmask1);
- jmask_SSE2 = _mm_load_ps((real *)pmask2);
- jmask_SSE3 = _mm_load_ps((real *)pmask3);
- pmask0 += UNROLLJ;
- pmask1 += UNROLLJ;
- pmask2 += UNROLLJ;
- pmask3 += UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_ps(x_align+j);
- jy_SSE = _mm_load_ps(y_align+j);
- jz_SSE = _mm_load_ps(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_ps(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_ps(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_ps(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_ps(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_ps(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_ps(iz_SSE1, jz_SSE);
- dx_SSE2 = _mm_sub_ps(ix_SSE2, jx_SSE);
- dy_SSE2 = _mm_sub_ps(iy_SSE2, jy_SSE);
- dz_SSE2 = _mm_sub_ps(iz_SSE2, jz_SSE);
- dx_SSE3 = _mm_sub_ps(ix_SSE3, jx_SSE);
- dy_SSE3 = _mm_sub_ps(iy_SSE3, jy_SSE);
- dz_SSE3 = _mm_sub_ps(iz_SSE3, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
- rsq_SSE2 = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
- rsq_SSE3 = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_ps(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_ps(jmask_SSE1, imask_SSE1);
- jmask_SSE2 = _mm_and_ps(jmask_SSE2, imask_SSE2);
- jmask_SSE3 = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_ps(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_ps(rsq_SSE1);
- rinv_SSE2 = gmx_mm_invsqrt_ps(rsq_SSE2);
- rinv_SSE3 = gmx_mm_invsqrt_ps(rsq_SSE3);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_ps(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_ps(rinv_SSE1, jmask_SSE1);
- rinv_SSE2 = _mm_and_ps(rinv_SSE2, jmask_SSE2);
- rinv_SSE3 = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
- irsq_SSE0 = _mm_mul_ps(rinv_SSE0, rinv_SSE0);
- irsq_SSE1 = _mm_mul_ps(rinv_SSE1, rinv_SSE1);
- irsq_SSE2 = _mm_mul_ps(rinv_SSE2, rinv_SSE2);
- irsq_SSE3 = _mm_mul_ps(rinv_SSE3, rinv_SSE3);
- idr4_SSE0 = _mm_mul_ps(irsq_SSE0, irsq_SSE0);
- idr4_SSE1 = _mm_mul_ps(irsq_SSE1, irsq_SSE1);
- idr4_SSE2 = _mm_mul_ps(irsq_SSE2, irsq_SSE2);
- idr4_SSE3 = _mm_mul_ps(irsq_SSE3, irsq_SSE3);
- idr6_SSE0 = _mm_mul_ps(idr4_SSE0, irsq_SSE0);
- idr6_SSE1 = _mm_mul_ps(idr4_SSE1, irsq_SSE1);
- idr6_SSE2 = _mm_mul_ps(idr4_SSE2, irsq_SSE2);
- idr6_SSE3 = _mm_mul_ps(idr4_SSE3, irsq_SSE3);
-
- raj_SSE = _mm_load_ps(gb_radius+j);
- vaj_SSE = _mm_load_ps(vsolv+j);
-
- rvdw_SSE0 = _mm_add_ps(rai_SSE0, raj_SSE);
- rvdw_SSE1 = _mm_add_ps(rai_SSE1, raj_SSE);
- rvdw_SSE2 = _mm_add_ps(rai_SSE2, raj_SSE);
- rvdw_SSE3 = _mm_add_ps(rai_SSE3, raj_SSE);
-
- ratio_SSE0 = _mm_mul_ps(rsq_SSE0, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE0, rvdw_SSE0)));
- ratio_SSE1 = _mm_mul_ps(rsq_SSE1, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE1, rvdw_SSE1)));
- ratio_SSE2 = _mm_mul_ps(rsq_SSE2, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE2, rvdw_SSE2)));
- ratio_SSE3 = _mm_mul_ps(rsq_SSE3, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE3, rvdw_SSE3)));
-
- ratio_SSE0 = _mm_min_ps(ratio_SSE0, still_p5inv_SSE);
- ratio_SSE1 = _mm_min_ps(ratio_SSE1, still_p5inv_SSE);
- ratio_SSE2 = _mm_min_ps(ratio_SSE2, still_p5inv_SSE);
- ratio_SSE3 = _mm_min_ps(ratio_SSE3, still_p5inv_SSE);
- theta_SSE0 = _mm_mul_ps(ratio_SSE0, still_pip5_SSE);
- theta_SSE1 = _mm_mul_ps(ratio_SSE1, still_pip5_SSE);
- theta_SSE2 = _mm_mul_ps(ratio_SSE2, still_pip5_SSE);
- theta_SSE3 = _mm_mul_ps(ratio_SSE3, still_pip5_SSE);
- gmx_mm_sincos_ps(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
- gmx_mm_sincos_ps(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
- gmx_mm_sincos_ps(theta_SSE2, &sinq_SSE2, &cosq_SSE2);
- gmx_mm_sincos_ps(theta_SSE3, &sinq_SSE3, &cosq_SSE3);
- term_SSE0 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE0));
- term_SSE1 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE1));
- term_SSE2 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE2));
- term_SSE3 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE3));
- ccf_SSE0 = _mm_mul_ps(term_SSE0, term_SSE0);
- ccf_SSE1 = _mm_mul_ps(term_SSE1, term_SSE1);
- ccf_SSE2 = _mm_mul_ps(term_SSE2, term_SSE2);
- ccf_SSE3 = _mm_mul_ps(term_SSE3, term_SSE3);
- dccf_SSE0 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE0),
- _mm_mul_ps(sinq_SSE0, theta_SSE0));
- dccf_SSE1 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE1),
- _mm_mul_ps(sinq_SSE1, theta_SSE1));
- dccf_SSE2 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE2),
- _mm_mul_ps(sinq_SSE2, theta_SSE2));
- dccf_SSE3 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE3),
- _mm_mul_ps(sinq_SSE3, theta_SSE3));
-
- prod_SSE = _mm_mul_ps(still_p4_SSE, vaj_SSE);
- icf4_SSE0 = _mm_mul_ps(ccf_SSE0, idr4_SSE0);
- icf4_SSE1 = _mm_mul_ps(ccf_SSE1, idr4_SSE1);
- icf4_SSE2 = _mm_mul_ps(ccf_SSE2, idr4_SSE2);
- icf4_SSE3 = _mm_mul_ps(ccf_SSE3, idr4_SSE3);
- icf6_SSE0 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
- icf6_SSE1 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
- icf6_SSE2 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE2), dccf_SSE2), idr6_SSE2);
- icf6_SSE3 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE3), dccf_SSE3), idr6_SSE3);
-
- _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
- gmx_mm_sum4_ps(_mm_mul_ps(prod_ai_SSE0, icf4_SSE0),
- _mm_mul_ps(prod_ai_SSE1, icf4_SSE1),
- _mm_mul_ps(prod_ai_SSE2, icf4_SSE2),
- _mm_mul_ps(prod_ai_SSE3, icf4_SSE3))));
-
- gpi_SSE0 = _mm_add_ps(gpi_SSE0, _mm_mul_ps(prod_SSE, icf4_SSE0));
- gpi_SSE1 = _mm_add_ps(gpi_SSE1, _mm_mul_ps(prod_SSE, icf4_SSE1));
- gpi_SSE2 = _mm_add_ps(gpi_SSE2, _mm_mul_ps(prod_SSE, icf4_SSE2));
- gpi_SSE3 = _mm_add_ps(gpi_SSE3, _mm_mul_ps(prod_SSE, icf4_SSE3));
-
- /* Save ai->aj and aj->ai chain rule terms */
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE3));
- dadx += 4;
-
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE0, icf6_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE1, icf6_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE2, icf6_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE3, icf6_SSE3));
- dadx += 4;
- }
-
- /* Main part, no exclusions */
- for (j = nj1; j < nj2; j += UNROLLJ)
- {
- /* load j atom coordinates */
- jx_SSE = _mm_load_ps(x_align+j);
- jy_SSE = _mm_load_ps(y_align+j);
- jz_SSE = _mm_load_ps(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_ps(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_ps(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_ps(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_ps(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_ps(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_ps(iz_SSE1, jz_SSE);
- dx_SSE2 = _mm_sub_ps(ix_SSE2, jx_SSE);
- dy_SSE2 = _mm_sub_ps(iy_SSE2, jy_SSE);
- dz_SSE2 = _mm_sub_ps(iz_SSE2, jz_SSE);
- dx_SSE3 = _mm_sub_ps(ix_SSE3, jx_SSE);
- dy_SSE3 = _mm_sub_ps(iy_SSE3, jy_SSE);
- dz_SSE3 = _mm_sub_ps(iz_SSE3, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
- rsq_SSE2 = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
- rsq_SSE3 = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_ps(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_ps(rsq_SSE1);
- rinv_SSE2 = gmx_mm_invsqrt_ps(rsq_SSE2);
- rinv_SSE3 = gmx_mm_invsqrt_ps(rsq_SSE3);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_ps(rinv_SSE0, imask_SSE0);
- rinv_SSE1 = _mm_and_ps(rinv_SSE1, imask_SSE1);
- rinv_SSE2 = _mm_and_ps(rinv_SSE2, imask_SSE2);
- rinv_SSE3 = _mm_and_ps(rinv_SSE3, imask_SSE3);
-
- irsq_SSE0 = _mm_mul_ps(rinv_SSE0, rinv_SSE0);
- irsq_SSE1 = _mm_mul_ps(rinv_SSE1, rinv_SSE1);
- irsq_SSE2 = _mm_mul_ps(rinv_SSE2, rinv_SSE2);
- irsq_SSE3 = _mm_mul_ps(rinv_SSE3, rinv_SSE3);
- idr4_SSE0 = _mm_mul_ps(irsq_SSE0, irsq_SSE0);
- idr4_SSE1 = _mm_mul_ps(irsq_SSE1, irsq_SSE1);
- idr4_SSE2 = _mm_mul_ps(irsq_SSE2, irsq_SSE2);
- idr4_SSE3 = _mm_mul_ps(irsq_SSE3, irsq_SSE3);
- idr6_SSE0 = _mm_mul_ps(idr4_SSE0, irsq_SSE0);
- idr6_SSE1 = _mm_mul_ps(idr4_SSE1, irsq_SSE1);
- idr6_SSE2 = _mm_mul_ps(idr4_SSE2, irsq_SSE2);
- idr6_SSE3 = _mm_mul_ps(idr4_SSE3, irsq_SSE3);
-
- raj_SSE = _mm_load_ps(gb_radius+j);
-
- rvdw_SSE0 = _mm_add_ps(rai_SSE0, raj_SSE);
- rvdw_SSE1 = _mm_add_ps(rai_SSE1, raj_SSE);
- rvdw_SSE2 = _mm_add_ps(rai_SSE2, raj_SSE);
- rvdw_SSE3 = _mm_add_ps(rai_SSE3, raj_SSE);
- vaj_SSE = _mm_load_ps(vsolv+j);
-
- ratio_SSE0 = _mm_mul_ps(rsq_SSE0, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE0, rvdw_SSE0)));
- ratio_SSE1 = _mm_mul_ps(rsq_SSE1, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE1, rvdw_SSE1)));
- ratio_SSE2 = _mm_mul_ps(rsq_SSE2, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE2, rvdw_SSE2)));
- ratio_SSE3 = _mm_mul_ps(rsq_SSE3, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE3, rvdw_SSE3)));
-
- ratio_SSE0 = _mm_min_ps(ratio_SSE0, still_p5inv_SSE);
- ratio_SSE1 = _mm_min_ps(ratio_SSE1, still_p5inv_SSE);
- ratio_SSE2 = _mm_min_ps(ratio_SSE2, still_p5inv_SSE);
- ratio_SSE3 = _mm_min_ps(ratio_SSE3, still_p5inv_SSE);
- theta_SSE0 = _mm_mul_ps(ratio_SSE0, still_pip5_SSE);
- theta_SSE1 = _mm_mul_ps(ratio_SSE1, still_pip5_SSE);
- theta_SSE2 = _mm_mul_ps(ratio_SSE2, still_pip5_SSE);
- theta_SSE3 = _mm_mul_ps(ratio_SSE3, still_pip5_SSE);
- gmx_mm_sincos_ps(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
- gmx_mm_sincos_ps(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
- gmx_mm_sincos_ps(theta_SSE2, &sinq_SSE2, &cosq_SSE2);
- gmx_mm_sincos_ps(theta_SSE3, &sinq_SSE3, &cosq_SSE3);
- term_SSE0 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE0));
- term_SSE1 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE1));
- term_SSE2 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE2));
- term_SSE3 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE3));
- ccf_SSE0 = _mm_mul_ps(term_SSE0, term_SSE0);
- ccf_SSE1 = _mm_mul_ps(term_SSE1, term_SSE1);
- ccf_SSE2 = _mm_mul_ps(term_SSE2, term_SSE2);
- ccf_SSE3 = _mm_mul_ps(term_SSE3, term_SSE3);
- dccf_SSE0 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE0),
- _mm_mul_ps(sinq_SSE0, theta_SSE0));
- dccf_SSE1 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE1),
- _mm_mul_ps(sinq_SSE1, theta_SSE1));
- dccf_SSE2 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE2),
- _mm_mul_ps(sinq_SSE2, theta_SSE2));
- dccf_SSE3 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE3),
- _mm_mul_ps(sinq_SSE3, theta_SSE3));
-
- prod_SSE = _mm_mul_ps(still_p4_SSE, vaj_SSE );
- icf4_SSE0 = _mm_mul_ps(ccf_SSE0, idr4_SSE0);
- icf4_SSE1 = _mm_mul_ps(ccf_SSE1, idr4_SSE1);
- icf4_SSE2 = _mm_mul_ps(ccf_SSE2, idr4_SSE2);
- icf4_SSE3 = _mm_mul_ps(ccf_SSE3, idr4_SSE3);
- icf6_SSE0 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
- icf6_SSE1 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
- icf6_SSE2 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE2), dccf_SSE2), idr6_SSE2);
- icf6_SSE3 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE3), dccf_SSE3), idr6_SSE3);
-
- _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
- gmx_mm_sum4_ps(_mm_mul_ps(prod_ai_SSE0, icf4_SSE0),
- _mm_mul_ps(prod_ai_SSE1, icf4_SSE1),
- _mm_mul_ps(prod_ai_SSE2, icf4_SSE2),
- _mm_mul_ps(prod_ai_SSE3, icf4_SSE3))));
-
- gpi_SSE0 = _mm_add_ps(gpi_SSE0, _mm_mul_ps(prod_SSE, icf4_SSE0));
- gpi_SSE1 = _mm_add_ps(gpi_SSE1, _mm_mul_ps(prod_SSE, icf4_SSE1));
- gpi_SSE2 = _mm_add_ps(gpi_SSE2, _mm_mul_ps(prod_SSE, icf4_SSE2));
- gpi_SSE3 = _mm_add_ps(gpi_SSE3, _mm_mul_ps(prod_SSE, icf4_SSE3));
-
- /* Save ai->aj and aj->ai chain rule terms */
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE3));
- dadx += 4;
-
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE0, icf6_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE1, icf6_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE2, icf6_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE3, icf6_SSE3));
- dadx += 4;
- }
- /* Epilogue part, including exclusion mask */
- for (j = nj2; j < nj3; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_ps((real *)emask0);
- jmask_SSE1 = _mm_load_ps((real *)emask1);
- jmask_SSE2 = _mm_load_ps((real *)emask2);
- jmask_SSE3 = _mm_load_ps((real *)emask3);
- emask0 += UNROLLJ;
- emask1 += UNROLLJ;
- emask2 += UNROLLJ;
- emask3 += UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_ps(x_align+j);
- jy_SSE = _mm_load_ps(y_align+j);
- jz_SSE = _mm_load_ps(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_ps(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_ps(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_ps(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_ps(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_ps(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_ps(iz_SSE1, jz_SSE);
- dx_SSE2 = _mm_sub_ps(ix_SSE2, jx_SSE);
- dy_SSE2 = _mm_sub_ps(iy_SSE2, jy_SSE);
- dz_SSE2 = _mm_sub_ps(iz_SSE2, jz_SSE);
- dx_SSE3 = _mm_sub_ps(ix_SSE3, jx_SSE);
- dy_SSE3 = _mm_sub_ps(iy_SSE3, jy_SSE);
- dz_SSE3 = _mm_sub_ps(iz_SSE3, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
- rsq_SSE2 = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
- rsq_SSE3 = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_ps(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_ps(jmask_SSE1, imask_SSE1);
- jmask_SSE2 = _mm_and_ps(jmask_SSE2, imask_SSE2);
- jmask_SSE3 = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_ps(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_ps(rsq_SSE1);
- rinv_SSE2 = gmx_mm_invsqrt_ps(rsq_SSE2);
- rinv_SSE3 = gmx_mm_invsqrt_ps(rsq_SSE3);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_ps(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_ps(rinv_SSE1, jmask_SSE1);
- rinv_SSE2 = _mm_and_ps(rinv_SSE2, jmask_SSE2);
- rinv_SSE3 = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
- irsq_SSE0 = _mm_mul_ps(rinv_SSE0, rinv_SSE0);
- irsq_SSE1 = _mm_mul_ps(rinv_SSE1, rinv_SSE1);
- irsq_SSE2 = _mm_mul_ps(rinv_SSE2, rinv_SSE2);
- irsq_SSE3 = _mm_mul_ps(rinv_SSE3, rinv_SSE3);
- idr4_SSE0 = _mm_mul_ps(irsq_SSE0, irsq_SSE0);
- idr4_SSE1 = _mm_mul_ps(irsq_SSE1, irsq_SSE1);
- idr4_SSE2 = _mm_mul_ps(irsq_SSE2, irsq_SSE2);
- idr4_SSE3 = _mm_mul_ps(irsq_SSE3, irsq_SSE3);
- idr6_SSE0 = _mm_mul_ps(idr4_SSE0, irsq_SSE0);
- idr6_SSE1 = _mm_mul_ps(idr4_SSE1, irsq_SSE1);
- idr6_SSE2 = _mm_mul_ps(idr4_SSE2, irsq_SSE2);
- idr6_SSE3 = _mm_mul_ps(idr4_SSE3, irsq_SSE3);
-
- raj_SSE = _mm_load_ps(gb_radius+j);
- vaj_SSE = _mm_load_ps(vsolv+j);
-
- rvdw_SSE0 = _mm_add_ps(rai_SSE0, raj_SSE);
- rvdw_SSE1 = _mm_add_ps(rai_SSE1, raj_SSE);
- rvdw_SSE2 = _mm_add_ps(rai_SSE2, raj_SSE);
- rvdw_SSE3 = _mm_add_ps(rai_SSE3, raj_SSE);
-
- ratio_SSE0 = _mm_mul_ps(rsq_SSE0, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE0, rvdw_SSE0)));
- ratio_SSE1 = _mm_mul_ps(rsq_SSE1, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE1, rvdw_SSE1)));
- ratio_SSE2 = _mm_mul_ps(rsq_SSE2, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE2, rvdw_SSE2)));
- ratio_SSE3 = _mm_mul_ps(rsq_SSE3, gmx_mm_inv_ps( _mm_mul_ps(rvdw_SSE3, rvdw_SSE3)));
-
- ratio_SSE0 = _mm_min_ps(ratio_SSE0, still_p5inv_SSE);
- ratio_SSE1 = _mm_min_ps(ratio_SSE1, still_p5inv_SSE);
- ratio_SSE2 = _mm_min_ps(ratio_SSE2, still_p5inv_SSE);
- ratio_SSE3 = _mm_min_ps(ratio_SSE3, still_p5inv_SSE);
- theta_SSE0 = _mm_mul_ps(ratio_SSE0, still_pip5_SSE);
- theta_SSE1 = _mm_mul_ps(ratio_SSE1, still_pip5_SSE);
- theta_SSE2 = _mm_mul_ps(ratio_SSE2, still_pip5_SSE);
- theta_SSE3 = _mm_mul_ps(ratio_SSE3, still_pip5_SSE);
- gmx_mm_sincos_ps(theta_SSE0, &sinq_SSE0, &cosq_SSE0);
- gmx_mm_sincos_ps(theta_SSE1, &sinq_SSE1, &cosq_SSE1);
- gmx_mm_sincos_ps(theta_SSE2, &sinq_SSE2, &cosq_SSE2);
- gmx_mm_sincos_ps(theta_SSE3, &sinq_SSE3, &cosq_SSE3);
- term_SSE0 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE0));
- term_SSE1 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE1));
- term_SSE2 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE2));
- term_SSE3 = _mm_mul_ps(half_SSE, _mm_sub_ps(one_SSE, cosq_SSE3));
- ccf_SSE0 = _mm_mul_ps(term_SSE0, term_SSE0);
- ccf_SSE1 = _mm_mul_ps(term_SSE1, term_SSE1);
- ccf_SSE2 = _mm_mul_ps(term_SSE2, term_SSE2);
- ccf_SSE3 = _mm_mul_ps(term_SSE3, term_SSE3);
- dccf_SSE0 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE0),
- _mm_mul_ps(sinq_SSE0, theta_SSE0));
- dccf_SSE1 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE1),
- _mm_mul_ps(sinq_SSE1, theta_SSE1));
- dccf_SSE2 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE2),
- _mm_mul_ps(sinq_SSE2, theta_SSE2));
- dccf_SSE3 = _mm_mul_ps(_mm_mul_ps(two_SSE, term_SSE3),
- _mm_mul_ps(sinq_SSE3, theta_SSE3));
-
- prod_SSE = _mm_mul_ps(still_p4_SSE, vaj_SSE);
- icf4_SSE0 = _mm_mul_ps(ccf_SSE0, idr4_SSE0);
- icf4_SSE1 = _mm_mul_ps(ccf_SSE1, idr4_SSE1);
- icf4_SSE2 = _mm_mul_ps(ccf_SSE2, idr4_SSE2);
- icf4_SSE3 = _mm_mul_ps(ccf_SSE3, idr4_SSE3);
- icf6_SSE0 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE0), dccf_SSE0), idr6_SSE0);
- icf6_SSE1 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE1), dccf_SSE1), idr6_SSE1);
- icf6_SSE2 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE2), dccf_SSE2), idr6_SSE2);
- icf6_SSE3 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four_SSE, ccf_SSE3), dccf_SSE3), idr6_SSE3);
-
- _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
- gmx_mm_sum4_ps(_mm_mul_ps(prod_ai_SSE0, icf4_SSE0),
- _mm_mul_ps(prod_ai_SSE1, icf4_SSE1),
- _mm_mul_ps(prod_ai_SSE2, icf4_SSE2),
- _mm_mul_ps(prod_ai_SSE3, icf4_SSE3))));
-
- gpi_SSE0 = _mm_add_ps(gpi_SSE0, _mm_mul_ps(prod_SSE, icf4_SSE0));
- gpi_SSE1 = _mm_add_ps(gpi_SSE1, _mm_mul_ps(prod_SSE, icf4_SSE1));
- gpi_SSE2 = _mm_add_ps(gpi_SSE2, _mm_mul_ps(prod_SSE, icf4_SSE2));
- gpi_SSE3 = _mm_add_ps(gpi_SSE3, _mm_mul_ps(prod_SSE, icf4_SSE3));
-
- /* Save ai->aj and aj->ai chain rule terms */
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_SSE, icf6_SSE3));
- dadx += 4;
-
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE0, icf6_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE1, icf6_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE2, icf6_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai_SSE3, icf6_SSE3));
- dadx += 4;
- }
- _MM_TRANSPOSE4_PS(gpi_SSE0, gpi_SSE1, gpi_SSE2, gpi_SSE3);
- gpi_SSE0 = _mm_add_ps(gpi_SSE0, gpi_SSE1);
- gpi_SSE2 = _mm_add_ps(gpi_SSE2, gpi_SSE3);
- gpi_SSE0 = _mm_add_ps(gpi_SSE0, gpi_SSE2);
- _mm_store_ps(work+i, _mm_add_ps(gpi_SSE0, _mm_load_ps(work+i)));
- }
-
- /* In case we have written anything beyond natoms, move it back.
- * Never mind that we leave stuff above natoms; that will not
- * be accessed later in the routine.
- * In principle this should be a move rather than sum, but this
- * way we dont have to worry about even/odd offsets...
- */
- for (i = natoms; i < ni1+1+natoms/2; i++)
- {
- work[i-natoms] += work[i];
- }
-
- /* Parallel summations would go here if ever implemented with DD */
-
- factor = 0.5 * ONE_4PI_EPS0;
- /* Calculate the radii - should we do all atoms, or just our local ones? */
- for (i = 0; i < natoms; i++)
- {
- if (born->use[i] != 0)
- {
- gpi = born->gpol[i]+work[i];
- gpi2 = gpi * gpi;
- born->bRad[i] = factor*gmx_invsqrt(gpi2);
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- return 0;
-}
-
-
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_single(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- int gb_algorithm,
- gmx_localtop_t * top,
- real * x,
- t_commrec * cr,
- void * paadata)
-{
- gmx_allvsallgb2_data_t *aadata;
- int natoms;
- int ni0, ni1;
- int nj0, nj1, nj2, nj3;
- int i, j, k, n;
- int * mask;
- int * pmask0;
- int * pmask1;
- int * pmask2;
- int * pmask3;
- int * emask0;
- int * emask1;
- int * emask2;
- int * emask3;
- real * gb_radius;
- real * vsolv;
- real * work;
- real tmpsum[4];
- real * x_align;
- real * y_align;
- real * z_align;
- int * jindex;
- real * dadx;
- real * obc_param;
- real rad, min_rad;
- real rai, rai_inv, rai_inv2, sum_ai, sum_ai2, sum_ai3, tsum, tchain;
-
- __m128 ix_SSE0, iy_SSE0, iz_SSE0;
- __m128 ix_SSE1, iy_SSE1, iz_SSE1;
- __m128 ix_SSE2, iy_SSE2, iz_SSE2;
- __m128 ix_SSE3, iy_SSE3, iz_SSE3;
- __m128 gpi_SSE0, rai_SSE0, prod_ai_SSE0;
- __m128 gpi_SSE1, rai_SSE1, prod_ai_SSE1;
- __m128 gpi_SSE2, rai_SSE2, prod_ai_SSE2;
- __m128 gpi_SSE3, rai_SSE3, prod_ai_SSE3;
- __m128 imask_SSE0, jmask_SSE0;
- __m128 imask_SSE1, jmask_SSE1;
- __m128 imask_SSE2, jmask_SSE2;
- __m128 imask_SSE3, jmask_SSE3;
- __m128 jx_SSE, jy_SSE, jz_SSE;
- __m128 dx_SSE0, dy_SSE0, dz_SSE0;
- __m128 dx_SSE1, dy_SSE1, dz_SSE1;
- __m128 dx_SSE2, dy_SSE2, dz_SSE2;
- __m128 dx_SSE3, dy_SSE3, dz_SSE3;
- __m128 rsq_SSE0, rinv_SSE0, irsq_SSE0, idr4_SSE0, idr6_SSE0;
- __m128 rsq_SSE1, rinv_SSE1, irsq_SSE1, idr4_SSE1, idr6_SSE1;
- __m128 rsq_SSE2, rinv_SSE2, irsq_SSE2, idr4_SSE2, idr6_SSE2;
- __m128 rsq_SSE3, rinv_SSE3, irsq_SSE3, idr4_SSE3, idr6_SSE3;
- __m128 raj_SSE, raj_inv_SSE, sk_aj_SSE, sk2_aj_SSE;
- __m128 ccf_SSE0, dccf_SSE0, prod_SSE0;
- __m128 ccf_SSE1, dccf_SSE1, prod_SSE1;
- __m128 ccf_SSE2, dccf_SSE2, prod_SSE2;
- __m128 ccf_SSE3, dccf_SSE3, prod_SSE3;
- __m128 icf4_SSE0, icf6_SSE0;
- __m128 icf4_SSE1, icf6_SSE1;
- __m128 icf4_SSE2, icf6_SSE2;
- __m128 icf4_SSE3, icf6_SSE3;
- __m128 oneeighth_SSE, onefourth_SSE, half_SSE, one_SSE, two_SSE, four_SSE;
- __m128 still_p4_SSE, still_p5inv_SSE, still_pip5_SSE;
- __m128 rai_inv_SSE0;
- __m128 rai_inv_SSE1;
- __m128 rai_inv_SSE2;
- __m128 rai_inv_SSE3;
- __m128 sk_ai_SSE0, sk2_ai_SSE0, sum_ai_SSE0;
- __m128 sk_ai_SSE1, sk2_ai_SSE1, sum_ai_SSE1;
- __m128 sk_ai_SSE2, sk2_ai_SSE2, sum_ai_SSE2;
- __m128 sk_ai_SSE3, sk2_ai_SSE3, sum_ai_SSE3;
- __m128 lij_inv_SSE0, sk2_rinv_SSE0;
- __m128 lij_inv_SSE1, sk2_rinv_SSE1;
- __m128 lij_inv_SSE2, sk2_rinv_SSE2;
- __m128 lij_inv_SSE3, sk2_rinv_SSE3;
- __m128 dr_SSE0;
- __m128 dr_SSE1;
- __m128 dr_SSE2;
- __m128 dr_SSE3;
- __m128 t1_SSE0, t2_SSE0, t3_SSE0, t4_SSE0;
- __m128 t1_SSE1, t2_SSE1, t3_SSE1, t4_SSE1;
- __m128 t1_SSE2, t2_SSE2, t3_SSE2, t4_SSE2;
- __m128 t1_SSE3, t2_SSE3, t3_SSE3, t4_SSE3;
- __m128 obc_mask1_SSE0, obc_mask2_SSE0, obc_mask3_SSE0;
- __m128 obc_mask1_SSE1, obc_mask2_SSE1, obc_mask3_SSE1;
- __m128 obc_mask1_SSE2, obc_mask2_SSE2, obc_mask3_SSE2;
- __m128 obc_mask1_SSE3, obc_mask2_SSE3, obc_mask3_SSE3;
- __m128 uij_SSE0, uij2_SSE0, uij3_SSE0;
- __m128 uij_SSE1, uij2_SSE1, uij3_SSE1;
- __m128 uij_SSE2, uij2_SSE2, uij3_SSE2;
- __m128 uij_SSE3, uij2_SSE3, uij3_SSE3;
- __m128 lij_SSE0, lij2_SSE0, lij3_SSE0;
- __m128 lij_SSE1, lij2_SSE1, lij3_SSE1;
- __m128 lij_SSE2, lij2_SSE2, lij3_SSE2;
- __m128 lij_SSE3, lij2_SSE3, lij3_SSE3;
- __m128 dlij_SSE0, diff2_SSE0, logterm_SSE0;
- __m128 dlij_SSE1, diff2_SSE1, logterm_SSE1;
- __m128 dlij_SSE2, diff2_SSE2, logterm_SSE2;
- __m128 dlij_SSE3, diff2_SSE3, logterm_SSE3;
- __m128 doffset_SSE;
-
- natoms = mdatoms->nr;
- ni0 = 0;
- ni1 = mdatoms->homenr;
-
- n = 0;
-
- aadata = *((gmx_allvsallgb2_data_t **)paadata);
-
-
- if (aadata == NULL)
- {
- genborn_allvsall_setup(&aadata, top, born, mdatoms, born->gb_doffset,
- egbOBC, TRUE, TRUE, TRUE);
- *((gmx_allvsallgb2_data_t **)paadata) = aadata;
- }
-
- x_align = aadata->x_align;
- y_align = aadata->y_align;
- z_align = aadata->z_align;
-
- gb_radius = aadata->gb_radius;
- work = aadata->work;
- jindex = aadata->jindex_gb;
- dadx = fr->dadx;
- obc_param = aadata->workparam;
-
- oneeighth_SSE = _mm_set1_ps(0.125);
- onefourth_SSE = _mm_set1_ps(0.25);
- half_SSE = _mm_set1_ps(0.5);
- one_SSE = _mm_set1_ps(1.0);
- two_SSE = _mm_set1_ps(2.0);
- four_SSE = _mm_set1_ps(4.0);
- doffset_SSE = _mm_set1_ps(born->gb_doffset);
-
- for (i = 0; i < natoms; i++)
- {
- x_align[i] = x[3*i];
- y_align[i] = x[3*i+1];
- z_align[i] = x[3*i+2];
- }
-
- /* Copy again */
- for (i = 0; i < natoms/2+1; i++)
- {
- x_align[natoms+i] = x_align[i];
- y_align[natoms+i] = y_align[i];
- z_align[natoms+i] = z_align[i];
- }
-
- for (i = 0; i < natoms+natoms/2+1; i++)
- {
- work[i] = 0;
- }
-
- for (i = ni0; i < ni1; i += UNROLLI)
- {
- /* We assume shifts are NOT used for all-vs-all interactions */
-
- /* Load i atom data */
- ix_SSE0 = _mm_load1_ps(x_align+i);
- iy_SSE0 = _mm_load1_ps(y_align+i);
- iz_SSE0 = _mm_load1_ps(z_align+i);
- ix_SSE1 = _mm_load1_ps(x_align+i+1);
- iy_SSE1 = _mm_load1_ps(y_align+i+1);
- iz_SSE1 = _mm_load1_ps(z_align+i+1);
- ix_SSE2 = _mm_load1_ps(x_align+i+2);
- iy_SSE2 = _mm_load1_ps(y_align+i+2);
- iz_SSE2 = _mm_load1_ps(z_align+i+2);
- ix_SSE3 = _mm_load1_ps(x_align+i+3);
- iy_SSE3 = _mm_load1_ps(y_align+i+3);
- iz_SSE3 = _mm_load1_ps(z_align+i+3);
-
- rai_SSE0 = _mm_load1_ps(gb_radius+i);
- rai_SSE1 = _mm_load1_ps(gb_radius+i+1);
- rai_SSE2 = _mm_load1_ps(gb_radius+i+2);
- rai_SSE3 = _mm_load1_ps(gb_radius+i+3);
- rai_inv_SSE0 = gmx_mm_inv_ps(rai_SSE0);
- rai_inv_SSE1 = gmx_mm_inv_ps(rai_SSE1);
- rai_inv_SSE2 = gmx_mm_inv_ps(rai_SSE2);
- rai_inv_SSE3 = gmx_mm_inv_ps(rai_SSE3);
-
- sk_ai_SSE0 = _mm_load1_ps(obc_param+i);
- sk_ai_SSE1 = _mm_load1_ps(obc_param+i+1);
- sk_ai_SSE2 = _mm_load1_ps(obc_param+i+2);
- sk_ai_SSE3 = _mm_load1_ps(obc_param+i+3);
- sk2_ai_SSE0 = _mm_mul_ps(sk_ai_SSE0, sk_ai_SSE0);
- sk2_ai_SSE1 = _mm_mul_ps(sk_ai_SSE1, sk_ai_SSE1);
- sk2_ai_SSE2 = _mm_mul_ps(sk_ai_SSE2, sk_ai_SSE2);
- sk2_ai_SSE3 = _mm_mul_ps(sk_ai_SSE3, sk_ai_SSE3);
-
- sum_ai_SSE0 = _mm_setzero_ps();
- sum_ai_SSE1 = _mm_setzero_ps();
- sum_ai_SSE2 = _mm_setzero_ps();
- sum_ai_SSE3 = _mm_setzero_ps();
-
- /* Load limits for loop over neighbors */
- nj0 = jindex[4*i];
- nj1 = jindex[4*i+1];
- nj2 = jindex[4*i+2];
- nj3 = jindex[4*i+3];
-
- pmask0 = aadata->prologue_mask_gb[i];
- pmask1 = aadata->prologue_mask_gb[i+1];
- pmask2 = aadata->prologue_mask_gb[i+2];
- pmask3 = aadata->prologue_mask_gb[i+3];
- emask0 = aadata->epilogue_mask[i];
- emask1 = aadata->epilogue_mask[i+1];
- emask2 = aadata->epilogue_mask[i+2];
- emask3 = aadata->epilogue_mask[i+3];
-
- imask_SSE0 = _mm_load1_ps((real *)(aadata->imask+i));
- imask_SSE1 = _mm_load1_ps((real *)(aadata->imask+i+1));
- imask_SSE2 = _mm_load1_ps((real *)(aadata->imask+i+2));
- imask_SSE3 = _mm_load1_ps((real *)(aadata->imask+i+3));
-
- /* Prologue part, including exclusion mask */
- for (j = nj0; j < nj1; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_ps((real *)pmask0);
- jmask_SSE1 = _mm_load_ps((real *)pmask1);
- jmask_SSE2 = _mm_load_ps((real *)pmask2);
- jmask_SSE3 = _mm_load_ps((real *)pmask3);
- pmask0 += UNROLLJ;
- pmask1 += UNROLLJ;
- pmask2 += UNROLLJ;
- pmask3 += UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_ps(x_align+j);
- jy_SSE = _mm_load_ps(y_align+j);
- jz_SSE = _mm_load_ps(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_ps(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_ps(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_ps(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_ps(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_ps(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_ps(iz_SSE1, jz_SSE);
- dx_SSE2 = _mm_sub_ps(ix_SSE2, jx_SSE);
- dy_SSE2 = _mm_sub_ps(iy_SSE2, jy_SSE);
- dz_SSE2 = _mm_sub_ps(iz_SSE2, jz_SSE);
- dx_SSE3 = _mm_sub_ps(ix_SSE3, jx_SSE);
- dy_SSE3 = _mm_sub_ps(iy_SSE3, jy_SSE);
- dz_SSE3 = _mm_sub_ps(iz_SSE3, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
- rsq_SSE2 = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
- rsq_SSE3 = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_ps(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_ps(jmask_SSE1, imask_SSE1);
- jmask_SSE2 = _mm_and_ps(jmask_SSE2, imask_SSE2);
- jmask_SSE3 = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_ps(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_ps(rsq_SSE1);
- rinv_SSE2 = gmx_mm_invsqrt_ps(rsq_SSE2);
- rinv_SSE3 = gmx_mm_invsqrt_ps(rsq_SSE3);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_ps(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_ps(rinv_SSE1, jmask_SSE1);
- rinv_SSE2 = _mm_and_ps(rinv_SSE2, jmask_SSE2);
- rinv_SSE3 = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
- dr_SSE0 = _mm_mul_ps(rsq_SSE0, rinv_SSE0);
- dr_SSE1 = _mm_mul_ps(rsq_SSE1, rinv_SSE1);
- dr_SSE2 = _mm_mul_ps(rsq_SSE2, rinv_SSE2);
- dr_SSE3 = _mm_mul_ps(rsq_SSE3, rinv_SSE3);
-
- sk_aj_SSE = _mm_load_ps(obc_param+j);
- raj_SSE = _mm_load_ps(gb_radius+j);
- raj_inv_SSE = gmx_mm_inv_ps(raj_SSE);
-
- /* Evaluate influence of atom aj -> ai */
- t1_SSE0 = _mm_add_ps(dr_SSE0, sk_aj_SSE);
- t1_SSE1 = _mm_add_ps(dr_SSE1, sk_aj_SSE);
- t1_SSE2 = _mm_add_ps(dr_SSE2, sk_aj_SSE);
- t1_SSE3 = _mm_add_ps(dr_SSE3, sk_aj_SSE);
- t2_SSE0 = _mm_sub_ps(dr_SSE0, sk_aj_SSE);
- t2_SSE1 = _mm_sub_ps(dr_SSE1, sk_aj_SSE);
- t2_SSE2 = _mm_sub_ps(dr_SSE2, sk_aj_SSE);
- t2_SSE3 = _mm_sub_ps(dr_SSE3, sk_aj_SSE);
- t3_SSE0 = _mm_sub_ps(sk_aj_SSE, dr_SSE0);
- t3_SSE1 = _mm_sub_ps(sk_aj_SSE, dr_SSE1);
- t3_SSE2 = _mm_sub_ps(sk_aj_SSE, dr_SSE2);
- t3_SSE3 = _mm_sub_ps(sk_aj_SSE, dr_SSE3);
-
- obc_mask1_SSE0 = _mm_cmplt_ps(rai_SSE0, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_ps(rai_SSE1, t1_SSE1);
- obc_mask1_SSE2 = _mm_cmplt_ps(rai_SSE2, t1_SSE2);
- obc_mask1_SSE3 = _mm_cmplt_ps(rai_SSE3, t1_SSE3);
- obc_mask2_SSE0 = _mm_cmplt_ps(rai_SSE0, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_ps(rai_SSE1, t2_SSE1);
- obc_mask2_SSE2 = _mm_cmplt_ps(rai_SSE2, t2_SSE2);
- obc_mask2_SSE3 = _mm_cmplt_ps(rai_SSE3, t2_SSE3);
- obc_mask3_SSE0 = _mm_cmplt_ps(rai_SSE0, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_ps(rai_SSE1, t3_SSE1);
- obc_mask3_SSE2 = _mm_cmplt_ps(rai_SSE2, t3_SSE2);
- obc_mask3_SSE3 = _mm_cmplt_ps(rai_SSE3, t3_SSE3);
- obc_mask1_SSE0 = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
- obc_mask1_SSE2 = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
- obc_mask1_SSE3 = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
- uij_SSE0 = gmx_mm_inv_ps(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_ps(t1_SSE1);
- uij_SSE2 = gmx_mm_inv_ps(t1_SSE2);
- uij_SSE3 = gmx_mm_inv_ps(t1_SSE3);
- lij_SSE0 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
- _mm_andnot_ps(obc_mask2_SSE0, rai_inv_SSE0));
- lij_SSE1 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
- _mm_andnot_ps(obc_mask2_SSE1, rai_inv_SSE1));
- lij_SSE2 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
- _mm_andnot_ps(obc_mask2_SSE2, rai_inv_SSE2));
- lij_SSE3 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
- _mm_andnot_ps(obc_mask2_SSE3, rai_inv_SSE3));
- dlij_SSE0 = _mm_and_ps(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_ps(one_SSE, obc_mask2_SSE1);
- dlij_SSE2 = _mm_and_ps(one_SSE, obc_mask2_SSE2);
- dlij_SSE3 = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
- uij2_SSE0 = _mm_mul_ps(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_ps(uij_SSE1, uij_SSE1);
- uij2_SSE2 = _mm_mul_ps(uij_SSE2, uij_SSE2);
- uij2_SSE3 = _mm_mul_ps(uij_SSE3, uij_SSE3);
- uij3_SSE0 = _mm_mul_ps(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_ps(uij2_SSE1, uij_SSE1);
- uij3_SSE2 = _mm_mul_ps(uij2_SSE2, uij_SSE2);
- uij3_SSE3 = _mm_mul_ps(uij2_SSE3, uij_SSE3);
- lij2_SSE0 = _mm_mul_ps(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_ps(lij_SSE1, lij_SSE1);
- lij2_SSE2 = _mm_mul_ps(lij_SSE2, lij_SSE2);
- lij2_SSE3 = _mm_mul_ps(lij_SSE3, lij_SSE3);
- lij3_SSE0 = _mm_mul_ps(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_ps(lij2_SSE1, lij_SSE1);
- lij3_SSE2 = _mm_mul_ps(lij2_SSE2, lij_SSE2);
- lij3_SSE3 = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
- diff2_SSE0 = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
- diff2_SSE2 = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
- diff2_SSE3 = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
- lij_inv_SSE0 = gmx_mm_invsqrt_ps(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_ps(lij2_SSE1);
- lij_inv_SSE2 = gmx_mm_invsqrt_ps(lij2_SSE2);
- lij_inv_SSE3 = gmx_mm_invsqrt_ps(lij2_SSE3);
- sk2_aj_SSE = _mm_mul_ps(sk_aj_SSE, sk_aj_SSE);
- sk2_rinv_SSE0 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE1);
- sk2_rinv_SSE2 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE2);
- sk2_rinv_SSE3 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE3);
- prod_SSE0 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
- prod_SSE2 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
- prod_SSE3 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
- logterm_SSE0 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
- logterm_SSE2 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
- logterm_SSE3 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-
- t1_SSE0 = _mm_sub_ps(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_ps(lij_SSE1, uij_SSE1);
- t1_SSE2 = _mm_sub_ps(lij_SSE2, uij_SSE2);
- t1_SSE3 = _mm_sub_ps(lij_SSE3, uij_SSE3);
- t2_SSE0 = _mm_mul_ps(diff2_SSE0,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_ps(diff2_SSE1,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t2_SSE2 = _mm_mul_ps(diff2_SSE2,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
- prod_SSE2));
- t2_SSE3 = _mm_mul_ps(diff2_SSE3,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
- prod_SSE3));
-
- t3_SSE0 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
- t3_SSE2 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
- t3_SSE3 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
- t1_SSE0 = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
- t1_SSE2 = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
- t1_SSE3 = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
- t4_SSE0 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE0, lij_SSE0));
- t4_SSE1 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE1, lij_SSE1));
- t4_SSE2 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE2, lij_SSE2));
- t4_SSE3 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE3, lij_SSE3));
- t4_SSE0 = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
- t4_SSE2 = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
- t4_SSE3 = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
- t1_SSE0 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
- t1_SSE2 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
- t1_SSE3 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
- sum_ai_SSE0 = _mm_add_ps(sum_ai_SSE0, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- sum_ai_SSE1 = _mm_add_ps(sum_ai_SSE1, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- sum_ai_SSE2 = _mm_add_ps(sum_ai_SSE2, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- sum_ai_SSE3 = _mm_add_ps(sum_ai_SSE3, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-
- t1_SSE0 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
- _mm_mul_ps(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
- _mm_mul_ps(prod_SSE1, lij3_SSE1));
- t1_SSE2 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
- _mm_mul_ps(prod_SSE2, lij3_SSE2));
- t1_SSE3 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
- _mm_mul_ps(prod_SSE3, lij3_SSE3));
- t1_SSE0 = _mm_sub_ps(t1_SSE0,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
- _mm_mul_ps(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_ps(t1_SSE1,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
- _mm_mul_ps(lij3_SSE1, dr_SSE1))));
- t1_SSE2 = _mm_sub_ps(t1_SSE2,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
- _mm_mul_ps(lij3_SSE2, dr_SSE2))));
- t1_SSE3 = _mm_sub_ps(t1_SSE3,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
- _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-
- t2_SSE0 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
- _mm_mul_ps(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
- _mm_mul_ps(uij3_SSE1, dr_SSE1)));
- t2_SSE2 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
- _mm_mul_ps(uij3_SSE2, dr_SSE2)));
- t2_SSE3 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
- _mm_mul_ps(uij3_SSE3, dr_SSE3)));
- t2_SSE0 = _mm_sub_ps(t2_SSE0,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
- _mm_mul_ps(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_ps(t2_SSE1,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
- _mm_mul_ps(prod_SSE1, uij3_SSE1)));
- t2_SSE2 = _mm_sub_ps(t2_SSE2,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
- _mm_mul_ps(prod_SSE2, uij3_SSE2)));
- t2_SSE3 = _mm_sub_ps(t2_SSE3,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
- _mm_mul_ps(prod_SSE3, uij3_SSE3)));
- t3_SSE0 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
- _mm_mul_ps(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
- _mm_mul_ps(rinv_SSE1, rinv_SSE1));
- t3_SSE2 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
- _mm_mul_ps(rinv_SSE2, rinv_SSE2));
- t3_SSE3 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
- _mm_mul_ps(rinv_SSE3, rinv_SSE3));
- t3_SSE0 = _mm_sub_ps(t3_SSE0,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_ps(t3_SSE1,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
- t3_SSE2 = _mm_sub_ps(t3_SSE2,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
- t3_SSE3 = _mm_sub_ps(t3_SSE3,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
- t1_SSE0 = _mm_mul_ps(rinv_SSE0,
- _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
- _mm_add_ps(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_ps(rinv_SSE1,
- _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
- _mm_add_ps(t2_SSE1, t3_SSE1)));
- t1_SSE2 = _mm_mul_ps(rinv_SSE2,
- _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
- _mm_add_ps(t2_SSE2, t3_SSE2)));
- t1_SSE3 = _mm_mul_ps(rinv_SSE3,
- _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
- _mm_add_ps(t2_SSE3, t3_SSE3)));
-
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
- dadx += 4;
-
- /* Evaluate influence of atom ai -> aj */
- t1_SSE0 = _mm_add_ps(dr_SSE0, sk_ai_SSE0);
- t1_SSE1 = _mm_add_ps(dr_SSE1, sk_ai_SSE1);
- t1_SSE2 = _mm_add_ps(dr_SSE2, sk_ai_SSE2);
- t1_SSE3 = _mm_add_ps(dr_SSE3, sk_ai_SSE3);
- t2_SSE0 = _mm_sub_ps(dr_SSE0, sk_ai_SSE0);
- t2_SSE1 = _mm_sub_ps(dr_SSE1, sk_ai_SSE1);
- t2_SSE2 = _mm_sub_ps(dr_SSE2, sk_ai_SSE2);
- t2_SSE3 = _mm_sub_ps(dr_SSE3, sk_ai_SSE3);
- t3_SSE0 = _mm_sub_ps(sk_ai_SSE0, dr_SSE0);
- t3_SSE1 = _mm_sub_ps(sk_ai_SSE1, dr_SSE1);
- t3_SSE2 = _mm_sub_ps(sk_ai_SSE2, dr_SSE2);
- t3_SSE3 = _mm_sub_ps(sk_ai_SSE3, dr_SSE3);
-
- obc_mask1_SSE0 = _mm_cmplt_ps(raj_SSE, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_ps(raj_SSE, t1_SSE1);
- obc_mask1_SSE2 = _mm_cmplt_ps(raj_SSE, t1_SSE2);
- obc_mask1_SSE3 = _mm_cmplt_ps(raj_SSE, t1_SSE3);
- obc_mask2_SSE0 = _mm_cmplt_ps(raj_SSE, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_ps(raj_SSE, t2_SSE1);
- obc_mask2_SSE2 = _mm_cmplt_ps(raj_SSE, t2_SSE2);
- obc_mask2_SSE3 = _mm_cmplt_ps(raj_SSE, t2_SSE3);
- obc_mask3_SSE0 = _mm_cmplt_ps(raj_SSE, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_ps(raj_SSE, t3_SSE1);
- obc_mask3_SSE2 = _mm_cmplt_ps(raj_SSE, t3_SSE2);
- obc_mask3_SSE3 = _mm_cmplt_ps(raj_SSE, t3_SSE3);
- obc_mask1_SSE0 = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
- obc_mask1_SSE2 = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
- obc_mask1_SSE3 = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
- uij_SSE0 = gmx_mm_inv_ps(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_ps(t1_SSE1);
- uij_SSE2 = gmx_mm_inv_ps(t1_SSE2);
- uij_SSE3 = gmx_mm_inv_ps(t1_SSE3);
- lij_SSE0 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
- _mm_andnot_ps(obc_mask2_SSE0, raj_inv_SSE));
- lij_SSE1 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
- _mm_andnot_ps(obc_mask2_SSE1, raj_inv_SSE));
- lij_SSE2 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
- _mm_andnot_ps(obc_mask2_SSE2, raj_inv_SSE));
- lij_SSE3 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
- _mm_andnot_ps(obc_mask2_SSE3, raj_inv_SSE));
- dlij_SSE0 = _mm_and_ps(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_ps(one_SSE, obc_mask2_SSE1);
- dlij_SSE2 = _mm_and_ps(one_SSE, obc_mask2_SSE2);
- dlij_SSE3 = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
- uij2_SSE0 = _mm_mul_ps(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_ps(uij_SSE1, uij_SSE1);
- uij2_SSE2 = _mm_mul_ps(uij_SSE2, uij_SSE2);
- uij2_SSE3 = _mm_mul_ps(uij_SSE3, uij_SSE3);
- uij3_SSE0 = _mm_mul_ps(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_ps(uij2_SSE1, uij_SSE1);
- uij3_SSE2 = _mm_mul_ps(uij2_SSE2, uij_SSE2);
- uij3_SSE3 = _mm_mul_ps(uij2_SSE3, uij_SSE3);
- lij2_SSE0 = _mm_mul_ps(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_ps(lij_SSE1, lij_SSE1);
- lij2_SSE2 = _mm_mul_ps(lij_SSE2, lij_SSE2);
- lij2_SSE3 = _mm_mul_ps(lij_SSE3, lij_SSE3);
- lij3_SSE0 = _mm_mul_ps(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_ps(lij2_SSE1, lij_SSE1);
- lij3_SSE2 = _mm_mul_ps(lij2_SSE2, lij_SSE2);
- lij3_SSE3 = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
- diff2_SSE0 = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
- diff2_SSE2 = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
- diff2_SSE3 = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
- lij_inv_SSE0 = gmx_mm_invsqrt_ps(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_ps(lij2_SSE1);
- lij_inv_SSE2 = gmx_mm_invsqrt_ps(lij2_SSE2);
- lij_inv_SSE3 = gmx_mm_invsqrt_ps(lij2_SSE3);
- sk2_rinv_SSE0 = _mm_mul_ps(sk2_ai_SSE0, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_ps(sk2_ai_SSE1, rinv_SSE1);
- sk2_rinv_SSE2 = _mm_mul_ps(sk2_ai_SSE2, rinv_SSE2);
- sk2_rinv_SSE3 = _mm_mul_ps(sk2_ai_SSE3, rinv_SSE3);
- prod_SSE0 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
- prod_SSE2 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
- prod_SSE3 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
- logterm_SSE0 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
- logterm_SSE2 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
- logterm_SSE3 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
- t1_SSE0 = _mm_sub_ps(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_ps(lij_SSE1, uij_SSE1);
- t1_SSE2 = _mm_sub_ps(lij_SSE2, uij_SSE2);
- t1_SSE3 = _mm_sub_ps(lij_SSE3, uij_SSE3);
- t2_SSE0 = _mm_mul_ps(diff2_SSE0,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_ps(diff2_SSE1,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t2_SSE2 = _mm_mul_ps(diff2_SSE2,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
- prod_SSE2));
- t2_SSE3 = _mm_mul_ps(diff2_SSE3,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
- prod_SSE3));
- t3_SSE0 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
- t3_SSE2 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
- t3_SSE3 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
- t1_SSE0 = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
- t1_SSE2 = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
- t1_SSE3 = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
- t4_SSE0 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE0));
- t4_SSE1 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE1));
- t4_SSE2 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE2));
- t4_SSE3 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE3));
- t4_SSE0 = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
- t4_SSE2 = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
- t4_SSE3 = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
- t1_SSE0 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
- t1_SSE2 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
- t1_SSE3 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
- _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
- gmx_mm_sum4_ps(_mm_and_ps(t1_SSE0, obc_mask1_SSE0),
- _mm_and_ps(t1_SSE1, obc_mask1_SSE1),
- _mm_and_ps(t1_SSE2, obc_mask1_SSE2),
- _mm_and_ps(t1_SSE3, obc_mask1_SSE3))));
-
- t1_SSE0 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
- _mm_mul_ps(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
- _mm_mul_ps(prod_SSE1, lij3_SSE1));
- t1_SSE2 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
- _mm_mul_ps(prod_SSE2, lij3_SSE2));
- t1_SSE3 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
- _mm_mul_ps(prod_SSE3, lij3_SSE3));
- t1_SSE0 = _mm_sub_ps(t1_SSE0,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
- _mm_mul_ps(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_ps(t1_SSE1,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
- _mm_mul_ps(lij3_SSE1, dr_SSE1))));
- t1_SSE2 = _mm_sub_ps(t1_SSE2,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
- _mm_mul_ps(lij3_SSE2, dr_SSE2))));
- t1_SSE3 = _mm_sub_ps(t1_SSE3,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
- _mm_mul_ps(lij3_SSE3, dr_SSE3))));
- t2_SSE0 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
- _mm_mul_ps(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
- _mm_mul_ps(uij3_SSE1, dr_SSE1)));
- t2_SSE2 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
- _mm_mul_ps(uij3_SSE2, dr_SSE2)));
- t2_SSE3 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
- _mm_mul_ps(uij3_SSE3, dr_SSE3)));
- t2_SSE0 = _mm_sub_ps(t2_SSE0,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
- _mm_mul_ps(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_ps(t2_SSE1,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
- _mm_mul_ps(prod_SSE1, uij3_SSE1)));
- t2_SSE2 = _mm_sub_ps(t2_SSE2,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
- _mm_mul_ps(prod_SSE2, uij3_SSE2)));
- t2_SSE3 = _mm_sub_ps(t2_SSE3,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
- _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-
- t3_SSE0 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
- _mm_mul_ps(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
- _mm_mul_ps(rinv_SSE1, rinv_SSE1));
- t3_SSE2 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
- _mm_mul_ps(rinv_SSE2, rinv_SSE2));
- t3_SSE3 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
- _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-
- t3_SSE0 = _mm_sub_ps(t3_SSE0,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_ps(t3_SSE1,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
- t3_SSE2 = _mm_sub_ps(t3_SSE2,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
- t3_SSE3 = _mm_sub_ps(t3_SSE3,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-
- t1_SSE0 = _mm_mul_ps(rinv_SSE0,
- _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
- _mm_add_ps(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_ps(rinv_SSE1,
- _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
- _mm_add_ps(t2_SSE1, t3_SSE1)));
- t1_SSE2 = _mm_mul_ps(rinv_SSE2,
- _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
- _mm_add_ps(t2_SSE2, t3_SSE2)));
- t1_SSE3 = _mm_mul_ps(rinv_SSE3,
- _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
- _mm_add_ps(t2_SSE3, t3_SSE3)));
-
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
- dadx += 4;
-
- }
-
- /* Main part, no exclusions */
- for (j = nj1; j < nj2; j += UNROLLJ)
- {
- /* load j atom coordinates */
- jx_SSE = _mm_load_ps(x_align+j);
- jy_SSE = _mm_load_ps(y_align+j);
- jz_SSE = _mm_load_ps(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_ps(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_ps(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_ps(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_ps(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_ps(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_ps(iz_SSE1, jz_SSE);
- dx_SSE2 = _mm_sub_ps(ix_SSE2, jx_SSE);
- dy_SSE2 = _mm_sub_ps(iy_SSE2, jy_SSE);
- dz_SSE2 = _mm_sub_ps(iz_SSE2, jz_SSE);
- dx_SSE3 = _mm_sub_ps(ix_SSE3, jx_SSE);
- dy_SSE3 = _mm_sub_ps(iy_SSE3, jy_SSE);
- dz_SSE3 = _mm_sub_ps(iz_SSE3, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
- rsq_SSE2 = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
- rsq_SSE3 = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_ps(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_ps(rsq_SSE1);
- rinv_SSE2 = gmx_mm_invsqrt_ps(rsq_SSE2);
- rinv_SSE3 = gmx_mm_invsqrt_ps(rsq_SSE3);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_ps(rinv_SSE0, imask_SSE0);
- rinv_SSE1 = _mm_and_ps(rinv_SSE1, imask_SSE1);
- rinv_SSE2 = _mm_and_ps(rinv_SSE2, imask_SSE2);
- rinv_SSE3 = _mm_and_ps(rinv_SSE3, imask_SSE3);
-
- dr_SSE0 = _mm_mul_ps(rsq_SSE0, rinv_SSE0);
- dr_SSE1 = _mm_mul_ps(rsq_SSE1, rinv_SSE1);
- dr_SSE2 = _mm_mul_ps(rsq_SSE2, rinv_SSE2);
- dr_SSE3 = _mm_mul_ps(rsq_SSE3, rinv_SSE3);
-
- sk_aj_SSE = _mm_load_ps(obc_param+j);
- raj_SSE = _mm_load_ps(gb_radius+j);
-
- raj_inv_SSE = gmx_mm_inv_ps(raj_SSE);
-
- /* Evaluate influence of atom aj -> ai */
- t1_SSE0 = _mm_add_ps(dr_SSE0, sk_aj_SSE);
- t1_SSE1 = _mm_add_ps(dr_SSE1, sk_aj_SSE);
- t1_SSE2 = _mm_add_ps(dr_SSE2, sk_aj_SSE);
- t1_SSE3 = _mm_add_ps(dr_SSE3, sk_aj_SSE);
- t2_SSE0 = _mm_sub_ps(dr_SSE0, sk_aj_SSE);
- t2_SSE1 = _mm_sub_ps(dr_SSE1, sk_aj_SSE);
- t2_SSE2 = _mm_sub_ps(dr_SSE2, sk_aj_SSE);
- t2_SSE3 = _mm_sub_ps(dr_SSE3, sk_aj_SSE);
- t3_SSE0 = _mm_sub_ps(sk_aj_SSE, dr_SSE0);
- t3_SSE1 = _mm_sub_ps(sk_aj_SSE, dr_SSE1);
- t3_SSE2 = _mm_sub_ps(sk_aj_SSE, dr_SSE2);
- t3_SSE3 = _mm_sub_ps(sk_aj_SSE, dr_SSE3);
-
- obc_mask1_SSE0 = _mm_cmplt_ps(rai_SSE0, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_ps(rai_SSE1, t1_SSE1);
- obc_mask1_SSE2 = _mm_cmplt_ps(rai_SSE2, t1_SSE2);
- obc_mask1_SSE3 = _mm_cmplt_ps(rai_SSE3, t1_SSE3);
- obc_mask2_SSE0 = _mm_cmplt_ps(rai_SSE0, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_ps(rai_SSE1, t2_SSE1);
- obc_mask2_SSE2 = _mm_cmplt_ps(rai_SSE2, t2_SSE2);
- obc_mask2_SSE3 = _mm_cmplt_ps(rai_SSE3, t2_SSE3);
- obc_mask3_SSE0 = _mm_cmplt_ps(rai_SSE0, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_ps(rai_SSE1, t3_SSE1);
- obc_mask3_SSE2 = _mm_cmplt_ps(rai_SSE2, t3_SSE2);
- obc_mask3_SSE3 = _mm_cmplt_ps(rai_SSE3, t3_SSE3);
- obc_mask1_SSE0 = _mm_and_ps(obc_mask1_SSE0, imask_SSE0);
- obc_mask1_SSE1 = _mm_and_ps(obc_mask1_SSE1, imask_SSE1);
- obc_mask1_SSE2 = _mm_and_ps(obc_mask1_SSE2, imask_SSE2);
- obc_mask1_SSE3 = _mm_and_ps(obc_mask1_SSE3, imask_SSE3);
-
- uij_SSE0 = gmx_mm_inv_ps(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_ps(t1_SSE1);
- uij_SSE2 = gmx_mm_inv_ps(t1_SSE2);
- uij_SSE3 = gmx_mm_inv_ps(t1_SSE3);
- lij_SSE0 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
- _mm_andnot_ps(obc_mask2_SSE0, rai_inv_SSE0));
- lij_SSE1 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
- _mm_andnot_ps(obc_mask2_SSE1, rai_inv_SSE1));
- lij_SSE2 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
- _mm_andnot_ps(obc_mask2_SSE2, rai_inv_SSE2));
- lij_SSE3 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
- _mm_andnot_ps(obc_mask2_SSE3, rai_inv_SSE3));
- dlij_SSE0 = _mm_and_ps(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_ps(one_SSE, obc_mask2_SSE1);
- dlij_SSE2 = _mm_and_ps(one_SSE, obc_mask2_SSE2);
- dlij_SSE3 = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
- uij2_SSE0 = _mm_mul_ps(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_ps(uij_SSE1, uij_SSE1);
- uij2_SSE2 = _mm_mul_ps(uij_SSE2, uij_SSE2);
- uij2_SSE3 = _mm_mul_ps(uij_SSE3, uij_SSE3);
- uij3_SSE0 = _mm_mul_ps(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_ps(uij2_SSE1, uij_SSE1);
- uij3_SSE2 = _mm_mul_ps(uij2_SSE2, uij_SSE2);
- uij3_SSE3 = _mm_mul_ps(uij2_SSE3, uij_SSE3);
- lij2_SSE0 = _mm_mul_ps(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_ps(lij_SSE1, lij_SSE1);
- lij2_SSE2 = _mm_mul_ps(lij_SSE2, lij_SSE2);
- lij2_SSE3 = _mm_mul_ps(lij_SSE3, lij_SSE3);
- lij3_SSE0 = _mm_mul_ps(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_ps(lij2_SSE1, lij_SSE1);
- lij3_SSE2 = _mm_mul_ps(lij2_SSE2, lij_SSE2);
- lij3_SSE3 = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
- diff2_SSE0 = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
- diff2_SSE2 = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
- diff2_SSE3 = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
- lij_inv_SSE0 = gmx_mm_invsqrt_ps(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_ps(lij2_SSE1);
- lij_inv_SSE2 = gmx_mm_invsqrt_ps(lij2_SSE2);
- lij_inv_SSE3 = gmx_mm_invsqrt_ps(lij2_SSE3);
- sk2_aj_SSE = _mm_mul_ps(sk_aj_SSE, sk_aj_SSE);
- sk2_rinv_SSE0 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE1);
- sk2_rinv_SSE2 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE2);
- sk2_rinv_SSE3 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE3);
- prod_SSE0 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
- prod_SSE2 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
- prod_SSE3 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
- logterm_SSE0 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
- logterm_SSE2 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
- logterm_SSE3 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-
- t1_SSE0 = _mm_sub_ps(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_ps(lij_SSE1, uij_SSE1);
- t1_SSE2 = _mm_sub_ps(lij_SSE2, uij_SSE2);
- t1_SSE3 = _mm_sub_ps(lij_SSE3, uij_SSE3);
- t2_SSE0 = _mm_mul_ps(diff2_SSE0,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_ps(diff2_SSE1,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t2_SSE2 = _mm_mul_ps(diff2_SSE2,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
- prod_SSE2));
- t2_SSE3 = _mm_mul_ps(diff2_SSE3,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
- prod_SSE3));
-
- t3_SSE0 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
- t3_SSE2 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
- t3_SSE3 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
- t1_SSE0 = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
- t1_SSE2 = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
- t1_SSE3 = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
- t4_SSE0 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE0, lij_SSE0));
- t4_SSE1 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE1, lij_SSE1));
- t4_SSE2 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE2, lij_SSE2));
- t4_SSE3 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE3, lij_SSE3));
- t4_SSE0 = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
- t4_SSE2 = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
- t4_SSE3 = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
- t1_SSE0 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
- t1_SSE2 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
- t1_SSE3 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
- sum_ai_SSE0 = _mm_add_ps(sum_ai_SSE0, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- sum_ai_SSE1 = _mm_add_ps(sum_ai_SSE1, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- sum_ai_SSE2 = _mm_add_ps(sum_ai_SSE2, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- sum_ai_SSE3 = _mm_add_ps(sum_ai_SSE3, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-
- t1_SSE0 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
- _mm_mul_ps(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
- _mm_mul_ps(prod_SSE1, lij3_SSE1));
- t1_SSE2 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
- _mm_mul_ps(prod_SSE2, lij3_SSE2));
- t1_SSE3 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
- _mm_mul_ps(prod_SSE3, lij3_SSE3));
- t1_SSE0 = _mm_sub_ps(t1_SSE0,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
- _mm_mul_ps(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_ps(t1_SSE1,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
- _mm_mul_ps(lij3_SSE1, dr_SSE1))));
- t1_SSE2 = _mm_sub_ps(t1_SSE2,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
- _mm_mul_ps(lij3_SSE2, dr_SSE2))));
- t1_SSE3 = _mm_sub_ps(t1_SSE3,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
- _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-
- t2_SSE0 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
- _mm_mul_ps(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
- _mm_mul_ps(uij3_SSE1, dr_SSE1)));
- t2_SSE2 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
- _mm_mul_ps(uij3_SSE2, dr_SSE2)));
- t2_SSE3 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
- _mm_mul_ps(uij3_SSE3, dr_SSE3)));
- t2_SSE0 = _mm_sub_ps(t2_SSE0,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
- _mm_mul_ps(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_ps(t2_SSE1,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
- _mm_mul_ps(prod_SSE1, uij3_SSE1)));
- t2_SSE2 = _mm_sub_ps(t2_SSE2,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
- _mm_mul_ps(prod_SSE2, uij3_SSE2)));
- t2_SSE3 = _mm_sub_ps(t2_SSE3,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
- _mm_mul_ps(prod_SSE3, uij3_SSE3)));
- t3_SSE0 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
- _mm_mul_ps(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
- _mm_mul_ps(rinv_SSE1, rinv_SSE1));
- t3_SSE2 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
- _mm_mul_ps(rinv_SSE2, rinv_SSE2));
- t3_SSE3 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
- _mm_mul_ps(rinv_SSE3, rinv_SSE3));
- t3_SSE0 = _mm_sub_ps(t3_SSE0,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_ps(t3_SSE1,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
- t3_SSE2 = _mm_sub_ps(t3_SSE2,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
- t3_SSE3 = _mm_sub_ps(t3_SSE3,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
- t1_SSE0 = _mm_mul_ps(rinv_SSE0,
- _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
- _mm_add_ps(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_ps(rinv_SSE1,
- _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
- _mm_add_ps(t2_SSE1, t3_SSE1)));
- t1_SSE2 = _mm_mul_ps(rinv_SSE2,
- _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
- _mm_add_ps(t2_SSE2, t3_SSE2)));
- t1_SSE3 = _mm_mul_ps(rinv_SSE3,
- _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
- _mm_add_ps(t2_SSE3, t3_SSE3)));
-
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
- dadx += 4;
-
- /* Evaluate influence of atom ai -> aj */
- t1_SSE0 = _mm_add_ps(dr_SSE0, sk_ai_SSE0);
- t1_SSE1 = _mm_add_ps(dr_SSE1, sk_ai_SSE1);
- t1_SSE2 = _mm_add_ps(dr_SSE2, sk_ai_SSE2);
- t1_SSE3 = _mm_add_ps(dr_SSE3, sk_ai_SSE3);
- t2_SSE0 = _mm_sub_ps(dr_SSE0, sk_ai_SSE0);
- t2_SSE1 = _mm_sub_ps(dr_SSE1, sk_ai_SSE1);
- t2_SSE2 = _mm_sub_ps(dr_SSE2, sk_ai_SSE2);
- t2_SSE3 = _mm_sub_ps(dr_SSE3, sk_ai_SSE3);
- t3_SSE0 = _mm_sub_ps(sk_ai_SSE0, dr_SSE0);
- t3_SSE1 = _mm_sub_ps(sk_ai_SSE1, dr_SSE1);
- t3_SSE2 = _mm_sub_ps(sk_ai_SSE2, dr_SSE2);
- t3_SSE3 = _mm_sub_ps(sk_ai_SSE3, dr_SSE3);
-
- obc_mask1_SSE0 = _mm_cmplt_ps(raj_SSE, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_ps(raj_SSE, t1_SSE1);
- obc_mask1_SSE2 = _mm_cmplt_ps(raj_SSE, t1_SSE2);
- obc_mask1_SSE3 = _mm_cmplt_ps(raj_SSE, t1_SSE3);
- obc_mask2_SSE0 = _mm_cmplt_ps(raj_SSE, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_ps(raj_SSE, t2_SSE1);
- obc_mask2_SSE2 = _mm_cmplt_ps(raj_SSE, t2_SSE2);
- obc_mask2_SSE3 = _mm_cmplt_ps(raj_SSE, t2_SSE3);
- obc_mask3_SSE0 = _mm_cmplt_ps(raj_SSE, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_ps(raj_SSE, t3_SSE1);
- obc_mask3_SSE2 = _mm_cmplt_ps(raj_SSE, t3_SSE2);
- obc_mask3_SSE3 = _mm_cmplt_ps(raj_SSE, t3_SSE3);
- obc_mask1_SSE0 = _mm_and_ps(obc_mask1_SSE0, imask_SSE0);
- obc_mask1_SSE1 = _mm_and_ps(obc_mask1_SSE1, imask_SSE1);
- obc_mask1_SSE2 = _mm_and_ps(obc_mask1_SSE2, imask_SSE2);
- obc_mask1_SSE3 = _mm_and_ps(obc_mask1_SSE3, imask_SSE3);
-
- uij_SSE0 = gmx_mm_inv_ps(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_ps(t1_SSE1);
- uij_SSE2 = gmx_mm_inv_ps(t1_SSE2);
- uij_SSE3 = gmx_mm_inv_ps(t1_SSE3);
- lij_SSE0 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
- _mm_andnot_ps(obc_mask2_SSE0, raj_inv_SSE));
- lij_SSE1 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
- _mm_andnot_ps(obc_mask2_SSE1, raj_inv_SSE));
- lij_SSE2 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
- _mm_andnot_ps(obc_mask2_SSE2, raj_inv_SSE));
- lij_SSE3 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
- _mm_andnot_ps(obc_mask2_SSE3, raj_inv_SSE));
- dlij_SSE0 = _mm_and_ps(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_ps(one_SSE, obc_mask2_SSE1);
- dlij_SSE2 = _mm_and_ps(one_SSE, obc_mask2_SSE2);
- dlij_SSE3 = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
- uij2_SSE0 = _mm_mul_ps(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_ps(uij_SSE1, uij_SSE1);
- uij2_SSE2 = _mm_mul_ps(uij_SSE2, uij_SSE2);
- uij2_SSE3 = _mm_mul_ps(uij_SSE3, uij_SSE3);
- uij3_SSE0 = _mm_mul_ps(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_ps(uij2_SSE1, uij_SSE1);
- uij3_SSE2 = _mm_mul_ps(uij2_SSE2, uij_SSE2);
- uij3_SSE3 = _mm_mul_ps(uij2_SSE3, uij_SSE3);
- lij2_SSE0 = _mm_mul_ps(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_ps(lij_SSE1, lij_SSE1);
- lij2_SSE2 = _mm_mul_ps(lij_SSE2, lij_SSE2);
- lij2_SSE3 = _mm_mul_ps(lij_SSE3, lij_SSE3);
- lij3_SSE0 = _mm_mul_ps(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_ps(lij2_SSE1, lij_SSE1);
- lij3_SSE2 = _mm_mul_ps(lij2_SSE2, lij_SSE2);
- lij3_SSE3 = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
- diff2_SSE0 = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
- diff2_SSE2 = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
- diff2_SSE3 = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
- lij_inv_SSE0 = gmx_mm_invsqrt_ps(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_ps(lij2_SSE1);
- lij_inv_SSE2 = gmx_mm_invsqrt_ps(lij2_SSE2);
- lij_inv_SSE3 = gmx_mm_invsqrt_ps(lij2_SSE3);
- sk2_rinv_SSE0 = _mm_mul_ps(sk2_ai_SSE0, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_ps(sk2_ai_SSE1, rinv_SSE1);
- sk2_rinv_SSE2 = _mm_mul_ps(sk2_ai_SSE2, rinv_SSE2);
- sk2_rinv_SSE3 = _mm_mul_ps(sk2_ai_SSE3, rinv_SSE3);
- prod_SSE0 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
- prod_SSE2 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
- prod_SSE3 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
- logterm_SSE0 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
- logterm_SSE2 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
- logterm_SSE3 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
- t1_SSE0 = _mm_sub_ps(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_ps(lij_SSE1, uij_SSE1);
- t1_SSE2 = _mm_sub_ps(lij_SSE2, uij_SSE2);
- t1_SSE3 = _mm_sub_ps(lij_SSE3, uij_SSE3);
- t2_SSE0 = _mm_mul_ps(diff2_SSE0,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_ps(diff2_SSE1,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t2_SSE2 = _mm_mul_ps(diff2_SSE2,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
- prod_SSE2));
- t2_SSE3 = _mm_mul_ps(diff2_SSE3,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
- prod_SSE3));
- t3_SSE0 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
- t3_SSE2 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
- t3_SSE3 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
- t1_SSE0 = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
- t1_SSE2 = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
- t1_SSE3 = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
- t4_SSE0 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE0));
- t4_SSE1 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE1));
- t4_SSE2 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE2));
- t4_SSE3 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE3));
- t4_SSE0 = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
- t4_SSE2 = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
- t4_SSE3 = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
- t1_SSE0 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
- t1_SSE2 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
- t1_SSE3 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
- _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
- gmx_mm_sum4_ps(_mm_and_ps(t1_SSE0, obc_mask1_SSE0),
- _mm_and_ps(t1_SSE1, obc_mask1_SSE1),
- _mm_and_ps(t1_SSE2, obc_mask1_SSE2),
- _mm_and_ps(t1_SSE3, obc_mask1_SSE3))));
-
- t1_SSE0 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
- _mm_mul_ps(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
- _mm_mul_ps(prod_SSE1, lij3_SSE1));
- t1_SSE2 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
- _mm_mul_ps(prod_SSE2, lij3_SSE2));
- t1_SSE3 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
- _mm_mul_ps(prod_SSE3, lij3_SSE3));
- t1_SSE0 = _mm_sub_ps(t1_SSE0,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
- _mm_mul_ps(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_ps(t1_SSE1,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
- _mm_mul_ps(lij3_SSE1, dr_SSE1))));
- t1_SSE2 = _mm_sub_ps(t1_SSE2,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
- _mm_mul_ps(lij3_SSE2, dr_SSE2))));
- t1_SSE3 = _mm_sub_ps(t1_SSE3,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
- _mm_mul_ps(lij3_SSE3, dr_SSE3))));
- t2_SSE0 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
- _mm_mul_ps(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
- _mm_mul_ps(uij3_SSE1, dr_SSE1)));
- t2_SSE2 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
- _mm_mul_ps(uij3_SSE2, dr_SSE2)));
- t2_SSE3 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
- _mm_mul_ps(uij3_SSE3, dr_SSE3)));
- t2_SSE0 = _mm_sub_ps(t2_SSE0,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
- _mm_mul_ps(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_ps(t2_SSE1,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
- _mm_mul_ps(prod_SSE1, uij3_SSE1)));
- t2_SSE2 = _mm_sub_ps(t2_SSE2,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
- _mm_mul_ps(prod_SSE2, uij3_SSE2)));
- t2_SSE3 = _mm_sub_ps(t2_SSE3,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
- _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-
- t3_SSE0 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
- _mm_mul_ps(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
- _mm_mul_ps(rinv_SSE1, rinv_SSE1));
- t3_SSE2 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
- _mm_mul_ps(rinv_SSE2, rinv_SSE2));
- t3_SSE3 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
- _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-
- t3_SSE0 = _mm_sub_ps(t3_SSE0,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_ps(t3_SSE1,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
- t3_SSE2 = _mm_sub_ps(t3_SSE2,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
- t3_SSE3 = _mm_sub_ps(t3_SSE3,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
- t1_SSE0 = _mm_mul_ps(rinv_SSE0,
- _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
- _mm_add_ps(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_ps(rinv_SSE1,
- _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
- _mm_add_ps(t2_SSE1, t3_SSE1)));
- t1_SSE2 = _mm_mul_ps(rinv_SSE2,
- _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
- _mm_add_ps(t2_SSE2, t3_SSE2)));
- t1_SSE3 = _mm_mul_ps(rinv_SSE3,
- _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
- _mm_add_ps(t2_SSE3, t3_SSE3)));
-
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
- dadx += 4;
- }
-
- /* Epilogue part, including exclusion mask */
- for (j = nj2; j < nj3; j += UNROLLJ)
- {
- jmask_SSE0 = _mm_load_ps((real *)emask0);
- jmask_SSE1 = _mm_load_ps((real *)emask1);
- jmask_SSE2 = _mm_load_ps((real *)emask2);
- jmask_SSE3 = _mm_load_ps((real *)emask3);
- emask0 += UNROLLJ;
- emask1 += UNROLLJ;
- emask2 += UNROLLJ;
- emask3 += UNROLLJ;
-
- /* load j atom coordinates */
- jx_SSE = _mm_load_ps(x_align+j);
- jy_SSE = _mm_load_ps(y_align+j);
- jz_SSE = _mm_load_ps(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_ps(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_ps(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_ps(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_ps(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_ps(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_ps(iz_SSE1, jz_SSE);
- dx_SSE2 = _mm_sub_ps(ix_SSE2, jx_SSE);
- dy_SSE2 = _mm_sub_ps(iy_SSE2, jy_SSE);
- dz_SSE2 = _mm_sub_ps(iz_SSE2, jz_SSE);
- dx_SSE3 = _mm_sub_ps(ix_SSE3, jx_SSE);
- dy_SSE3 = _mm_sub_ps(iy_SSE3, jy_SSE);
- dz_SSE3 = _mm_sub_ps(iz_SSE3, jz_SSE);
-
- /* rsq = dx*dx+dy*dy+dz*dz */
- rsq_SSE0 = gmx_mm_calc_rsq_ps(dx_SSE0, dy_SSE0, dz_SSE0);
- rsq_SSE1 = gmx_mm_calc_rsq_ps(dx_SSE1, dy_SSE1, dz_SSE1);
- rsq_SSE2 = gmx_mm_calc_rsq_ps(dx_SSE2, dy_SSE2, dz_SSE2);
- rsq_SSE3 = gmx_mm_calc_rsq_ps(dx_SSE3, dy_SSE3, dz_SSE3);
-
- /* Combine masks */
- jmask_SSE0 = _mm_and_ps(jmask_SSE0, imask_SSE0);
- jmask_SSE1 = _mm_and_ps(jmask_SSE1, imask_SSE1);
- jmask_SSE2 = _mm_and_ps(jmask_SSE2, imask_SSE2);
- jmask_SSE3 = _mm_and_ps(jmask_SSE3, imask_SSE3);
-
- /* Calculate 1/r and 1/r2 */
- rinv_SSE0 = gmx_mm_invsqrt_ps(rsq_SSE0);
- rinv_SSE1 = gmx_mm_invsqrt_ps(rsq_SSE1);
- rinv_SSE2 = gmx_mm_invsqrt_ps(rsq_SSE2);
- rinv_SSE3 = gmx_mm_invsqrt_ps(rsq_SSE3);
-
- /* Apply mask */
- rinv_SSE0 = _mm_and_ps(rinv_SSE0, jmask_SSE0);
- rinv_SSE1 = _mm_and_ps(rinv_SSE1, jmask_SSE1);
- rinv_SSE2 = _mm_and_ps(rinv_SSE2, jmask_SSE2);
- rinv_SSE3 = _mm_and_ps(rinv_SSE3, jmask_SSE3);
-
- dr_SSE0 = _mm_mul_ps(rsq_SSE0, rinv_SSE0);
- dr_SSE1 = _mm_mul_ps(rsq_SSE1, rinv_SSE1);
- dr_SSE2 = _mm_mul_ps(rsq_SSE2, rinv_SSE2);
- dr_SSE3 = _mm_mul_ps(rsq_SSE3, rinv_SSE3);
-
- sk_aj_SSE = _mm_load_ps(obc_param+j);
- raj_SSE = _mm_load_ps(gb_radius+j);
-
- raj_inv_SSE = gmx_mm_inv_ps(raj_SSE);
-
- /* Evaluate influence of atom aj -> ai */
- t1_SSE0 = _mm_add_ps(dr_SSE0, sk_aj_SSE);
- t1_SSE1 = _mm_add_ps(dr_SSE1, sk_aj_SSE);
- t1_SSE2 = _mm_add_ps(dr_SSE2, sk_aj_SSE);
- t1_SSE3 = _mm_add_ps(dr_SSE3, sk_aj_SSE);
- t2_SSE0 = _mm_sub_ps(dr_SSE0, sk_aj_SSE);
- t2_SSE1 = _mm_sub_ps(dr_SSE1, sk_aj_SSE);
- t2_SSE2 = _mm_sub_ps(dr_SSE2, sk_aj_SSE);
- t2_SSE3 = _mm_sub_ps(dr_SSE3, sk_aj_SSE);
- t3_SSE0 = _mm_sub_ps(sk_aj_SSE, dr_SSE0);
- t3_SSE1 = _mm_sub_ps(sk_aj_SSE, dr_SSE1);
- t3_SSE2 = _mm_sub_ps(sk_aj_SSE, dr_SSE2);
- t3_SSE3 = _mm_sub_ps(sk_aj_SSE, dr_SSE3);
-
- obc_mask1_SSE0 = _mm_cmplt_ps(rai_SSE0, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_ps(rai_SSE1, t1_SSE1);
- obc_mask1_SSE2 = _mm_cmplt_ps(rai_SSE2, t1_SSE2);
- obc_mask1_SSE3 = _mm_cmplt_ps(rai_SSE3, t1_SSE3);
- obc_mask2_SSE0 = _mm_cmplt_ps(rai_SSE0, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_ps(rai_SSE1, t2_SSE1);
- obc_mask2_SSE2 = _mm_cmplt_ps(rai_SSE2, t2_SSE2);
- obc_mask2_SSE3 = _mm_cmplt_ps(rai_SSE3, t2_SSE3);
- obc_mask3_SSE0 = _mm_cmplt_ps(rai_SSE0, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_ps(rai_SSE1, t3_SSE1);
- obc_mask3_SSE2 = _mm_cmplt_ps(rai_SSE2, t3_SSE2);
- obc_mask3_SSE3 = _mm_cmplt_ps(rai_SSE3, t3_SSE3);
- obc_mask1_SSE0 = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
- obc_mask1_SSE2 = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
- obc_mask1_SSE3 = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
- uij_SSE0 = gmx_mm_inv_ps(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_ps(t1_SSE1);
- uij_SSE2 = gmx_mm_inv_ps(t1_SSE2);
- uij_SSE3 = gmx_mm_inv_ps(t1_SSE3);
- lij_SSE0 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
- _mm_andnot_ps(obc_mask2_SSE0, rai_inv_SSE0));
- lij_SSE1 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
- _mm_andnot_ps(obc_mask2_SSE1, rai_inv_SSE1));
- lij_SSE2 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
- _mm_andnot_ps(obc_mask2_SSE2, rai_inv_SSE2));
- lij_SSE3 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
- _mm_andnot_ps(obc_mask2_SSE3, rai_inv_SSE3));
- dlij_SSE0 = _mm_and_ps(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_ps(one_SSE, obc_mask2_SSE1);
- dlij_SSE2 = _mm_and_ps(one_SSE, obc_mask2_SSE2);
- dlij_SSE3 = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
- uij2_SSE0 = _mm_mul_ps(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_ps(uij_SSE1, uij_SSE1);
- uij2_SSE2 = _mm_mul_ps(uij_SSE2, uij_SSE2);
- uij2_SSE3 = _mm_mul_ps(uij_SSE3, uij_SSE3);
- uij3_SSE0 = _mm_mul_ps(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_ps(uij2_SSE1, uij_SSE1);
- uij3_SSE2 = _mm_mul_ps(uij2_SSE2, uij_SSE2);
- uij3_SSE3 = _mm_mul_ps(uij2_SSE3, uij_SSE3);
- lij2_SSE0 = _mm_mul_ps(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_ps(lij_SSE1, lij_SSE1);
- lij2_SSE2 = _mm_mul_ps(lij_SSE2, lij_SSE2);
- lij2_SSE3 = _mm_mul_ps(lij_SSE3, lij_SSE3);
- lij3_SSE0 = _mm_mul_ps(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_ps(lij2_SSE1, lij_SSE1);
- lij3_SSE2 = _mm_mul_ps(lij2_SSE2, lij_SSE2);
- lij3_SSE3 = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
- diff2_SSE0 = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
- diff2_SSE2 = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
- diff2_SSE3 = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
- lij_inv_SSE0 = gmx_mm_invsqrt_ps(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_ps(lij2_SSE1);
- lij_inv_SSE2 = gmx_mm_invsqrt_ps(lij2_SSE2);
- lij_inv_SSE3 = gmx_mm_invsqrt_ps(lij2_SSE3);
- sk2_aj_SSE = _mm_mul_ps(sk_aj_SSE, sk_aj_SSE);
- sk2_rinv_SSE0 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE1);
- sk2_rinv_SSE2 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE2);
- sk2_rinv_SSE3 = _mm_mul_ps(sk2_aj_SSE, rinv_SSE3);
- prod_SSE0 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
- prod_SSE2 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
- prod_SSE3 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
- logterm_SSE0 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
- logterm_SSE2 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
- logterm_SSE3 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
-
- t1_SSE0 = _mm_sub_ps(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_ps(lij_SSE1, uij_SSE1);
- t1_SSE2 = _mm_sub_ps(lij_SSE2, uij_SSE2);
- t1_SSE3 = _mm_sub_ps(lij_SSE3, uij_SSE3);
- t2_SSE0 = _mm_mul_ps(diff2_SSE0,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_ps(diff2_SSE1,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t2_SSE2 = _mm_mul_ps(diff2_SSE2,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
- prod_SSE2));
- t2_SSE3 = _mm_mul_ps(diff2_SSE3,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
- prod_SSE3));
-
- t3_SSE0 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
- t3_SSE2 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
- t3_SSE3 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
- t1_SSE0 = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
- t1_SSE2 = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
- t1_SSE3 = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
- t4_SSE0 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE0, lij_SSE0));
- t4_SSE1 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE1, lij_SSE1));
- t4_SSE2 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE2, lij_SSE2));
- t4_SSE3 = _mm_mul_ps(two_SSE, _mm_sub_ps(rai_inv_SSE3, lij_SSE3));
- t4_SSE0 = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
- t4_SSE2 = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
- t4_SSE3 = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
- t1_SSE0 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
- t1_SSE2 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
- t1_SSE3 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
- sum_ai_SSE0 = _mm_add_ps(sum_ai_SSE0, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- sum_ai_SSE1 = _mm_add_ps(sum_ai_SSE1, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- sum_ai_SSE2 = _mm_add_ps(sum_ai_SSE2, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- sum_ai_SSE3 = _mm_add_ps(sum_ai_SSE3, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
-
- t1_SSE0 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
- _mm_mul_ps(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
- _mm_mul_ps(prod_SSE1, lij3_SSE1));
- t1_SSE2 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
- _mm_mul_ps(prod_SSE2, lij3_SSE2));
- t1_SSE3 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
- _mm_mul_ps(prod_SSE3, lij3_SSE3));
- t1_SSE0 = _mm_sub_ps(t1_SSE0,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
- _mm_mul_ps(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_ps(t1_SSE1,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
- _mm_mul_ps(lij3_SSE1, dr_SSE1))));
- t1_SSE2 = _mm_sub_ps(t1_SSE2,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
- _mm_mul_ps(lij3_SSE2, dr_SSE2))));
- t1_SSE3 = _mm_sub_ps(t1_SSE3,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
- _mm_mul_ps(lij3_SSE3, dr_SSE3))));
-
- t2_SSE0 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
- _mm_mul_ps(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
- _mm_mul_ps(uij3_SSE1, dr_SSE1)));
- t2_SSE2 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
- _mm_mul_ps(uij3_SSE2, dr_SSE2)));
- t2_SSE3 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
- _mm_mul_ps(uij3_SSE3, dr_SSE3)));
- t2_SSE0 = _mm_sub_ps(t2_SSE0,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
- _mm_mul_ps(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_ps(t2_SSE1,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
- _mm_mul_ps(prod_SSE1, uij3_SSE1)));
- t2_SSE2 = _mm_sub_ps(t2_SSE2,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
- _mm_mul_ps(prod_SSE2, uij3_SSE2)));
- t2_SSE3 = _mm_sub_ps(t2_SSE3,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
- _mm_mul_ps(prod_SSE3, uij3_SSE3)));
- t3_SSE0 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
- _mm_mul_ps(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
- _mm_mul_ps(rinv_SSE1, rinv_SSE1));
- t3_SSE2 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
- _mm_mul_ps(rinv_SSE2, rinv_SSE2));
- t3_SSE3 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
- _mm_mul_ps(rinv_SSE3, rinv_SSE3));
- t3_SSE0 = _mm_sub_ps(t3_SSE0,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_ps(t3_SSE1,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
- t3_SSE2 = _mm_sub_ps(t3_SSE2,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
- t3_SSE3 = _mm_sub_ps(t3_SSE3,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
- t1_SSE0 = _mm_mul_ps(rinv_SSE0,
- _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
- _mm_add_ps(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_ps(rinv_SSE1,
- _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
- _mm_add_ps(t2_SSE1, t3_SSE1)));
- t1_SSE2 = _mm_mul_ps(rinv_SSE2,
- _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
- _mm_add_ps(t2_SSE2, t3_SSE2)));
- t1_SSE3 = _mm_mul_ps(rinv_SSE3,
- _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
- _mm_add_ps(t2_SSE3, t3_SSE3)));
-
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
- dadx += 4;
-
- /* Evaluate influence of atom ai -> aj */
- t1_SSE0 = _mm_add_ps(dr_SSE0, sk_ai_SSE0);
- t1_SSE1 = _mm_add_ps(dr_SSE1, sk_ai_SSE1);
- t1_SSE2 = _mm_add_ps(dr_SSE2, sk_ai_SSE2);
- t1_SSE3 = _mm_add_ps(dr_SSE3, sk_ai_SSE3);
- t2_SSE0 = _mm_sub_ps(dr_SSE0, sk_ai_SSE0);
- t2_SSE1 = _mm_sub_ps(dr_SSE1, sk_ai_SSE1);
- t2_SSE2 = _mm_sub_ps(dr_SSE2, sk_ai_SSE2);
- t2_SSE3 = _mm_sub_ps(dr_SSE3, sk_ai_SSE3);
- t3_SSE0 = _mm_sub_ps(sk_ai_SSE0, dr_SSE0);
- t3_SSE1 = _mm_sub_ps(sk_ai_SSE1, dr_SSE1);
- t3_SSE2 = _mm_sub_ps(sk_ai_SSE2, dr_SSE2);
- t3_SSE3 = _mm_sub_ps(sk_ai_SSE3, dr_SSE3);
-
- obc_mask1_SSE0 = _mm_cmplt_ps(raj_SSE, t1_SSE0);
- obc_mask1_SSE1 = _mm_cmplt_ps(raj_SSE, t1_SSE1);
- obc_mask1_SSE2 = _mm_cmplt_ps(raj_SSE, t1_SSE2);
- obc_mask1_SSE3 = _mm_cmplt_ps(raj_SSE, t1_SSE3);
- obc_mask2_SSE0 = _mm_cmplt_ps(raj_SSE, t2_SSE0);
- obc_mask2_SSE1 = _mm_cmplt_ps(raj_SSE, t2_SSE1);
- obc_mask2_SSE2 = _mm_cmplt_ps(raj_SSE, t2_SSE2);
- obc_mask2_SSE3 = _mm_cmplt_ps(raj_SSE, t2_SSE3);
- obc_mask3_SSE0 = _mm_cmplt_ps(raj_SSE, t3_SSE0);
- obc_mask3_SSE1 = _mm_cmplt_ps(raj_SSE, t3_SSE1);
- obc_mask3_SSE2 = _mm_cmplt_ps(raj_SSE, t3_SSE2);
- obc_mask3_SSE3 = _mm_cmplt_ps(raj_SSE, t3_SSE3);
- obc_mask1_SSE0 = _mm_and_ps(obc_mask1_SSE0, jmask_SSE0);
- obc_mask1_SSE1 = _mm_and_ps(obc_mask1_SSE1, jmask_SSE1);
- obc_mask1_SSE2 = _mm_and_ps(obc_mask1_SSE2, jmask_SSE2);
- obc_mask1_SSE3 = _mm_and_ps(obc_mask1_SSE3, jmask_SSE3);
-
- uij_SSE0 = gmx_mm_inv_ps(t1_SSE0);
- uij_SSE1 = gmx_mm_inv_ps(t1_SSE1);
- uij_SSE2 = gmx_mm_inv_ps(t1_SSE2);
- uij_SSE3 = gmx_mm_inv_ps(t1_SSE3);
- lij_SSE0 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE0, gmx_mm_inv_ps(t2_SSE0)),
- _mm_andnot_ps(obc_mask2_SSE0, raj_inv_SSE));
- lij_SSE1 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE1, gmx_mm_inv_ps(t2_SSE1)),
- _mm_andnot_ps(obc_mask2_SSE1, raj_inv_SSE));
- lij_SSE2 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE2, gmx_mm_inv_ps(t2_SSE2)),
- _mm_andnot_ps(obc_mask2_SSE2, raj_inv_SSE));
- lij_SSE3 = _mm_or_ps( _mm_and_ps(obc_mask2_SSE3, gmx_mm_inv_ps(t2_SSE3)),
- _mm_andnot_ps(obc_mask2_SSE3, raj_inv_SSE));
- dlij_SSE0 = _mm_and_ps(one_SSE, obc_mask2_SSE0);
- dlij_SSE1 = _mm_and_ps(one_SSE, obc_mask2_SSE1);
- dlij_SSE2 = _mm_and_ps(one_SSE, obc_mask2_SSE2);
- dlij_SSE3 = _mm_and_ps(one_SSE, obc_mask2_SSE3);
-
- uij2_SSE0 = _mm_mul_ps(uij_SSE0, uij_SSE0);
- uij2_SSE1 = _mm_mul_ps(uij_SSE1, uij_SSE1);
- uij2_SSE2 = _mm_mul_ps(uij_SSE2, uij_SSE2);
- uij2_SSE3 = _mm_mul_ps(uij_SSE3, uij_SSE3);
- uij3_SSE0 = _mm_mul_ps(uij2_SSE0, uij_SSE0);
- uij3_SSE1 = _mm_mul_ps(uij2_SSE1, uij_SSE1);
- uij3_SSE2 = _mm_mul_ps(uij2_SSE2, uij_SSE2);
- uij3_SSE3 = _mm_mul_ps(uij2_SSE3, uij_SSE3);
- lij2_SSE0 = _mm_mul_ps(lij_SSE0, lij_SSE0);
- lij2_SSE1 = _mm_mul_ps(lij_SSE1, lij_SSE1);
- lij2_SSE2 = _mm_mul_ps(lij_SSE2, lij_SSE2);
- lij2_SSE3 = _mm_mul_ps(lij_SSE3, lij_SSE3);
- lij3_SSE0 = _mm_mul_ps(lij2_SSE0, lij_SSE0);
- lij3_SSE1 = _mm_mul_ps(lij2_SSE1, lij_SSE1);
- lij3_SSE2 = _mm_mul_ps(lij2_SSE2, lij_SSE2);
- lij3_SSE3 = _mm_mul_ps(lij2_SSE3, lij_SSE3);
-
- diff2_SSE0 = _mm_sub_ps(uij2_SSE0, lij2_SSE0);
- diff2_SSE1 = _mm_sub_ps(uij2_SSE1, lij2_SSE1);
- diff2_SSE2 = _mm_sub_ps(uij2_SSE2, lij2_SSE2);
- diff2_SSE3 = _mm_sub_ps(uij2_SSE3, lij2_SSE3);
- lij_inv_SSE0 = gmx_mm_invsqrt_ps(lij2_SSE0);
- lij_inv_SSE1 = gmx_mm_invsqrt_ps(lij2_SSE1);
- lij_inv_SSE2 = gmx_mm_invsqrt_ps(lij2_SSE2);
- lij_inv_SSE3 = gmx_mm_invsqrt_ps(lij2_SSE3);
- sk2_rinv_SSE0 = _mm_mul_ps(sk2_ai_SSE0, rinv_SSE0);
- sk2_rinv_SSE1 = _mm_mul_ps(sk2_ai_SSE1, rinv_SSE1);
- sk2_rinv_SSE2 = _mm_mul_ps(sk2_ai_SSE2, rinv_SSE2);
- sk2_rinv_SSE3 = _mm_mul_ps(sk2_ai_SSE3, rinv_SSE3);
- prod_SSE0 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE0);
- prod_SSE1 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE1);
- prod_SSE2 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE2);
- prod_SSE3 = _mm_mul_ps(onefourth_SSE, sk2_rinv_SSE3);
-
- logterm_SSE0 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE0, lij_inv_SSE0));
- logterm_SSE1 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE1, lij_inv_SSE1));
- logterm_SSE2 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE2, lij_inv_SSE2));
- logterm_SSE3 = gmx_mm_log_ps(_mm_mul_ps(uij_SSE3, lij_inv_SSE3));
- t1_SSE0 = _mm_sub_ps(lij_SSE0, uij_SSE0);
- t1_SSE1 = _mm_sub_ps(lij_SSE1, uij_SSE1);
- t1_SSE2 = _mm_sub_ps(lij_SSE2, uij_SSE2);
- t1_SSE3 = _mm_sub_ps(lij_SSE3, uij_SSE3);
- t2_SSE0 = _mm_mul_ps(diff2_SSE0,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE0),
- prod_SSE0));
- t2_SSE1 = _mm_mul_ps(diff2_SSE1,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE1),
- prod_SSE1));
- t2_SSE2 = _mm_mul_ps(diff2_SSE2,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE2),
- prod_SSE2));
- t2_SSE3 = _mm_mul_ps(diff2_SSE3,
- _mm_sub_ps(_mm_mul_ps(onefourth_SSE, dr_SSE3),
- prod_SSE3));
- t3_SSE0 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE0, logterm_SSE0));
- t3_SSE1 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE1, logterm_SSE1));
- t3_SSE2 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE2, logterm_SSE2));
- t3_SSE3 = _mm_mul_ps(half_SSE, _mm_mul_ps(rinv_SSE3, logterm_SSE3));
- t1_SSE0 = _mm_add_ps(t1_SSE0, _mm_add_ps(t2_SSE0, t3_SSE0));
- t1_SSE1 = _mm_add_ps(t1_SSE1, _mm_add_ps(t2_SSE1, t3_SSE1));
- t1_SSE2 = _mm_add_ps(t1_SSE2, _mm_add_ps(t2_SSE2, t3_SSE2));
- t1_SSE3 = _mm_add_ps(t1_SSE3, _mm_add_ps(t2_SSE3, t3_SSE3));
- t4_SSE0 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE0));
- t4_SSE1 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE1));
- t4_SSE2 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE2));
- t4_SSE3 = _mm_mul_ps(two_SSE, _mm_sub_ps(raj_inv_SSE, lij_SSE3));
- t4_SSE0 = _mm_and_ps(t4_SSE0, obc_mask3_SSE0);
- t4_SSE1 = _mm_and_ps(t4_SSE1, obc_mask3_SSE1);
- t4_SSE2 = _mm_and_ps(t4_SSE2, obc_mask3_SSE2);
- t4_SSE3 = _mm_and_ps(t4_SSE3, obc_mask3_SSE3);
- t1_SSE0 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE0, t4_SSE0));
- t1_SSE1 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE1, t4_SSE1));
- t1_SSE2 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE2, t4_SSE2));
- t1_SSE3 = _mm_mul_ps(half_SSE, _mm_add_ps(t1_SSE3, t4_SSE3));
-
- _mm_store_ps(work+j, _mm_add_ps(_mm_load_ps(work+j),
- gmx_mm_sum4_ps(_mm_and_ps(t1_SSE0, obc_mask1_SSE0),
- _mm_and_ps(t1_SSE1, obc_mask1_SSE1),
- _mm_and_ps(t1_SSE2, obc_mask1_SSE2),
- _mm_and_ps(t1_SSE3, obc_mask1_SSE3))));
-
- t1_SSE0 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE0),
- _mm_mul_ps(prod_SSE0, lij3_SSE0));
- t1_SSE1 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE1),
- _mm_mul_ps(prod_SSE1, lij3_SSE1));
- t1_SSE2 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE2),
- _mm_mul_ps(prod_SSE2, lij3_SSE2));
- t1_SSE3 = _mm_add_ps(_mm_mul_ps(half_SSE, lij2_SSE3),
- _mm_mul_ps(prod_SSE3, lij3_SSE3));
- t1_SSE0 = _mm_sub_ps(t1_SSE0,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE0, rinv_SSE0),
- _mm_mul_ps(lij3_SSE0, dr_SSE0))));
- t1_SSE1 = _mm_sub_ps(t1_SSE1,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE1, rinv_SSE1),
- _mm_mul_ps(lij3_SSE1, dr_SSE1))));
- t1_SSE2 = _mm_sub_ps(t1_SSE2,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE2, rinv_SSE2),
- _mm_mul_ps(lij3_SSE2, dr_SSE2))));
- t1_SSE3 = _mm_sub_ps(t1_SSE3,
- _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(lij_SSE3, rinv_SSE3),
- _mm_mul_ps(lij3_SSE3, dr_SSE3))));
- t2_SSE0 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE0, rinv_SSE0),
- _mm_mul_ps(uij3_SSE0, dr_SSE0)));
- t2_SSE1 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE1, rinv_SSE1),
- _mm_mul_ps(uij3_SSE1, dr_SSE1)));
- t2_SSE2 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE2, rinv_SSE2),
- _mm_mul_ps(uij3_SSE2, dr_SSE2)));
- t2_SSE3 = _mm_mul_ps(onefourth_SSE,
- _mm_add_ps(_mm_mul_ps(uij_SSE3, rinv_SSE3),
- _mm_mul_ps(uij3_SSE3, dr_SSE3)));
- t2_SSE0 = _mm_sub_ps(t2_SSE0,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE0),
- _mm_mul_ps(prod_SSE0, uij3_SSE0)));
- t2_SSE1 = _mm_sub_ps(t2_SSE1,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE1),
- _mm_mul_ps(prod_SSE1, uij3_SSE1)));
- t2_SSE2 = _mm_sub_ps(t2_SSE2,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE2),
- _mm_mul_ps(prod_SSE2, uij3_SSE2)));
- t2_SSE3 = _mm_sub_ps(t2_SSE3,
- _mm_add_ps(_mm_mul_ps(half_SSE, uij2_SSE3),
- _mm_mul_ps(prod_SSE3, uij3_SSE3)));
-
- t3_SSE0 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE0),
- _mm_mul_ps(rinv_SSE0, rinv_SSE0));
- t3_SSE1 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE1),
- _mm_mul_ps(rinv_SSE1, rinv_SSE1));
- t3_SSE2 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE2),
- _mm_mul_ps(rinv_SSE2, rinv_SSE2));
- t3_SSE3 = _mm_mul_ps(_mm_mul_ps(onefourth_SSE, logterm_SSE3),
- _mm_mul_ps(rinv_SSE3, rinv_SSE3));
-
- t3_SSE0 = _mm_sub_ps(t3_SSE0,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE0, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE0, rinv_SSE0))));
- t3_SSE1 = _mm_sub_ps(t3_SSE1,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE1, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE1, rinv_SSE1))));
- t3_SSE2 = _mm_sub_ps(t3_SSE2,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE2, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE2, rinv_SSE2))));
- t3_SSE3 = _mm_sub_ps(t3_SSE3,
- _mm_mul_ps(_mm_mul_ps(diff2_SSE3, oneeighth_SSE),
- _mm_add_ps(one_SSE,
- _mm_mul_ps(sk2_rinv_SSE3, rinv_SSE3))));
-
-
- t1_SSE0 = _mm_mul_ps(rinv_SSE0,
- _mm_add_ps(_mm_mul_ps(dlij_SSE0, t1_SSE0),
- _mm_add_ps(t2_SSE0, t3_SSE0)));
- t1_SSE1 = _mm_mul_ps(rinv_SSE1,
- _mm_add_ps(_mm_mul_ps(dlij_SSE1, t1_SSE1),
- _mm_add_ps(t2_SSE1, t3_SSE1)));
- t1_SSE2 = _mm_mul_ps(rinv_SSE2,
- _mm_add_ps(_mm_mul_ps(dlij_SSE2, t1_SSE2),
- _mm_add_ps(t2_SSE2, t3_SSE2)));
- t1_SSE3 = _mm_mul_ps(rinv_SSE3,
- _mm_add_ps(_mm_mul_ps(dlij_SSE3, t1_SSE3),
- _mm_add_ps(t2_SSE3, t3_SSE3)));
-
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE0, obc_mask1_SSE0));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE1, obc_mask1_SSE1));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE2, obc_mask1_SSE2));
- dadx += 4;
- _mm_store_ps(dadx, _mm_and_ps(t1_SSE3, obc_mask1_SSE3));
- dadx += 4;
- }
- _MM_TRANSPOSE4_PS(sum_ai_SSE0, sum_ai_SSE1, sum_ai_SSE2, sum_ai_SSE3);
- sum_ai_SSE0 = _mm_add_ps(sum_ai_SSE0, sum_ai_SSE1);
- sum_ai_SSE2 = _mm_add_ps(sum_ai_SSE2, sum_ai_SSE3);
- sum_ai_SSE0 = _mm_add_ps(sum_ai_SSE0, sum_ai_SSE2);
- _mm_store_ps(work+i, _mm_add_ps(sum_ai_SSE0, _mm_load_ps(work+i)));
- }
-
-
- for (i = 0; i < natoms/2+1; i++)
- {
- work[i] += work[natoms+i];
- }
-
- /* Parallel summations would go here if ever implemented with DD */
-
- if (gb_algorithm == egbHCT)
- {
- /* HCT */
- for (i = 0; i < natoms; i++)
- {
- if (born->use[i] != 0)
- {
- rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
- sum_ai = 1.0/rai - work[i];
- min_rad = rai + born->gb_doffset;
- rad = 1.0/sum_ai;
-
- born->bRad[i] = rad > min_rad ? rad : min_rad;
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- }
- else
- {
- /* OBC */
-
- /* Calculate the radii */
- for (i = 0; i < natoms; i++)
- {
-
- if (born->use[i] != 0)
- {
- rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
- rai_inv2 = 1.0/rai;
- rai = rai-born->gb_doffset;
- rai_inv = 1.0/rai;
- sum_ai = rai * work[i];
- sum_ai2 = sum_ai * sum_ai;
- sum_ai3 = sum_ai2 * sum_ai;
-
- tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
- born->bRad[i] = rai_inv - tsum*rai_inv2;
- born->bRad[i] = 1.0 / born->bRad[i];
-
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
- tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
- born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
- }
- }
- }
-
- return 0;
-}
-
-
-
-
-
-
-
-
-int
-genborn_allvsall_calc_chainrule_sse2_single(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- real * x,
- real * f,
- int gb_algorithm,
- void * paadata)
-{
- gmx_allvsallgb2_data_t *aadata;
- int natoms;
- int ni0, ni1;
- int nj0, nj1, nj2, nj3;
- int i, j, k, n;
- int idx;
- int * mask;
- int * pmask0;
- int * emask0;
- int * jindex;
-
- real ix, iy, iz;
- real fix, fiy, fiz;
- real jx, jy, jz;
- real dx, dy, dz;
- real tx, ty, tz;
- real rbai, rbaj, fgb, fgb_ai, rbi;
- real * rb;
- real * dadx;
- real * x_align;
- real * y_align;
- real * z_align;
- real * fx_align;
- real * fy_align;
- real * fz_align;
- real tmpsum[4];
-
- __m128 jmask_SSE0, jmask_SSE1, jmask_SSE2, jmask_SSE3;
- __m128 ix_SSE0, iy_SSE0, iz_SSE0;
- __m128 ix_SSE1, iy_SSE1, iz_SSE1;
- __m128 ix_SSE2, iy_SSE2, iz_SSE2;
- __m128 ix_SSE3, iy_SSE3, iz_SSE3;
- __m128 fix_SSE0, fiy_SSE0, fiz_SSE0;
- __m128 fix_SSE1, fiy_SSE1, fiz_SSE1;
- __m128 fix_SSE2, fiy_SSE2, fiz_SSE2;
- __m128 fix_SSE3, fiy_SSE3, fiz_SSE3;
- __m128 rbai_SSE0, rbai_SSE1, rbai_SSE2, rbai_SSE3;
- __m128 imask_SSE0, imask_SSE1, imask_SSE2, imask_SSE3;
- __m128 jx_SSE, jy_SSE, jz_SSE, rbaj_SSE;
- __m128 dx_SSE0, dy_SSE0, dz_SSE0;
- __m128 dx_SSE1, dy_SSE1, dz_SSE1;
- __m128 dx_SSE2, dy_SSE2, dz_SSE2;
- __m128 dx_SSE3, dy_SSE3, dz_SSE3;
- __m128 fgb_SSE0, fgb_ai_SSE0;
- __m128 fgb_SSE1, fgb_ai_SSE1;
- __m128 fgb_SSE2, fgb_ai_SSE2;
- __m128 fgb_SSE3, fgb_ai_SSE3;
- __m128 tx_SSE0, ty_SSE0, tz_SSE0;
- __m128 tx_SSE1, ty_SSE1, tz_SSE1;
- __m128 tx_SSE2, ty_SSE2, tz_SSE2;
- __m128 tx_SSE3, ty_SSE3, tz_SSE3;
- __m128 t1, t2;
-
- natoms = mdatoms->nr;
- ni0 = 0;
- ni1 = mdatoms->homenr;
- dadx = fr->dadx;
-
- aadata = (gmx_allvsallgb2_data_t *)paadata;
-
- x_align = aadata->x_align;
- y_align = aadata->y_align;
- z_align = aadata->z_align;
- fx_align = aadata->fx_align;
- fy_align = aadata->fy_align;
- fz_align = aadata->fz_align;
-
- jindex = aadata->jindex_gb;
- dadx = fr->dadx;
-
- n = 0;
- rb = aadata->work;
-
- /* Loop to get the proper form for the Born radius term */
- if (gb_algorithm == egbSTILL)
- {
- for (i = 0; i < natoms; i++)
- {
- rbi = born->bRad[i];
- rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
- }
- }
- else if (gb_algorithm == egbHCT)
- {
- for (i = 0; i < natoms; i++)
- {
- rbi = born->bRad[i];
- rb[i] = rbi * rbi * fr->dvda[i];
- }
- }
- else if (gb_algorithm == egbOBC)
- {
- for (idx = 0; idx < natoms; idx++)
- {
- rbi = born->bRad[idx];
- rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
- }
- }
-
- for (i = 0; i < 2*natoms; i++)
- {
- fx_align[i] = 0;
- fy_align[i] = 0;
- fz_align[i] = 0;
- }
-
-
- for (i = 0; i < natoms; i++)
- {
- rb[i+natoms] = rb[i];
- }
-
- for (i = ni0; i < ni1; i += UNROLLI)
- {
- /* We assume shifts are NOT used for all-vs-all interactions */
-
- /* Load i atom data */
- ix_SSE0 = _mm_load1_ps(x_align+i);
- iy_SSE0 = _mm_load1_ps(y_align+i);
- iz_SSE0 = _mm_load1_ps(z_align+i);
- ix_SSE1 = _mm_load1_ps(x_align+i+1);
- iy_SSE1 = _mm_load1_ps(y_align+i+1);
- iz_SSE1 = _mm_load1_ps(z_align+i+1);
- ix_SSE2 = _mm_load1_ps(x_align+i+2);
- iy_SSE2 = _mm_load1_ps(y_align+i+2);
- iz_SSE2 = _mm_load1_ps(z_align+i+2);
- ix_SSE3 = _mm_load1_ps(x_align+i+3);
- iy_SSE3 = _mm_load1_ps(y_align+i+3);
- iz_SSE3 = _mm_load1_ps(z_align+i+3);
-
- fix_SSE0 = _mm_setzero_ps();
- fiy_SSE0 = _mm_setzero_ps();
- fiz_SSE0 = _mm_setzero_ps();
- fix_SSE1 = _mm_setzero_ps();
- fiy_SSE1 = _mm_setzero_ps();
- fiz_SSE1 = _mm_setzero_ps();
- fix_SSE2 = _mm_setzero_ps();
- fiy_SSE2 = _mm_setzero_ps();
- fiz_SSE2 = _mm_setzero_ps();
- fix_SSE3 = _mm_setzero_ps();
- fiy_SSE3 = _mm_setzero_ps();
- fiz_SSE3 = _mm_setzero_ps();
-
- rbai_SSE0 = _mm_load1_ps(rb+i);
- rbai_SSE1 = _mm_load1_ps(rb+i+1);
- rbai_SSE2 = _mm_load1_ps(rb+i+2);
- rbai_SSE3 = _mm_load1_ps(rb+i+3);
-
- /* Load limits for loop over neighbors */
- nj0 = jindex[4*i];
- nj3 = jindex[4*i+3];
-
- /* No masks necessary, since the stored chain rule derivatives will be zero in those cases! */
- for (j = nj0; j < nj3; j += UNROLLJ)
- {
- /* load j atom coordinates */
- jx_SSE = _mm_load_ps(x_align+j);
- jy_SSE = _mm_load_ps(y_align+j);
- jz_SSE = _mm_load_ps(z_align+j);
-
- /* Calculate distance */
- dx_SSE0 = _mm_sub_ps(ix_SSE0, jx_SSE);
- dy_SSE0 = _mm_sub_ps(iy_SSE0, jy_SSE);
- dz_SSE0 = _mm_sub_ps(iz_SSE0, jz_SSE);
- dx_SSE1 = _mm_sub_ps(ix_SSE1, jx_SSE);
- dy_SSE1 = _mm_sub_ps(iy_SSE1, jy_SSE);
- dz_SSE1 = _mm_sub_ps(iz_SSE1, jz_SSE);
- dx_SSE2 = _mm_sub_ps(ix_SSE2, jx_SSE);
- dy_SSE2 = _mm_sub_ps(iy_SSE2, jy_SSE);
- dz_SSE2 = _mm_sub_ps(iz_SSE2, jz_SSE);
- dx_SSE3 = _mm_sub_ps(ix_SSE3, jx_SSE);
- dy_SSE3 = _mm_sub_ps(iy_SSE3, jy_SSE);
- dz_SSE3 = _mm_sub_ps(iz_SSE3, jz_SSE);
-
- rbaj_SSE = _mm_load_ps(rb+j);
-
- fgb_SSE0 = _mm_mul_ps(rbai_SSE0, _mm_load_ps(dadx));
- dadx += 4;
- fgb_SSE1 = _mm_mul_ps(rbai_SSE1, _mm_load_ps(dadx));
- dadx += 4;
- fgb_SSE2 = _mm_mul_ps(rbai_SSE2, _mm_load_ps(dadx));
- dadx += 4;
- fgb_SSE3 = _mm_mul_ps(rbai_SSE3, _mm_load_ps(dadx));
- dadx += 4;
-
- fgb_ai_SSE0 = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
- dadx += 4;
- fgb_ai_SSE1 = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
- dadx += 4;
- fgb_ai_SSE2 = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
- dadx += 4;
- fgb_ai_SSE3 = _mm_mul_ps(rbaj_SSE, _mm_load_ps(dadx));
- dadx += 4;
-
- /* Total force between ai and aj is the sum of ai->aj and aj->ai */
- fgb_SSE0 = _mm_add_ps(fgb_SSE0, fgb_ai_SSE0);
- fgb_SSE1 = _mm_add_ps(fgb_SSE1, fgb_ai_SSE1);
- fgb_SSE2 = _mm_add_ps(fgb_SSE2, fgb_ai_SSE2);
- fgb_SSE3 = _mm_add_ps(fgb_SSE3, fgb_ai_SSE3);
-
- /* Calculate temporary vectorial force */
- tx_SSE0 = _mm_mul_ps(fgb_SSE0, dx_SSE0);
- ty_SSE0 = _mm_mul_ps(fgb_SSE0, dy_SSE0);
- tz_SSE0 = _mm_mul_ps(fgb_SSE0, dz_SSE0);
- tx_SSE1 = _mm_mul_ps(fgb_SSE1, dx_SSE1);
- ty_SSE1 = _mm_mul_ps(fgb_SSE1, dy_SSE1);
- tz_SSE1 = _mm_mul_ps(fgb_SSE1, dz_SSE1);
- tx_SSE2 = _mm_mul_ps(fgb_SSE2, dx_SSE2);
- ty_SSE2 = _mm_mul_ps(fgb_SSE2, dy_SSE2);
- tz_SSE2 = _mm_mul_ps(fgb_SSE2, dz_SSE2);
- tx_SSE3 = _mm_mul_ps(fgb_SSE3, dx_SSE3);
- ty_SSE3 = _mm_mul_ps(fgb_SSE3, dy_SSE3);
- tz_SSE3 = _mm_mul_ps(fgb_SSE3, dz_SSE3);
-
- /* Increment i atom force */
- fix_SSE0 = _mm_add_ps(fix_SSE0, tx_SSE0);
- fiy_SSE0 = _mm_add_ps(fiy_SSE0, ty_SSE0);
- fiz_SSE0 = _mm_add_ps(fiz_SSE0, tz_SSE0);
- fix_SSE1 = _mm_add_ps(fix_SSE1, tx_SSE1);
- fiy_SSE1 = _mm_add_ps(fiy_SSE1, ty_SSE1);
- fiz_SSE1 = _mm_add_ps(fiz_SSE1, tz_SSE1);
- fix_SSE2 = _mm_add_ps(fix_SSE2, tx_SSE2);
- fiy_SSE2 = _mm_add_ps(fiy_SSE2, ty_SSE2);
- fiz_SSE2 = _mm_add_ps(fiz_SSE2, tz_SSE2);
- fix_SSE3 = _mm_add_ps(fix_SSE3, tx_SSE3);
- fiy_SSE3 = _mm_add_ps(fiy_SSE3, ty_SSE3);
- fiz_SSE3 = _mm_add_ps(fiz_SSE3, tz_SSE3);
-
- /* Decrement j atom force */
- _mm_store_ps(fx_align+j,
- _mm_sub_ps( _mm_load_ps(fx_align+j), gmx_mm_sum4_ps(tx_SSE0, tx_SSE1, tx_SSE2, tx_SSE3) ));
- _mm_store_ps(fy_align+j,
- _mm_sub_ps( _mm_load_ps(fy_align+j), gmx_mm_sum4_ps(ty_SSE0, ty_SSE1, ty_SSE2, ty_SSE3) ));
- _mm_store_ps(fz_align+j,
- _mm_sub_ps( _mm_load_ps(fz_align+j), gmx_mm_sum4_ps(tz_SSE0, tz_SSE1, tz_SSE2, tz_SSE3) ));
- }
- /* Add i forces to mem and shifted force list */
- _MM_TRANSPOSE4_PS(fix_SSE0, fix_SSE1, fix_SSE2, fix_SSE3);
- fix_SSE0 = _mm_add_ps(fix_SSE0, fix_SSE1);
- fix_SSE2 = _mm_add_ps(fix_SSE2, fix_SSE3);
- fix_SSE0 = _mm_add_ps(fix_SSE0, fix_SSE2);
- _mm_store_ps(fx_align+i, _mm_add_ps(fix_SSE0, _mm_load_ps(fx_align+i)));
-
- _MM_TRANSPOSE4_PS(fiy_SSE0, fiy_SSE1, fiy_SSE2, fiy_SSE3);
- fiy_SSE0 = _mm_add_ps(fiy_SSE0, fiy_SSE1);
- fiy_SSE2 = _mm_add_ps(fiy_SSE2, fiy_SSE3);
- fiy_SSE0 = _mm_add_ps(fiy_SSE0, fiy_SSE2);
- _mm_store_ps(fy_align+i, _mm_add_ps(fiy_SSE0, _mm_load_ps(fy_align+i)));
-
- _MM_TRANSPOSE4_PS(fiz_SSE0, fiz_SSE1, fiz_SSE2, fiz_SSE3);
- fiz_SSE0 = _mm_add_ps(fiz_SSE0, fiz_SSE1);
- fiz_SSE2 = _mm_add_ps(fiz_SSE2, fiz_SSE3);
- fiz_SSE0 = _mm_add_ps(fiz_SSE0, fiz_SSE2);
- _mm_store_ps(fz_align+i, _mm_add_ps(fiz_SSE0, _mm_load_ps(fz_align+i)));
- }
-
- for (i = 0; i < natoms; i++)
- {
- f[3*i] += fx_align[i] + fx_align[natoms+i];
- f[3*i+1] += fy_align[i] + fy_align[natoms+i];
- f[3*i+2] += fz_align[i] + fz_align[natoms+i];
- }
-
- return 0;
-}
-
-#else
-/* dummy variable when not using SSE */
-int genborn_allvsall_sse2_single_dummy;
-
-
-#endif
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team.
- * Copyright (c) 2010,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _GENBORN_ALLVSALL_SSE2_SINGLE_H
-#define _GENBORN_ALLVSALL_SSE2_SINGLE_H
-
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/simple.h"
-
-int
-genborn_allvsall_calc_still_radii_sse2_single(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- gmx_localtop_t * top,
- real * x,
- t_commrec * cr,
- void * work);
-
-int
-genborn_allvsall_calc_hct_obc_radii_sse2_single(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- int gb_algorithm,
- gmx_localtop_t * top,
- real * x,
- t_commrec * cr,
- void * work);
-
-int
-genborn_allvsall_calc_chainrule_sse2_single(t_forcerec * fr,
- t_mdatoms * mdatoms,
- gmx_genborn_t * born,
- real * x,
- real * f,
- int gb_algorithm,
- void * work);
-
-#endif
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-#include <string.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/fileio/pdbio.h"
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-/* Only compile this file if SSE2 intrinsics are available */
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-#include "genborn_sse2_double.h"
-
-#include <emmintrin.h>
-#include <gmx_sse2_double.h>
-
-int
-calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr,
- int natoms, gmx_localtop_t *top,
- double *x, t_nblist *nl,
- gmx_genborn_t *born)
-{
- int i, k, n, ii, is3, ii3, nj0, nj1, offset;
- int jnrA, jnrB, j3A, j3B;
- int *mdtype;
- double shX, shY, shZ;
- int *jjnr;
- double *shiftvec;
-
- double gpi_ai, gpi2;
- double factor;
- double *gb_radius;
- double *vsolv;
- double *work;
- double *dadx;
-
- __m128d ix, iy, iz;
- __m128d jx, jy, jz;
- __m128d dx, dy, dz;
- __m128d tx, ty, tz;
- __m128d rsq, rinv, rinv2, rinv4, rinv6;
- __m128d ratio, gpi, rai, raj, vai, vaj, rvdw;
- __m128d ccf, dccf, theta, cosq, term, sinq, res, prod, prod_ai, tmp;
- __m128d mask, icf4, icf6, mask_cmp;
-
- const __m128d half = _mm_set1_pd(0.5);
- const __m128d three = _mm_set1_pd(3.0);
- const __m128d one = _mm_set1_pd(1.0);
- const __m128d two = _mm_set1_pd(2.0);
- const __m128d zero = _mm_set1_pd(0.0);
- const __m128d four = _mm_set1_pd(4.0);
-
- const __m128d still_p5inv = _mm_set1_pd(STILL_P5INV);
- const __m128d still_pip5 = _mm_set1_pd(STILL_PIP5);
- const __m128d still_p4 = _mm_set1_pd(STILL_P4);
-
- factor = 0.5 * ONE_4PI_EPS0;
-
- gb_radius = born->gb_radius;
- vsolv = born->vsolv;
- work = born->gpol_still_work;
- jjnr = nl->jjnr;
- shiftvec = fr->shift_vec[0];
- dadx = fr->dadx;
-
- jnrA = jnrB = 0;
- jx = _mm_setzero_pd();
- jy = _mm_setzero_pd();
- jz = _mm_setzero_pd();
-
- n = 0;
-
- for (i = 0; i < natoms; i++)
- {
- work[i] = 0;
- }
-
- for (i = 0; i < nl->nri; i++)
- {
- ii = nl->iinr[i];
- ii3 = ii*3;
- is3 = 3*nl->shift[i];
- shX = shiftvec[is3];
- shY = shiftvec[is3+1];
- shZ = shiftvec[is3+2];
- nj0 = nl->jindex[i];
- nj1 = nl->jindex[i+1];
-
- ix = _mm_set1_pd(shX+x[ii3+0]);
- iy = _mm_set1_pd(shY+x[ii3+1]);
- iz = _mm_set1_pd(shZ+x[ii3+2]);
-
-
- /* Polarization energy for atom ai */
- gpi = _mm_setzero_pd();
-
- rai = _mm_load1_pd(gb_radius+ii);
- prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]);
-
- for (k = nj0; k < nj1-1; k += 2)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
-
- GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A, x+j3B, jx, jy, jz);
-
- GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA, gb_radius+jnrB, raj);
- GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA, vsolv+jnrB, vaj);
-
- dx = _mm_sub_pd(ix, jx);
- dy = _mm_sub_pd(iy, jy);
- dz = _mm_sub_pd(iz, jz);
-
- rsq = gmx_mm_calc_rsq_pd(dx, dy, dz);
- rinv = gmx_mm_invsqrt_pd(rsq);
- rinv2 = _mm_mul_pd(rinv, rinv);
- rinv4 = _mm_mul_pd(rinv2, rinv2);
- rinv6 = _mm_mul_pd(rinv4, rinv2);
-
- rvdw = _mm_add_pd(rai, raj);
- ratio = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw, rvdw)));
-
- mask_cmp = _mm_cmple_pd(ratio, still_p5inv);
-
- /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
- if (0 == _mm_movemask_pd(mask_cmp) )
- {
- /* if ratio>still_p5inv for ALL elements */
- ccf = one;
- dccf = _mm_setzero_pd();
- }
- else
- {
- ratio = _mm_min_pd(ratio, still_p5inv);
- theta = _mm_mul_pd(ratio, still_pip5);
- gmx_mm_sincos_pd(theta, &sinq, &cosq);
- term = _mm_mul_pd(half, _mm_sub_pd(one, cosq));
- ccf = _mm_mul_pd(term, term);
- dccf = _mm_mul_pd(_mm_mul_pd(two, term),
- _mm_mul_pd(sinq, theta));
- }
-
- prod = _mm_mul_pd(still_p4, vaj);
- icf4 = _mm_mul_pd(ccf, rinv4);
- icf6 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four, ccf), dccf), rinv6);
-
- GMX_MM_INCREMENT_2VALUES_PD(work+jnrA, work+jnrB, _mm_mul_pd(prod_ai, icf4));
-
- gpi = _mm_add_pd(gpi, _mm_mul_pd(prod, icf4) );
-
- _mm_store_pd(dadx, _mm_mul_pd(prod, icf6));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai, icf6));
- dadx += 2;
- }
-
- if (k < nj1)
- {
- jnrA = jjnr[k];
-
- j3A = 3*jnrA;
-
- GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A, jx, jy, jz);
-
- GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA, raj);
- GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA, vaj);
-
- dx = _mm_sub_sd(ix, jx);
- dy = _mm_sub_sd(iy, jy);
- dz = _mm_sub_sd(iz, jz);
-
- rsq = gmx_mm_calc_rsq_pd(dx, dy, dz);
- rinv = gmx_mm_invsqrt_pd(rsq);
- rinv2 = _mm_mul_sd(rinv, rinv);
- rinv4 = _mm_mul_sd(rinv2, rinv2);
- rinv6 = _mm_mul_sd(rinv4, rinv2);
-
- rvdw = _mm_add_sd(rai, raj);
- ratio = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw, rvdw)));
-
- mask_cmp = _mm_cmple_sd(ratio, still_p5inv);
-
- /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
- if (0 == _mm_movemask_pd(mask_cmp) )
- {
- /* if ratio>still_p5inv for ALL elements */
- ccf = one;
- dccf = _mm_setzero_pd();
- }
- else
- {
- ratio = _mm_min_sd(ratio, still_p5inv);
- theta = _mm_mul_sd(ratio, still_pip5);
- gmx_mm_sincos_pd(theta, &sinq, &cosq);
- term = _mm_mul_sd(half, _mm_sub_sd(one, cosq));
- ccf = _mm_mul_sd(term, term);
- dccf = _mm_mul_sd(_mm_mul_sd(two, term),
- _mm_mul_sd(sinq, theta));
- }
-
- prod = _mm_mul_sd(still_p4, vaj);
- icf4 = _mm_mul_sd(ccf, rinv4);
- icf6 = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four, ccf), dccf), rinv6);
-
- GMX_MM_INCREMENT_1VALUE_PD(work+jnrA, _mm_mul_sd(prod_ai, icf4));
-
- gpi = _mm_add_sd(gpi, _mm_mul_sd(prod, icf4) );
-
- _mm_store_pd(dadx, _mm_mul_pd(prod, icf6));
- dadx += 2;
- _mm_store_pd(dadx, _mm_mul_pd(prod_ai, icf6));
- dadx += 2;
- }
- gmx_mm_update_1pot_pd(gpi, work+ii);
- }
-
- /* Sum up the polarization energy from other nodes */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_sum_real(cr->dd, work);
- }
-
- /* Compute the radii */
- for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
- {
- if (born->use[i] != 0)
- {
- gpi_ai = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/
- gpi2 = gpi_ai * gpi_ai;
- born->bRad[i] = factor*gmx_invsqrt(gpi2);
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- /* Extra (local) communication required for DD */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_spread_real(cr->dd, born->bRad);
- dd_atom_spread_real(cr->dd, fr->invsqrta);
- }
-
- return 0;
-}
-
-
-int
-calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
- double *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm)
-{
- int i, ai, k, n, ii, ii3, is3, nj0, nj1, at0, at1, offset;
- int jnrA, jnrB;
- int j3A, j3B;
- double shX, shY, shZ;
- double rr, rr_inv, rr_inv2, sum_tmp, sum, sum2, sum3, gbr;
- double sum_ai2, sum_ai3, tsum, tchain, doffset;
- double *obc_param;
- double *gb_radius;
- double *work;
- int * jjnr;
- double *dadx;
- double *shiftvec;
- double min_rad, rad;
-
- __m128d ix, iy, iz, jx, jy, jz;
- __m128d dx, dy, dz, t1, t2, t3, t4;
- __m128d rsq, rinv, r;
- __m128d rai, rai_inv, raj, raj_inv, rai_inv2, sk, sk2, lij, dlij, duij;
- __m128d uij, lij2, uij2, lij3, uij3, diff2;
- __m128d lij_inv, sk2_inv, prod, log_term, tmp, tmp_sum;
- __m128d sum_ai, tmp_ai, sk_ai, sk_aj, sk2_ai, sk2_aj, sk2_rinv;
- __m128d dadx1, dadx2;
- __m128d logterm;
- __m128d mask;
- __m128d obc_mask1, obc_mask2, obc_mask3;
-
- __m128d oneeighth = _mm_set1_pd(0.125);
- __m128d onefourth = _mm_set1_pd(0.25);
-
- const __m128d half = _mm_set1_pd(0.5);
- const __m128d three = _mm_set1_pd(3.0);
- const __m128d one = _mm_set1_pd(1.0);
- const __m128d two = _mm_set1_pd(2.0);
- const __m128d zero = _mm_set1_pd(0.0);
- const __m128d neg = _mm_set1_pd(-1.0);
-
- /* Set the dielectric offset */
- doffset = born->gb_doffset;
- gb_radius = born->gb_radius;
- obc_param = born->param;
- work = born->gpol_hct_work;
- jjnr = nl->jjnr;
- dadx = fr->dadx;
- shiftvec = fr->shift_vec[0];
-
- jx = _mm_setzero_pd();
- jy = _mm_setzero_pd();
- jz = _mm_setzero_pd();
-
- jnrA = jnrB = 0;
-
- for (i = 0; i < born->nr; i++)
- {
- work[i] = 0;
- }
-
- for (i = 0; i < nl->nri; i++)
- {
- ii = nl->iinr[i];
- ii3 = ii*3;
- is3 = 3*nl->shift[i];
- shX = shiftvec[is3];
- shY = shiftvec[is3+1];
- shZ = shiftvec[is3+2];
- nj0 = nl->jindex[i];
- nj1 = nl->jindex[i+1];
-
- ix = _mm_set1_pd(shX+x[ii3+0]);
- iy = _mm_set1_pd(shY+x[ii3+1]);
- iz = _mm_set1_pd(shZ+x[ii3+2]);
-
- rai = _mm_load1_pd(gb_radius+ii);
- rai_inv = gmx_mm_inv_pd(rai);
-
- sum_ai = _mm_setzero_pd();
-
- sk_ai = _mm_load1_pd(born->param+ii);
- sk2_ai = _mm_mul_pd(sk_ai, sk_ai);
-
- for (k = nj0; k < nj1-1; k += 2)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
-
- GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A, x+j3B, jx, jy, jz);
- GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA, gb_radius+jnrB, raj);
- GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA, obc_param+jnrB, sk_aj);
-
- dx = _mm_sub_pd(ix, jx);
- dy = _mm_sub_pd(iy, jy);
- dz = _mm_sub_pd(iz, jz);
-
- rsq = gmx_mm_calc_rsq_pd(dx, dy, dz);
-
- rinv = gmx_mm_invsqrt_pd(rsq);
- r = _mm_mul_pd(rsq, rinv);
-
- /* Compute raj_inv aj1-4 */
- raj_inv = gmx_mm_inv_pd(raj);
-
- /* Evaluate influence of atom aj -> ai */
- t1 = _mm_add_pd(r, sk_aj);
- t2 = _mm_sub_pd(r, sk_aj);
- t3 = _mm_sub_pd(sk_aj, r);
- obc_mask1 = _mm_cmplt_pd(rai, t1);
- obc_mask2 = _mm_cmplt_pd(rai, t2);
- obc_mask3 = _mm_cmplt_pd(rai, t3);
-
- uij = gmx_mm_inv_pd(t1);
- lij = _mm_or_pd( _mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
- _mm_andnot_pd(obc_mask2, rai_inv));
- dlij = _mm_and_pd(one, obc_mask2);
- uij2 = _mm_mul_pd(uij, uij);
- uij3 = _mm_mul_pd(uij2, uij);
- lij2 = _mm_mul_pd(lij, lij);
- lij3 = _mm_mul_pd(lij2, lij);
-
- diff2 = _mm_sub_pd(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_pd(lij2);
- sk2_aj = _mm_mul_pd(sk_aj, sk_aj);
- sk2_rinv = _mm_mul_pd(sk2_aj, rinv);
- prod = _mm_mul_pd(onefourth, sk2_rinv);
-
- logterm = gmx_mm_log_pd(_mm_mul_pd(uij, lij_inv));
-
- t1 = _mm_sub_pd(lij, uij);
- t2 = _mm_mul_pd(diff2,
- _mm_sub_pd(_mm_mul_pd(onefourth, r),
- prod));
- t3 = _mm_mul_pd(half, _mm_mul_pd(rinv, logterm));
- t1 = _mm_add_pd(t1, _mm_add_pd(t2, t3));
- t4 = _mm_mul_pd(two, _mm_sub_pd(rai_inv, lij));
- t4 = _mm_and_pd(t4, obc_mask3);
- t1 = _mm_mul_pd(half, _mm_add_pd(t1, t4));
-
- sum_ai = _mm_add_pd(sum_ai, _mm_and_pd(t1, obc_mask1) );
-
- t1 = _mm_add_pd(_mm_mul_pd(half, lij2),
- _mm_mul_pd(prod, lij3));
- t1 = _mm_sub_pd(t1,
- _mm_mul_pd(onefourth,
- _mm_add_pd(_mm_mul_pd(lij, rinv),
- _mm_mul_pd(lij3, r))));
- t2 = _mm_mul_pd(onefourth,
- _mm_add_pd(_mm_mul_pd(uij, rinv),
- _mm_mul_pd(uij3, r)));
- t2 = _mm_sub_pd(t2,
- _mm_add_pd(_mm_mul_pd(half, uij2),
- _mm_mul_pd(prod, uij3)));
- t3 = _mm_mul_pd(_mm_mul_pd(onefourth, logterm),
- _mm_mul_pd(rinv, rinv));
- t3 = _mm_sub_pd(t3,
- _mm_mul_pd(_mm_mul_pd(diff2, oneeighth),
- _mm_add_pd(one,
- _mm_mul_pd(sk2_rinv, rinv))));
- t1 = _mm_mul_pd(rinv,
- _mm_add_pd(_mm_mul_pd(dlij, t1),
- _mm_add_pd(t2, t3)));
-
- dadx1 = _mm_and_pd(t1, obc_mask1);
-
- /* Evaluate influence of atom ai -> aj */
- t1 = _mm_add_pd(r, sk_ai);
- t2 = _mm_sub_pd(r, sk_ai);
- t3 = _mm_sub_pd(sk_ai, r);
- obc_mask1 = _mm_cmplt_pd(raj, t1);
- obc_mask2 = _mm_cmplt_pd(raj, t2);
- obc_mask3 = _mm_cmplt_pd(raj, t3);
-
- uij = gmx_mm_inv_pd(t1);
- lij = _mm_or_pd( _mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
- _mm_andnot_pd(obc_mask2, raj_inv));
- dlij = _mm_and_pd(one, obc_mask2);
- uij2 = _mm_mul_pd(uij, uij);
- uij3 = _mm_mul_pd(uij2, uij);
- lij2 = _mm_mul_pd(lij, lij);
- lij3 = _mm_mul_pd(lij2, lij);
-
- diff2 = _mm_sub_pd(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_pd(lij2);
- sk2_rinv = _mm_mul_pd(sk2_ai, rinv);
- prod = _mm_mul_pd(onefourth, sk2_rinv);
-
- logterm = gmx_mm_log_pd(_mm_mul_pd(uij, lij_inv));
-
- t1 = _mm_sub_pd(lij, uij);
- t2 = _mm_mul_pd(diff2,
- _mm_sub_pd(_mm_mul_pd(onefourth, r),
- prod));
- t3 = _mm_mul_pd(half, _mm_mul_pd(rinv, logterm));
- t1 = _mm_add_pd(t1, _mm_add_pd(t2, t3));
- t4 = _mm_mul_pd(two, _mm_sub_pd(raj_inv, lij));
- t4 = _mm_and_pd(t4, obc_mask3);
- t1 = _mm_mul_pd(half, _mm_add_pd(t1, t4));
-
- GMX_MM_INCREMENT_2VALUES_PD(work+jnrA, work+jnrB, _mm_and_pd(t1, obc_mask1));
-
- t1 = _mm_add_pd(_mm_mul_pd(half, lij2),
- _mm_mul_pd(prod, lij3));
- t1 = _mm_sub_pd(t1,
- _mm_mul_pd(onefourth,
- _mm_add_pd(_mm_mul_pd(lij, rinv),
- _mm_mul_pd(lij3, r))));
- t2 = _mm_mul_pd(onefourth,
- _mm_add_pd(_mm_mul_pd(uij, rinv),
- _mm_mul_pd(uij3, r)));
- t2 = _mm_sub_pd(t2,
- _mm_add_pd(_mm_mul_pd(half, uij2),
- _mm_mul_pd(prod, uij3)));
- t3 = _mm_mul_pd(_mm_mul_pd(onefourth, logterm),
- _mm_mul_pd(rinv, rinv));
- t3 = _mm_sub_pd(t3,
- _mm_mul_pd(_mm_mul_pd(diff2, oneeighth),
- _mm_add_pd(one,
- _mm_mul_pd(sk2_rinv, rinv))));
- t1 = _mm_mul_pd(rinv,
- _mm_add_pd(_mm_mul_pd(dlij, t1),
- _mm_add_pd(t2, t3)));
-
- dadx2 = _mm_and_pd(t1, obc_mask1);
-
- _mm_store_pd(dadx, dadx1);
- dadx += 2;
- _mm_store_pd(dadx, dadx2);
- dadx += 2;
- } /* end normal inner loop */
-
- if (k < nj1)
- {
- jnrA = jjnr[k];
-
- j3A = 3*jnrA;
-
- GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A, jx, jy, jz);
- GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA, raj);
- GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA, sk_aj);
-
- dx = _mm_sub_sd(ix, jx);
- dy = _mm_sub_sd(iy, jy);
- dz = _mm_sub_sd(iz, jz);
-
- rsq = gmx_mm_calc_rsq_pd(dx, dy, dz);
-
- rinv = gmx_mm_invsqrt_pd(rsq);
- r = _mm_mul_sd(rsq, rinv);
-
- /* Compute raj_inv aj1-4 */
- raj_inv = gmx_mm_inv_pd(raj);
-
- /* Evaluate influence of atom aj -> ai */
- t1 = _mm_add_sd(r, sk_aj);
- t2 = _mm_sub_sd(r, sk_aj);
- t3 = _mm_sub_sd(sk_aj, r);
- obc_mask1 = _mm_cmplt_sd(rai, t1);
- obc_mask2 = _mm_cmplt_sd(rai, t2);
- obc_mask3 = _mm_cmplt_sd(rai, t3);
-
- uij = gmx_mm_inv_pd(t1);
- lij = _mm_or_pd(_mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
- _mm_andnot_pd(obc_mask2, rai_inv));
- dlij = _mm_and_pd(one, obc_mask2);
- uij2 = _mm_mul_sd(uij, uij);
- uij3 = _mm_mul_sd(uij2, uij);
- lij2 = _mm_mul_sd(lij, lij);
- lij3 = _mm_mul_sd(lij2, lij);
-
- diff2 = _mm_sub_sd(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_pd(lij2);
- sk2_aj = _mm_mul_sd(sk_aj, sk_aj);
- sk2_rinv = _mm_mul_sd(sk2_aj, rinv);
- prod = _mm_mul_sd(onefourth, sk2_rinv);
-
- logterm = gmx_mm_log_pd(_mm_mul_sd(uij, lij_inv));
-
- t1 = _mm_sub_sd(lij, uij);
- t2 = _mm_mul_sd(diff2,
- _mm_sub_sd(_mm_mul_pd(onefourth, r),
- prod));
- t3 = _mm_mul_sd(half, _mm_mul_sd(rinv, logterm));
- t1 = _mm_add_sd(t1, _mm_add_sd(t2, t3));
- t4 = _mm_mul_sd(two, _mm_sub_sd(rai_inv, lij));
- t4 = _mm_and_pd(t4, obc_mask3);
- t1 = _mm_mul_sd(half, _mm_add_sd(t1, t4));
-
- sum_ai = _mm_add_sd(sum_ai, _mm_and_pd(t1, obc_mask1) );
-
- t1 = _mm_add_sd(_mm_mul_sd(half, lij2),
- _mm_mul_sd(prod, lij3));
- t1 = _mm_sub_sd(t1,
- _mm_mul_sd(onefourth,
- _mm_add_sd(_mm_mul_sd(lij, rinv),
- _mm_mul_sd(lij3, r))));
- t2 = _mm_mul_sd(onefourth,
- _mm_add_sd(_mm_mul_sd(uij, rinv),
- _mm_mul_sd(uij3, r)));
- t2 = _mm_sub_sd(t2,
- _mm_add_sd(_mm_mul_sd(half, uij2),
- _mm_mul_sd(prod, uij3)));
- t3 = _mm_mul_sd(_mm_mul_sd(onefourth, logterm),
- _mm_mul_sd(rinv, rinv));
- t3 = _mm_sub_sd(t3,
- _mm_mul_sd(_mm_mul_sd(diff2, oneeighth),
- _mm_add_sd(one,
- _mm_mul_sd(sk2_rinv, rinv))));
- t1 = _mm_mul_sd(rinv,
- _mm_add_sd(_mm_mul_sd(dlij, t1),
- _mm_add_pd(t2, t3)));
-
- dadx1 = _mm_and_pd(t1, obc_mask1);
-
- /* Evaluate influence of atom ai -> aj */
- t1 = _mm_add_sd(r, sk_ai);
- t2 = _mm_sub_sd(r, sk_ai);
- t3 = _mm_sub_sd(sk_ai, r);
- obc_mask1 = _mm_cmplt_sd(raj, t1);
- obc_mask2 = _mm_cmplt_sd(raj, t2);
- obc_mask3 = _mm_cmplt_sd(raj, t3);
-
- uij = gmx_mm_inv_pd(t1);
- lij = _mm_or_pd( _mm_and_pd(obc_mask2, gmx_mm_inv_pd(t2)),
- _mm_andnot_pd(obc_mask2, raj_inv));
- dlij = _mm_and_pd(one, obc_mask2);
- uij2 = _mm_mul_sd(uij, uij);
- uij3 = _mm_mul_sd(uij2, uij);
- lij2 = _mm_mul_sd(lij, lij);
- lij3 = _mm_mul_sd(lij2, lij);
-
- diff2 = _mm_sub_sd(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_pd(lij2);
- sk2_rinv = _mm_mul_sd(sk2_ai, rinv);
- prod = _mm_mul_sd(onefourth, sk2_rinv);
-
- logterm = gmx_mm_log_pd(_mm_mul_sd(uij, lij_inv));
-
- t1 = _mm_sub_sd(lij, uij);
- t2 = _mm_mul_sd(diff2,
- _mm_sub_sd(_mm_mul_sd(onefourth, r),
- prod));
- t3 = _mm_mul_sd(half, _mm_mul_sd(rinv, logterm));
- t1 = _mm_add_sd(t1, _mm_add_sd(t2, t3));
- t4 = _mm_mul_sd(two, _mm_sub_sd(raj_inv, lij));
- t4 = _mm_and_pd(t4, obc_mask3);
- t1 = _mm_mul_sd(half, _mm_add_sd(t1, t4));
-
- GMX_MM_INCREMENT_1VALUE_PD(work+jnrA, _mm_and_pd(t1, obc_mask1));
-
- t1 = _mm_add_sd(_mm_mul_sd(half, lij2),
- _mm_mul_sd(prod, lij3));
- t1 = _mm_sub_sd(t1,
- _mm_mul_sd(onefourth,
- _mm_add_sd(_mm_mul_sd(lij, rinv),
- _mm_mul_sd(lij3, r))));
- t2 = _mm_mul_sd(onefourth,
- _mm_add_sd(_mm_mul_sd(uij, rinv),
- _mm_mul_sd(uij3, r)));
- t2 = _mm_sub_sd(t2,
- _mm_add_sd(_mm_mul_sd(half, uij2),
- _mm_mul_sd(prod, uij3)));
- t3 = _mm_mul_sd(_mm_mul_sd(onefourth, logterm),
- _mm_mul_sd(rinv, rinv));
- t3 = _mm_sub_sd(t3,
- _mm_mul_sd(_mm_mul_sd(diff2, oneeighth),
- _mm_add_sd(one,
- _mm_mul_sd(sk2_rinv, rinv))));
- t1 = _mm_mul_sd(rinv,
- _mm_add_sd(_mm_mul_sd(dlij, t1),
- _mm_add_sd(t2, t3)));
-
- dadx2 = _mm_and_pd(t1, obc_mask1);
-
- _mm_store_pd(dadx, dadx1);
- dadx += 2;
- _mm_store_pd(dadx, dadx2);
- dadx += 2;
- }
- gmx_mm_update_1pot_pd(sum_ai, work+ii);
-
- }
-
- /* Parallel summations */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_sum_real(cr->dd, work);
- }
-
- if (gb_algorithm == egbHCT)
- {
- /* HCT */
- for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
- {
- if (born->use[i] != 0)
- {
- rr = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
- sum = 1.0/rr - work[i];
- min_rad = rr + doffset;
- rad = 1.0/sum;
-
- born->bRad[i] = rad > min_rad ? rad : min_rad;
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- /* Extra communication required for DD */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_spread_real(cr->dd, born->bRad);
- dd_atom_spread_real(cr->dd, fr->invsqrta);
- }
- }
- else
- {
- /* OBC */
- for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
- {
- if (born->use[i] != 0)
- {
- rr = top->atomtypes.gb_radius[md->typeA[i]];
- rr_inv2 = 1.0/rr;
- rr = rr-doffset;
- rr_inv = 1.0/rr;
- sum = rr * work[i];
- sum2 = sum * sum;
- sum3 = sum2 * sum;
-
- tsum = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3);
- born->bRad[i] = rr_inv - tsum*rr_inv2;
- born->bRad[i] = 1.0 / born->bRad[i];
-
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
- tchain = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2);
- born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2;
- }
- }
- /* Extra (local) communication required for DD */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_spread_real(cr->dd, born->bRad);
- dd_atom_spread_real(cr->dd, fr->invsqrta);
- dd_atom_spread_real(cr->dd, born->drobc);
- }
- }
-
-
-
- return 0;
-}
-
-
-int
-calc_gb_chainrule_sse2_double(int natoms, t_nblist *nl, double *dadx, double *dvda,
- double *x, double *f, double *fshift, double *shiftvec,
- int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md)
-{
- int i, k, n, ii, jnr, ii3, is3, nj0, nj1, n0, n1;
- int jnrA, jnrB;
- int j3A, j3B;
- int * jjnr;
-
- double rbi, shX, shY, shZ;
- double *rb;
-
- __m128d ix, iy, iz;
- __m128d jx, jy, jz;
- __m128d fix, fiy, fiz;
- __m128d dx, dy, dz;
- __m128d tx, ty, tz;
-
- __m128d rbai, rbaj, f_gb, f_gb_ai;
- __m128d xmm1, xmm2, xmm3;
-
- const __m128d two = _mm_set1_pd(2.0);
-
- rb = born->work;
-
- jjnr = nl->jjnr;
-
- /* Loop to get the proper form for the Born radius term, sse style */
- n0 = 0;
- n1 = natoms;
-
- if (gb_algorithm == egbSTILL)
- {
- for (i = n0; i < n1; i++)
- {
- rbi = born->bRad[i];
- rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
- }
- }
- else if (gb_algorithm == egbHCT)
- {
- for (i = n0; i < n1; i++)
- {
- rbi = born->bRad[i];
- rb[i] = rbi * rbi * dvda[i];
- }
- }
- else if (gb_algorithm == egbOBC)
- {
- for (i = n0; i < n1; i++)
- {
- rbi = born->bRad[i];
- rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
- }
- }
-
- jz = _mm_setzero_pd();
-
- n = j3A = j3B = 0;
-
- for (i = 0; i < nl->nri; i++)
- {
- ii = nl->iinr[i];
- ii3 = ii*3;
- is3 = 3*nl->shift[i];
- shX = shiftvec[is3];
- shY = shiftvec[is3+1];
- shZ = shiftvec[is3+2];
- nj0 = nl->jindex[i];
- nj1 = nl->jindex[i+1];
-
- ix = _mm_set1_pd(shX+x[ii3+0]);
- iy = _mm_set1_pd(shY+x[ii3+1]);
- iz = _mm_set1_pd(shZ+x[ii3+2]);
-
- rbai = _mm_load1_pd(rb+ii);
- fix = _mm_setzero_pd();
- fiy = _mm_setzero_pd();
- fiz = _mm_setzero_pd();
-
-
- for (k = nj0; k < nj1-1; k += 2)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
-
- GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A, x+j3B, jx, jy, jz);
-
- dx = _mm_sub_pd(ix, jx);
- dy = _mm_sub_pd(iy, jy);
- dz = _mm_sub_pd(iz, jz);
-
- GMX_MM_LOAD_2VALUES_PD(rb+jnrA, rb+jnrB, rbaj);
-
- /* load chain rule terms for j1-4 */
- f_gb = _mm_load_pd(dadx);
- dadx += 2;
- f_gb_ai = _mm_load_pd(dadx);
- dadx += 2;
-
- /* calculate scalar force */
- f_gb = _mm_mul_pd(f_gb, rbai);
- f_gb_ai = _mm_mul_pd(f_gb_ai, rbaj);
- f_gb = _mm_add_pd(f_gb, f_gb_ai);
-
- tx = _mm_mul_pd(f_gb, dx);
- ty = _mm_mul_pd(f_gb, dy);
- tz = _mm_mul_pd(f_gb, dz);
-
- fix = _mm_add_pd(fix, tx);
- fiy = _mm_add_pd(fiy, ty);
- fiz = _mm_add_pd(fiz, tz);
-
- GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(f+j3A, f+j3B, tx, ty, tz);
- }
-
- /*deal with odd elements */
- if (k < nj1)
- {
- jnrA = jjnr[k];
- j3A = 3*jnrA;
-
- GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A, jx, jy, jz);
-
- dx = _mm_sub_sd(ix, jx);
- dy = _mm_sub_sd(iy, jy);
- dz = _mm_sub_sd(iz, jz);
-
- GMX_MM_LOAD_1VALUE_PD(rb+jnrA, rbaj);
-
- /* load chain rule terms */
- f_gb = _mm_load_pd(dadx);
- dadx += 2;
- f_gb_ai = _mm_load_pd(dadx);
- dadx += 2;
-
- /* calculate scalar force */
- f_gb = _mm_mul_sd(f_gb, rbai);
- f_gb_ai = _mm_mul_sd(f_gb_ai, rbaj);
- f_gb = _mm_add_sd(f_gb, f_gb_ai);
-
- tx = _mm_mul_sd(f_gb, dx);
- ty = _mm_mul_sd(f_gb, dy);
- tz = _mm_mul_sd(f_gb, dz);
-
- fix = _mm_add_sd(fix, tx);
- fiy = _mm_add_sd(fiy, ty);
- fiz = _mm_add_sd(fiz, tz);
-
- GMX_MM_DECREMENT_1RVEC_1POINTER_PD(f+j3A, tx, ty, tz);
- }
-
- /* fix/fiy/fiz now contain four partial force terms, that all should be
- * added to the i particle forces and shift forces.
- */
- gmx_mm_update_iforce_1atom_pd(&fix, &fiy, &fiz, f+ii3, fshift+is3);
- }
-
- return 0;
-}
-
-#else
-/* keep compiler happy */
-int genborn_sse2_dummy;
-
-#endif /* SSE2 intrinsics available */
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include <math.h>
-#include <string.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/fileio/pdbio.h"
-#include "gromacs/legacyheaders/genborn.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-
-/* Only compile this file if SSE intrinsics are available */
-#if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
-
-#include "genborn_sse2_single.h"
-
-#include <emmintrin.h>
-#include <gmx_sse2_single.h>
-
-
-int
-calc_gb_rad_still_sse2_single(t_commrec *cr, t_forcerec *fr,
- int natoms, gmx_localtop_t *top,
- float *x, t_nblist *nl,
- gmx_genborn_t *born)
-{
- int i, k, n, ii, is3, ii3, nj0, nj1, offset;
- int jnrA, jnrB, jnrC, jnrD, j3A, j3B, j3C, j3D;
- int jnrE, jnrF, jnrG, jnrH, j3E, j3F, j3G, j3H;
- int shift;
- int *mdtype;
- real shX, shY, shZ;
- int *jjnr;
- real *shiftvec;
-
- float gpi_ai, gpi2;
- float factor;
- float *gb_radius;
- float *vsolv;
- float *work;
- float *dadx;
-
- __m128 ix, iy, iz;
- __m128 jx, jy, jz;
- __m128 dx, dy, dz;
- __m128 tx, ty, tz;
- __m128 jxB, jyB, jzB;
- __m128 dxB, dyB, dzB;
- __m128 txB, tyB, tzB;
- __m128 rsq, rinv, rinv2, rinv4, rinv6;
- __m128 rsqB, rinvB, rinv2B, rinv4B, rinv6B;
- __m128 ratio, gpi, rai, raj, vai, vaj, rvdw;
- __m128 ratioB, rajB, vajB, rvdwB;
- __m128 ccf, dccf, theta, cosq, term, sinq, res, prod, prod_ai, tmp;
- __m128 ccfB, dccfB, thetaB, cosqB, termB, sinqB, resB, prodB;
- __m128 mask, icf4, icf6, mask_cmp;
- __m128 icf4B, icf6B, mask_cmpB;
-
- __m128 mask1 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0, 0xffffffff) );
- __m128 mask2 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff) );
- __m128 mask3 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0xffffffff, 0xffffffff, 0xffffffff) );
-
- const __m128 half = _mm_set1_ps(0.5f);
- const __m128 three = _mm_set1_ps(3.0f);
- const __m128 one = _mm_set1_ps(1.0f);
- const __m128 two = _mm_set1_ps(2.0f);
- const __m128 zero = _mm_set1_ps(0.0f);
- const __m128 four = _mm_set1_ps(4.0f);
-
- const __m128 still_p5inv = _mm_set1_ps(STILL_P5INV);
- const __m128 still_pip5 = _mm_set1_ps(STILL_PIP5);
- const __m128 still_p4 = _mm_set1_ps(STILL_P4);
-
- factor = 0.5 * ONE_4PI_EPS0;
-
- gb_radius = born->gb_radius;
- vsolv = born->vsolv;
- work = born->gpol_still_work;
- jjnr = nl->jjnr;
- shiftvec = fr->shift_vec[0];
- dadx = fr->dadx;
-
- jnrA = jnrB = jnrC = jnrD = 0;
- jx = _mm_setzero_ps();
- jy = _mm_setzero_ps();
- jz = _mm_setzero_ps();
-
- n = 0;
-
- for (i = 0; i < natoms; i++)
- {
- work[i] = 0;
- }
-
- for (i = 0; i < nl->nri; i++)
- {
- ii = nl->iinr[i];
- ii3 = ii*3;
- is3 = 3*nl->shift[i];
- shX = shiftvec[is3];
- shY = shiftvec[is3+1];
- shZ = shiftvec[is3+2];
- nj0 = nl->jindex[i];
- nj1 = nl->jindex[i+1];
-
- ix = _mm_set1_ps(shX+x[ii3+0]);
- iy = _mm_set1_ps(shY+x[ii3+1]);
- iz = _mm_set1_ps(shZ+x[ii3+2]);
-
- offset = (nj1-nj0)%4;
-
- /* Polarization energy for atom ai */
- gpi = _mm_setzero_ps();
-
- rai = _mm_load1_ps(gb_radius+ii);
- prod_ai = _mm_set1_ps(STILL_P4*vsolv[ii]);
-
- for (k = nj0; k < nj1-4-offset; k += 8)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- jnrD = jjnr[k+3];
- jnrE = jjnr[k+4];
- jnrF = jjnr[k+5];
- jnrG = jjnr[k+6];
- jnrH = jjnr[k+7];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- j3D = 3*jnrD;
- j3E = 3*jnrE;
- j3F = 3*jnrF;
- j3G = 3*jnrG;
- j3H = 3*jnrH;
-
- GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
- GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3E, x+j3F, x+j3G, x+j3H, jxB, jyB, jzB);
-
- GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
- GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrE, gb_radius+jnrF, gb_radius+jnrG, gb_radius+jnrH, rajB);
- GMX_MM_LOAD_4VALUES_PS(vsolv+jnrA, vsolv+jnrB, vsolv+jnrC, vsolv+jnrD, vaj);
- GMX_MM_LOAD_4VALUES_PS(vsolv+jnrE, vsolv+jnrF, vsolv+jnrG, vsolv+jnrH, vajB);
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
- dxB = _mm_sub_ps(ix, jxB);
- dyB = _mm_sub_ps(iy, jyB);
- dzB = _mm_sub_ps(iz, jzB);
-
- rsq = gmx_mm_calc_rsq_ps(dx, dy, dz);
- rsqB = gmx_mm_calc_rsq_ps(dxB, dyB, dzB);
- rinv = gmx_mm_invsqrt_ps(rsq);
- rinvB = gmx_mm_invsqrt_ps(rsqB);
- rinv2 = _mm_mul_ps(rinv, rinv);
- rinv2B = _mm_mul_ps(rinvB, rinvB);
- rinv4 = _mm_mul_ps(rinv2, rinv2);
- rinv4B = _mm_mul_ps(rinv2B, rinv2B);
- rinv6 = _mm_mul_ps(rinv4, rinv2);
- rinv6B = _mm_mul_ps(rinv4B, rinv2B);
-
- rvdw = _mm_add_ps(rai, raj);
- rvdwB = _mm_add_ps(rai, rajB);
- ratio = _mm_mul_ps(rsq, gmx_mm_inv_ps( _mm_mul_ps(rvdw, rvdw)));
- ratioB = _mm_mul_ps(rsqB, gmx_mm_inv_ps( _mm_mul_ps(rvdwB, rvdwB)));
-
- mask_cmp = _mm_cmple_ps(ratio, still_p5inv);
- mask_cmpB = _mm_cmple_ps(ratioB, still_p5inv);
-
- /* gmx_mm_sincos_ps() is quite expensive, so avoid calculating it if we can! */
- if (0 == _mm_movemask_ps(mask_cmp) )
- {
- /* if ratio>still_p5inv for ALL elements */
- ccf = one;
- dccf = _mm_setzero_ps();
- }
- else
- {
- ratio = _mm_min_ps(ratio, still_p5inv);
- theta = _mm_mul_ps(ratio, still_pip5);
- gmx_mm_sincos_ps(theta, &sinq, &cosq);
- term = _mm_mul_ps(half, _mm_sub_ps(one, cosq));
- ccf = _mm_mul_ps(term, term);
- dccf = _mm_mul_ps(_mm_mul_ps(two, term),
- _mm_mul_ps(sinq, theta));
- }
- if (0 == _mm_movemask_ps(mask_cmpB) )
- {
- /* if ratio>still_p5inv for ALL elements */
- ccfB = one;
- dccfB = _mm_setzero_ps();
- }
- else
- {
- ratioB = _mm_min_ps(ratioB, still_p5inv);
- thetaB = _mm_mul_ps(ratioB, still_pip5);
- gmx_mm_sincos_ps(thetaB, &sinqB, &cosqB);
- termB = _mm_mul_ps(half, _mm_sub_ps(one, cosqB));
- ccfB = _mm_mul_ps(termB, termB);
- dccfB = _mm_mul_ps(_mm_mul_ps(two, termB),
- _mm_mul_ps(sinqB, thetaB));
- }
-
- prod = _mm_mul_ps(still_p4, vaj);
- prodB = _mm_mul_ps(still_p4, vajB);
- icf4 = _mm_mul_ps(ccf, rinv4);
- icf4B = _mm_mul_ps(ccfB, rinv4B);
- icf6 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccf), dccf), rinv6);
- icf6B = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccfB), dccfB), rinv6B);
-
- GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_mul_ps(prod_ai, icf4));
- GMX_MM_INCREMENT_4VALUES_PS(work+jnrE, work+jnrF, work+jnrG, work+jnrH, _mm_mul_ps(prod_ai, icf4B));
-
- gpi = _mm_add_ps(gpi, _mm_add_ps( _mm_mul_ps(prod, icf4), _mm_mul_ps(prodB, icf4B) ) );
-
- _mm_store_ps(dadx, _mm_mul_ps(prod, icf6));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prodB, icf6B));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6B));
- dadx += 4;
- }
-
- for (; k < nj1-offset; k += 4)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- jnrD = jjnr[k+3];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- j3D = 3*jnrD;
-
- GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
-
- GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
- GMX_MM_LOAD_4VALUES_PS(vsolv+jnrA, vsolv+jnrB, vsolv+jnrC, vsolv+jnrD, vaj);
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
-
- rsq = gmx_mm_calc_rsq_ps(dx, dy, dz);
- rinv = gmx_mm_invsqrt_ps(rsq);
- rinv2 = _mm_mul_ps(rinv, rinv);
- rinv4 = _mm_mul_ps(rinv2, rinv2);
- rinv6 = _mm_mul_ps(rinv4, rinv2);
-
- rvdw = _mm_add_ps(rai, raj);
- ratio = _mm_mul_ps(rsq, gmx_mm_inv_ps( _mm_mul_ps(rvdw, rvdw)));
-
- mask_cmp = _mm_cmple_ps(ratio, still_p5inv);
-
- /* gmx_mm_sincos_ps() is quite expensive, so avoid calculating it if we can! */
- if (0 == _mm_movemask_ps(mask_cmp))
- {
- /* if ratio>still_p5inv for ALL elements */
- ccf = one;
- dccf = _mm_setzero_ps();
- }
- else
- {
- ratio = _mm_min_ps(ratio, still_p5inv);
- theta = _mm_mul_ps(ratio, still_pip5);
- gmx_mm_sincos_ps(theta, &sinq, &cosq);
- term = _mm_mul_ps(half, _mm_sub_ps(one, cosq));
- ccf = _mm_mul_ps(term, term);
- dccf = _mm_mul_ps(_mm_mul_ps(two, term),
- _mm_mul_ps(sinq, theta));
- }
-
- prod = _mm_mul_ps(still_p4, vaj);
- icf4 = _mm_mul_ps(ccf, rinv4);
- icf6 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccf), dccf), rinv6);
-
- GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_mul_ps(prod_ai, icf4));
-
- gpi = _mm_add_ps(gpi, _mm_mul_ps(prod, icf4));
-
- _mm_store_ps(dadx, _mm_mul_ps(prod, icf6));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6));
- dadx += 4;
- }
-
- if (offset != 0)
- {
- if (offset == 1)
- {
- jnrA = jjnr[k];
- j3A = 3*jnrA;
- GMX_MM_LOAD_1RVEC_1POINTER_PS(x+j3A, jx, jy, jz);
- GMX_MM_LOAD_1VALUE_PS(gb_radius+jnrA, raj);
- GMX_MM_LOAD_1VALUE_PS(vsolv+jnrA, vaj);
- mask = mask1;
- }
- else if (offset == 2)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- GMX_MM_LOAD_1RVEC_2POINTERS_PS(x+j3A, x+j3B, jx, jy, jz);
- GMX_MM_LOAD_2VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, raj);
- GMX_MM_LOAD_2VALUES_PS(vsolv+jnrA, vsolv+jnrB, vaj);
- mask = mask2;
- }
- else
- {
- /* offset must be 3 */
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- GMX_MM_LOAD_1RVEC_3POINTERS_PS(x+j3A, x+j3B, x+j3C, jx, jy, jz);
- GMX_MM_LOAD_3VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, raj);
- GMX_MM_LOAD_3VALUES_PS(vsolv+jnrA, vsolv+jnrB, vsolv+jnrC, vaj);
- mask = mask3;
- }
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
-
- rsq = gmx_mm_calc_rsq_ps(dx, dy, dz);
- rinv = gmx_mm_invsqrt_ps(rsq);
- rinv2 = _mm_mul_ps(rinv, rinv);
- rinv4 = _mm_mul_ps(rinv2, rinv2);
- rinv6 = _mm_mul_ps(rinv4, rinv2);
-
- rvdw = _mm_add_ps(rai, raj);
- ratio = _mm_mul_ps(rsq, gmx_mm_inv_ps( _mm_mul_ps(rvdw, rvdw)));
-
- mask_cmp = _mm_cmple_ps(ratio, still_p5inv);
-
- if (0 == _mm_movemask_ps(mask_cmp))
- {
- /* if ratio>still_p5inv for ALL elements */
- ccf = one;
- dccf = _mm_setzero_ps();
- }
- else
- {
- ratio = _mm_min_ps(ratio, still_p5inv);
- theta = _mm_mul_ps(ratio, still_pip5);
- gmx_mm_sincos_ps(theta, &sinq, &cosq);
- term = _mm_mul_ps(half, _mm_sub_ps(one, cosq));
- ccf = _mm_mul_ps(term, term);
- dccf = _mm_mul_ps(_mm_mul_ps(two, term),
- _mm_mul_ps(sinq, theta));
- }
-
- prod = _mm_mul_ps(still_p4, vaj);
- icf4 = _mm_mul_ps(ccf, rinv4);
- icf6 = _mm_mul_ps( _mm_sub_ps( _mm_mul_ps(four, ccf), dccf), rinv6);
-
- gpi = _mm_add_ps(gpi, _mm_mul_ps(prod, icf4));
-
- _mm_store_ps(dadx, _mm_mul_ps(prod, icf6));
- dadx += 4;
- _mm_store_ps(dadx, _mm_mul_ps(prod_ai, icf6));
- dadx += 4;
-
- tmp = _mm_mul_ps(prod_ai, icf4);
-
- if (offset == 1)
- {
- GMX_MM_INCREMENT_1VALUE_PS(work+jnrA, tmp);
- }
- else if (offset == 2)
- {
- GMX_MM_INCREMENT_2VALUES_PS(work+jnrA, work+jnrB, tmp);
- }
- else
- {
- /* offset must be 3 */
- GMX_MM_INCREMENT_3VALUES_PS(work+jnrA, work+jnrB, work+jnrC, tmp);
- }
- }
- GMX_MM_UPDATE_1POT_PS(gpi, work+ii);
- }
-
- /* Sum up the polarization energy from other nodes */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_sum_real(cr->dd, work);
- }
-
- /* Compute the radii */
- for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
- {
- if (born->use[i] != 0)
- {
- gpi_ai = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/
- gpi2 = gpi_ai * gpi_ai;
- born->bRad[i] = factor*gmx_invsqrt(gpi2);
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- /* Extra (local) communication required for DD */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_spread_real(cr->dd, born->bRad);
- dd_atom_spread_real(cr->dd, fr->invsqrta);
- }
-
- return 0;
-}
-
-
-int
-calc_gb_rad_hct_obc_sse2_single(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
- float *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm)
-{
- int i, ai, k, n, ii, ii3, is3, nj0, nj1, at0, at1, offset;
- int jnrA, jnrB, jnrC, jnrD;
- int j3A, j3B, j3C, j3D;
- int jnrE, jnrF, jnrG, jnrH;
- int j3E, j3F, j3G, j3H;
- float shX, shY, shZ;
- float rr, rr_inv, rr_inv2, sum_tmp, sum, sum2, sum3, gbr;
- float sum_ai2, sum_ai3, tsum, tchain, doffset;
- float *obc_param;
- float *gb_radius;
- float *work;
- int * jjnr;
- float *dadx;
- float *shiftvec;
- float min_rad, rad;
-
- __m128 ix, iy, iz, jx, jy, jz;
- __m128 dx, dy, dz, t1, t2, t3, t4;
- __m128 rsq, rinv, r;
- __m128 rai, rai_inv, raj, raj_inv, rai_inv2, sk, sk2, lij, dlij, duij;
- __m128 uij, lij2, uij2, lij3, uij3, diff2;
- __m128 lij_inv, sk2_inv, prod, log_term, tmp, tmp_sum;
- __m128 sum_ai, tmp_ai, sk_ai, sk_aj, sk2_ai, sk2_aj, sk2_rinv;
- __m128 dadx1, dadx2;
- __m128 logterm;
- __m128 mask;
- __m128 obc_mask1, obc_mask2, obc_mask3;
- __m128 jxB, jyB, jzB, t1B, t2B, t3B, t4B;
- __m128 dxB, dyB, dzB, rsqB, rinvB, rB;
- __m128 rajB, raj_invB, rai_inv2B, sk2B, lijB, dlijB, duijB;
- __m128 uijB, lij2B, uij2B, lij3B, uij3B, diff2B;
- __m128 lij_invB, sk2_invB, prodB;
- __m128 sk_ajB, sk2_ajB, sk2_rinvB;
- __m128 dadx1B, dadx2B;
- __m128 logtermB;
- __m128 obc_mask1B, obc_mask2B, obc_mask3B;
-
- __m128 mask1 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0, 0xffffffff) );
- __m128 mask2 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff) );
- __m128 mask3 = gmx_mm_castsi128_ps( _mm_set_epi32(0, 0xffffffff, 0xffffffff, 0xffffffff) );
-
- __m128 oneeighth = _mm_set1_ps(0.125);
- __m128 onefourth = _mm_set1_ps(0.25);
-
- const __m128 half = _mm_set1_ps(0.5f);
- const __m128 three = _mm_set1_ps(3.0f);
- const __m128 one = _mm_set1_ps(1.0f);
- const __m128 two = _mm_set1_ps(2.0f);
- const __m128 zero = _mm_set1_ps(0.0f);
- const __m128 neg = _mm_set1_ps(-1.0f);
-
- /* Set the dielectric offset */
- doffset = born->gb_doffset;
- gb_radius = born->gb_radius;
- obc_param = born->param;
- work = born->gpol_hct_work;
- jjnr = nl->jjnr;
- dadx = fr->dadx;
- shiftvec = fr->shift_vec[0];
-
- jx = _mm_setzero_ps();
- jy = _mm_setzero_ps();
- jz = _mm_setzero_ps();
-
- jnrA = jnrB = jnrC = jnrD = 0;
-
- for (i = 0; i < born->nr; i++)
- {
- work[i] = 0;
- }
-
- for (i = 0; i < nl->nri; i++)
- {
- ii = nl->iinr[i];
- ii3 = ii*3;
- is3 = 3*nl->shift[i];
- shX = shiftvec[is3];
- shY = shiftvec[is3+1];
- shZ = shiftvec[is3+2];
- nj0 = nl->jindex[i];
- nj1 = nl->jindex[i+1];
-
- ix = _mm_set1_ps(shX+x[ii3+0]);
- iy = _mm_set1_ps(shY+x[ii3+1]);
- iz = _mm_set1_ps(shZ+x[ii3+2]);
-
- offset = (nj1-nj0)%4;
-
- rai = _mm_load1_ps(gb_radius+ii);
- rai_inv = gmx_mm_inv_ps(rai);
-
- sum_ai = _mm_setzero_ps();
-
- sk_ai = _mm_load1_ps(born->param+ii);
- sk2_ai = _mm_mul_ps(sk_ai, sk_ai);
-
- for (k = nj0; k < nj1-4-offset; k += 8)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- jnrD = jjnr[k+3];
- jnrE = jjnr[k+4];
- jnrF = jjnr[k+5];
- jnrG = jjnr[k+6];
- jnrH = jjnr[k+7];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- j3D = 3*jnrD;
- j3E = 3*jnrE;
- j3F = 3*jnrF;
- j3G = 3*jnrG;
- j3H = 3*jnrH;
-
- GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
- GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3E, x+j3F, x+j3G, x+j3H, jxB, jyB, jzB);
- GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
- GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrE, gb_radius+jnrF, gb_radius+jnrG, gb_radius+jnrH, rajB);
- GMX_MM_LOAD_4VALUES_PS(obc_param+jnrA, obc_param+jnrB, obc_param+jnrC, obc_param+jnrD, sk_aj);
- GMX_MM_LOAD_4VALUES_PS(obc_param+jnrE, obc_param+jnrF, obc_param+jnrG, obc_param+jnrH, sk_ajB);
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
- dxB = _mm_sub_ps(ix, jxB);
- dyB = _mm_sub_ps(iy, jyB);
- dzB = _mm_sub_ps(iz, jzB);
-
- rsq = gmx_mm_calc_rsq_ps(dx, dy, dz);
- rsqB = gmx_mm_calc_rsq_ps(dxB, dyB, dzB);
-
- rinv = gmx_mm_invsqrt_ps(rsq);
- r = _mm_mul_ps(rsq, rinv);
- rinvB = gmx_mm_invsqrt_ps(rsqB);
- rB = _mm_mul_ps(rsqB, rinvB);
-
- /* Compute raj_inv aj1-4 */
- raj_inv = gmx_mm_inv_ps(raj);
- raj_invB = gmx_mm_inv_ps(rajB);
-
- /* Evaluate influence of atom aj -> ai */
- t1 = _mm_add_ps(r, sk_aj);
- t2 = _mm_sub_ps(r, sk_aj);
- t3 = _mm_sub_ps(sk_aj, r);
- t1B = _mm_add_ps(rB, sk_ajB);
- t2B = _mm_sub_ps(rB, sk_ajB);
- t3B = _mm_sub_ps(sk_ajB, rB);
- obc_mask1 = _mm_cmplt_ps(rai, t1);
- obc_mask2 = _mm_cmplt_ps(rai, t2);
- obc_mask3 = _mm_cmplt_ps(rai, t3);
- obc_mask1B = _mm_cmplt_ps(rai, t1B);
- obc_mask2B = _mm_cmplt_ps(rai, t2B);
- obc_mask3B = _mm_cmplt_ps(rai, t3B);
-
- uij = gmx_mm_inv_ps(t1);
- lij = _mm_or_ps( _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
- _mm_andnot_ps(obc_mask2, rai_inv));
- dlij = _mm_and_ps(one, obc_mask2);
- uij2 = _mm_mul_ps(uij, uij);
- uij3 = _mm_mul_ps(uij2, uij);
- lij2 = _mm_mul_ps(lij, lij);
- lij3 = _mm_mul_ps(lij2, lij);
-
- uijB = gmx_mm_inv_ps(t1B);
- lijB = _mm_or_ps( _mm_and_ps(obc_mask2B, gmx_mm_inv_ps(t2B)),
- _mm_andnot_ps(obc_mask2B, rai_inv));
- dlijB = _mm_and_ps(one, obc_mask2B);
- uij2B = _mm_mul_ps(uijB, uijB);
- uij3B = _mm_mul_ps(uij2B, uijB);
- lij2B = _mm_mul_ps(lijB, lijB);
- lij3B = _mm_mul_ps(lij2B, lijB);
-
- diff2 = _mm_sub_ps(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_ps(lij2);
- sk2_aj = _mm_mul_ps(sk_aj, sk_aj);
- sk2_rinv = _mm_mul_ps(sk2_aj, rinv);
- prod = _mm_mul_ps(onefourth, sk2_rinv);
-
- diff2B = _mm_sub_ps(uij2B, lij2B);
- lij_invB = gmx_mm_invsqrt_ps(lij2B);
- sk2_ajB = _mm_mul_ps(sk_ajB, sk_ajB);
- sk2_rinvB = _mm_mul_ps(sk2_ajB, rinvB);
- prodB = _mm_mul_ps(onefourth, sk2_rinvB);
-
- logterm = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
- logtermB = gmx_mm_log_ps(_mm_mul_ps(uijB, lij_invB));
-
- t1 = _mm_sub_ps(lij, uij);
- t2 = _mm_mul_ps(diff2,
- _mm_sub_ps(_mm_mul_ps(onefourth, r),
- prod));
- t3 = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
- t1 = _mm_add_ps(t1, _mm_add_ps(t2, t3));
- t4 = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lij));
- t4 = _mm_and_ps(t4, obc_mask3);
- t1 = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
- t1B = _mm_sub_ps(lijB, uijB);
- t2B = _mm_mul_ps(diff2B,
- _mm_sub_ps(_mm_mul_ps(onefourth, rB),
- prodB));
- t3B = _mm_mul_ps(half, _mm_mul_ps(rinvB, logtermB));
- t1B = _mm_add_ps(t1B, _mm_add_ps(t2B, t3B));
- t4B = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lijB));
- t4B = _mm_and_ps(t4B, obc_mask3B);
- t1B = _mm_mul_ps(half, _mm_add_ps(t1B, t4B));
-
- sum_ai = _mm_add_ps(sum_ai, _mm_add_ps( _mm_and_ps(t1, obc_mask1), _mm_and_ps(t1B, obc_mask1B) ));
-
- t1 = _mm_add_ps(_mm_mul_ps(half, lij2),
- _mm_mul_ps(prod, lij3));
- t1 = _mm_sub_ps(t1,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lij, rinv),
- _mm_mul_ps(lij3, r))));
- t2 = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uij, rinv),
- _mm_mul_ps(uij3, r)));
- t2 = _mm_sub_ps(t2,
- _mm_add_ps(_mm_mul_ps(half, uij2),
- _mm_mul_ps(prod, uij3)));
- t3 = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
- _mm_mul_ps(rinv, rinv));
- t3 = _mm_sub_ps(t3,
- _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinv, rinv))));
- t1 = _mm_mul_ps(rinv,
- _mm_add_ps(_mm_mul_ps(dlij, t1),
- _mm_add_ps(t2, t3)));
-
-
-
- t1B = _mm_add_ps(_mm_mul_ps(half, lij2B),
- _mm_mul_ps(prodB, lij3B));
- t1B = _mm_sub_ps(t1B,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lijB, rinvB),
- _mm_mul_ps(lij3B, rB))));
- t2B = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uijB, rinvB),
- _mm_mul_ps(uij3B, rB)));
- t2B = _mm_sub_ps(t2B,
- _mm_add_ps(_mm_mul_ps(half, uij2B),
- _mm_mul_ps(prodB, uij3B)));
- t3B = _mm_mul_ps(_mm_mul_ps(onefourth, logtermB),
- _mm_mul_ps(rinvB, rinvB));
- t3B = _mm_sub_ps(t3B,
- _mm_mul_ps(_mm_mul_ps(diff2B, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinvB, rinvB))));
- t1B = _mm_mul_ps(rinvB,
- _mm_add_ps(_mm_mul_ps(dlijB, t1B),
- _mm_add_ps(t2B, t3B)));
-
- dadx1 = _mm_and_ps(t1, obc_mask1);
- dadx1B = _mm_and_ps(t1B, obc_mask1B);
-
-
- /* Evaluate influence of atom ai -> aj */
- t1 = _mm_add_ps(r, sk_ai);
- t2 = _mm_sub_ps(r, sk_ai);
- t3 = _mm_sub_ps(sk_ai, r);
- t1B = _mm_add_ps(rB, sk_ai);
- t2B = _mm_sub_ps(rB, sk_ai);
- t3B = _mm_sub_ps(sk_ai, rB);
- obc_mask1 = _mm_cmplt_ps(raj, t1);
- obc_mask2 = _mm_cmplt_ps(raj, t2);
- obc_mask3 = _mm_cmplt_ps(raj, t3);
- obc_mask1B = _mm_cmplt_ps(rajB, t1B);
- obc_mask2B = _mm_cmplt_ps(rajB, t2B);
- obc_mask3B = _mm_cmplt_ps(rajB, t3B);
-
- uij = gmx_mm_inv_ps(t1);
- lij = _mm_or_ps( _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
- _mm_andnot_ps(obc_mask2, raj_inv));
- dlij = _mm_and_ps(one, obc_mask2);
- uij2 = _mm_mul_ps(uij, uij);
- uij3 = _mm_mul_ps(uij2, uij);
- lij2 = _mm_mul_ps(lij, lij);
- lij3 = _mm_mul_ps(lij2, lij);
-
- uijB = gmx_mm_inv_ps(t1B);
- lijB = _mm_or_ps( _mm_and_ps(obc_mask2B, gmx_mm_inv_ps(t2B)),
- _mm_andnot_ps(obc_mask2B, raj_invB));
- dlijB = _mm_and_ps(one, obc_mask2B);
- uij2B = _mm_mul_ps(uijB, uijB);
- uij3B = _mm_mul_ps(uij2B, uijB);
- lij2B = _mm_mul_ps(lijB, lijB);
- lij3B = _mm_mul_ps(lij2B, lijB);
-
- diff2 = _mm_sub_ps(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_ps(lij2);
- sk2_rinv = _mm_mul_ps(sk2_ai, rinv);
- prod = _mm_mul_ps(onefourth, sk2_rinv);
-
- diff2B = _mm_sub_ps(uij2B, lij2B);
- lij_invB = gmx_mm_invsqrt_ps(lij2B);
- sk2_rinvB = _mm_mul_ps(sk2_ai, rinvB);
- prodB = _mm_mul_ps(onefourth, sk2_rinvB);
-
- logterm = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
- logtermB = gmx_mm_log_ps(_mm_mul_ps(uijB, lij_invB));
-
- t1 = _mm_sub_ps(lij, uij);
- t2 = _mm_mul_ps(diff2,
- _mm_sub_ps(_mm_mul_ps(onefourth, r),
- prod));
- t3 = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
- t1 = _mm_add_ps(t1, _mm_add_ps(t2, t3));
- t4 = _mm_mul_ps(two, _mm_sub_ps(raj_inv, lij));
- t4 = _mm_and_ps(t4, obc_mask3);
- t1 = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
- t1B = _mm_sub_ps(lijB, uijB);
- t2B = _mm_mul_ps(diff2B,
- _mm_sub_ps(_mm_mul_ps(onefourth, rB),
- prodB));
- t3B = _mm_mul_ps(half, _mm_mul_ps(rinvB, logtermB));
- t1B = _mm_add_ps(t1B, _mm_add_ps(t2B, t3B));
- t4B = _mm_mul_ps(two, _mm_sub_ps(raj_invB, lijB));
- t4B = _mm_and_ps(t4B, obc_mask3B);
- t1B = _mm_mul_ps(half, _mm_add_ps(t1B, t4B));
-
- GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_and_ps(t1, obc_mask1));
- GMX_MM_INCREMENT_4VALUES_PS(work+jnrE, work+jnrF, work+jnrG, work+jnrH, _mm_and_ps(t1B, obc_mask1B));
-
- t1 = _mm_add_ps(_mm_mul_ps(half, lij2),
- _mm_mul_ps(prod, lij3));
- t1 = _mm_sub_ps(t1,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lij, rinv),
- _mm_mul_ps(lij3, r))));
- t2 = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uij, rinv),
- _mm_mul_ps(uij3, r)));
- t2 = _mm_sub_ps(t2,
- _mm_add_ps(_mm_mul_ps(half, uij2),
- _mm_mul_ps(prod, uij3)));
- t3 = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
- _mm_mul_ps(rinv, rinv));
- t3 = _mm_sub_ps(t3,
- _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinv, rinv))));
- t1 = _mm_mul_ps(rinv,
- _mm_add_ps(_mm_mul_ps(dlij, t1),
- _mm_add_ps(t2, t3)));
-
-
- t1B = _mm_add_ps(_mm_mul_ps(half, lij2B),
- _mm_mul_ps(prodB, lij3B));
- t1B = _mm_sub_ps(t1B,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lijB, rinvB),
- _mm_mul_ps(lij3B, rB))));
- t2B = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uijB, rinvB),
- _mm_mul_ps(uij3B, rB)));
- t2B = _mm_sub_ps(t2B,
- _mm_add_ps(_mm_mul_ps(half, uij2B),
- _mm_mul_ps(prodB, uij3B)));
- t3B = _mm_mul_ps(_mm_mul_ps(onefourth, logtermB),
- _mm_mul_ps(rinvB, rinvB));
- t3B = _mm_sub_ps(t3B,
- _mm_mul_ps(_mm_mul_ps(diff2B, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinvB, rinvB))));
- t1B = _mm_mul_ps(rinvB,
- _mm_add_ps(_mm_mul_ps(dlijB, t1B),
- _mm_add_ps(t2B, t3B)));
-
-
- dadx2 = _mm_and_ps(t1, obc_mask1);
- dadx2B = _mm_and_ps(t1B, obc_mask1B);
-
- _mm_store_ps(dadx, dadx1);
- dadx += 4;
- _mm_store_ps(dadx, dadx2);
- dadx += 4;
- _mm_store_ps(dadx, dadx1B);
- dadx += 4;
- _mm_store_ps(dadx, dadx2B);
- dadx += 4;
-
- } /* end normal inner loop */
-
- for (; k < nj1-offset; k += 4)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- jnrD = jjnr[k+3];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- j3D = 3*jnrD;
-
- GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
- GMX_MM_LOAD_4VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, gb_radius+jnrD, raj);
- GMX_MM_LOAD_4VALUES_PS(obc_param+jnrA, obc_param+jnrB, obc_param+jnrC, obc_param+jnrD, sk_aj);
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
-
- rsq = gmx_mm_calc_rsq_ps(dx, dy, dz);
-
- rinv = gmx_mm_invsqrt_ps(rsq);
- r = _mm_mul_ps(rsq, rinv);
-
- /* Compute raj_inv aj1-4 */
- raj_inv = gmx_mm_inv_ps(raj);
-
- /* Evaluate influence of atom aj -> ai */
- t1 = _mm_add_ps(r, sk_aj);
- obc_mask1 = _mm_cmplt_ps(rai, t1);
-
- if (_mm_movemask_ps(obc_mask1))
- {
- /* If any of the elements has rai<dr+sk, this is executed */
- t2 = _mm_sub_ps(r, sk_aj);
- t3 = _mm_sub_ps(sk_aj, r);
-
- obc_mask2 = _mm_cmplt_ps(rai, t2);
- obc_mask3 = _mm_cmplt_ps(rai, t3);
-
- uij = gmx_mm_inv_ps(t1);
- lij = _mm_or_ps( _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
- _mm_andnot_ps(obc_mask2, rai_inv));
- dlij = _mm_and_ps(one, obc_mask2);
- uij2 = _mm_mul_ps(uij, uij);
- uij3 = _mm_mul_ps(uij2, uij);
- lij2 = _mm_mul_ps(lij, lij);
- lij3 = _mm_mul_ps(lij2, lij);
- diff2 = _mm_sub_ps(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_ps(lij2);
- sk2_aj = _mm_mul_ps(sk_aj, sk_aj);
- sk2_rinv = _mm_mul_ps(sk2_aj, rinv);
- prod = _mm_mul_ps(onefourth, sk2_rinv);
- logterm = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
- t1 = _mm_sub_ps(lij, uij);
- t2 = _mm_mul_ps(diff2,
- _mm_sub_ps(_mm_mul_ps(onefourth, r),
- prod));
- t3 = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
- t1 = _mm_add_ps(t1, _mm_add_ps(t2, t3));
- t4 = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lij));
- t4 = _mm_and_ps(t4, obc_mask3);
- t1 = _mm_mul_ps(half, _mm_add_ps(t1, t4));
- sum_ai = _mm_add_ps(sum_ai, _mm_and_ps(t1, obc_mask1));
- t1 = _mm_add_ps(_mm_mul_ps(half, lij2),
- _mm_mul_ps(prod, lij3));
- t1 = _mm_sub_ps(t1,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lij, rinv),
- _mm_mul_ps(lij3, r))));
- t2 = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uij, rinv),
- _mm_mul_ps(uij3, r)));
- t2 = _mm_sub_ps(t2,
- _mm_add_ps(_mm_mul_ps(half, uij2),
- _mm_mul_ps(prod, uij3)));
- t3 = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
- _mm_mul_ps(rinv, rinv));
- t3 = _mm_sub_ps(t3,
- _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinv, rinv))));
- t1 = _mm_mul_ps(rinv,
- _mm_add_ps(_mm_mul_ps(dlij, t1),
- _mm_add_ps(t2, t3)));
-
- dadx1 = _mm_and_ps(t1, obc_mask1);
- }
- else
- {
- dadx1 = _mm_setzero_ps();
- }
-
- /* Evaluate influence of atom ai -> aj */
- t1 = _mm_add_ps(r, sk_ai);
- obc_mask1 = _mm_cmplt_ps(raj, t1);
-
- if (_mm_movemask_ps(obc_mask1))
- {
- t2 = _mm_sub_ps(r, sk_ai);
- t3 = _mm_sub_ps(sk_ai, r);
- obc_mask2 = _mm_cmplt_ps(raj, t2);
- obc_mask3 = _mm_cmplt_ps(raj, t3);
-
- uij = gmx_mm_inv_ps(t1);
- lij = _mm_or_ps( _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
- _mm_andnot_ps(obc_mask2, raj_inv));
- dlij = _mm_and_ps(one, obc_mask2);
- uij2 = _mm_mul_ps(uij, uij);
- uij3 = _mm_mul_ps(uij2, uij);
- lij2 = _mm_mul_ps(lij, lij);
- lij3 = _mm_mul_ps(lij2, lij);
- diff2 = _mm_sub_ps(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_ps(lij2);
- sk2_rinv = _mm_mul_ps(sk2_ai, rinv);
- prod = _mm_mul_ps(onefourth, sk2_rinv);
- logterm = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
- t1 = _mm_sub_ps(lij, uij);
- t2 = _mm_mul_ps(diff2,
- _mm_sub_ps(_mm_mul_ps(onefourth, r),
- prod));
- t3 = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
- t1 = _mm_add_ps(t1, _mm_add_ps(t2, t3));
- t4 = _mm_mul_ps(two, _mm_sub_ps(raj_inv, lij));
- t4 = _mm_and_ps(t4, obc_mask3);
- t1 = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
- GMX_MM_INCREMENT_4VALUES_PS(work+jnrA, work+jnrB, work+jnrC, work+jnrD, _mm_and_ps(t1, obc_mask1));
-
- t1 = _mm_add_ps(_mm_mul_ps(half, lij2),
- _mm_mul_ps(prod, lij3));
- t1 = _mm_sub_ps(t1,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lij, rinv),
- _mm_mul_ps(lij3, r))));
- t2 = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uij, rinv),
- _mm_mul_ps(uij3, r)));
- t2 = _mm_sub_ps(t2,
- _mm_add_ps(_mm_mul_ps(half, uij2),
- _mm_mul_ps(prod, uij3)));
- t3 = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
- _mm_mul_ps(rinv, rinv));
- t3 = _mm_sub_ps(t3,
- _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinv, rinv))));
- t1 = _mm_mul_ps(rinv,
- _mm_add_ps(_mm_mul_ps(dlij, t1),
- _mm_add_ps(t2, t3)));
- dadx2 = _mm_and_ps(t1, obc_mask1);
- }
- else
- {
- dadx2 = _mm_setzero_ps();
- }
-
- _mm_store_ps(dadx, dadx1);
- dadx += 4;
- _mm_store_ps(dadx, dadx2);
- dadx += 4;
- } /* end normal inner loop */
-
- if (offset != 0)
- {
- if (offset == 1)
- {
- jnrA = jjnr[k];
- j3A = 3*jnrA;
- GMX_MM_LOAD_1RVEC_1POINTER_PS(x+j3A, jx, jy, jz);
- GMX_MM_LOAD_1VALUE_PS(gb_radius+jnrA, raj);
- GMX_MM_LOAD_1VALUE_PS(obc_param+jnrA, sk_aj);
- mask = mask1;
- }
- else if (offset == 2)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- GMX_MM_LOAD_1RVEC_2POINTERS_PS(x+j3A, x+j3B, jx, jy, jz);
- GMX_MM_LOAD_2VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, raj);
- GMX_MM_LOAD_2VALUES_PS(obc_param+jnrA, obc_param+jnrB, sk_aj);
- mask = mask2;
- }
- else
- {
- /* offset must be 3 */
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- GMX_MM_LOAD_1RVEC_3POINTERS_PS(x+j3A, x+j3B, x+j3C, jx, jy, jz);
- GMX_MM_LOAD_3VALUES_PS(gb_radius+jnrA, gb_radius+jnrB, gb_radius+jnrC, raj);
- GMX_MM_LOAD_3VALUES_PS(obc_param+jnrA, obc_param+jnrB, obc_param+jnrC, sk_aj);
- mask = mask3;
- }
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
-
- rsq = gmx_mm_calc_rsq_ps(dx, dy, dz);
-
- rinv = gmx_mm_invsqrt_ps(rsq);
- r = _mm_mul_ps(rsq, rinv);
-
- /* Compute raj_inv aj1-4 */
- raj_inv = gmx_mm_inv_ps(raj);
-
- /* Evaluate influence of atom aj -> ai */
- t1 = _mm_add_ps(r, sk_aj);
- obc_mask1 = _mm_cmplt_ps(rai, t1);
- obc_mask1 = _mm_and_ps(obc_mask1, mask);
-
- if (_mm_movemask_ps(obc_mask1))
- {
- t2 = _mm_sub_ps(r, sk_aj);
- t3 = _mm_sub_ps(sk_aj, r);
- obc_mask2 = _mm_cmplt_ps(rai, t2);
- obc_mask3 = _mm_cmplt_ps(rai, t3);
-
- uij = gmx_mm_inv_ps(t1);
- lij = _mm_or_ps( _mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
- _mm_andnot_ps(obc_mask2, rai_inv));
- dlij = _mm_and_ps(one, obc_mask2);
- uij2 = _mm_mul_ps(uij, uij);
- uij3 = _mm_mul_ps(uij2, uij);
- lij2 = _mm_mul_ps(lij, lij);
- lij3 = _mm_mul_ps(lij2, lij);
- diff2 = _mm_sub_ps(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_ps(lij2);
- sk2_aj = _mm_mul_ps(sk_aj, sk_aj);
- sk2_rinv = _mm_mul_ps(sk2_aj, rinv);
- prod = _mm_mul_ps(onefourth, sk2_rinv);
- logterm = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
- t1 = _mm_sub_ps(lij, uij);
- t2 = _mm_mul_ps(diff2,
- _mm_sub_ps(_mm_mul_ps(onefourth, r),
- prod));
- t3 = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
- t1 = _mm_add_ps(t1, _mm_add_ps(t2, t3));
- t4 = _mm_mul_ps(two, _mm_sub_ps(rai_inv, lij));
- t4 = _mm_and_ps(t4, obc_mask3);
- t1 = _mm_mul_ps(half, _mm_add_ps(t1, t4));
- sum_ai = _mm_add_ps(sum_ai, _mm_and_ps(t1, obc_mask1));
- t1 = _mm_add_ps(_mm_mul_ps(half, lij2),
- _mm_mul_ps(prod, lij3));
- t1 = _mm_sub_ps(t1,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lij, rinv),
- _mm_mul_ps(lij3, r))));
- t2 = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uij, rinv),
- _mm_mul_ps(uij3, r)));
- t2 = _mm_sub_ps(t2,
- _mm_add_ps(_mm_mul_ps(half, uij2),
- _mm_mul_ps(prod, uij3)));
- t3 = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
- _mm_mul_ps(rinv, rinv));
- t3 = _mm_sub_ps(t3,
- _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinv, rinv))));
- t1 = _mm_mul_ps(rinv,
- _mm_add_ps(_mm_mul_ps(dlij, t1),
- _mm_add_ps(t2, t3)));
- dadx1 = _mm_and_ps(t1, obc_mask1);
- }
- else
- {
- dadx1 = _mm_setzero_ps();
- }
-
- /* Evaluate influence of atom ai -> aj */
- t1 = _mm_add_ps(r, sk_ai);
- obc_mask1 = _mm_cmplt_ps(raj, t1);
- obc_mask1 = _mm_and_ps(obc_mask1, mask);
-
- if (_mm_movemask_ps(obc_mask1))
- {
- t2 = _mm_sub_ps(r, sk_ai);
- t3 = _mm_sub_ps(sk_ai, r);
- obc_mask2 = _mm_cmplt_ps(raj, t2);
- obc_mask3 = _mm_cmplt_ps(raj, t3);
-
- uij = gmx_mm_inv_ps(t1);
- lij = _mm_or_ps(_mm_and_ps(obc_mask2, gmx_mm_inv_ps(t2)),
- _mm_andnot_ps(obc_mask2, raj_inv));
- dlij = _mm_and_ps(one, obc_mask2);
- uij2 = _mm_mul_ps(uij, uij);
- uij3 = _mm_mul_ps(uij2, uij);
- lij2 = _mm_mul_ps(lij, lij);
- lij3 = _mm_mul_ps(lij2, lij);
- diff2 = _mm_sub_ps(uij2, lij2);
- lij_inv = gmx_mm_invsqrt_ps(lij2);
- sk2_rinv = _mm_mul_ps(sk2_ai, rinv);
- prod = _mm_mul_ps(onefourth, sk2_rinv);
- logterm = gmx_mm_log_ps(_mm_mul_ps(uij, lij_inv));
- t1 = _mm_sub_ps(lij, uij);
- t2 = _mm_mul_ps(diff2,
- _mm_sub_ps(_mm_mul_ps(onefourth, r),
- prod));
- t3 = _mm_mul_ps(half, _mm_mul_ps(rinv, logterm));
- t1 = _mm_add_ps(t1, _mm_add_ps(t2, t3));
- t4 = _mm_mul_ps(two, _mm_sub_ps(raj_inv, lij));
- t4 = _mm_and_ps(t4, obc_mask3);
- t1 = _mm_mul_ps(half, _mm_add_ps(t1, t4));
-
- tmp = _mm_and_ps(t1, obc_mask1);
-
- t1 = _mm_add_ps(_mm_mul_ps(half, lij2),
- _mm_mul_ps(prod, lij3));
- t1 = _mm_sub_ps(t1,
- _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(lij, rinv),
- _mm_mul_ps(lij3, r))));
- t2 = _mm_mul_ps(onefourth,
- _mm_add_ps(_mm_mul_ps(uij, rinv),
- _mm_mul_ps(uij3, r)));
- t2 = _mm_sub_ps(t2,
- _mm_add_ps(_mm_mul_ps(half, uij2),
- _mm_mul_ps(prod, uij3)));
- t3 = _mm_mul_ps(_mm_mul_ps(onefourth, logterm),
- _mm_mul_ps(rinv, rinv));
- t3 = _mm_sub_ps(t3,
- _mm_mul_ps(_mm_mul_ps(diff2, oneeighth),
- _mm_add_ps(one,
- _mm_mul_ps(sk2_rinv, rinv))));
- t1 = _mm_mul_ps(rinv,
- _mm_add_ps(_mm_mul_ps(dlij, t1),
- _mm_add_ps(t2, t3)));
- dadx2 = _mm_and_ps(t1, obc_mask1);
- }
- else
- {
- dadx2 = _mm_setzero_ps();
- tmp = _mm_setzero_ps();
- }
-
- _mm_store_ps(dadx, dadx1);
- dadx += 4;
- _mm_store_ps(dadx, dadx2);
- dadx += 4;
-
- if (offset == 1)
- {
- GMX_MM_INCREMENT_1VALUE_PS(work+jnrA, tmp);
- }
- else if (offset == 2)
- {
- GMX_MM_INCREMENT_2VALUES_PS(work+jnrA, work+jnrB, tmp);
- }
- else
- {
- /* offset must be 3 */
- GMX_MM_INCREMENT_3VALUES_PS(work+jnrA, work+jnrB, work+jnrC, tmp);
- }
-
- }
- GMX_MM_UPDATE_1POT_PS(sum_ai, work+ii);
-
- }
-
- /* Parallel summations */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_sum_real(cr->dd, work);
- }
-
- if (gb_algorithm == egbHCT)
- {
- /* HCT */
- for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
- {
- if (born->use[i] != 0)
- {
- rr = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
- sum = 1.0/rr - work[i];
- min_rad = rr + doffset;
- rad = 1.0/sum;
-
- born->bRad[i] = rad > min_rad ? rad : min_rad;
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
- }
- }
-
- /* Extra communication required for DD */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_spread_real(cr->dd, born->bRad);
- dd_atom_spread_real(cr->dd, fr->invsqrta);
- }
- }
- else
- {
- /* OBC */
- for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
- {
- if (born->use[i] != 0)
- {
- rr = top->atomtypes.gb_radius[md->typeA[i]];
- rr_inv2 = 1.0/rr;
- rr = rr-doffset;
- rr_inv = 1.0/rr;
- sum = rr * work[i];
- sum2 = sum * sum;
- sum3 = sum2 * sum;
-
- tsum = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3);
- born->bRad[i] = rr_inv - tsum*rr_inv2;
- born->bRad[i] = 1.0 / born->bRad[i];
-
- fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
-
- tchain = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2);
- born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2;
- }
- }
- /* Extra (local) communication required for DD */
- if (DOMAINDECOMP(cr))
- {
- dd_atom_spread_real(cr->dd, born->bRad);
- dd_atom_spread_real(cr->dd, fr->invsqrta);
- dd_atom_spread_real(cr->dd, born->drobc);
- }
- }
-
-
-
- return 0;
-}
-
-
-
-float calc_gb_chainrule_sse2_single(int natoms, t_nblist *nl, float *dadx, float *dvda,
- float *x, float *f, float *fshift, float *shiftvec,
- int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md)
-{
- int i, k, n, ii, jnr, ii3, is3, nj0, nj1, offset, n0, n1;
- int jnrA, jnrB, jnrC, jnrD;
- int j3A, j3B, j3C, j3D;
- int jnrE, jnrF, jnrG, jnrH;
- int j3E, j3F, j3G, j3H;
- int * jjnr;
-
- float rbi, shX, shY, shZ;
- float *rb;
-
- __m128 ix, iy, iz;
- __m128 jx, jy, jz;
- __m128 jxB, jyB, jzB;
- __m128 fix, fiy, fiz;
- __m128 dx, dy, dz;
- __m128 tx, ty, tz;
- __m128 dxB, dyB, dzB;
- __m128 txB, tyB, tzB;
-
- __m128 rbai, rbaj, rbajB, f_gb, f_gb_ai, f_gbB, f_gb_aiB;
- __m128 xmm1, xmm2, xmm3;
-
- const __m128 two = _mm_set1_ps(2.0f);
-
- rb = born->work;
-
- jjnr = nl->jjnr;
-
- /* Loop to get the proper form for the Born radius term, sse style */
- offset = natoms%4;
-
- n0 = 0;
- n1 = natoms;
-
- if (gb_algorithm == egbSTILL)
- {
- for (i = n0; i < n1; i++)
- {
- rbi = born->bRad[i];
- rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
- }
- }
- else if (gb_algorithm == egbHCT)
- {
- for (i = n0; i < n1; i++)
- {
- rbi = born->bRad[i];
- rb[i] = rbi * rbi * dvda[i];
- }
- }
- else if (gb_algorithm == egbOBC)
- {
- for (i = n0; i < n1; i++)
- {
- rbi = born->bRad[i];
- rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
- }
- }
-
- jz = _mm_setzero_ps();
-
- n = j3A = j3B = j3C = j3D = 0;
-
- for (i = 0; i < nl->nri; i++)
- {
- ii = nl->iinr[i];
- ii3 = ii*3;
- is3 = 3*nl->shift[i];
- shX = shiftvec[is3];
- shY = shiftvec[is3+1];
- shZ = shiftvec[is3+2];
- nj0 = nl->jindex[i];
- nj1 = nl->jindex[i+1];
-
- ix = _mm_set1_ps(shX+x[ii3+0]);
- iy = _mm_set1_ps(shY+x[ii3+1]);
- iz = _mm_set1_ps(shZ+x[ii3+2]);
-
- offset = (nj1-nj0)%4;
-
- rbai = _mm_load1_ps(rb+ii);
- fix = _mm_setzero_ps();
- fiy = _mm_setzero_ps();
- fiz = _mm_setzero_ps();
-
-
- for (k = nj0; k < nj1-offset; k += 4)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- jnrD = jjnr[k+3];
-
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- j3D = 3*jnrD;
-
- GMX_MM_LOAD_1RVEC_4POINTERS_PS(x+j3A, x+j3B, x+j3C, x+j3D, jx, jy, jz);
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
-
- GMX_MM_LOAD_4VALUES_PS(rb+jnrA, rb+jnrB, rb+jnrC, rb+jnrD, rbaj);
-
- /* load chain rule terms for j1-4 */
- f_gb = _mm_load_ps(dadx);
- dadx += 4;
- f_gb_ai = _mm_load_ps(dadx);
- dadx += 4;
-
- /* calculate scalar force */
- f_gb = _mm_mul_ps(f_gb, rbai);
- f_gb_ai = _mm_mul_ps(f_gb_ai, rbaj);
- f_gb = _mm_add_ps(f_gb, f_gb_ai);
-
- tx = _mm_mul_ps(f_gb, dx);
- ty = _mm_mul_ps(f_gb, dy);
- tz = _mm_mul_ps(f_gb, dz);
-
- fix = _mm_add_ps(fix, tx);
- fiy = _mm_add_ps(fiy, ty);
- fiz = _mm_add_ps(fiz, tz);
-
- GMX_MM_DECREMENT_1RVEC_4POINTERS_PS(f+j3A, f+j3B, f+j3C, f+j3D, tx, ty, tz);
- }
-
- /*deal with odd elements */
- if (offset != 0)
- {
- if (offset == 1)
- {
- jnrA = jjnr[k];
- j3A = 3*jnrA;
- GMX_MM_LOAD_1RVEC_1POINTER_PS(x+j3A, jx, jy, jz);
- GMX_MM_LOAD_1VALUE_PS(rb+jnrA, rbaj);
- }
- else if (offset == 2)
- {
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- GMX_MM_LOAD_1RVEC_2POINTERS_PS(x+j3A, x+j3B, jx, jy, jz);
- GMX_MM_LOAD_2VALUES_PS(rb+jnrA, rb+jnrB, rbaj);
- }
- else
- {
- /* offset must be 3 */
- jnrA = jjnr[k];
- jnrB = jjnr[k+1];
- jnrC = jjnr[k+2];
- j3A = 3*jnrA;
- j3B = 3*jnrB;
- j3C = 3*jnrC;
- GMX_MM_LOAD_1RVEC_3POINTERS_PS(x+j3A, x+j3B, x+j3C, jx, jy, jz);
- GMX_MM_LOAD_3VALUES_PS(rb+jnrA, rb+jnrB, rb+jnrC, rbaj);
- }
-
- dx = _mm_sub_ps(ix, jx);
- dy = _mm_sub_ps(iy, jy);
- dz = _mm_sub_ps(iz, jz);
-
- /* load chain rule terms for j1-4 */
- f_gb = _mm_load_ps(dadx);
- dadx += 4;
- f_gb_ai = _mm_load_ps(dadx);
- dadx += 4;
-
- /* calculate scalar force */
- f_gb = _mm_mul_ps(f_gb, rbai);
- f_gb_ai = _mm_mul_ps(f_gb_ai, rbaj);
- f_gb = _mm_add_ps(f_gb, f_gb_ai);
-
- tx = _mm_mul_ps(f_gb, dx);
- ty = _mm_mul_ps(f_gb, dy);
- tz = _mm_mul_ps(f_gb, dz);
-
- fix = _mm_add_ps(fix, tx);
- fiy = _mm_add_ps(fiy, ty);
- fiz = _mm_add_ps(fiz, tz);
-
- if (offset == 1)
- {
- GMX_MM_DECREMENT_1RVEC_1POINTER_PS(f+j3A, tx, ty, tz);
- }
- else if (offset == 2)
- {
- GMX_MM_DECREMENT_1RVEC_2POINTERS_PS(f+j3A, f+j3B, tx, ty, tz);
- }
- else
- {
- /* offset must be 3 */
- GMX_MM_DECREMENT_1RVEC_3POINTERS_PS(f+j3A, f+j3B, f+j3C, tx, ty, tz);
- }
- }
-
- /* fix/fiy/fiz now contain four partial force terms, that all should be
- * added to the i particle forces and shift forces.
- */
- gmx_mm_update_iforce_1atom_ps(&fix, &fiy, &fiz, f+ii3, fshift+is3);
- }
-
- return 0;
-}
-
-
-#else
-/* keep compiler happy */
-int genborn_sse_dummy;
-
-#endif /* SSE intrinsics available */
cl_command_queue stream[2]; /**< local and non-local GPU queues */
/** events used for synchronization */
- cl_event nonlocal_done; /**< event triggered when the non-local non-bonded kernel
- is done (and the local transfer can proceed) */
- cl_event isc_ops_and_local_H2D_done; /**< event triggered when the tasks issued in
- the local stream that need to precede the
- non-local force calculations are done
- (e.g. f buffer 0-ing, local x/q H2D) */
-
- cl_bool bDoTime; /**< True if event-based timing is enabled. */
- cl_timers_t *timers; /**< OpenCL event-based timers. */
- struct gmx_wallclock_gpu_t *timings; /**< Timing data. */
+ cl_event nonlocal_done; /**< event triggered when the non-local non-bonded kernel
+ is done (and the local transfer can proceed) */
+ cl_event misc_ops_and_local_H2D_done; /**< event triggered when the tasks issued in
+ the local stream that need to precede the
+ non-local force calculations are done
+ (e.g. f buffer 0-ing, local x/q H2D) */
+
+ cl_bool bDoTime; /**< True if event-based timing is enabled. */
+ cl_timers_t *timers; /**< OpenCL event-based timers. */
+ struct gmx_wallclock_gpu_t *timings; /**< Timing data. */
};
#ifdef __cplusplus
nsp_cj4 += (nbl->cj4[cj4].imei[0].imask >> p) & 1;
}
- if (nsp_cj4 > 0 && nsp + nsp_cj4 > nsp_max)
+ /* Check if we should split at this cj4 to get a list of size nsp */
+ if (nsp > 0 && nsp + nsp_cj4 > nsp_max)
{
/* Split the list at cj4 */
nbl->sci[sci].cj4_ind_end = cj4;
ls[XX] = (grid->c1[XX] - grid->c0[XX])/(grid->ncx*GPU_NSUBCELL_X);
ls[YY] = (grid->c1[YY] - grid->c0[YY])/(grid->ncy*GPU_NSUBCELL_Y);
- ls[ZZ] = (grid->c1[ZZ] - grid->c0[ZZ])*grid->ncx*grid->ncy/(grid->nc*GPU_NSUBCELL_Z);
+ ls[ZZ] = grid->na_c/(grid->atom_density*ls[XX]*ls[YY]);
/* The average squared length of the diagonal of a sub cell */
xy_diag2 = ls[XX]*ls[XX] + ls[YY]*ls[YY] + ls[ZZ]*ls[ZZ];
/* 4 octants of a sphere */
vol_est += 0.5*4.0/3.0*M_PI*pow(r_eff_sup, 3);
+ /* Estimate the number of cluster pairs as the local number of
+ * clusters times the volume they interact with times the density.
+ */
nsp_est = grid->nsubc_tot*vol_est*grid->atom_density/grid->na_c;
/* Subtract the non-local pair count */
nsp_est -= nsp_est_nl;
+ /* For small cut-offs nsp_est will be an underesimate.
+ * With DD nsp_est_nl is an overestimate so nsp_est can get negative.
+ * So to avoid too small or negative nsp_est we set a minimum of
+ * all cells interacting with all 3^3 direct neighbors (3^3-1)/2+1=14.
+ * This might be a slight overestimate for small non-periodic groups of
+ * atoms as will occur for a local domain with DD, but for small
+ * groups of atoms we'll anyhow be limited by nsubpair_target_min,
+ * so this overestimation will not matter.
+ */
+ nsp_est = max(nsp_est, grid->nsubc_tot*14.0);
+
if (debug)
{
fprintf(debug, "nsp_est local %5.1f non-local %5.1f\n",
#include <stdlib.h>
#include <string.h>
+#include <cmath>
+
+#include <algorithm>
+
#include "gromacs/domdec/domdec.h"
#include "gromacs/legacyheaders/force.h"
#include "gromacs/legacyheaders/macros.h"
static int
round_up_to_simd_width(int length, int simd_width)
{
- int offset, newlength;
+ int offset;
offset = (simd_width > 0) ? length % simd_width : 0;
{
t_nblist *nl;
int homenr;
- int i, nn;
+ int i;
for (i = 0; (i < 2); i++)
{
*/
int maxsr, maxsr_wat, maxlr, maxlr_wat;
int ielec, ivdw, ielecmod, ivdwmod, type;
- int solvent;
int igeometry_def, igeometry_w, igeometry_ww;
int i;
gmx_bool bElecAndVdwSwitchDiffers;
* all the nlist arrays many times in a row.
* The numbers seem very accurate, but they are uncritical.
*/
- maxsr_wat = min(fr->nWatMol, (homenr+2)/3);
+ maxsr_wat = std::min(fr->nWatMol, (homenr+2)/3);
if (fr->bTwinRange)
{
maxlr = 50;
- maxlr_wat = min(maxsr_wat, maxlr);
+ maxlr_wat = std::min(maxsr_wat, maxlr);
}
else
{
static gmx_inline void new_i_nblist(t_nblist *nlist, atom_id i_atom, int shift, int gid)
{
- int i, k, nri, nshift;
-
- nri = nlist->nri;
+ int nri = nlist->nri;
/* Check whether we have to increase the i counter */
if ((nri == -1) ||
t_nblist * vdwc_ww = NULL;
t_nblist * coul_ww = NULL;
- int i, j, jcg, igid, gid, nbl_ind, ind_ij;
+ int i, j, jcg, igid, gid, nbl_ind;
atom_id jj, jj0, jj1, i_atom;
- int i0, nicg, len;
+ int i0, nicg;
int *cginfo;
int *type, *typeB;
real *charge, *chargeB;
- real qi, qiB, qq, rlj;
+ real qi, qiB;
gmx_bool bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
gmx_bool bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
int iwater, jwater;
gmx_bool bLR,
gmx_bool bDoVdW,
gmx_bool bDoCoul,
- int solvent_opt)
+ int gmx_unused solvent_opt)
{
/* The a[] index has been removed,
* to put it back in i_atom should be a[i0] and jj should be a[jj].
t_nblist * vdwc_adress = NULL;
t_nblist * vdw_adress = NULL;
t_nblist * coul_adress = NULL;
- t_nblist * vdwc_ww = NULL;
- t_nblist * coul_ww = NULL;
int i, j, jcg, igid, gid, nbl_ind, nbl_ind_adress;
atom_id jj, jj0, jj1, i_atom;
- int i0, nicg, len;
+ int i0, nicg;
int *cginfo;
- int *type, *typeB;
- real *charge, *chargeB;
+ int *type;
+ real *charge;
real *wf;
- real qi, qiB, qq, rlj;
- gmx_bool bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
- gmx_bool bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
+ real qi;
+ gmx_bool bNotEx;
+ gmx_bool bDoVdW_i, bDoCoul_i;
gmx_bool b_hybrid;
- gmx_bool j_all_atom;
- int iwater, jwater;
t_nblist *nlist, *nlist_adress;
gmx_bool bEnergyGroupCG;
/* Copy some pointers */
cginfo = fr->cginfo;
charge = md->chargeA;
- chargeB = md->chargeB;
type = md->typeA;
- typeB = md->typeB;
- bPert = md->bPerturbed;
wf = md->wf;
/* Get atom range */
/* Get the i charge group info */
igid = GET_CGINFO_GID(cginfo[icg]);
- iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
-
if (md->nPerturbed)
{
gmx_fatal(FARGS, "AdResS does not support free energy pertubation\n");
/* Perform NINT operation, using trunc operation, therefore
* we first add 2.5 then subtract 2 again
*/
- tz = dz*b_inv[ZZ] + h25;
+ tz = static_cast<int>(dz*b_inv[ZZ] + h25);
tz -= 2;
dz -= tz*box[ZZ][ZZ];
dy -= tz*box[ZZ][YY];
dx -= tz*box[ZZ][XX];
- ty = dy*b_inv[YY] + h25;
+ ty = static_cast<int>(dy*b_inv[YY] + h25);
ty -= 2;
dy -= ty*box[YY][YY];
dx -= ty*box[YY][XX];
- tx = dx*b_inv[XX]+h25;
+ tx = static_cast<int>(dx*b_inv[XX]+h25);
tx -= 2;
dx -= tx*box[XX][XX];
/* Perform NINT operation, using trunc operation, therefore
* we first add 1.5 then subtract 1 again
*/
- tx = dx*b_inv[XX] + h15;
- ty = dy*b_inv[YY] + h15;
- tz = dz*b_inv[ZZ] + h15;
+ tx = static_cast<int>(dx*b_inv[XX] + h15);
+ ty = static_cast<int>(dy*b_inv[YY] + h15);
+ tz = static_cast<int>(dz*b_inv[ZZ] + h15);
tx--;
ty--;
tz--;
return r2;
}
-static void add_simple(t_ns_buf *nsbuf, int nrj, atom_id cg_j,
+static void add_simple(t_ns_buf * nsbuf, int nrj, atom_id cg_j,
gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
int icg, int jgid, t_block *cgs, t_excl bexcl[],
int shift, t_forcerec *fr, put_in_list_t *put_in_list)
int j, nrj, jgid;
int *cginfo = fr->cginfo;
atom_id cg_j, *cgindex;
- t_ns_buf *nsbuf;
cgindex = cgs->index;
shift = CENTRAL;
int j, nrj, jgid;
int *cginfo = fr->cginfo;
atom_id cg_j, *cgindex;
- t_ns_buf *nsbuf;
cgindex = cgs->index;
if (bBox)
{
int naaj, k;
real rlist2;
- int nsearch, icg, jcg, igid, i0, nri, nn;
+ int nsearch, icg, igid, nn;
int *cginfo;
t_ns_buf *nsbuf;
/* atom_id *i_atoms; */
*rvdw2 = *rs2;
*rcoul2 = *rs2;
}
- *rm2 = min(*rvdw2, *rcoul2);
- *rl2 = max(*rvdw2, *rcoul2);
+ *rm2 = std::min(*rvdw2, *rcoul2);
+ *rl2 = std::max(*rvdw2, *rcoul2);
}
static void init_nsgrid_lists(t_forcerec *fr, int ngid, gmx_ns_t *ns)
#endif
int dx0, dx1, dy0, dy1, dz0, dz1;
int Nx, Ny, Nz, shift = -1, j, nrj, nns, nn = -1;
- real gridx, gridy, gridz, grid_x, grid_y, grid_z;
+ real gridx, gridy, gridz, grid_x, grid_y;
real *dcx2, *dcy2, *dcz2;
int zgi, ygi, xgi;
- int cg0, cg1, icg = -1, cgsnr, i0, igid, nri, naaj, max_jcg;
+ int cg0, cg1, icg = -1, cgsnr, i0, igid, naaj, max_jcg;
int jcg0, jcg1, jjcg, cgj0, jgid;
int *grida, *gridnra, *gridind;
gmx_bool rvdw_lt_rcoul, rcoul_lt_rvdw;
- rvec xi, *cgcm, grid_offset;
- real r2, rs2, rvdw2, rcoul2, rm2, rl2, XI, YI, ZI, dcx, dcy, dcz, tmp1, tmp2;
+ rvec *cgcm, grid_offset;
+ real r2, rs2, rvdw2, rcoul2, rm2, rl2, XI, YI, ZI, tmp1, tmp2;
int *i_egp_flags;
gmx_bool bDomDec, bTriclinicX, bTriclinicY;
ivec ncpddc;
gridz = grid->cell_size[ZZ];
grid_x = 1/gridx;
grid_y = 1/gridy;
- grid_z = 1/gridz;
copy_rvec(grid->cell_offset, grid_offset);
copy_ivec(grid->ncpddc, ncpddc);
dcx2 = grid->dcx2;
else
{
if (d == XX &&
- box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
+ box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < std::sqrt(rl2))
{
shp[d] = 2;
}
}
}
}
- /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
setexcl(cgs->index[icg], cgs->index[icg+1], &top->excls, FALSE, bexcl);
}
/* No need to perform any left-over force calculations anymore (as we used to do here)
{
int mt, icg, nr_in_cg, maxcg, i, j, jcg, ngid, ncg;
t_block *cgs;
- char *ptr;
/* Compute largest charge groups size (# atoms) */
nr_in_cg = 1;
cgs = &mtop->moltype[mt].cgs;
for (icg = 0; (icg < cgs->nr); icg++)
{
- nr_in_cg = max(nr_in_cg, (int)(cgs->index[icg+1]-cgs->index[icg]));
+ nr_in_cg = std::max(nr_in_cg, (int)(cgs->index[icg+1]-cgs->index[icg]));
}
}
{
t_block *cgs = &(top->cgs);
rvec box_size, grid_x0, grid_x1;
- int i, j, m, ngid;
+ int m, ngid;
real min_size, grid_dens;
int nsearch;
gmx_bool bGrid;
- char *ptr;
- gmx_bool *i_egp_flags;
- int cg_start, cg_end, start, end;
+ int start, end;
gmx_ns_t *ns;
t_grid *grid;
gmx_domdec_zones_t *dd_zones;
}
if (!bGrid)
{
- min_size = min(box_size[XX], min(box_size[YY], box_size[ZZ]));
+ min_size = std::min(box_size[XX], std::min(box_size[YY], box_size[ZZ]));
if (2*fr->rlistlong >= min_size)
{
gmx_fatal(FARGS, "One of the box diagonal elements has become smaller than twice the cut-off length.");
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <stdio.h>
#include <stdlib.h>
+#include <cmath>
+
+#include <algorithm>
+
#include "gromacs/domdec/domdec.h"
#include "gromacs/fileio/pdbio.h"
#include "gromacs/legacyheaders/macros.h"
for (d = 0; d < DIM; d++)
{
av[d] = s1[d];
- stddev[d] = sqrt(s2[d] - s1[d]*s1[d]);
+ stddev[d] = std::sqrt(s2[d] - s1[d]*s1[d]);
}
}
{
int i, j;
gmx_bool bDD, bDDRect;
- rvec av, stddev;
rvec izones_size;
real inv_r_ideal, size, add_tric, radd;
}
/* Use the ideal number of cg's per cell to set the ideal cell size */
- inv_r_ideal = pow(grid_density/grid->ncg_ideal, 1.0/3.0);
+ inv_r_ideal = std::pow((real)(grid_density/grid->ncg_ideal), (real)(1.0/3.0));
if (rlist > 0 && inv_r_ideal*rlist < 1)
{
inv_r_ideal = 1/rlist;
* direction has uniform DD cell boundaries.
*/
bDDRect = !(ddbox->tric_dir[i] ||
- (dd->bGridJump && i != dd->dim[0]));
+ (dd_dlb_is_on(dd) && i != dd->dim[0]));
radd = rlist;
if (i >= ddbox->npbcdim &&
/* Check if the cell boundary in this direction is
* perpendicular to the Cartesian axis.
+ * Since grid->npbcdim isan integer that in principle can take
+ * any value, we help the compiler avoid warnings and potentially
+ * optimize by ensuring that j < DIM here.
*/
- for (j = i+1; j < grid->npbcdim; j++)
+ for (j = i+1; j < grid->npbcdim && j < DIM; j++)
{
if (box[j][i] != 0)
{
t_grid *init_grid(FILE *fplog, t_forcerec *fr)
{
- int d, m;
char *ptr;
t_grid *grid;
real rlistlong, real grid_density)
{
int i, m;
- ivec cx;
set_grid_sizes(box, izones_x0, izones_x1, rlistlong, dd, ddbox, grid, grid_density);
}
}
- m = max(grid->n[XX], max(grid->n[YY], grid->n[ZZ]));
+ m = std::max(grid->n[XX], std::max(grid->n[YY], grid->n[ZZ]));
if (m > grid->dc_nalloc)
{
/* Allocate with double the initial size for box scaling */
int cg0, int cg1, rvec cg_cm[])
{
int *cell_index;
- int nrx, nry, nrz;
+ int nry, nrz;
rvec n_box, offset;
int zone, ccg0, ccg1, cg, d, not_used;
ivec shift0, useall, b0, b1, ind;
cell_index = grid->cell_index;
/* Initiate cell borders */
- nrx = grid->n[XX];
nry = grid->n[YY];
nrz = grid->n[ZZ];
for (d = 0; d < DIM; d++)
{
for (d = 0; d < DIM; d++)
{
- ind[d] = (cg_cm[cg][d] - offset[d])*n_box[d];
+ ind[d] = static_cast<int>((cg_cm[cg][d] - offset[d])*n_box[d]);
/* With pbc we should be done here.
* Without pbc cg's outside the grid
* should be assigned to the closest grid cell.
bUse = TRUE;
for (d = 0; d < DIM; d++)
{
- ind[d] = (cg_cm[cg][d] - offset[d])*n_box[d];
+ ind[d] = static_cast<int>((cg_cm[cg][d] - offset[d])*n_box[d]);
/* Here we have to correct for rounding problems,
* as this cg_cm to cell index operation is not necessarily
* binary identical to the operation for the DD zone assignment
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/legacyheaders/qmmm.h"
#include "gromacs/legacyheaders/txtdump.h"
#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/units.h"
#include "gromacs/math/vec.h"
#include "gromacs/utility/fatalerror.h"
void
-F77_FUNC(inigms, IMIGMS) (void);
+ F77_FUNC(inigms, IMIGMS) (void);
void
-F77_FUNC(endgms, ENDGMS) (void);
+ F77_FUNC(endgms, ENDGMS) (void);
void
-F77_FUNC(grads, GRADS) (int *nrqmat, real *qmcrd, int *nrmmat, real *mmchrg,
- real *mmcrd, real *qmgrad, real *mmgrad, real *energy);
+ F77_FUNC(grads, GRADS) (int *nrqmat, real *qmcrd, int *nrmmat, real *mmchrg,
+ real *mmcrd, real *qmgrad, real *mmgrad, real *energy);
* dynamics simulations. 7-6-2002 (London)
*/
int
- i, j, rank;
+ i, j;
FILE
*out;
char
}
}
-real call_gamess(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
+real call_gamess(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
rvec f[], rvec fshift[])
{
/* do the actual QMMM calculation using GAMESS-UK. In this
* gradient routines linked directly
*/
int
- i, j, rank;
+ i, j;
real
QMener = 0.0, *qmgrad, *mmgrad, *mmcrd, *qmcrd, energy;
t_QMMMrec
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/legacyheaders/typedefs.h"
#include "gromacs/math/units.h"
#include "gromacs/math/vec.h"
+#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
-
/* TODO: this should be made thread-safe */
/* Gaussian interface routines */
-void init_gaussian(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
+void init_gaussian(t_QMrec *qm)
{
- FILE
- *rffile = NULL, *out = NULL;
+ FILE *out = NULL;
ivec
- basissets[eQMbasisNR] = {{0, 3, 0},
+ basissets[eQMbasisNR] = {{0, 3, 0},
{0, 3, 0}, /*added for double sto-3g entry in names.c*/
{5, 0, 0},
{5, 0, 1},
{1, 6, 11},
{4, 6, 0}};
char
- *buf = NULL;
+ *buf = NULL;
int
- i;
+ i;
/* using the ivec above to convert the basis read form the mdp file
* in a human readable format into some numbers for the gaussian
} /* write_gaussian_input */
-real read_gaussian_output(rvec QMgrad[], rvec MMgrad[], int step,
- t_QMrec *qm, t_MMrec *mm)
+real read_gaussian_output(rvec QMgrad[], rvec MMgrad[], t_QMrec *qm, t_MMrec *mm)
{
int
i, j, atnum;
return(QMener);
}
-real read_gaussian_SH_output(rvec QMgrad[], rvec MMgrad[], int step,
- gmx_bool swapped, t_QMrec *qm, t_MMrec *mm)
+real read_gaussian_SH_output(rvec QMgrad[], rvec MMgrad[], int step, t_QMrec *qm, t_MMrec *mm)
{
int
i;
}
}
-real call_gaussian(t_commrec *cr, t_forcerec *fr,
- t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
+real call_gaussian(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
{
/* normal gaussian jobs */
static int
write_gaussian_input(step, fr, qm, mm);
do_gaussian(step, exe);
- QMener = read_gaussian_output(QMgrad, MMgrad, step, qm, mm);
+ QMener = read_gaussian_output(QMgrad, MMgrad, qm, mm);
/* put the QMMM forces in the force array and to the fshift
*/
for (i = 0; i < qm->nrQMatoms; i++)
} /* call_gaussian */
-real call_gaussian_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
- rvec f[], rvec fshift[])
+real call_gaussian_SH(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
{
/* a gaussian call routine intended for doing diabatic surface
* "sliding". See the manual for the theoretical background of this
write_gaussian_SH_input(step, swapped, fr, qm, mm);
do_gaussian(step, exe);
- QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, swapped, qm, mm);
+ QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, qm, mm);
/* check for a surface hop. Only possible if we were already state
* averaging.
{
write_gaussian_SH_input(step, swapped, fr, qm, mm);
do_gaussian(step, exe);
- QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, swapped, qm, mm);
+ QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, qm, mm);
}
}
/* add the QMMM forces to the gmx force array and fshift
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
/* mopac interface routines */
void
-F77_FUNC(domldt, DOMLDT) (int *nrqmat, int labels[], char keywords[]);
+ F77_FUNC(domldt, DOMLDT) (int *nrqmat, int labels[], char keywords[]);
void
-F77_FUNC(domop, DOMOP) (int *nrqmat, double qmcrd[], int *nrmmat,
- double mmchrg[], double mmcrd[], double qmgrad[],
- double mmgrad[], double *energy, double qmcharges[]);
+ F77_FUNC(domop, DOMOP) (int *nrqmat, double qmcrd[], int *nrmmat,
+ double mmchrg[], double mmcrd[], double qmgrad[],
+ double mmgrad[], double *energy, double qmcharges[]);
-void init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
+void init_mopac(t_QMrec *qm)
{
/* initializes the mopac routines ans sets up the semiempirical
* computation by calling moldat(). The inline mopac routines can
} /* init_mopac */
-real call_mopac(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
- rvec f[], rvec fshift[])
+real call_mopac(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
{
/* do the actual QMMM calculation using directly linked mopac subroutines
*/
return (QMener);
}
-real call_mopac_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, t_MMrec *mm,
- rvec f[], rvec fshift[])
+real call_mopac_SH(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[])
{
/* do the actual SH QMMM calculation using directly linked mopac
subroutines */
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
int
i, j, atnum;
char
- buf[300], tmp[300], orca_xyzFilename[300], orca_pcgradFilename[300], orca_engradFilename[300];
+ buf[300], orca_xyzFilename[300], orca_pcgradFilename[300], orca_engradFilename[300];
real
QMener;
FILE
gmx_fatal(FARGS, "Unexpected end of ORCA output");
}
#ifdef GMX_DOUBLE
- sscanf(buf, "%s%lf%lf%lf\n",
- tmp,
+ sscanf(buf, "%d%lf%lf%lf\n",
+ &atnum,
&qm->xQM[i][XX],
&qm->xQM[i][YY],
&qm->xQM[i][ZZ]);
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <stdlib.h>
#include <string.h>
+#include <cmath>
+
+#include <algorithm>
+
#include "gromacs/fileio/confio.h"
#include "gromacs/legacyheaders/force.h"
#include "gromacs/legacyheaders/macros.h"
init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
real
-call_gamess(t_commrec *cr, t_forcerec *fr,
+call_gamess(t_forcerec *fr,
t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
#elif defined GMX_QMMM_MOPAC
/* MOPAC interface */
void
-init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
+init_mopac(t_QMrec *qm);
real
-call_mopac(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
- t_MMrec *mm, rvec f[], rvec fshift[]);
+call_mopac(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
real
-call_mopac_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
- t_MMrec *mm, rvec f[], rvec fshift[]);
+call_mopac_SH(t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
#elif defined GMX_QMMM_GAUSSIAN
/* GAUSSIAN interface */
void
-init_gaussian(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
+init_gaussian(t_QMrec *qm);
real
-call_gaussian_SH(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
- t_MMrec *mm, rvec f[], rvec fshift[]);
+call_gaussian_SH(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
real
-call_gaussian(t_commrec *cr, t_forcerec *fr, t_QMrec *qm,
- t_MMrec *mm, rvec f[], rvec fshift[]);
+call_gaussian(t_forcerec *fr, t_QMrec *qm, t_MMrec *mm, rvec f[], rvec fshift[]);
#elif defined GMX_QMMM_ORCA
/* ORCA interface */
#ifdef GMX_QMMM_MOPAC
if (qm->bSH)
{
- QMener = call_mopac_SH(cr, fr, qm, mm, f, fshift);
+ QMener = call_mopac_SH(qm, mm, f, fshift);
}
else
{
- QMener = call_mopac(cr, fr, qm, mm, f, fshift);
+ QMener = call_mopac(qm, mm, f, fshift);
}
#else
gmx_fatal(FARGS, "Semi-empirical QM only supported with Mopac.");
if (qm->bSH && qm->QMmethod == eQMmethodCASSCF)
{
#ifdef GMX_QMMM_GAUSSIAN
- QMener = call_gaussian_SH(cr, fr, qm, mm, f, fshift);
+ QMener = call_gaussian_SH(fr, qm, mm, f, fshift);
#else
gmx_fatal(FARGS, "Ab-initio Surface-hopping only supported with Gaussian.");
#endif
else
{
#ifdef GMX_QMMM_GAMESS
- QMener = call_gamess(cr, fr, qm, mm, f, fshift);
+ QMener = call_gamess(fr, qm, mm, f, fshift);
#elif defined GMX_QMMM_GAUSSIAN
- QMener = call_gaussian(cr, fr, qm, mm, f, fshift);
+ QMener = call_gaussian(fr, qm, mm, f, fshift);
#elif defined GMX_QMMM_ORCA
QMener = call_orca(fr, qm, mm, f, fshift);
#else
{
#ifdef GMX_QMMM_MOPAC
/* do a semi-empiprical calculation */
- init_mopac(cr, qm, mm);
+ init_mopac(qm);
#else
gmx_fatal(FARGS, "Semi-empirical QM only supported with Mopac.");
#endif
#ifdef GMX_QMMM_GAMESS
init_gamess(cr, qm, mm);
#elif defined GMX_QMMM_GAUSSIAN
- init_gaussian(cr, qm, mm);
+ init_gaussian(qm);
#elif defined GMX_QMMM_ORCA
init_orca(qm);
#else
FILE
*out = NULL;
int
- i, j, k, nrexcl = 0, *excluded = NULL, max = 0;
+ i, j, k, nrexcl = 0, *excluded = NULL, max_excl = 0;
out = fopen("QMMMexcl.dat", "w");
{
if (mm->indexMM[k] == excls->a[j]) /* the excluded MM atom */
{
- if (nrexcl >= max)
+ if (nrexcl >= max_excl)
{
- max += 1000;
- srenew(excluded, max);
+ max_excl += 1000;
+ srenew(excluded, max_excl);
}
excluded[nrexcl++] = k;
continue;
t_ilist *ilist_mol;
gmx_mtop_atomlookup_t alook;
- c6au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 6));
- c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 12));
+ c6au = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 6));
+ c12au = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 12));
/* issue a fatal if the user wants to run with more than one node */
if (PAR(cr))
{
{
#ifdef GMX_QMMM_MOPAC
/* semi-empiprical 1-layer ONIOM calculation requested (mopac93) */
- init_mopac(cr, qr->qm[0], qr->mm);
+ init_mopac(qr->qm[0]);
#else
gmx_fatal(FARGS, "Semi-empirical QM only supported with Mopac.");
#endif
#ifdef GMX_QMMM_GAMESS
init_gamess(cr, qr->qm[0], qr->mm);
#elif defined GMX_QMMM_GAUSSIAN
- init_gaussian(cr, qr->qm[0], qr->mm);
+ init_gaussian(qr->qm[0]);
#elif defined GMX_QMMM_ORCA
init_orca(qr->qm[0]);
#else
QMMMlist;
rvec
dx, crd;
- int
- *MMatoms;
t_QMrec
*qm;
t_MMrec
real
c12au, c6au;
- c6au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 6));
- c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM, 12));
+ c6au = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 6));
+ c12au = (HARTREE2KJ*AVOGADRO*std::pow(BOHR2NM, 12));
/* every cpu has this array. On every processor we fill this array
* with 1's and 0's. 1's indicate the atoms is a QM atom on the
crd[0] = IS2X(QMMMlist.shift[i]) + IS2X(qm_i_particles[i].shift);
crd[1] = IS2Y(QMMMlist.shift[i]) + IS2Y(qm_i_particles[i].shift);
crd[2] = IS2Z(QMMMlist.shift[i]) + IS2Z(qm_i_particles[i].shift);
- is = XYZ2IS(crd[0], crd[1], crd[2]);
+ is = static_cast<int>(XYZ2IS(crd[0], crd[1], crd[2]));
for (j = QMMMlist.jindex[i];
j < QMMMlist.jindex[i+1];
j++)
qsort(qm_i_particles, QMMMlist.nri,
(size_t)sizeof(qm_i_particles[0]),
struct_comp);
- qsort(mm_j_particles, mm_nr,
- (size_t)sizeof(mm_j_particles[0]),
- struct_comp);
+ /* The mm_j_particles argument to qsort is not allowed to be NULL */
+ if (mm_nr > 0)
+ {
+ qsort(mm_j_particles, mm_nr,
+ (size_t)sizeof(mm_j_particles[0]),
+ struct_comp);
+ }
/* remove multiples in the QM shift array, since in init_QMMM() we
* went through the atom numbers from 0 to md.nr, the order sorted
* here matches the one of QMindex already.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
namespace gmx
{
-class File;
class HelpWriterContext;
/*! \libinternal \brief
#include "gromacs/onlinehelp/helpformat.h"
#include "gromacs/onlinehelp/helpwritercontext.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
namespace gmx
{
{
return false;
}
- File &file = context.outputFile();
+ TextWriter &file = context.outputFile();
TextTableFormatter formatter;
formatter.addColumn(NULL, maxNameLength + 1, false);
formatter.addColumn(NULL, 72 - maxNameLength, true);
#include "gromacs/onlinehelp/helpformat.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/programcontext.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "rstparser.h"
{
public:
//! Initializes the state with the given parameters.
- SharedState(File *file, HelpOutputFormat format,
+ SharedState(TextOutputStream *stream, HelpOutputFormat format,
const HelpLinks *links)
- : file_(*file), format_(format), links_(links)
+ : file_(stream), format_(format), links_(links)
{
}
return *consoleOptionsFormatter_;
}
- //! Output file to which the help is written.
- File &file_;
+ //! Writer for writing the help.
+ TextWriter file_;
//! Output format for the help output.
HelpOutputFormat format_;
//! Links to use.
* HelpWriterContext
*/
-HelpWriterContext::HelpWriterContext(File *file, HelpOutputFormat format)
- : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(file, format, NULL)), 0))
+HelpWriterContext::HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format)
+ : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(stream, format, NULL)), 0))
{
}
-HelpWriterContext::HelpWriterContext(File *file, HelpOutputFormat format,
+HelpWriterContext::HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format,
const HelpLinks *links)
- : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(file, format, links)), 0))
+ : impl_(new Impl(Impl::StatePointer(new Impl::SharedState(stream, format, links)), 0))
{
if (links != NULL)
{
return impl_->state_->format_;
}
-File &HelpWriterContext::outputFile() const
+TextWriter &HelpWriterContext::outputFile() const
{
- return impl_->state_->file_;
+ // TODO: Consider how to deal with the const/non-const difference better.
+ return const_cast<TextWriter &>(impl_->state_->file_);
}
void HelpWriterContext::enterSubSection(const std::string &title)
{
return;
}
- File &file = outputFile();
+ TextWriter &file = outputFile();
switch (outputFormat())
{
case eHelpOutputFormat_Console:
const std::string &info,
const std::string &description) const
{
- File &file = outputFile();
+ TextWriter &file = outputFile();
switch (outputFormat())
{
case eHelpOutputFormat_Console:
namespace gmx
{
-class File;
class TextLineWrapperSettings;
+class TextOutputStream;
+class TextWriter;
/*! \cond libapi */
//! \libinternal Output format for help writing.
{
public:
/*! \brief
- * Initializes a context with the given output file and format.
+ * Initializes a context with the given output stream and format.
*
* \throws std::bad_alloc if out of memory.
*/
- HelpWriterContext(File *file, HelpOutputFormat format);
+ HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format);
/*! \brief
- * Initializes a context with the given output file, format and links.
+ * Initializes a context with the given output stream, format and links.
*
* \throws std::bad_alloc if out of memory.
*
* is destructed. The caller is responsible for ensuring that the
* links object remains valid long enough.
*/
- HelpWriterContext(File *file, HelpOutputFormat format,
+ HelpWriterContext(TextOutputStream *stream, HelpOutputFormat format,
const HelpLinks *links);
//! Creates a copy of the context.
HelpWriterContext(const HelpWriterContext &other);
*/
HelpOutputFormat outputFormat() const;
/*! \brief
- * Returns the raw output file for writing the help.
+ * Returns the raw writer for writing the help.
*
- * Using this file directly should be avoided, as it requires one to
+ * Using this writer directly should be avoided, as it requires one to
* have different code for each output format.
* Using other methods in this class should be preferred.
*
* Does not throw.
*/
- File &outputFile() const;
+ TextWriter &outputFile() const;
/*! \brief
* Creates a subsection in the output help.
#include "gromacs/onlinehelp/helptopic.h"
#include "gromacs/onlinehelp/helpwritercontext.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/stringstream.h"
#include "gromacs/onlinehelp/tests/mock_helptopic.h"
#include "testutils/stringtest.h"
#include "testutils/testasserts.h"
-#include "testutils/testfilemanager.h"
namespace
{
public:
HelpTestBase();
- gmx::test::TestFileManager tempFiles_;
MockHelpTopic rootTopic_;
- std::string filename_;
- gmx::File helpFile_;
+ gmx::StringOutputStream helpFile_;
gmx::HelpWriterContext context_;
gmx::HelpManager manager_;
};
HelpTestBase::HelpTestBase()
: rootTopic_("", NULL, "Root topic text"),
- filename_(tempFiles_.getTemporaryFilePath("helptext.txt")),
- helpFile_(filename_, "w"),
context_(&helpFile_, gmx::eHelpOutputFormat_Console),
manager_(rootTopic_, context_)
{
ASSERT_NO_THROW_GMX(manager_.writeCurrentTopic());
helpFile_.close();
- checkFileContents(filename_, "HelpText");
+ checkText(helpFile_.toString(), "HelpText");
}
TEST_F(HelpTopicFormattingTest, FormatsSimpleTopic)
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
/********************************************************************
* gmx_ana_indexgrps_t functions
}
/*!
- * \param[in] fp Where to print the output.
+ * \param[in] writer Writer to use for output.
* \param[in] g Index groups to print.
* \param[in] maxn Maximum number of indices to print
* (-1 = print all, 0 = print only names).
*/
void
-gmx_ana_indexgrps_print(FILE *fp, gmx_ana_indexgrps_t *g, int maxn)
+gmx_ana_indexgrps_print(gmx::TextWriter *writer, gmx_ana_indexgrps_t *g, int maxn)
{
for (int i = 0; i < g->nr; ++i)
{
- fprintf(fp, " Group %2d \"%s\" ", i, g->names[i].c_str());
- gmx_ana_index_dump(fp, &g->g[i], maxn);
+ writer->writeString(gmx::formatString(" Group %2d \"%s\" ",
+ i, g->names[i].c_str()));
+ gmx_ana_index_dump(writer, &g->g[i], maxn);
}
}
}
/*!
- * \param[in] fp Where to print the output.
+ * \param[in] writer Writer to use for output.
* \param[in] g Index group to print.
* \param[in] maxn Maximum number of indices to print (-1 = print all).
*/
void
-gmx_ana_index_dump(FILE *fp, gmx_ana_index_t *g, int maxn)
+gmx_ana_index_dump(gmx::TextWriter *writer, gmx_ana_index_t *g, int maxn)
{
- int j, n;
-
- fprintf(fp, "(%d atoms)", g->isize);
+ writer->writeString(gmx::formatString("(%d atoms)", g->isize));
if (maxn != 0)
{
- fprintf(fp, ":");
- n = g->isize;
+ writer->writeString(":");
+ int n = g->isize;
if (maxn >= 0 && n > maxn)
{
n = maxn;
}
- for (j = 0; j < n; ++j)
+ for (int j = 0; j < n; ++j)
{
- fprintf(fp, " %d", g->index[j]+1);
+ writer->writeString(gmx::formatString(" %d", g->index[j]+1));
}
if (n < g->isize)
{
- fprintf(fp, " ...");
+ writer->writeString(" ...");
}
}
- fprintf(fp, "\n");
+ writer->writeLine();
}
int
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/legacyheaders/types/simple.h"
#include "gromacs/topology/block.h"
+namespace gmx
+{
+class TextWriter;
+}
+
struct t_topology;
/** Stores a set of index groups. */
/** Writes out a list of index groups. */
void
-gmx_ana_indexgrps_print(FILE *fp, gmx_ana_indexgrps_t *g, int maxn);
+gmx_ana_indexgrps_print(gmx::TextWriter *writer, gmx_ana_indexgrps_t *g, int maxn);
/*@}*/
/*! \name Functions for handling gmx_ana_index_t
/** Writes out the contents of a index group. */
void
-gmx_ana_index_dump(FILE *fp, gmx_ana_index_t *g, int maxn);
+gmx_ana_index_dump(gmx::TextWriter *writer, gmx_ana_index_t *g, int maxn);
/*! \brief
* Returns maximum atom index that appears in an index group.
#include <algorithm>
#include <vector>
-#include "thread_mpi/mutex.h"
-
#include "gromacs/legacyheaders/names.h"
#include "gromacs/math/vec.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/mutex.h"
#include "gromacs/utility/stringutil.h"
namespace gmx
//! Data structure to hold the grid cell contents.
CellList cells_;
- tMPI::mutex createPairSearchMutex_;
+ Mutex createPairSearchMutex_;
PairSearchList pairSearchList_;
friend class AnalysisNeighborhoodPairSearchImpl;
AnalysisNeighborhoodSearchImpl::PairSearchImplPointer
AnalysisNeighborhoodSearchImpl::getPairSearch()
{
- tMPI::lock_guard<tMPI::mutex> lock(createPairSearchMutex_);
+ lock_guard<Mutex> lock(createPairSearchMutex_);
// TODO: Consider whether this needs to/can be faster, e.g., by keeping a
// separate pool of unused search objects.
PairSearchList::const_iterator i;
SearchImplPointer getSearch();
- tMPI::mutex createSearchMutex_;
+ Mutex createSearchMutex_;
SearchList searchList_;
real cutoff_;
const t_blocka *excls_;
AnalysisNeighborhood::Impl::SearchImplPointer
AnalysisNeighborhood::Impl::getSearch()
{
- tMPI::lock_guard<tMPI::mutex> lock(createSearchMutex_);
+ lock_guard<Mutex> lock(createSearchMutex_);
// TODO: Consider whether this needs to/can be faster, e.g., by keeping a
// separate pool of unused search objects.
SearchList::const_iterator i;
* methods and initializes the children of the method element.
* - selectioncollection.h, selectioncollection.cpp:
* These files define the high-level public interface to the parser
- * through SelectionCollection::parseFromStdin(),
- * SelectionCollection::parseFromFile() and
+ * through SelectionCollection::parseInteractive(),
+ * SelectionCollection::parseFromStdin(),
+ * SelectionCollection::parseFromFile(), and
* SelectionCollection::parseFromString().
*
* The basic control flow in the parser is as follows: when a parser function
#include "gromacs/selection/selection.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "keywords.h"
#include "poscalc.h"
catch (gmx::UserInputError &ex)
{
ex.prependContext(context);
- if (_gmx_sel_is_lexer_interactive(scanner))
+ gmx::TextWriter *statusWriter
+ = _gmx_sel_lexer_get_status_writer(scanner);
+ if (statusWriter != NULL)
{
- gmx::formatExceptionMessageToFile(stderr, ex);
+ gmx::formatExceptionMessageToWriter(statusWriter, ex);
return true;
}
throw;
root->fillNameIfMissing(_gmx_sel_lexer_pselstr(scanner));
/* Print out some information if the parser is interactive */
- if (_gmx_sel_is_lexer_interactive(scanner))
+ gmx::TextWriter *statusWriter = _gmx_sel_lexer_get_status_writer(scanner);
+ if (statusWriter != NULL)
{
- fprintf(stderr, "Selection '%s' parsed\n",
- _gmx_sel_lexer_pselstr(scanner));
+ const std::string message
+ = gmx::formatString("Selection '%s' parsed",
+ _gmx_sel_lexer_pselstr(scanner));
+ statusWriter->writeLine(message);
}
return root;
srenew(sc->varstrs, sc->nvars + 1);
sc->varstrs[sc->nvars] = gmx_strdup(pselstr);
++sc->nvars;
- if (_gmx_sel_is_lexer_interactive(scanner))
+ gmx::TextWriter *statusWriter = _gmx_sel_lexer_get_status_writer(scanner);
+ if (statusWriter != NULL)
{
- fprintf(stderr, "Variable '%s' parsed\n", pselstr);
+ const std::string message
+ = gmx::formatString("Variable '%s' parsed", pselstr);
+ statusWriter->writeLine(message);
}
return root;
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
YY_RULE_SETUP
#line 137 "scanner.l"
{
- if (yytext[0] == ';' || state->bInteractive)
+ if (yytext[0] == ';' || state->statusWriter != NULL)
{
rtrim(state->pselstr);
state->bCmdStart = true;
#include "parser.h"
+namespace gmx
+{
+class TextWriter;
+}
+
struct gmx_ana_indexgrps_t;
struct gmx_ana_selcollection_t;
/** Initializes the selection scanner. */
void
_gmx_sel_init_lexer(yyscan_t *scannerp, struct gmx_ana_selcollection_t *sc,
- bool bInteractive, int maxnr, bool bGroups,
+ gmx::TextWriter *statusWriter, int maxnr, bool bGroups,
struct gmx_ana_indexgrps_t *grps);
/** Frees memory allocated for the selection scanner. */
void
void
_gmx_sel_lexer_rethrow_exception_if_occurred(yyscan_t scanner);
-/** Returns true if the scanner is interactive. */
-bool
-_gmx_sel_is_lexer_interactive(yyscan_t scanner);
+/** Returns writer for status output (if not NULL, the scanner is interactive). */
+gmx::TextWriter *
+_gmx_sel_lexer_get_status_writer(yyscan_t scanner);
/** Returns the selection collection for the scanner. */
struct gmx_ana_selcollection_t *
_gmx_sel_lexer_selcollection(yyscan_t scanner);
\\\n { _gmx_sel_lexer_add_token(yylloc, " ", 1, state); break; }
";"|\n {
- if (yytext[0] == ';' || state->bInteractive)
+ if (yytext[0] == ';' || state->statusWriter != NULL)
{
rtrim(state->pselstr);
state->bCmdStart = true;
void
_gmx_sel_init_lexer(yyscan_t *scannerp, struct gmx_ana_selcollection_t *sc,
- bool bInteractive, int maxnr, bool bGroups,
- struct gmx_ana_indexgrps_t *grps)
+ gmx::TextWriter *statusWriter, int maxnr,
+ bool bGroups, struct gmx_ana_indexgrps_t *grps)
{
int rc = _gmx_sel_yylex_init(scannerp);
if (rc != 0)
state->grps = grps;
state->nexpsel = (maxnr > 0 ? static_cast<int>(sc->sel.size()) + maxnr : -1);
- state->bInteractive = bInteractive;
+ state->statusWriter = statusWriter;
snew(state->pselstr, STRSTORE_ALLOCSTEP);
state->pselstr[0] = 0;
}
}
-bool
-_gmx_sel_is_lexer_interactive(yyscan_t scanner)
+gmx::TextWriter *
+_gmx_sel_lexer_get_status_writer(yyscan_t scanner)
{
gmx_sel_lexer_t *state = _gmx_sel_yyget_extra(scanner);
- return state->bInteractive;
+ return state->statusWriter;
}
struct gmx_ana_selcollection_t *
namespace gmx
{
class SelectionParserSymbol;
+class TextWriter;
}
/* These need to be defined before including scanner_flex.h, because it
//! Number of selections at which the parser should stop.
int nexpsel;
- //! Whether the parser is interactive.
- bool bInteractive;
+ //! Writer to use for status output (if not NULL, parser is interactive).
+ gmx::TextWriter *statusWriter;
//! Pretty-printed version of the string parsed since last clear.
char *pselstr;
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "selelem.h"
#include "selvalue.h"
fprintf(fp, " Group ");
gmx_ana_index_t g;
gmx_ana_index_set(&g, p.m.mapb.nra, p.m.mapb.a, 0);
- gmx_ana_index_dump(fp, &g, nmaxind);
+ TextWriter writer(fp);
+ gmx_ana_index_dump(&writer, &g, nmaxind);
fprintf(fp, " Block (size=%d):", p.m.mapb.nr);
if (!p.m.mapb.index)
#include <string>
#include <vector>
+#include <boost/scoped_ptr.hpp>
#include <boost/shared_ptr.hpp>
#include "gromacs/fileio/trx.h"
#include "gromacs/selection/selhelp.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "compiler.h"
#include "mempool.h"
/*! \brief
* Reads a single selection line from stdin.
*
- * \param[in] infile File to read from (typically File::standardInput()).
- * \param[in] bInteractive Whether to print interactive prompts.
+ * \param[in] inputStream Stream to read from (typically the StandardInputStream).
+ * \param[in] statusWriter Stream to print prompts to (if NULL, no output is done).
* \param[out] line The read line in stored here.
* \returns true if something was read, false if at end of input.
*
* Handles line continuation, reading also the continuing line(s) in one call.
*/
-bool promptLine(File *infile, bool bInteractive, std::string *line)
+bool promptLine(TextInputStream *inputStream, TextWriter *statusWriter,
+ std::string *line)
{
- if (bInteractive)
+ if (statusWriter != NULL)
{
- fprintf(stderr, "> ");
+ statusWriter->writeString("> ");
}
- if (!infile->readLineWithTrailingSpace(line))
+ if (!inputStream->readLine(line))
{
return false;
}
while (endsWith(*line, "\\\n"))
{
line->resize(line->length() - 2);
- if (bInteractive)
+ if (statusWriter != NULL)
{
- fprintf(stderr, "... ");
+ statusWriter->writeString("... ");
}
std::string buffer;
// Return value ignored, buffer remains empty and works correctly
// if there is nothing to read.
- infile->readLineWithTrailingSpace(&buffer);
+ inputStream->readLine(&buffer);
line->append(buffer);
}
if (endsWith(*line, "\n"))
{
line->resize(line->length() - 1);
}
- else if (bInteractive)
+ else if (statusWriter != NULL)
{
- fprintf(stderr, "\n");
+ statusWriter->writeLine();
}
return true;
}
bool bInteractive)
{
int status = YYPUSH_MORE;
- int prevToken = 0;
do
{
YYSTYPE value;
YYLTYPE location;
int token = _gmx_sel_yylex(&value, &location, scanner);
- if (bInteractive)
+ if (bInteractive && token == 0)
{
- if (token == 0)
- {
- break;
- }
- // Empty commands cause the interactive parser to print out
- // status information. This avoids producing those unnecessarily,
- // e.g., from "resname RA;;".
- if (prevToken == CMD_SEP && token == CMD_SEP)
- {
- continue;
- }
- prevToken = token;
+ break;
}
status = _gmx_sel_yypush_parse(parserState, token, &value, &location, scanner);
}
/*! \brief
* Print current status in response to empty line in interactive input.
*
+ * \param[in] writer Writer to use for the output.
* \param[in] sc Selection collection data structure.
* \param[in] grps Available index groups.
* \param[in] firstSelection Index of first selection from this interactive
*
* Prints the available index groups and currently provided selections.
*/
-void printCurrentStatus(gmx_ana_selcollection_t *sc, gmx_ana_indexgrps_t *grps,
- size_t firstSelection, int maxCount,
- const std::string &context, bool bFirst)
+void printCurrentStatus(TextWriter *writer, gmx_ana_selcollection_t *sc,
+ gmx_ana_indexgrps_t *grps, size_t firstSelection,
+ int maxCount, const std::string &context, bool bFirst)
{
if (grps != NULL)
{
- std::fprintf(stderr, "Available static index groups:\n");
- gmx_ana_indexgrps_print(stderr, grps, 0);
+ writer->writeLine("Available static index groups:");
+ gmx_ana_indexgrps_print(writer, grps, 0);
}
- std::fprintf(stderr, "Specify ");
+ writer->writeString("Specify ");
if (maxCount < 0)
{
- std::fprintf(stderr, "any number of selections");
+ writer->writeString("any number of selections");
}
else if (maxCount == 1)
{
- std::fprintf(stderr, "a selection");
+ writer->writeString("a selection");
}
else
{
- std::fprintf(stderr, "%d selections", maxCount);
+ writer->writeString(formatString("%d selections", maxCount));
}
- std::fprintf(stderr, "%s%s:\n",
- context.empty() ? "" : " ", context.c_str());
- std::fprintf(stderr,
- "(one per line, <enter> for status/groups, 'help' for help%s)\n",
- maxCount < 0 ? ", Ctrl-D to end" : "");
+ writer->writeString(formatString("%s%s:\n",
+ context.empty() ? "" : " ", context.c_str()));
+ writer->writeString(formatString(
+ "(one per line, <enter> for status/groups, 'help' for help%s)\n",
+ maxCount < 0 ? ", Ctrl-D to end" : ""));
if (!bFirst && (sc->nvars > 0 || sc->sel.size() > firstSelection))
{
- std::fprintf(stderr, "Currently provided selections:\n");
+ writer->writeLine("Currently provided selections:");
for (int i = 0; i < sc->nvars; ++i)
{
- std::fprintf(stderr, " %s\n", sc->varstrs[i]);
+ writer->writeString(formatString(" %s\n", sc->varstrs[i]));
}
for (size_t i = firstSelection; i < sc->sel.size(); ++i)
{
- std::fprintf(stderr, " %2d. %s\n",
- static_cast<int>(i - firstSelection + 1),
- sc->sel[i]->selectionText());
+ writer->writeString(formatString(
+ " %2d. %s\n",
+ static_cast<int>(i - firstSelection + 1),
+ sc->sel[i]->selectionText()));
}
if (maxCount > 0)
{
const int remaining
= maxCount - static_cast<int>(sc->sel.size() - firstSelection);
- std::fprintf(stderr, "(%d more selection%s required)\n",
- remaining, remaining > 1 ? "s" : "");
+ writer->writeString(formatString(
+ "(%d more selection%s required)\n",
+ remaining, remaining > 1 ? "s" : ""));
}
}
}
/*! \brief
* Prints selection help in interactive selection input.
*
+ * \param[in] writer Writer to use for the output.
* \param[in] sc Selection collection data structure.
* \param[in] line Line of user input requesting help (starting with `help`).
*
* Initializes the selection help if not yet initialized, and finds the help
* topic based on words on the input line.
*/
-void printHelp(gmx_ana_selcollection_t *sc, const std::string &line)
+void printHelp(TextWriter *writer, gmx_ana_selcollection_t *sc,
+ const std::string &line)
{
if (sc->rootHelp.get() == NULL)
{
sc->rootHelp = createSelectionHelpTopic();
}
- HelpWriterContext context(&File::standardError(),
- eHelpOutputFormat_Console);
+ HelpWriterContext context(&writer->stream(), eHelpOutputFormat_Console);
HelpManager manager(*sc->rootHelp, context);
try
{
}
catch (const InvalidInputError &ex)
{
- fprintf(stderr, "%s\n", ex.what());
+ writer->writeLine(ex.what());
return;
}
manager.writeCurrentTopic();
* Helper function that runs the parser once the tokenizer has been
* initialized.
*
- * \param[in,out] scanner Scanner data structure.
- * \param[in] bStdIn Whether to use a line-based reading
+ * \param[in,out] scanner Scanner data structure.
+ * \param[in] inputStream Stream to use for input (currently only with
+ * `bInteractive==true`).
+ * \param[in] bInteractive Whether to use a line-based reading
* algorithm designed for interactive input.
* \param[in] maxnr Maximum number of selections to parse
* (if -1, parse as many as provided by the user).
* \throws std::bad_alloc if out of memory.
* \throws InvalidInputError if there is a parsing error.
*
- * Used internally to implement parseFromStdin(), parseFromFile() and
+ * Used internally to implement parseInteractive(), parseFromFile() and
* parseFromString().
*/
-SelectionList runParser(yyscan_t scanner, bool bStdIn, int maxnr,
- const std::string &context)
+SelectionList runParser(yyscan_t scanner, TextInputStream *inputStream,
+ bool bInteractive, int maxnr, const std::string &context)
{
boost::shared_ptr<void> scannerGuard(scanner, &_gmx_sel_free_lexer);
gmx_ana_selcollection_t *sc = _gmx_sel_lexer_selcollection(scanner);
{
boost::shared_ptr<_gmx_sel_yypstate> parserState(
_gmx_sel_yypstate_new(), &_gmx_sel_yypstate_delete);
- if (bStdIn)
+ if (bInteractive)
{
- File &stdinFile(File::standardInput());
- const bool bInteractive = _gmx_sel_is_lexer_interactive(scanner);
- if (bInteractive)
+ TextWriter *statusWriter = _gmx_sel_lexer_get_status_writer(scanner);
+ if (statusWriter != NULL)
{
- printCurrentStatus(sc, grps, oldCount, maxnr, context, true);
+ printCurrentStatus(statusWriter, sc, grps, oldCount, maxnr, context, true);
}
std::string line;
int status;
- while (promptLine(&stdinFile, bInteractive, &line))
+ while (promptLine(inputStream, statusWriter, &line))
{
- if (bInteractive)
+ if (statusWriter != NULL)
{
line = stripString(line);
if (line.empty())
{
- printCurrentStatus(sc, grps, oldCount, maxnr, context, false);
+ printCurrentStatus(statusWriter, sc, grps, oldCount, maxnr, context, false);
continue;
}
if (startsWith(line, "help")
&& (line[4] == 0 || std::isspace(line[4])))
{
- printHelp(sc, line);
+ printHelp(statusWriter, sc, line);
continue;
}
}
return false;
}
-
SelectionList
-SelectionCollection::parseFromStdin(int nr, bool bInteractive,
+SelectionCollection::parseFromStdin(int count, bool bInteractive,
const std::string &context)
+{
+ return parseInteractive(count, &StandardInputStream::instance(),
+ bInteractive ? &TextOutputFile::standardError() : NULL,
+ context);
+}
+
+SelectionList
+SelectionCollection::parseInteractive(int count,
+ TextInputStream *inputStream,
+ TextOutputStream *statusStream,
+ const std::string &context)
{
yyscan_t scanner;
- _gmx_sel_init_lexer(&scanner, &impl_->sc_, bInteractive, nr,
- impl_->bExternalGroupsSet_,
- impl_->grps_);
- return runParser(scanner, true, nr, context);
+ boost::scoped_ptr<TextWriter> statusWriter;
+ if (statusStream != NULL)
+ {
+ statusWriter.reset(new TextWriter(statusStream));
+ statusWriter->wrapperSettings().setLineLength(78);
+ }
+ _gmx_sel_init_lexer(&scanner, &impl_->sc_, statusWriter.get(),
+ count, impl_->bExternalGroupsSet_, impl_->grps_);
+ return runParser(scanner, inputStream, true, count, context);
}
try
{
- yyscan_t scanner;
- File file(filename, "r");
+ yyscan_t scanner;
+ TextInputFile file(filename);
// TODO: Exception-safe way of using the lexer.
- _gmx_sel_init_lexer(&scanner, &impl_->sc_, false, -1,
+ _gmx_sel_init_lexer(&scanner, &impl_->sc_, NULL, -1,
impl_->bExternalGroupsSet_,
impl_->grps_);
_gmx_sel_set_lex_input_file(scanner, file.handle());
- return runParser(scanner, false, -1, std::string());
+ return runParser(scanner, NULL, false, -1, std::string());
}
catch (GromacsException &ex)
{
{
yyscan_t scanner;
- _gmx_sel_init_lexer(&scanner, &impl_->sc_, false, -1,
+ _gmx_sel_init_lexer(&scanner, &impl_->sc_, NULL, -1,
impl_->bExternalGroupsSet_,
impl_->grps_);
_gmx_sel_set_lex_input_str(scanner, str.c_str());
- return runParser(scanner, false, -1, std::string());
+ return runParser(scanner, NULL, false, -1, std::string());
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
class Options;
class SelectionCompiler;
class SelectionEvaluator;
+class TextInputStream;
+class TextOutputStream;
/*! \brief
* Collection of selections.
* initialization options.
*
* After setting the default values, one or more selections can be parsed with
- * one or more calls to parseFromStdin(), parseFromFile(), and/or
+ * one or more calls to parseInteractive(), parseFromStdin(), parseFromFile(), and/or
* parseFromString(). After all selections are parsed, the topology must be
* set with setTopology() unless requiresTopology() returns false (the topology
* can also be set earlier).
*/
SelectionList parseFromStdin(int count, bool bInteractive,
const std::string &context);
+ /*! \brief
+ * Parses selection(s) interactively using provided streams.
+ *
+ * \param[in] count Number of selections to parse
+ * (if -1, parse as many as provided by the user).
+ * \param[in] inputStream Stream to use for input.
+ * \param[in] outputStream Stream to use for output
+ * (if NULL, the parser runs non-interactively and does not
+ * produce any status messages).
+ * \param[in] context Context to print for interactive input.
+ * \returns Vector of parsed selections.
+ * \throws std::bad_alloc if out of memory.
+ * \throws InvalidInputError if there is a parsing error
+ * (an interactive parser only throws this if too few selections
+ * are provided and the user forced the end of input).
+ *
+ * Works the same as parseFromStdin(), except that the caller can
+ * provide streams to use instead of `stdin` and `stderr`.
+ *
+ * Mainly usable for unit testing interactive input.
+ */
+ SelectionList parseInteractive(int count,
+ TextInputStream *inputStream,
+ TextOutputStream *outputStream,
+ const std::string &context);
/*! \brief
* Parses selection(s) from a file.
*
#include "gromacs/onlinehelp/helptopic.h"
#include "gromacs/onlinehelp/helpwritercontext.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "selmethod.h"
#include "symrec.h"
e_selvalue_t type,
bool bModifiers) const
{
- File &file = context.outputFile();
+ TextWriter &file = context.outputFile();
MethodList::const_iterator iter;
for (iter = methods_.begin(); iter != methods_.end(); ++iter)
{
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+ <String Name="Output1"><![CDATA[
+Variable 'foo = resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+resname RB
+]]></String>
+ <String Name="Output2"><![CDATA[
+Selection 'resname RB' parsed
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+"Name" resname RC
+]]></String>
+ <String Name="Output3"><![CDATA[
+Selection '"Name" resname RC' parsed
+> ]]></String>
+ <String Name="Input4"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+resname RB and \
+]]></String>
+ <String Name="Output1"><![CDATA[
+... ]]></String>
+ <String Name="Input2"><![CDATA[
+resname RC
+]]></String>
+ <String Name="Output2"><![CDATA[
+Selection 'resname RB and resname RC' parsed
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+resname RA;
+]]></String>
+ <String Name="Output1"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+; resname RB;;
+]]></String>
+ <String Name="Output2"><![CDATA[
+Selection 'resname RB' parsed
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+
+]]></String>
+ <String Name="Output3"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+ 1. resname RA
+ 2. resname RB
+> ]]></String>
+ <String Name="Input4"><![CDATA[
+;
+]]></String>
+ <String Name="Output4"><![CDATA[
+> ]]></String>
+ <String Name="Input5"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+"Sel" resname RA
+]]></String>
+ <String Name="Output1"><![CDATA[
+Selection '"Sel" resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+"Sel2" resname RB
+]]></String>
+ <String Name="Output2"><![CDATA[
+Selection '"Sel2" resname RB' parsed
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+
+]]></String>
+ <String Name="Output3"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+ 1. "Sel" resname RA
+ 2. "Sel2" resname RB
+> ]]></String>
+ <String Name="Input4"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+resname RA; resname RB and \
+]]></String>
+ <String Name="Output1"><![CDATA[
+... ]]></String>
+ <String Name="Input2"><![CDATA[
+resname RC
+]]></String>
+ <String Name="Output2"><![CDATA[
+Selection 'resname RA' parsed
+Selection 'resname RB and resname RC' parsed
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+resname RA]]></String>
+ <String Name="Output1"><![CDATA[
+
+Selection 'resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+ <String Name="Input2"><![CDATA[
+resname RB
+]]></String>
+ <String Name="Input3"><![CDATA[
+"Name" resname RC
+]]></String>
+ <String Name="Input4"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify a selection for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+ <String Name="Output1"><![CDATA[
+Variable 'foo = resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+resname RA
+]]></String>
+ <String Name="Output2"><![CDATA[
+Selection 'resname RA' parsed
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+ <String Name="Input2"><![CDATA[
+resname RA
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify a selection for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+foo = resname RA
+]]></String>
+ <String Name="Output1"><![CDATA[
+Variable 'foo = resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+
+]]></String>
+ <String Name="Output2"><![CDATA[
+Specify a selection for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+Currently provided selections:
+ foo = resname RA
+(1 more selection required)
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+resname RB
+]]></String>
+ <String Name="Output3"><![CDATA[
+Selection 'resname RB' parsed
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+
+]]></String>
+ <String Name="Output1"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+ foo = resname RA
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+bar = resname RC
+]]></String>
+ <String Name="Output2"><![CDATA[
+Variable 'bar = resname RC' parsed
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+resname RA
+]]></String>
+ <String Name="Output3"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+ <String Name="Input4"><![CDATA[
+
+]]></String>
+ <String Name="Output4"><![CDATA[
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+ foo = resname RA
+ bar = resname RC
+ 1. resname RA
+> ]]></String>
+ <String Name="Input5"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Available static index groups:
+ Group 0 "GrpA" (5 atoms)
+ Group 1 "GrpB" (5 atoms)
+ Group 2 "GrpUnsorted" (8 atoms)
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+resname RA
+]]></String>
+ <String Name="Output1"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+
+]]></String>
+ <String Name="Output2"><![CDATA[
+Available static index groups:
+ Group 0 "GrpA" (5 atoms)
+ Group 1 "GrpB" (5 atoms)
+ Group 2 "GrpUnsorted" (8 atoms)
+Specify any number of selections for test context:
+(one per line, <enter> for status/groups, 'help' for help, Ctrl-D to end)
+Currently provided selections:
+ 1. resname RA
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+resname RA
+]]></String>
+ <String Name="Output1"><![CDATA[
+Selection 'resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+resname RB
+]]></String>
+ <String Name="Output2"><![CDATA[
+Selection 'resname RB' parsed
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <InteractiveSession Name="Interactive">
+ <String Name="Output0"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+> ]]></String>
+ <String Name="Input1"><![CDATA[
+"Sel" resname RA
+]]></String>
+ <String Name="Output1"><![CDATA[
+Selection '"Sel" resname RA' parsed
+> ]]></String>
+ <String Name="Input2"><![CDATA[
+
+]]></String>
+ <String Name="Output2"><![CDATA[
+Specify 2 selections for test context:
+(one per line, <enter> for status/groups, 'help' for help)
+Currently provided selections:
+ 1. "Sel" resname RA
+(1 more selection required)
+> ]]></String>
+ <String Name="Input3"><![CDATA[
+resname RB
+]]></String>
+ <String Name="Output3"><![CDATA[
+Selection 'resname RB' parsed
+]]></String>
+ </InteractiveSession>
+</ReferenceData>
<xsl:value-of select="."/>
</xsl:template>
+<xsl:template match="InteractiveSession">
+ <pre>
+ <xsl:for-each select="*">
+ <xsl:choose>
+ <xsl:when test="starts-with(@Name, 'Output')">
+ <xsl:value-of select="substring(.,2)"/>
+ </xsl:when>
+ <xsl:when test="string-length(.)=1">
+ <xsl:text>►</xsl:text>
+ <xsl:text>¶</xsl:text>
+ </xsl:when>
+ <xsl:when test="contains(substring(.,2), ' ')">
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="translate(substring(.,2), ' ', '⏎')"/>
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="substring(.,2)"/>
+ <xsl:text>¶</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:for-each>
+ <xsl:text>[EOF]</xsl:text>
+ </pre>
+</xsl:template>
+
</xsl:stylesheet>
<xsl:key name="SelectionName" match="ParsedSelections/ParsedSelection" use="@Name"/>
+<xsl:template match="InteractiveSession">
+ <h2>Interactive Session</h2>
+ <xsl:apply-imports />
+</xsl:template>
+
<xsl:template match="ParsedSelections">
<h2>Parsed Selections</h2>
<table border="1">
#include "gromacs/utility/gmxregex.h"
#include "gromacs/utility/stringutil.h"
+#include "testutils/interactivetest.h"
#include "testutils/refdata.h"
#include "testutils/testasserts.h"
#include "testutils/testfilemanager.h"
}
+/********************************************************************
+ * Test fixture for interactive SelectionCollection tests
+ */
+
+class SelectionCollectionInteractiveTest : public SelectionCollectionTest
+{
+ public:
+ SelectionCollectionInteractiveTest()
+ : helper_(data_.rootChecker())
+ {
+ }
+
+ void runTest(int count, bool bInteractive,
+ const gmx::ConstArrayRef<const char *> &input);
+
+ gmx::test::TestReferenceData data_;
+ gmx::test::InteractiveTestHelper helper_;
+};
+
+void SelectionCollectionInteractiveTest::runTest(
+ int count, bool bInteractive,
+ const gmx::ConstArrayRef<const char *> &inputLines)
+{
+ helper_.setInputLines(inputLines);
+ // TODO: Check something about the returned selections as well.
+ ASSERT_NO_THROW_GMX(sc_.parseInteractive(
+ count, &helper_.inputStream(),
+ bInteractive ? &helper_.outputStream() : NULL,
+ "for test context"));
+ helper_.checkSession();
+}
+
+
/********************************************************************
* Test fixture for selection testing with reference data
*/
// TODO: Tests for more evaluation errors
+/********************************************************************
+ * Tests for interactive selection input
+ */
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesBasicInput)
+{
+ const char *const input[] = {
+ "foo = resname RA",
+ "resname RB",
+ "\"Name\" resname RC"
+ };
+ runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesContinuation)
+{
+ const char *const input[] = {
+ "resname RB and \\",
+ "resname RC"
+ };
+ runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesSingleSelectionInput)
+{
+ const char *const input[] = {
+ "foo = resname RA",
+ "resname RA"
+ };
+ runTest(1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesTwoSelectionInput)
+{
+ const char *const input[] = {
+ "resname RA",
+ "resname RB"
+ };
+ runTest(2, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesStatusWithGroups)
+{
+ const char *const input[] = {
+ "resname RA",
+ ""
+ };
+ loadIndexGroups("simple.ndx");
+ runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesStatusWithExistingSelections)
+{
+ const char *const input[] = {
+ "",
+ "bar = resname RC",
+ "resname RA",
+ ""
+ };
+ ASSERT_NO_THROW_GMX(sc_.parseFromString("foo = resname RA"));
+ ASSERT_NO_THROW_GMX(sc_.parseFromString("resname RB"));
+ runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesSingleSelectionInputStatus)
+{
+ const char *const input[] = {
+ "foo = resname RA",
+ "",
+ "resname RB"
+ };
+ runTest(1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesTwoSelectionInputStatus)
+{
+ const char *const input[] = {
+ "\"Sel\" resname RA",
+ "",
+ "resname RB"
+ };
+ runTest(2, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesMultiSelectionInputStatus)
+{
+ const char *const input[] = {
+ "\"Sel\" resname RA",
+ "\"Sel2\" resname RB",
+ ""
+ };
+ runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesNoFinalNewline)
+{
+ // TODO: There is an extra prompt printed after the input is finished; it
+ // would be cleaner not to have it, but it's only a cosmetic issue.
+ const char *const input[] = {
+ "resname RA"
+ };
+ helper_.setLastNewline(false);
+ runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesEmptySelections)
+{
+ const char *const input[] = {
+ "resname RA;",
+ "; resname RB;;",
+ " ",
+ ";"
+ };
+ runTest(-1, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesMultipleSelectionsOnLine)
+{
+ const char *const input[] = {
+ "resname RA; resname RB and \\",
+ "resname RC"
+ };
+ runTest(2, true, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesNoninteractiveInput)
+{
+ const char *const input[] = {
+ "foo = resname RA",
+ "resname RB",
+ "\"Name\" resname RC"
+ };
+ runTest(-1, false, input);
+}
+
+TEST_F(SelectionCollectionInteractiveTest, HandlesSingleSelectionInputNoninteractively)
+{
+ const char *const input[] = {
+ "foo = resname RA",
+ "resname RA"
+ };
+ runTest(1, false, input);
+}
+
/********************************************************************
* Tests for selection keywords
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2014, by the GROMACS development team, led by
+# Copyright (c) 2014,2015, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-file(GLOB TOOLS_SOURCES *.cpp *.c)
+file(GLOB TOOLS_SOURCES *.cpp)
set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${TOOLS_SOURCES} PARENT_SCOPE)
if (BUILD_TESTING)
*/
#include "gmxpre.h"
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
#include "gromacs/commandline/pargs.h"
#include "gromacs/fileio/confio.h"
static void tpx2system(FILE *fp, gmx_mtop_t *mtop)
{
- int i, nmol, nvsite = 0;
+ int nmol, nvsite = 0;
gmx_mtop_atomloop_block_t aloop;
t_atom *atom;
static void tpx2methods(const char *tpx, const char *tex)
{
FILE *fp;
- t_tpxheader sh;
t_inputrec ir;
t_state state;
gmx_mtop_t mtop;
static void chk_bonds(t_idef *idef, int ePBC, rvec *x, matrix box, real tol)
{
- int ftype, i, k, ai, aj, type;
- real b0, blen, deviation, devtot;
+ int ftype, k, ai, aj, type;
+ real b0, blen, deviation;
t_pbc pbc;
rvec dx;
- devtot = 0;
set_pbc(&pbc, ePBC, box);
for (ftype = 0; (ftype < F_NRE); ftype++)
{
b0 = idef->iparams[type].harmonic.rA;
break;
case F_G96BONDS:
- b0 = sqrt(idef->iparams[type].harmonic.rA);
+ b0 = std::sqrt(idef->iparams[type].harmonic.rA);
break;
case F_MORSE:
b0 = idef->iparams[type].morse.b0A;
pbc_dx(&pbc, x[ai], x[aj], dx);
blen = norm(dx);
deviation = sqr(blen-b0);
- if (sqrt(deviation/sqr(b0) > tol))
+ if (std::sqrt(deviation/sqr(b0)) > tol)
{
fprintf(stderr, "Distance between atoms %d and %d is %.3f, should be %.3f\n", ai+1, aj+1, blen, b0);
}
t_count count;
t_fr_time first, last;
int j = -1, new_natoms, natoms;
- real rdum, tt, old_t1, old_t2, prec;
- gmx_bool bShowTimestep = TRUE, bOK, newline = FALSE;
+ real old_t1, old_t2;
+ gmx_bool bShowTimestep = TRUE, newline = FALSE;
t_trxstatus *status;
gmx_mtop_t mtop;
gmx_localtop_t *top = NULL;
*(atoms->resinfo[atoms->atom[j].resind].name),
atoms->resinfo[atoms->atom[j].resind].nr,
atom_vdw[j],
- sqrt(r2) );
+ std::sqrt(r2) );
}
}
}
{
t_blocka *grps;
char **grpname;
- int i, j;
+ int i;
grps = init_index(fn, &grpname);
if (debug)
void chk_enx(const char *fn)
{
- int nre, fnr, ndr;
+ int nre, fnr;
ener_file_t in;
gmx_enxnm_t *enm = NULL;
t_enxframe *fr;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_TOOLS_CHECK_H
#define GMX_TOOLS_CHECK_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
/*! \brief Implements gmx check
*
* \param[in] argc argc value passed to main().
*/
int gmx_check(int argc, char *argv[]);
-#ifdef __cplusplus
-}
-#endif
-
#endif
* the research papers on the package. Check out http://www.gromacs.org.
*/
/* This file is completely threadsafe - keep it that way! */
+
#include "gmxpre.h"
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+
+#include <algorithm>
#include "gromacs/fileio/enxio.h"
#include "gromacs/fileio/tpxio.h"
if (i1 != i2)
{
fprintf(fp, "%s (", s);
- fprintf(fp, "%"GMX_PRId64, i1);
+ fprintf(fp, "%" GMX_PRId64, i1);
fprintf(fp, " - ");
- fprintf(fp, "%"GMX_PRId64, i2);
+ fprintf(fp, "%" GMX_PRId64, i2);
fprintf(fp, ")\n");
}
}
static void cmp_str(FILE *fp, const char *s, int index,
const char *s1, const char *s2)
{
- if (strcmp(s1, s2) != 0)
+ if (std::strcmp(s1, s2) != 0)
{
if (index != -1)
{
{
cmp_int(fp, "idef->ntypes", -1, id1->ntypes, id2->ntypes);
cmp_int(fp, "idef->atnr", -1, id1->atnr, id2->atnr);
- for (i = 0; (i < min(id1->ntypes, id2->ntypes)); i++)
+ for (i = 0; (i < std::min(id1->ntypes, id2->ntypes)); i++)
{
sprintf(buf1, "idef->functype[%d]", i);
sprintf(buf2, "idef->iparam[%d]", i);
static void cmp_block(FILE *fp, t_block *b1, t_block *b2, const char *s)
{
- int i, j, k;
char buf[32];
fprintf(fp, "comparing block %s\n", s);
static void cmp_blocka(FILE *fp, t_blocka *b1, t_blocka *b2, const char *s)
{
- int i, j, k;
char buf[32];
fprintf(fp, "comparing blocka %s\n", s);
static void cmp_atom(FILE *fp, int index, t_atom *a1, t_atom *a2, real ftol, real abstol)
{
- int i;
- char buf[256];
-
if (a2)
{
cmp_us(fp, "atom.type", index, a1->type, a2->type);
static void cmp_top(FILE *fp, t_topology *t1, t_topology *t2, real ftol, real abstol)
{
- int i;
-
fprintf(fp, "comparing top\n");
if (t2)
{
static void cmp_groups(FILE *fp, gmx_groups_t *g0, gmx_groups_t *g1,
int natoms0, int natoms1)
{
- int i, j, ndiff;
+ int i, j;
char buf[32];
fprintf(fp, "comparing groups\n");
ssd += d*d;
}
}
- fprintf(fp, "%s RMSD %g\n", title, sqrt(ssd/n));
+ fprintf(fp, "%s RMSD %g\n", title, std::sqrt(ssd/n));
}
else
{
rms_x1 += d*d;
}
}
- rms_x1 = sqrt(rms_x1/(DIM*n));
+ rms_x1 = std::sqrt(rms_x1/(DIM*n));
/* And now do the actual comparision with a hopefully realistic abstol. */
for (i = 0; (i < n); i++)
{
cmp_int(fp, "inputrec->grpopts.ngacc", -1, opt1->ngacc, opt2->ngacc);
cmp_int(fp, "inputrec->grpopts.ngfrz", -1, opt1->ngfrz, opt2->ngfrz);
cmp_int(fp, "inputrec->grpopts.ngener", -1, opt1->ngener, opt2->ngener);
- for (i = 0; (i < min(opt1->ngtc, opt2->ngtc)); i++)
+ for (i = 0; (i < std::min(opt1->ngtc, opt2->ngtc)); i++)
{
cmp_real(fp, "inputrec->grpopts.nrdf", i, opt1->nrdf[i], opt2->nrdf[i], ftol, abstol);
cmp_real(fp, "inputrec->grpopts.ref_t", i, opt1->ref_t[i], opt2->ref_t[i], ftol, abstol);
}
}
}
- for (i = 0; (i < min(opt1->ngacc, opt2->ngacc)); i++)
+ for (i = 0; (i < std::min(opt1->ngacc, opt2->ngacc)); i++)
{
cmp_rvec(fp, "inputrec->grpopts.acc", i, opt1->acc[i], opt2->acc[i], ftol, abstol);
}
- for (i = 0; (i < min(opt1->ngfrz, opt2->ngfrz)); i++)
+ for (i = 0; (i < std::min(opt1->ngfrz, opt2->ngfrz)); i++)
{
cmp_ivec(fp, "inputrec->grpopts.nFreeze", i, opt1->nFreeze[i], opt2->nFreeze[i]);
}
{
sprintf(buf, "inputrec->%s[%d]", s, m);
cmp_int(fp, buf, 0, c1->n, c2->n);
- for (i = 0; (i < min(c1->n, c2->n)); i++)
+ for (i = 0; (i < std::min(c1->n, c2->n)); i++)
{
cmp_real(fp, buf, i, c1->a[i], c2->a[i], ftol, abstol);
cmp_real(fp, buf, i, c1->phi[i], c2->phi[i], ftol, abstol);
cmp_int(fp, "inputrec->fepvals->n_lambda", -1, fep1->n_lambda, fep2->n_lambda);
for (i = 0; i < efptNR; i++)
{
- for (j = 0; j < min(fep1->n_lambda, fep2->n_lambda); j++)
+ for (j = 0; j < std::min(fep1->n_lambda, fep2->n_lambda); j++)
{
cmp_double(fp, "inputrec->fepvals->all_lambda", -1, fep1->all_lambda[i][j], fep2->all_lambda[i][j], ftol, abstol);
}
cmp_int(fp, "inputrec->bSimTemp", -1, ir1->bSimTemp, ir2->bSimTemp);
if ((ir1->bSimTemp == ir2->bSimTemp) && (ir1->bSimTemp))
{
- cmp_simtempvals(fp, ir1->simtempvals, ir2->simtempvals, min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
+ cmp_simtempvals(fp, ir1->simtempvals, ir2->simtempvals, std::min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
}
cmp_int(fp, "inputrec->bExpanded", -1, ir1->bExpanded, ir2->bExpanded);
if ((ir1->bExpanded == ir2->bExpanded) && (ir1->bExpanded))
{
- cmp_expandedvals(fp, ir1->expandedvals, ir2->expandedvals, min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
+ cmp_expandedvals(fp, ir1->expandedvals, ir2->expandedvals, std::min(ir1->fepvals->n_lambda, ir2->fepvals->n_lambda), ftol, abstol);
}
cmp_int(fp, "inputrec->nwall", -1, ir1->nwall, ir2->nwall);
cmp_int(fp, "inputrec->wall_type", -1, ir1->wall_type, ir2->wall_type);
gmx_bool bRMSD, real ftol, real abstol)
{
const char *ff[2];
- t_tpxheader sh[2];
t_inputrec ir[2];
t_state state[2];
gmx_mtop_t mtop[2];
}
if (cmp_bool(fp, "bX", -1, fr1->bX, fr2->bX))
{
- cmp_rvecs(fp, "x", min(fr1->natoms, fr2->natoms), fr1->x, fr2->x, bRMSD, ftol, abstol);
+ cmp_rvecs(fp, "x", std::min(fr1->natoms, fr2->natoms), fr1->x, fr2->x, bRMSD, ftol, abstol);
}
if (cmp_bool(fp, "bV", -1, fr1->bV, fr2->bV))
{
- cmp_rvecs(fp, "v", min(fr1->natoms, fr2->natoms), fr1->v, fr2->v, bRMSD, ftol, abstol);
+ cmp_rvecs(fp, "v", std::min(fr1->natoms, fr2->natoms), fr1->v, fr2->v, bRMSD, ftol, abstol);
}
if (cmp_bool(fp, "bF", -1, fr1->bF, fr2->bF))
{
if (bRMSD)
{
- cmp_rvecs(fp, "f", min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, bRMSD, ftol, abstol);
+ cmp_rvecs(fp, "f", std::min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, bRMSD, ftol, abstol);
}
else
{
- cmp_rvecs_rmstol(fp, "f", min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, ftol, abstol);
+ cmp_rvecs_rmstol(fp, "f", std::min(fr1->natoms, fr2->natoms), fr1->f, fr2->f, ftol, abstol);
}
}
if (cmp_bool(fp, "bBox", -1, fr1->bBox, fr2->bBox))
int *tensi, int i,
t_energy e1[], t_energy e2[])
{
- int d1, d2;
- int len;
- int j;
- real prod1, prod2;
- int nfound;
+ int d1, d2;
+ int j;
+ real prod1, prod2;
+ int nfound;
+ size_t len;
d1 = tensi[i]/DIM;
d2 = tensi[i] - d1*DIM;
/* Find the diagonal elements d1 and d2 */
- len = strlen(enm1[ind1[i]].name);
+ len = std::strlen(enm1[ind1[i]].name);
prod1 = 1;
prod2 = 1;
nfound = 0;
for (j = 0; j < n; j++)
{
if (tensi[j] >= 0 &&
- strlen(enm1[ind1[j]].name) == len &&
- strncmp(enm1[ind1[i]].name, enm1[ind1[j]].name, len-2) == 0 &&
+ std::strlen(enm1[ind1[j]].name) == len &&
+ std::strncmp(enm1[ind1[i]].name, enm1[ind1[j]].name, len-2) == 0 &&
(tensi[j] == d1*DIM+d1 || tensi[j] == d2*DIM+d2))
{
prod1 *= fabs(e1[ind1[j]].e);
if (nfound == 2)
{
- return 0.5*(sqrt(prod1) + sqrt(prod2));
+ return 0.5*(std::sqrt(prod1) + std::sqrt(prod2));
}
else
{
{
int len1, len2;
- len1 = strlen(nm1);
- len2 = strlen(nm2);
+ len1 = std::strlen(nm1);
+ len2 = std::strlen(nm2);
/* Remove " (bar)" at the end of a name */
- if (len1 > 6 && strcmp(nm1+len1-6, " (bar)") == 0)
+ if (len1 > 6 && std::strcmp(nm1+len1-6, " (bar)") == 0)
{
len1 -= 6;
}
- if (len2 > 6 && strcmp(nm2+len2-6, " (bar)") == 0)
+ if (len2 > 6 && std::strcmp(nm2+len2-6, " (bar)") == 0)
{
len2 -= 6;
}
{
ii = ind1[i];
tensi[i] = -1;
- len = strlen(enm1[ii].name);
+ len = std::strlen(enm1[ii].name);
if (len > 3 && enm1[ii].name[len-3] == '-')
{
d1 = enm1[ii].name[len-2] - 'X';
if (abstol_i > 0)
{
/* We found a diagonal, we need to check with the minimum tolerance */
- abstol_i = min(abstol_i, abstol);
+ abstol_i = std::min(abstol_i, abstol);
}
else
{
void comp_enx(const char *fn1, const char *fn2, real ftol, real abstol, const char *lastener)
{
- int nre, nre1, nre2, block;
+ int nre, nre1, nre2;
ener_file_t in1, in2;
int i, j, maxener, *ind1, *ind2, *have;
- char buf[256];
gmx_enxnm_t *enm1 = NULL, *enm2 = NULL;
t_enxframe *fr1, *fr2;
gmx_bool b1, b2;
maxener = nre;
for (i = 0; i < nre; i++)
{
- if ((lastener != NULL) && (strstr(enm1[i].name, lastener) != NULL))
+ if ((lastener != NULL) && (std::strstr(enm1[i].name, lastener) != NULL))
{
maxener = i+1;
break;
*/
#include "gmxpre.h"
-#include <math.h>
+#include <cmath>
#include "gromacs/commandline/pargs.h"
#include "gromacs/fileio/enxio.h"
gmx_bool bFrame, bUse, bSel, bNeedEner, bReadEner, bScanEner, bFepState;
gmx_mtop_t mtop;
t_atoms atoms;
- t_inputrec *ir, *irnew = NULL;
- t_gromppopts *gopts;
+ t_inputrec *ir;
t_state state;
rvec *newx = NULL, *newv = NULL, *tmpx, *tmpv;
matrix newbox;
{ "-init_fep_state", FALSE, etINT, {&init_fep_state},
"fep state to initialize from" },
};
- int nerror = 0;
/* Parse the command line */
if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa,
if (EI_SD(ir->eI) || ir->eI == eiBD)
{
- fprintf(stderr, "\nChanging ld-seed from %"GMX_PRId64 " ", ir->ld_seed);
+ fprintf(stderr, "\nChanging ld-seed from %" GMX_PRId64 " ", ir->ld_seed);
ir->ld_seed = (gmx_int64_t)gmx_rng_make_seed();
- fprintf(stderr, "to %"GMX_PRId64 "\n\n", ir->ld_seed);
+ fprintf(stderr, "to %" GMX_PRId64 "\n\n", ir->ld_seed);
}
frame_fn = ftp2fn(efTRN, NFILE, fnm);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_TOOLS_CONVERT_TPR_H
#define GMX_TOOLS_CONVERT_TPR_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
/*! \brief Implements gmx convert-tpr
*
* \param[in] argc argc value passed to main().
*/
int gmx_convert_tpr(int argc, char *argv[]);
-#ifdef __cplusplus
-}
-#endif
-
#endif
#include "config.h"
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
#include "gromacs/commandline/pargs.h"
#include "gromacs/fileio/enxio.h"
gmx_bool bSysTop)
{
FILE *gp;
- int fp, indent, i, j, **gcount, atot;
+ int indent, i, j, **gcount, atot;
t_state state;
rvec *f = NULL;
t_inputrec ir;
if (available(stdout, &tpx, 0, fn))
{
indent = 0;
- indent = pr_title(stdout, indent, fn);
+ pr_title(stdout, indent, fn);
pr_inputrec(stdout, 0, "inputrec", tpx.bIr ? &(ir) : NULL, FALSE);
- indent = 0;
pr_header(stdout, indent, "header", &(tpx));
if (!bSysTop)
void list_ene(const char *fn)
{
- int ndr;
ener_file_t in;
gmx_bool bCont;
gmx_enxnm_t *enm = NULL;
t_enxframe *fr;
int i, j, nre, b;
- real rav, minthird;
char buf[22];
printf("gmx dump: %s\n", fn);
printf("%5d %-24s (%s)\n", i, enm[i].name, enm[i].unit);
}
- minthird = -1.0/3.0;
snew(fr, 1);
do
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_TOOLS_DUMP_H
#define GMX_TOOLS_DUMP_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
/*! \brief Implements gmx dump
*
* \param[in] argc argc value passed to main().
*/
int gmx_dump(int argc, char *argv[]);
-#ifdef __cplusplus
-}
-#endif
-
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/trajectoryanalysis/analysismodule.h"
#include "gromacs/trajectoryanalysis/analysissettings.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
#include "gromacs/utility/gmxassert.h"
#include "runnercommon.h"
common->initIndexGroups(selections, bUseDefaultGroups_);
- const bool bInteractive = File::standardInput().isInteractive();
+ const bool bInteractive = StandardInputStream::instance().isInteractive();
seloptManager.parseRequestedFromStdin(bInteractive);
common->doneIndexGroups(selections);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
AnalysisDataPlotModulePointer plotm(
new AnalysisDataPlotModule(settings.plotSettings()));
- plotm->setFileName(fnAll_);
+ plotm->setFileName(fnXYZ_);
plotm->setTitle("Distance");
plotm->setXAxisIsTime();
plotm->setYLabel("Distance (nm)");
<xsl:value-of select="."/>
</xsl:template>
+<xsl:template match="InteractiveSession">
+ <pre>
+ <xsl:for-each select="*">
+ <xsl:choose>
+ <xsl:when test="starts-with(@Name, 'Output')">
+ <xsl:value-of select="substring(.,2)"/>
+ </xsl:when>
+ <xsl:when test="string-length(.)=1">
+ <xsl:text>►</xsl:text>
+ <xsl:text>¶</xsl:text>
+ </xsl:when>
+ <xsl:when test="contains(substring(.,2), ' ')">
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="translate(substring(.,2), ' ', '⏎')"/>
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="substring(.,2)"/>
+ <xsl:text>¶</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:for-each>
+ <xsl:text>[EOF]</xsl:text>
+ </pre>
+</xsl:template>
+
</xsl:stylesheet>
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* containers to simplify implementation of other code. Contents of the module
* are discussed in more details under the different headings below.
* Some of the code in installed headers in the module is intended for use
- * directly from code outside the Gromacs library, but a significant portion is
- * exposed only because other public headers depend on it.
+ * directly from code outside the \Gromacs library, but a significant portion
+ * is exposed only because other public headers depend on it.
*
* Since this module implements error handling, it should be at the lowest
* level: it should not depend on other modules. Any functionality needed by
* the error handling code should also be kept in this module.
*
- * <H3>Error Handling</H3>
+ * <H3>Error handling</H3>
*
* Exception classes used in the library are declared in the exceptions.h header
- * file. Most Gromacs-specific exceptions derive from gmx::GromacsException.
+ * file. Most \Gromacs-specific exceptions derive from gmx::GromacsException.
*
* This header also declares a ::GMX_THROW macro that should be used for
* throwing exceptions. ::GMX_THROW_WITH_ERRNO is also provided for reporting
* \endif
*
*
- * <H3>Basic %File Handling</H3>
+ * \if libapi
+ *
+ * <H3>Basic file handling and streams</H3>
*
- * The header file.h declares a gmx::File class for basic I/O support.
+ * The header textstream.h declares interfaces for simple text format streams.
+ * Headers filestream.h and stringstream.h provide implementations for these
+ * streams for reading/writing files and for writing to in-memory strings.
*
- * The header path.h declares helpers for manipulating paths and for managing
- * directories.
+ * The header fileredirector.h provides interfaces for redirecting file input
+ * and/or output to alternative streams, for use in testing, as well as default
+ * implementations for these interfaces that just use the file system.
*
- * The fate of these headers depends on what is decided in Redmine issue #950.
+ * The header textwriter.h provides gmx::TextWriter for more formatting support
+ * when writing to a text stream. Similarly, textreader.h provides more
+ * formatting support when reading from a text stream.
*
+ * The header path.h declares helpers for manipulating paths as strings and for
+ * managing directories and files.
+ * The fate of this header depends on what is decided in Redmine issue #950.
+ *
+ * \endif
*
- * <H3>Implementation Helpers</H3>
+ * <H3>Implementation helpers</H3>
*
* The header basedefinitions.h contains common definitions and macros used
* throughout \Gromacs. It includes fixed-width integer types (`gmx_int64_t`
* safety when using bit flag fields.
*
*
- * <H3>Other Functionality</H3>
+ * <H3>Other functionality</H3>
*
* The header init.h declares gmx::init() and gmx::finalize() for initializing
* and deinitializing the \Gromacs library.
errorcodes.h
exceptions.h
fatalerror.h
- file.h
flags.h
futil.h
gmxassert.h
#include "buildinfo.h"
#include "gromacs/utility/directoryenumerator.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
#include "gromacs/utility/path.h"
#include "gromacs/utility/programcontext.h"
#include "gromacs/utility/stringutil.h"
fprintf(debug, "Opening library file %s\n", fn);
}
#endif
- return File::openRawHandle(filename, "r");
+ return TextInputFile::openRawHandle(filename);
}
std::string DataFileFinder::findFile(const DataFileOptions &options) const
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <cstdlib>
-#include "thread_mpi/mutex.h"
+#include "gromacs/utility/mutex.h"
#include "errorformat.h"
//! Global error handler set with setFatalErrorHandler().
ErrorHandlerFunc g_errorHandler = standardErrorHandler;
//! Mutex for protecting access to ::g_errorHandler.
-tMPI::mutex handler_mutex;
+Mutex handler_mutex;
//! \}
ErrorHandlerFunc setFatalErrorHandler(ErrorHandlerFunc handler)
{
- tMPI::lock_guard<tMPI::mutex> lock(handler_mutex);
- ErrorHandlerFunc oldHandler = g_errorHandler;
+ lock_guard<Mutex> lock(handler_mutex);
+ ErrorHandlerFunc oldHandler = g_errorHandler;
g_errorHandler = handler;
return oldHandler;
}
{
ErrorHandlerFunc handler = NULL;
{
- tMPI::lock_guard<tMPI::mutex> lock(handler_mutex);
+ lock_guard<Mutex> lock(handler_mutex);
handler = g_errorHandler;
}
if (handler != NULL)
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/errorcodes.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textwriter.h"
#include "errorformat.h"
FILE *fp_;
};
+/*! \brief
+ * Exception information writer to format into a TextOutputStream.
+ */
+class MessageWriterTextWriter : public MessageWriterInterface
+{
+ public:
+ //! Initializes a writer that writes to the given stream.
+ explicit MessageWriterTextWriter(TextWriter *writer) : writer_(writer)
+ {
+ }
+
+ virtual void writeLine(const char *text, int indent)
+ {
+ writer_->wrapperSettings().setIndent(indent);
+ writer_->writeLine(text);
+ }
+ virtual void writeErrNoInfo(int errorNumber, const char *funcName,
+ int indent)
+ {
+ writer_->wrapperSettings().setIndent(indent);
+ writer_->writeLine(formatString("Reason: %s", std::strerror(errorNumber)));
+ if (funcName != NULL)
+ {
+ writer_->writeLine(
+ formatString("(call to %s() returned error code %d)",
+ funcName, errorNumber));
+ }
+ }
+
+ private:
+ TextWriter *writer_;
+};
+
/*! \brief
* Exception information writer to format into an std::string.
*/
formatExceptionMessageInternal(&writer, ex, 0);
}
+void formatExceptionMessageToWriter(TextWriter *writer,
+ const std::exception &ex)
+{
+ MessageWriterTextWriter messageWriter(writer);
+ formatExceptionMessageInternal(&messageWriter, ex, 0);
+}
+
int processExceptionAtExit(const std::exception & /*ex*/)
{
int returnCode = 1;
namespace gmx
{
+class TextWriter;
+
namespace internal
{
//! Internal container type for storing a list of nested exceptions.
* \throws std::bad_alloc if out of memory.
*/
void formatExceptionMessageToFile(FILE *fp, const std::exception &ex);
+/*! \brief
+ * Formats an error message for reporting an exception.
+ *
+ * \param writer Writer to use for writing the message.
+ * \param[in] ex Exception to format.
+ * \throws std::bad_alloc if out of memory.
+ */
+void formatExceptionMessageToWriter(TextWriter *writer,
+ const std::exception &ex);
/*! \brief
* Handles an exception that is causing the program to terminate.
*
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- * \brief
- * Implements gmx::File.
- *
- * \author Teemu Murtola <teemu.murtola@gmail.com>
- * \ingroup module_utility
- */
-#include "gmxpre.h"
-
-#include "file.h"
-
-#include "config.h"
-
-#include <cerrno>
-#include <cstdio>
-#include <cstring>
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include <sys/stat.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/gmxassert.h"
-#include "gromacs/utility/stringutil.h"
-
-namespace gmx
-{
-
-/*! \internal \brief
- * Private implementation class for File.
- *
- * \ingroup module_utility
- */
-class File::Impl
-{
- public:
- /*! \brief
- * Initialize a file object with the given handle.
- *
- * \param[in] fp %File handle to use (may be NULL).
- * \param[in] bClose Whether this object should close its file handle.
- */
- Impl(FILE *fp, bool bClose);
- ~Impl();
-
- //! File handle for this object (may be NULL).
- FILE *fp_;
- /*! \brief
- * Whether \p fp_ should be closed by this object.
- *
- * Can be true if \p fp_ is NULL.
- */
- bool bClose_;
-};
-
-File::Impl::Impl(FILE *fp, bool bClose)
- : fp_(fp), bClose_(bClose)
-{
-}
-
-File::Impl::~Impl()
-{
- if (fp_ != NULL && bClose_)
- {
- if (fclose(fp_) != 0)
- {
- // TODO: Log the error somewhere
- }
- }
-}
-
-// static
-FILE *File::openRawHandle(const char *filename, const char *mode)
-{
- FILE *fp = fopen(filename, mode);
- if (fp == NULL)
- {
- GMX_THROW_WITH_ERRNO(
- FileIOError(formatString("Could not open file '%s'", filename)),
- "fopen", errno);
- }
- return fp;
-}
-
-// static
-FILE *File::openRawHandle(const std::string &filename, const char *mode)
-{
- return openRawHandle(filename.c_str(), mode);
-}
-
-File::File(const char *filename, const char *mode)
- : impl_(new Impl(NULL, true))
-{
- open(filename, mode);
-}
-
-File::File(const std::string &filename, const char *mode)
- : impl_(new Impl(NULL, true))
-{
- open(filename, mode);
-}
-
-File::File(const FileInitializer &initializer)
- : impl_(new Impl(NULL, true))
-{
- open(initializer.filename_, initializer.mode_);
-}
-
-File::File(FILE *fp, bool bClose)
- : impl_(new Impl(fp, bClose))
-{
-}
-
-File::~File()
-{
-}
-
-void File::open(const char *filename, const char *mode)
-{
- GMX_RELEASE_ASSERT(impl_->fp_ == NULL,
- "Attempted to open the same file object twice");
- // TODO: Port all necessary functionality from gmx_ffopen() here.
- impl_->fp_ = openRawHandle(filename, mode);
-}
-
-void File::open(const std::string &filename, const char *mode)
-{
- open(filename.c_str(), mode);
-}
-
-void File::close()
-{
- GMX_RELEASE_ASSERT(impl_->fp_ != NULL,
- "Attempted to close a file object that is not open");
- GMX_RELEASE_ASSERT(impl_->bClose_,
- "Attempted to close a file object that should not be");
- bool bOk = (fclose(impl_->fp_) == 0);
- impl_->fp_ = NULL;
- if (!bOk)
- {
- GMX_THROW_WITH_ERRNO(
- FileIOError("Error while closing file"), "fclose", errno);
- }
-}
-
-bool File::isInteractive() const
-{
- GMX_RELEASE_ASSERT(impl_->fp_ != NULL,
- "Attempted to access a file object that is not open");
-#ifdef HAVE_UNISTD_H
- return isatty(fileno(impl_->fp_));
-#else
- return true;
-#endif
-}
-
-FILE *File::handle()
-{
- GMX_RELEASE_ASSERT(impl_->fp_ != NULL,
- "Attempted to access a file object that is not open");
- return impl_->fp_;
-}
-
-void File::readBytes(void *buffer, size_t bytes)
-{
- errno = 0;
- FILE *fp = handle();
- // TODO: Retry based on errno or something else?
- size_t bytesRead = std::fread(buffer, 1, bytes, fp);
- if (bytesRead != bytes)
- {
- if (feof(fp))
- {
- GMX_THROW(FileIOError(
- formatString("Premature end of file\n"
- "Attempted to read: %d bytes\n"
- "Successfully read: %d bytes",
- static_cast<int>(bytes),
- static_cast<int>(bytesRead))));
- }
- else
- {
- GMX_THROW_WITH_ERRNO(FileIOError("Error while reading file"),
- "fread", errno);
- }
- }
-}
-
-bool File::readLine(std::string *line)
-{
- if (!readLineWithTrailingSpace(line))
- {
- return false;
- }
- size_t endPos = line->find_last_not_of(" \t\r\n");
- if (endPos != std::string::npos)
- {
- line->resize(endPos + 1);
- }
- return true;
-}
-
-bool File::readLineWithTrailingSpace(std::string *line)
-{
- line->clear();
- const size_t bufsize = 256;
- std::string result;
- char buf[bufsize];
- buf[0] = '\0';
- FILE *fp = handle();
- while (fgets(buf, bufsize, fp) != NULL)
- {
- size_t length = std::strlen(buf);
- result.append(buf, length);
- if (length < bufsize - 1 || buf[length - 1] == '\n')
- {
- break;
- }
- }
- if (ferror(fp))
- {
- GMX_THROW_WITH_ERRNO(FileIOError("Error while reading file"),
- "fgets", errno);
- }
- *line = result;
- return !result.empty() || !feof(fp);
-}
-
-void File::writeString(const char *str)
-{
- if (fprintf(handle(), "%s", str) < 0)
- {
- GMX_THROW_WITH_ERRNO(FileIOError("Writing to file failed"),
- "fprintf", errno);
- }
-}
-
-void File::writeLine(const char *line)
-{
- size_t length = std::strlen(line);
-
- writeString(line);
- if (length == 0 || line[length-1] != '\n')
- {
- writeString("\n");
- }
-}
-
-void File::writeLine()
-{
- writeString("\n");
-}
-
-// static
-bool File::exists(const char *filename)
-{
- if (filename == NULL)
- {
- return false;
- }
- FILE *test = fopen(filename, "r");
- if (test == NULL)
- {
- return false;
- }
- else
- {
- fclose(test);
- // Windows doesn't allow fopen of directory, so we don't need to check
- // this separately.
-#ifndef GMX_NATIVE_WINDOWS
- struct stat st_buf;
- int status = stat(filename, &st_buf);
- if (status != 0 || !S_ISREG(st_buf.st_mode))
- {
- return false;
- }
-#endif
- return true;
- }
-}
-
-// static
-bool File::exists(const std::string &filename)
-{
- return exists(filename.c_str());
-}
-
-// static
-File &File::standardInput()
-{
- static File stdinObject(stdin, false);
- return stdinObject;
-}
-
-// static
-File &File::standardOutput()
-{
- static File stdoutObject(stdout, false);
- return stdoutObject;
-}
-
-// static
-File &File::standardError()
-{
- static File stderrObject(stderr, false);
- return stderrObject;
-}
-
-// static
-std::string File::readToString(const char *filename)
-{
- // Binary mode is required on Windows to be able to determine a size
- // that can be passed to fread().
- File file(filename, "rb");
- FILE *fp = file.handle();
-
- if (std::fseek(fp, 0L, SEEK_END) != 0)
- {
- GMX_THROW_WITH_ERRNO(FileIOError("Seeking to end of file failed"),
- "fseek", errno);
- }
- long len = std::ftell(fp);
- if (len == -1)
- {
- GMX_THROW_WITH_ERRNO(FileIOError("Reading file length failed"),
- "ftell", errno);
- }
- if (std::fseek(fp, 0L, SEEK_SET) != 0)
- {
- GMX_THROW_WITH_ERRNO(FileIOError("Seeking to start of file failed"),
- "fseek", errno);
- }
-
- std::vector<char> data(len);
- file.readBytes(&data[0], len);
- file.close();
-
- std::string result(&data[0], len);
- // The below is necessary on Windows to make newlines stay as '\n' on a
- // roundtrip.
- result = replaceAll(result, "\r\n", "\n");
-
- return result;
-}
-
-// static
-std::string File::readToString(const std::string &filename)
-{
- return readToString(filename.c_str());
-}
-
-// static
-void File::writeFileFromString(const std::string &filename,
- const std::string &text)
-{
- File file(filename, "w");
- file.writeString(text);
- file.close();
-}
-
-} // namespace gmx
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \file
- * \brief
- * Declares gmx::File.
- *
- * \author Teemu Murtola <teemu.murtola@gmail.com>
- * \inpublicapi
- * \ingroup module_utility
- */
-#ifndef GMX_UTILITY_FILE_H
-#define GMX_UTILITY_FILE_H
-
-#include <cstdio>
-
-#include <string>
-
-#include "gromacs/utility/classhelpers.h"
-
-namespace gmx
-{
-
-class File;
-
-/*! \brief
- * Parameters for creating a File object.
- *
- * This class (mostly) replaces the ability to return a File object from a
- * function (since File is not copyable): returning a FileInitializer instead
- * allows the caller to construct the File object.
- *
- * \inpublicapi
- * \ingroup module_utility
- */
-class FileInitializer
-{
- public:
- /*! \brief
- * Creates the initializer with given parameters.
- *
- * The passed strings must remain valid until the initializer is used
- * to construct a File object.
- */
- FileInitializer(const char *filename, const char *mode)
- : filename_(filename), mode_(mode)
- {
- }
-
- private:
- const char *filename_;
- const char *mode_;
-
- /*! \brief
- * Needed to allow access to the parameters without otherwise
- * unnecessary accessors.
- */
- friend class File;
-};
-
-/*! \brief
- * Basic file object.
- *
- * This class provides basic file I/O functionality and uses exceptions
- * (FileIOError) for error reporting.
- *
- * \inpublicapi
- * \ingroup module_utility
- */
-class File
-{
- public:
- /*! \brief
- * Opens a file and returns a `FILE` handle.
- *
- * \param[in] filename Path of the file to open.
- * \param[in] mode Mode to open the file in (for fopen()).
- * \throws FileIOError on any I/O error.
- *
- * Instead of returning `NULL` on errors, throws an exception with
- * additional details (including the file name and `errno`).
- */
- static FILE *openRawHandle(const char *filename, const char *mode);
- //! \copydoc openRawHandle(const char *, const char *)
- static FILE *openRawHandle(const std::string &filename, const char *mode);
- /*! \brief
- * Creates a file object and opens a file.
- *
- * \param[in] filename Path of the file to open.
- * \param[in] mode Mode to open the file in (for fopen()).
- * \throws std::bad_alloc if out of memory.
- * \throws FileIOError on any I/O error.
- *
- * \see open(const char *, const char *)
- */
- File(const char *filename, const char *mode);
- //! \copydoc File(const char *, const char *)
- File(const std::string &filename, const char *mode);
- /*! \brief
- * Creates a file object and opens a file.
- *
- * \param[in] initializer Parameters to open the file.
- * \throws std::bad_alloc if out of memory.
- * \throws FileIOError on any I/O error.
- */
- File(const FileInitializer &initializer);
- /*! \brief
- * Destroys the file object.
- *
- * If the file is still open, it is closed.
- * Any error conditions will be ignored.
- */
- ~File();
-
- /*! \brief
- * Opens a file.
- *
- * \param[in] filename Path of the file to open.
- * \param[in] mode Mode to open the file in (for fopen()).
- * \throws FileIOError on any I/O error.
- *
- * The file object must not be open.
- */
- void open(const char *filename, const char *mode);
- //! \copydoc open(const char *, const char *)
- void open(const std::string &filename, const char *mode);
- /*! \brief
- * Closes the file object.
- *
- * \throws FileIOError on any I/O error.
- *
- * The file must be open.
- */
- void close();
-
- /*! \brief
- * Returns whether the file is an interactive terminal.
- *
- * Only works on Unix, otherwise always returns true.
- * It only makes sense to call this for File::standardInput() and
- * friends.
- *
- * Thie file must be open.
- * Does not throw.
- */
- bool isInteractive() const;
- /*! \brief
- * Returns a file handle for interfacing with C functions.
- *
- * The file must be open.
- * Does not throw.
- */
- FILE *handle();
-
- /*! \brief
- * Reads given number of bytes from the file.
- *
- * \param[out] buffer Pointer to buffer that receives the bytes.
- * \param[in] bytes Number of bytes to read.
- * \throws FileIOError on any I/O error.
- *
- * The file must be open.
- */
- void readBytes(void *buffer, size_t bytes);
- /*! \brief
- * Reads a single line from the file.
- *
- * \param[out] line String to receive the line.
- * \returns false if nothing was read because the file ended.
- * \throws std::bad_alloc if out of memory.
- * \throws FileIOError on any I/O error.
- *
- * On error or when false is returned, \p line will be empty.
- * Trailing space will be removed from the line.
- * To loop over all lines in the file, use:
- * \code
- std::string line;
- while (file.readLine(&line))
- {
- // ...
- }
- \endcode
- */
- bool readLine(std::string *line);
- /*! \brief
- * Reads a single line from the file.
- *
- * \param[out] line String to receive the line.
- * \returns false if nothing was read because the file ended.
- * \throws std::bad_alloc if out of memory.
- * \throws FileIOError on any I/O error.
- *
- * On error or when false is returned, \p line will be empty.
- * Works as readLine(), except that terminating newline will be present
- * in \p line if it was present in the file.
- *
- * \see readLine()
- */
- bool readLineWithTrailingSpace(std::string *line);
-
- /*! \brief
- * Writes a string to the file.
- *
- * \param[in] str String to write.
- * \throws FileIOError on any I/O error.
- *
- * The file must be open.
- */
- void writeString(const char *str);
- //! \copydoc writeString(const char *)
- void writeString(const std::string &str) { writeString(str.c_str()); }
- /*! \brief
- * Writes a line to the file.
- *
- * \param[in] line Line to write.
- * \throws FileIOError on any I/O error.
- *
- * If \p line does not end in a newline, one newline is appended.
- * Otherwise, works as writeString().
- *
- * The file must be open.
- */
- void writeLine(const char *line);
- //! \copydoc writeLine(const char *)
- void writeLine(const std::string &line) { writeLine(line.c_str()); }
- /*! \brief
- * Writes a newline to the file.
- *
- * \throws FileIOError on any I/O error.
- */
- void writeLine();
-
- /*! \brief
- * Checks whether a file exists and is a regular file.
- *
- * \param[in] filename Path to the file to check.
- * \returns true if \p filename exists and is accessible.
- *
- * Does not throw.
- */
- static bool exists(const char *filename);
- //! \copydoc exists(const char *)
- static bool exists(const std::string &filename);
-
- /*! \brief
- * Returns a File object for accessing stdin.
- *
- * \throws std::bad_alloc if out of memory (only on first call).
- */
- static File &standardInput();
- /*! \brief
- * Returns a File object for accessing stdout.
- *
- * \throws std::bad_alloc if out of memory (only on first call).
- */
- static File &standardOutput();
- /*! \brief
- * Returns a File object for accessing stderr.
- *
- * \throws std::bad_alloc if out of memory (only on first call).
- */
- static File &standardError();
-
- /*! \brief
- * Reads contents of a file to a std::string.
- *
- * \param[in] filename Name of the file to read.
- * \returns The contents of \p filename.
- * \throws std::bad_alloc if out of memory.
- * \throws FileIOError on any I/O error.
- */
- static std::string readToString(const char *filename);
- //! \copydoc readToString(const char *)
- static std::string readToString(const std::string &filename);
- /*! \brief
- * Convenience method for writing a file from a string in a single call.
- *
- * \param[in] filename Name of the file to read.
- * \param[in] text String to write to \p filename.
- * \throws FileIOError on any I/O error.
- *
- * If \p filename exists, it is overwritten.
- */
- static void writeFileFromString(const std::string &filename,
- const std::string &text);
-
- private:
- /*! \brief
- * Initialize file object from an existing file handle.
- *
- * \param[in] fp %File handle to use (may be NULL).
- * \param[in] bClose Whether this object should close its file handle.
- * \throws std::bad_alloc if out of memory.
- *
- * Used internally to implement standardOutput() and standardError().
- */
- File(FILE *fp, bool bClose);
-
- class Impl;
-
- PrivateImplPointer<Impl> impl_;
-};
-
-} // namespace gmx
-
-#endif
#include "fileredirector.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
+#include "gromacs/utility/path.h"
namespace gmx
{
class DefaultOutputRedirector : public FileOutputRedirectorInterface
{
public:
- virtual File &standardOutput()
+ virtual TextOutputStream &standardOutput()
{
- return File::standardOutput();
+ return TextOutputFile::standardOutput();
}
- virtual FileInitializer openFileForWriting(const char *filename)
+ virtual TextOutputStreamPointer openTextOutputFile(const char *filename)
{
- return FileInitializer(filename, "w");
+ return TextOutputStreamPointer(new TextOutputFile(filename));
}
};
#include <string>
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/textstream.h"
namespace gmx
{
*
* The calling code should take in this interface and use the methods in it
* all file system operations that need to support this redirection.
- * By default, the code can then use defaultFileInputRedirector() in case no
- * redirection is needed.
*
* This allows tests to override the file existence checks without actually
- * using the file system.
+ * using the file system. See FileOutputRedirectorInterface for notes on
+ * a typical usage pattern.
*
* With some further refactoring of the File class, this could also support
* redirecting input files from in-memory buffers as well, but for now the
/*! \libinternal \brief
* Allows capturing `stdout` and file output from code that supports it.
*
- * The calling code should take in this interface and use the File objects
+ * The calling code should take in this interface and use the stream objects
* it returns for all output that needs to support this redirection.
- * By default, the code can then use defaultFileOutputRedirector() in case no
- * redirection is needed.
*
- * This allows tests to capture the file output without duplicating the
- * knowledge of which files are actually produced. With some further
- * refactoring of the File class, this could support capturing the output into
- * in-memory buffers as well, but for now the current capabilities are
- * sufficient.
+ * Currently, the (nearly) only purpose for this interface is for unit tests to
+ * capture the file output without duplicating the knowledge of which files are
+ * actually produced. The tests can also replace actual files with in-memory
+ * streams (e.g., a StringOutputStream), and test the output without actually
+ * accessing the file system and managing actual files.
+ *
+ * As the main user for non-default implementation of this interface is tests,
+ * code using this interface generally uses a pattern where the redirector is
+ * initialized to defaultFileOutputRedirector(), and a separate setter is
+ * provided for tests to change the default. This allows code outside the
+ * tests (and outside the code actually calling the redirector) to be written
+ * as if this interface did not exist (i.e., they do not need to pass the
+ * default instance).
+ *
+ * Also, the interface only supports text files, but can be generalized if/when
+ * there is a need for binary streams (see also TextOutputStream).
*
* \inlibraryapi
* \ingroup module_utility
virtual ~FileOutputRedirectorInterface();
/*! \brief
- * Returns a File object to use for `stdout` output.
+ * Returns a stream to use for `stdout` output.
*/
- virtual File &standardOutput() = 0;
+ virtual TextOutputStream &standardOutput() = 0;
/*! \brief
- * Returns a File object to use for output to a given file.
+ * Returns a stream to use for output to a file at a given path.
*
* \param[in] filename Requested file name.
*/
- virtual FileInitializer openFileForWriting(const char *filename) = 0;
+ virtual TextOutputStreamPointer openTextOutputFile(const char *filename) = 0;
- //! Convenience method to open a file using an std::string path.
- FileInitializer openFileForWriting(const std::string &filename)
+ //! Convenience method to open a stream using an std::string path.
+ TextOutputStreamPointer openTextOutputFile(const std::string &filename)
{
- return openFileForWriting(filename.c_str());
+ return openTextOutputFile(filename.c_str());
}
};
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements classes from filestream.h.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "filestream.h"
+
+#include "config.h"
+
+#include <cerrno>
+#include <cstdio>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/stringutil.h"
+
+namespace gmx
+{
+
+namespace
+{
+
+//! Helper function for implementing readLine() for input streams.
+bool readLineImpl(FILE *fp, std::string *line)
+{
+ line->clear();
+ const size_t bufsize = 256;
+ std::string result;
+ char buf[bufsize];
+ buf[0] = '\0';
+ while (std::fgets(buf, bufsize, fp) != NULL)
+ {
+ const size_t length = std::strlen(buf);
+ result.append(buf, length);
+ if (length < bufsize - 1 || buf[length - 1] == '\n')
+ {
+ break;
+ }
+ }
+ if (std::ferror(fp))
+ {
+ GMX_THROW_WITH_ERRNO(FileIOError("Error while reading file"),
+ "fgets", errno);
+ }
+ *line = result;
+ return !result.empty() || !std::feof(fp);
+}
+
+} // namespace
+
+namespace internal
+{
+
+/********************************************************************
+ * FileStreamImpl
+ */
+
+class FileStreamImpl
+{
+ public:
+ explicit FileStreamImpl(FILE *fp)
+ : fp_(fp), bClose_(false)
+ {
+ }
+ FileStreamImpl(const char *filename, const char *mode)
+ : fp_(NULL), bClose_(true)
+ {
+ fp_ = std::fopen(filename, mode);
+ if (fp_ == NULL)
+ {
+ GMX_THROW_WITH_ERRNO(
+ FileIOError(formatString("Could not open file '%s'", filename)),
+ "fopen", errno);
+ }
+ }
+ ~FileStreamImpl()
+ {
+ if (fp_ != NULL && bClose_)
+ {
+ if (std::fclose(fp_) != 0)
+ {
+ // TODO: Log the error somewhere
+ }
+ }
+ }
+
+ FILE *handle()
+ {
+ GMX_RELEASE_ASSERT(fp_ != NULL,
+ "Attempted to access a file object that is not open");
+ return fp_;
+ }
+
+ void close()
+ {
+ GMX_RELEASE_ASSERT(fp_ != NULL,
+ "Attempted to close a file object that is not open");
+ GMX_RELEASE_ASSERT(bClose_,
+ "Attempted to close a file object that should not be");
+ const bool bOk = (std::fclose(fp_) == 0);
+ fp_ = NULL;
+ if (!bOk)
+ {
+ GMX_THROW_WITH_ERRNO(
+ FileIOError("Error while closing file"), "fclose", errno);
+ }
+ }
+
+ private:
+ //! File handle for this object (NULL if the stream has been closed).
+ FILE *fp_;
+ //! Whether \p fp_ should be closed by this object.
+ bool bClose_;
+};
+
+} // namespace internal
+
+using internal::FileStreamImpl;
+
+/********************************************************************
+ * StandardInputStream
+ */
+
+bool StandardInputStream::isInteractive() const
+{
+#ifdef HAVE_UNISTD_H
+ return isatty(fileno(stdin));
+#else
+ return true;
+#endif
+}
+
+bool StandardInputStream::readLine(std::string *line)
+{
+ return readLineImpl(stdin, line);
+}
+
+// static
+StandardInputStream &StandardInputStream::instance()
+{
+ static StandardInputStream stdinObject;
+ return stdinObject;
+}
+
+/********************************************************************
+ * TextInputFile
+ */
+
+// static
+FILE *TextInputFile::openRawHandle(const char *filename)
+{
+ FILE *fp = fopen(filename, "r");
+ if (fp == NULL)
+ {
+ GMX_THROW_WITH_ERRNO(
+ FileIOError(formatString("Could not open file '%s'", filename)),
+ "fopen", errno);
+ }
+ return fp;
+}
+
+// static
+FILE *TextInputFile::openRawHandle(const std::string &filename)
+{
+ return openRawHandle(filename.c_str());
+}
+
+TextInputFile::TextInputFile(const std::string &filename)
+ : impl_(new FileStreamImpl(filename.c_str(), "r"))
+{
+}
+
+TextInputFile::TextInputFile(FILE *fp)
+ : impl_(new FileStreamImpl(fp))
+{
+}
+
+TextInputFile::~TextInputFile()
+{
+}
+
+FILE *TextInputFile::handle()
+{
+ return impl_->handle();
+}
+
+bool TextInputFile::readLine(std::string *line)
+{
+ return readLineImpl(impl_->handle(), line);
+}
+
+void TextInputFile::close()
+{
+ impl_->close();
+}
+
+/********************************************************************
+ * TextOutputFile
+ */
+
+TextOutputFile::TextOutputFile(const std::string &filename)
+ : impl_(new FileStreamImpl(filename.c_str(), "w"))
+{
+}
+
+TextOutputFile::TextOutputFile(FILE *fp)
+ : impl_(new FileStreamImpl(fp))
+{
+}
+
+TextOutputFile::~TextOutputFile()
+{
+}
+
+void TextOutputFile::write(const char *str)
+{
+ if (std::fprintf(impl_->handle(), "%s", str) < 0)
+ {
+ GMX_THROW_WITH_ERRNO(FileIOError("Writing to file failed"),
+ "fprintf", errno);
+ }
+}
+
+void TextOutputFile::close()
+{
+ impl_->close();
+}
+
+// static
+TextOutputFile &TextOutputFile::standardOutput()
+{
+ static TextOutputFile stdoutObject(stdout);
+ return stdoutObject;
+}
+
+// static
+TextOutputFile &TextOutputFile::standardError()
+{
+ static TextOutputFile stderrObject(stderr);
+ return stderrObject;
+}
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares implementations for textstream.h interfaces for file input/output.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_FILESTREAM_H
+#define GMX_UTILITY_FILESTREAM_H
+
+#include <cstdio>
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+namespace internal
+{
+class FileStreamImpl;
+}
+
+/*! \libinternal \brief
+ * Text input stream implementation for reading from `stdin`.
+ *
+ * Implementations for the TextInputStream methods throw FileIOError on any
+ * I/O error.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class StandardInputStream : public TextInputStream
+{
+ public:
+ /*! \brief
+ * Returns whether `stdin` is an interactive terminal.
+ *
+ * Only works on Unix, otherwise always returns true.
+ *
+ * Does not throw.
+ */
+ bool isInteractive() const;
+
+ // From TextInputStream
+ virtual bool readLine(std::string *line);
+ virtual void close() {}
+
+ /*! \brief
+ * Returns a stream for accessing `stdin`.
+ *
+ * Does not throw.
+ */
+ static StandardInputStream &instance();
+};
+
+/*! \libinternal \brief
+ * Text input stream implementation for reading from a file.
+ *
+ * Implementations for the TextInputStream methods throw FileIOError on any
+ * I/O error.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextInputFile : public TextInputStream
+{
+ public:
+ /*! \brief
+ * Opens a file and returns a `FILE` handle.
+ *
+ * \param[in] filename Path of the file to open.
+ * \throws FileIOError on any I/O error.
+ *
+ * Instead of returning `NULL` on errors, throws an exception with
+ * additional details (including the file name and `errno`).
+ */
+ static FILE *openRawHandle(const char *filename);
+ //! \copydoc openRawHandle(const char *, const char *)
+ static FILE *openRawHandle(const std::string &filename);
+
+ /*! \brief
+ * Opens a text file as a stream.
+ *
+ * \param[in] filename Path to the file to open.
+ * \throws std::bad_alloc if out of memory.
+ * \throws FileIOError on any I/O error.
+ */
+ explicit TextInputFile(const std::string &filename);
+ /*! \brief
+ * Initializes file object from an existing file handle.
+ *
+ * \param[in] fp File handle to use.
+ * \throws std::bad_alloc if out of memory.
+ *
+ * The caller is responsible of closing the file; close() does nothing
+ * for an object constructed this way.
+ */
+ explicit TextInputFile(FILE *fp);
+ virtual ~TextInputFile();
+
+ /*! \brief
+ * Returns a raw handle to the input file.
+ *
+ * This is provided for interoperability with older C-like code.
+ */
+ FILE *handle();
+
+ // From TextInputStream
+ virtual bool readLine(std::string *line);
+ virtual void close();
+
+ private:
+ PrivateImplPointer<internal::FileStreamImpl> impl_;
+};
+
+/*! \libinternal \brief
+ * Text output stream implementation for writing to a file.
+ *
+ * Implementations for the TextOutputStream methods throw FileIOError on any
+ * I/O error.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextOutputFile : public TextOutputStream
+{
+ public:
+ //! \copydoc TextInputFile::TextInputFile(const std::string &)
+ explicit TextOutputFile(const std::string &filename);
+ //! \copydoc TextInputFile::TextInputFile(FILE *)
+ explicit TextOutputFile(FILE *fp);
+ virtual ~TextOutputFile();
+
+ // From TextOutputStream
+ virtual void write(const char *text);
+ virtual void close();
+
+ /*! \brief
+ * Returns a stream for accessing `stdout`.
+ *
+ * \throws std::bad_alloc if out of memory (only on first call).
+ */
+ static TextOutputFile &standardOutput();
+ /*! \brief
+ * Returns a stream for accessing `stderr`.
+ *
+ * \throws std::bad_alloc if out of memory (only on first call).
+ */
+ static TextOutputFile &standardError();
+
+ private:
+ PrivateImplPointer<internal::FileStreamImpl> impl_;
+};
+
+} // namespace gmx
+
+#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifndef _genborn_sse_h
-#define _genborn_sse_h
+/*! \libinternal \file
+ * \brief
+ * Declares C++11-style basic threading primitives
+ * (gmx::Mutex, gmx::lock_guard).
+ *
+ * For now, the implementation is imported from thread-MPI.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_THREADING_MUTEX_H
+#define GMX_THREADING_MUTEX_H
-#include "gromacs/legacyheaders/typedefs.h"
+#include "thread_mpi/mutex.h"
-float
-calc_gb_chainrule_sse2_single(int natoms, t_nblist *nl, float *dadx, float *dvda,
- float *xd, float *f, float *fshift, float *shift_vec,
- int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md);
+namespace gmx
+{
-int
-calc_gb_rad_still_sse2_single(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
- float *x, t_nblist *nl, gmx_genborn_t *born);
+//! \cond libapi
+/*! \libinternal \brief
+ * C++11-compatible basic mutex.
+ */
+typedef tMPI::mutex Mutex;
+//! \endcond
+using tMPI::lock_guard;
-int
-calc_gb_rad_hct_obc_sse2_single(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
- float *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm);
+} // namespace gmx
-#endif /* _genborn_sse_h */
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares no_delete deleter for boost::shared_ptr.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_NODELETE_H
+#define GMX_UTILITY_NODELETE_H
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Deleter for boost::shared_ptr that does nothing.
+ *
+ * This is useful for cases where a class needs to keep a reference to another
+ * class, and optionally also manage the lifetime of that other class.
+ * The simplest construct (that does not force all callers to use heap
+ * allocation and boost::shared_ptr for the referenced class) is to use a
+ * single boost::shared_ptr to hold that reference, and use no_delete as the
+ * deleter if the lifetime is managed externally.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+template <class T>
+struct no_delete
+{
+ //! Deleter that does nothing.
+ void operator()(T *) {}
+};
+
+} // namespace gmx
+
+#endif
#include <cctype>
#include <cerrno>
+#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
+#include <string>
#include <sys/stat.h>
return result;
}
+/********************************************************************
+ * File
+ */
+
+// static
+bool File::exists(const char *filename)
+{
+ if (filename == NULL)
+ {
+ return false;
+ }
+ FILE *test = std::fopen(filename, "r");
+ if (test == NULL)
+ {
+ return false;
+ }
+ else
+ {
+ std::fclose(test);
+ // Windows doesn't allow fopen of directory, so we don't need to check
+ // this separately.
+#ifndef GMX_NATIVE_WINDOWS
+ struct stat st_buf;
+ int status = stat(filename, &st_buf);
+ if (status != 0 || !S_ISREG(st_buf.st_mode))
+ {
+ return false;
+ }
+#endif
+ return true;
+ }
+}
+
+// static
+bool File::exists(const std::string &filename)
+{
+ return exists(filename.c_str());
+}
/********************************************************************
* Directory
Path();
};
+class File
+{
+ public:
+ /*! \brief
+ * Checks whether a file exists and is a regular file.
+ *
+ * \param[in] filename Path to the file to check.
+ * \returns `true` if \p filename exists and is accessible.
+ *
+ * Does not throw.
+ */
+ static bool exists(const char *filename);
+ //! \copydoc exists(const char *)
+ static bool exists(const std::string &filename);
+
+ private:
+ // Disallow instantiation.
+ File();
+};
class Directory
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifndef _genborn_sse2_double_h
-#define _genborn_sse2_double_h
+/*! \internal \file
+ * \brief
+ * Implements classes from stringstream.h.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "stringstream.h"
-#include "gromacs/legacyheaders/typedefs.h"
+#include <string>
-int
-calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
- double *x, t_nblist *nl, gmx_genborn_t *born);
+namespace gmx
+{
-int
-calc_gb_chainrule_sse2_double(int natoms, t_nblist *nl, double *dadx, double *dvda, double *xd, double *f,
- double *fshift, double *shift_vec, int gb_algorithm,
- gmx_genborn_t *born, t_mdatoms *md);
+void StringOutputStream::write(const char *str)
+{
+ str_.append(str);
+}
-int
-calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
- double *x, t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, int gb_algorithm);
+void StringOutputStream::close()
+{
+}
-#endif /* _genborn_sse2_double_h */
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares implementations for textstream.h interfaces for input/output to
+ * in-memory strings.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_STRINGSTREAM_H
+#define GMX_UTILITY_STRINGSTREAM_H
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Text output stream implementation for writing to an in-memory string.
+ *
+ * Implementations for the TextOutputStream methods throw std::bad_alloc if
+ * reallocation of the string fails.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class StringOutputStream : public TextOutputStream
+{
+ public:
+ //! Returns the text written to the stream so far.
+ const std::string &toString() const { return str_; }
+
+ // From TextOutputStream
+ virtual void write(const char *text);
+ virtual void close();
+
+ private:
+ std::string str_;
+};
+
+} // namespace gmx
+
+#endif
namespace gmx
{
-bool endsWith(const std::string &str, const char *suffix)
+bool endsWith(const char *str, const char *suffix)
{
- if (suffix == NULL || suffix[0] == '\0')
+ if (isNullOrEmpty(suffix))
{
return true;
}
- size_t length = std::strlen(suffix);
- return (str.length() >= length
- && str.compare(str.length() - length, length, suffix) == 0);
+ const size_t strLength = std::strlen(str);
+ const size_t suffixLength = std::strlen(suffix);
+ return (strLength >= suffixLength
+ && std::strcmp(&str[strLength - suffixLength], suffix) == 0);
}
std::string stripSuffixIfPresent(const std::string &str, const char *suffix)
TextLineWrapperSettings::TextLineWrapperSettings()
: maxLength_(0), indent_(0), firstLineIndent_(-1),
- bStripLeadingWhitespace_(false), continuationChar_('\0')
+ bKeepFinalSpaces_(false), continuationChar_('\0')
{
}
* TextLineWrapper
*/
+bool TextLineWrapper::isTrivial() const
+{
+ return settings_.lineLength() == 0 && settings_.indent() == 0
+ && settings_.firstLineIndent_ <= 0;
+}
+
size_t
TextLineWrapper::findNextLine(const char *input, size_t lineStart) const
{
size_t inputLength = std::strlen(input);
bool bFirstLine = (lineStart == 0 || input[lineStart - 1] == '\n');
// Ignore leading whitespace if necessary.
- if (!bFirstLine || settings_.bStripLeadingWhitespace_)
+ if (!bFirstLine)
{
lineStart += std::strspn(input + lineStart, " ");
if (lineStart >= inputLength)
size_t inputLength = input.length();
bool bFirstLine = (lineStart == 0 || input[lineStart - 1] == '\n');
// Strip leading whitespace if necessary.
- if (!bFirstLine || settings_.bStripLeadingWhitespace_)
+ if (!bFirstLine)
{
lineStart = input.find_first_not_of(' ', lineStart);
if (lineStart >= inputLength)
int indent = (bFirstLine ? settings_.firstLineIndent() : settings_.indent());
bool bContinuation = (lineEnd < inputLength && input[lineEnd - 1] != '\n');
// Strip trailing whitespace.
- while (lineEnd > lineStart && std::isspace(input[lineEnd - 1]))
+ if (!settings_.bKeepFinalSpaces_ || lineEnd < inputLength || input[inputLength - 1] == '\n')
{
- --lineEnd;
+ while (lineEnd > lineStart && std::isspace(input[lineEnd - 1]))
+ {
+ --lineEnd;
+ }
}
const size_t lineLength = lineEnd - lineStart;
*
* Does not throw.
*/
-bool inline isNullOrEmpty(const char *str)
+static inline bool isNullOrEmpty(const char *str)
{
return str == NULL || str[0] == '\0';
}
* Returns true if \p prefix is empty.
* Does not throw.
*/
-bool inline startsWith(const std::string &str, const std::string &prefix)
+static inline bool startsWith(const std::string &str, const std::string &prefix)
{
return str.compare(0, prefix.length(), prefix) == 0;
}
//! \copydoc startsWith(const std::string &, const std::string &)
-bool inline startsWith(const char *str, const char *prefix)
+static inline bool startsWith(const char *str, const char *prefix)
{
return std::strncmp(str, prefix, std::strlen(prefix)) == 0;
}
* Returns true if \p suffix is NULL or empty.
* Does not throw.
*/
-bool endsWith(const std::string &str, const char *suffix);
+bool endsWith(const char *str, const char *suffix);
+//! \copydoc endsWith(const char *, const char *)
+static inline bool endsWith(const std::string &str, const char *suffix)
+{
+ return endsWith(str.c_str(), suffix);
+}
/*! \brief
* Removes a suffix from a string.
* - No maximum line width (only explicit line breaks).
* - No indentation.
* - No continuation characters.
- * - Ignore whitespace after an explicit newline.
+ * - Do not keep final spaces in input strings.
*/
TextLineWrapperSettings();
*/
void setFirstLineIndent(int indent) { firstLineIndent_ = indent; }
/*! \brief
- * Sets whether to remove spaces after an explicit newline.
+ * Sets whether final spaces in input should be kept.
*
- * \param[in] bStrip If true, spaces after newline are ignored.
+ * \param[in] bKeep Whether to keep spaces at the end of the input.
*
- * If not removed, the space is added to the indentation set with
- * setIndent().
- * The default is to not strip such whitespace.
+ * This means that wrapping a string that ends in spaces also keeps
+ * those spaces in the output. This allows using the wrapper for
+ * partial lines where the initial part of the line may end in a space.
+ * By default, all trailing whitespace is removed. Note that this
+ * option does not affect spaces before an explicit newline: those are
+ * always removed.
*/
- void setStripLeadingWhitespace(bool bStrip)
- {
- bStripLeadingWhitespace_ = bStrip;
- }
+ void setKeepFinalSpaces(bool bKeep) { bKeepFinalSpaces_ = bKeep; }
/*! \brief
* Sets a continuation marker for wrapped lines.
*
* If -1, \a indent_ is used.
*/
int firstLineIndent_;
- //! Whether to ignore or preserve space after a newline.
- bool bStripLeadingWhitespace_;
+ //! Whether to keep spaces at end of input.
+ bool bKeepFinalSpaces_;
//! If not \c '\0', mark each wrapping point with this character.
char continuationChar_;
*/
TextLineWrapperSettings &settings() { return settings_; }
+ //! Returns true if the wrapper would not modify the input string.
+ bool isTrivial() const;
+
/*! \brief
* Finds the next line to be wrapped.
*
gmx_add_unit_test(UtilityUnitTests utility-test
arrayref.cpp
bitmask32.cpp bitmask64.cpp bitmask128.cpp
- stringutil.cpp)
+ stringutil.cpp
+ textwriter.cpp
+ )
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
<ReferenceData>
- <String Name="WrappedAt14StripLeading"><![CDATA[
-A quick brown
-fox jumps
-over the lazy
-dog]]></String>
- <String Name="WrappedAt14PreserveLeading"><![CDATA[
+ <String Name="WrappedAt14"><![CDATA[
A quick brown
fox jumps
over the lazy
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <String Name="Output"><![CDATA[
+Explicit newline
+Implicit newline
+Explicit newline
+Implicit newline
+
+]]></String>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <String Name="Output"><![CDATA[
+Partial spaced line
+Partial spaced line
+]]></String>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <String Name="Output"><![CDATA[
+Partial spaced line
+Partial spaced line
+]]></String>
+</ReferenceData>
--- /dev/null
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+ <String Name="Output"><![CDATA[
+ Wrapped and
+ indented text
+ Wrapped and
+ indented text
+
+]]></String>
+</ReferenceData>
EXPECT_EQ("", wrapper.wrapToString(""));
EXPECT_EQ("", wrapper.wrapToString(" "));
EXPECT_TRUE(wrapper.wrapToVector("").empty());
- EXPECT_TRUE(wrapper.wrapToString(" ").empty());
+ {
+ std::vector<std::string> wrapped(wrapper.wrapToVector(" "));
+ ASSERT_EQ(1U, wrapped.size());
+ EXPECT_EQ("", wrapped[0]);
+ }
+}
+
+TEST_F(TextLineWrapperTest, HandlesTrailingWhitespace)
+{
+ gmx::TextLineWrapper wrapper;
+
+ EXPECT_EQ("line", wrapper.wrapToString("line "));
+ EXPECT_EQ("line\n", wrapper.wrapToString("line \n"));
+
+ wrapper.settings().setKeepFinalSpaces(true);
+ EXPECT_EQ("line ", wrapper.wrapToString("line "));
+ EXPECT_EQ("line\n", wrapper.wrapToString("line \n"));
}
TEST_F(TextLineWrapperTest, HandlesTrailingNewlines)
TEST_F(TextLineWrapperTest, WrapsCorrectlyWithExtraWhitespace)
{
gmx::TextLineWrapper wrapper;
-
wrapper.settings().setLineLength(14);
- wrapper.settings().setStripLeadingWhitespace(true);
- checkText(wrapper.wrapToString(g_wrapTextWhitespace),
- "WrappedAt14StripLeading");
- wrapper.settings().setStripLeadingWhitespace(false);
+
checkText(wrapper.wrapToString(g_wrapTextWhitespace),
- "WrappedAt14PreserveLeading");
+ "WrappedAt14");
}
} // namespace
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Tests for gmx::TextWriter.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "gromacs/utility/textwriter.h"
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "gromacs/utility/stringstream.h"
+#include "gromacs/utility/stringutil.h"
+
+#include "testutils/stringtest.h"
+
+namespace
+{
+
+class TextWriterTest : public gmx::test::StringTestBase
+{
+ public:
+ TextWriterTest() : writer_(&stream_)
+ {
+ }
+
+ void checkOutput()
+ {
+ checkText(stream_.toString(), "Output");
+ }
+
+ gmx::StringOutputStream stream_;
+ gmx::TextWriter writer_;
+};
+
+TEST_F(TextWriterTest, WritesLines)
+{
+ writer_.writeLine("Explicit newline\n");
+ writer_.writeLine("Implicit newline");
+ writer_.writeLine(std::string("Explicit newline\n"));
+ writer_.writeLine(std::string("Implicit newline"));
+ writer_.writeLine();
+ checkOutput();
+}
+
+TEST_F(TextWriterTest, WritesLinesInParts)
+{
+ writer_.writeString("Partial ");
+ writer_.writeString("spaced");
+ writer_.writeString(" line");
+ writer_.writeLine();
+ writer_.writeString(std::string("Partial "));
+ writer_.writeString(std::string("spaced"));
+ writer_.writeString(std::string(" line"));
+ writer_.writeLine();
+ checkOutput();
+}
+
+TEST_F(TextWriterTest, WritesWrappedLines)
+{
+ writer_.wrapperSettings().setIndent(2);
+ writer_.wrapperSettings().setLineLength(15);
+ writer_.writeLine("Wrapped and indented text");
+ writer_.writeLine(std::string("Wrapped and indented text"));
+ writer_.writeLine();
+ checkOutput();
+}
+
+TEST_F(TextWriterTest, WritesLinesInPartsWithWrapper)
+{
+ writer_.wrapperSettings().setLineLength(50);
+ writer_.writeString("Partial ");
+ writer_.writeString("spaced");
+ writer_.writeString(" line");
+ writer_.writeLine();
+ writer_.writeString(std::string("Partial "));
+ writer_.writeString(std::string("spaced"));
+ writer_.writeString(std::string(" line"));
+ writer_.writeLine();
+ checkOutput();
+}
+
+} // namespace
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements gmx::TextReader.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "textreader.h"
+
+#include "gromacs/utility/filestream.h"
+#include "gromacs/utility/nodelete.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+// static
+std::string TextReader::readFileToString(const char *filename)
+{
+ std::string result;
+ TextReader reader(filename);
+ std::string line;
+ while (reader.readLine(&line))
+ {
+ result.append(line);
+ }
+ reader.close();
+ return result;
+}
+
+// static
+std::string TextReader::readFileToString(const std::string &filename)
+{
+ return readFileToString(filename.c_str());
+}
+
+class TextReader::Impl
+{
+ public:
+ explicit Impl(const TextInputStreamPointer &stream)
+ : stream_(stream)
+ {
+ }
+
+ TextInputStreamPointer stream_;
+};
+
+TextReader::TextReader(const std::string &filename)
+ : impl_(new Impl(TextInputStreamPointer(new TextInputFile(filename))))
+{
+}
+
+TextReader::TextReader(TextInputStream *stream)
+ : impl_(new Impl(TextInputStreamPointer(stream, no_delete<TextInputStream>())))
+{
+}
+
+TextReader::TextReader(const TextInputStreamPointer &stream)
+ : impl_(new Impl(stream))
+{
+}
+
+TextReader::~TextReader()
+{
+}
+
+bool TextReader::readLine(std::string *line)
+{
+ return impl_->stream_->readLine(line);
+}
+
+bool TextReader::readLineTrimmed(std::string *line)
+{
+ if (!readLine(line))
+ {
+ return false;
+ }
+ const size_t endPos = line->find_last_not_of(" \t\r\n");
+ if (endPos != std::string::npos)
+ {
+ line->resize(endPos + 1);
+ }
+ return true;
+}
+
+void TextReader::close()
+{
+ impl_->stream_->close();
+}
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares gmx::TextReader.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEXTREADER_H
+#define GMX_UTILITY_TEXTREADER_H
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Reads text from a TextInputStream.
+ *
+ * This class provides more formatted reading capabilities than reading raw
+ * lines from the stream (and a natural place to implement more such
+ * capabilities).
+ *
+ * All methods that read from the stream can throw any exceptions that the
+ * underlying stream throws.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextReader
+{
+ public:
+ /*! \brief
+ * Reads contents of a file to a std::string.
+ *
+ * \param[in] filename Name of the file to read.
+ * \returns The contents of \p filename.
+ * \throws std::bad_alloc if out of memory.
+ * \throws FileIOError on any I/O error.
+ */
+ static std::string readFileToString(const char *filename);
+ //! \copydoc readFileToString(const char *)
+ static std::string readFileToString(const std::string &filename);
+
+ /*! \brief
+ * Creates a reader that reads from specified file.
+ *
+ * \param[in] filename Path to the file to open.
+ * \throws std::bad_alloc if out of memory.
+ * \throws FileIOError on any I/O error.
+ *
+ * This constructor is provided for convenience for reading directly
+ * from a file, without the need to construct multiple objects.
+ */
+ explicit TextReader(const std::string &filename);
+ /*! \brief
+ * Creates a reader that reads from specified stream.
+ *
+ * \param[in] stream Stream to read from.
+ * \throws std::bad_alloc if out of memory.
+ *
+ * The caller is responsible of the lifetime of the stream (should
+ * remain in existence as long as the reader exists).
+ *
+ * This constructor is provided for convenience for cases where the
+ * stream is not allocated with `new` and/or not managed by a
+ * boost::shared_ptr (e.g., if the stream is an object on the stack).
+ */
+ explicit TextReader(TextInputStream *stream);
+ /*! \brief
+ * Creates a reader that reads from specified stream.
+ *
+ * \param[in] stream Stream to read from.
+ * \throws std::bad_alloc if out of memory.
+ *
+ * The reader keeps a reference to the stream, so the caller can pass
+ * in a temporary if necessary.
+ */
+ explicit TextReader(const TextInputStreamPointer &stream);
+ ~TextReader();
+
+ /*! \brief
+ * Reads a single line (including newline) from the stream.
+ *
+ * \param[out] line String to receive the line.
+ * \returns `false` if nothing was read because the file ended.
+ *
+ * On error or when false is returned, \p line will be empty.
+ * Newlines will be returned as part of \p line if it was present in
+ * the stream.
+ * To loop over all lines in the stream, use:
+ * \code
+ std::string line;
+ while (reader.readLine(&line))
+ {
+ // ...
+ }
+ \endcode
+ */
+ bool readLine(std::string *line);
+ /*! \brief
+ * Reads a single line from the stream.
+ *
+ * \param[out] line String to receive the line.
+ * \returns false if nothing was read because the file ended.
+ *
+ * On error or when false is returned, \p line will be empty.
+ * Works as readLine(), except that trailing whitespace will be removed
+ * from \p line.
+ *
+ * \see readLine()
+ */
+ bool readLineTrimmed(std::string *line);
+
+ /*! \brief
+ * Closes the underlying stream.
+ */
+ void close();
+
+ private:
+ class Impl;
+
+ PrivateImplPointer<Impl> impl_;
+};
+
+} // namespace gmx
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares interfaces for simple input/output streams.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEXTSTREAM_H
+#define GMX_UTILITY_TEXTSTREAM_H
+
+#include <boost/shared_ptr.hpp>
+
+namespace gmx
+{
+
+/*! \libinternal \brief
+ * Interface for reading text.
+ *
+ * Concrete implementations can read the text from, e.g., a file or an in-memory
+ * string. The main use is to allow unit tests to inject in-memory buffers
+ * instead of writing files to be read by the code under test, but there are
+ * also use cases outside the tests where it is useful to abstract out whether
+ * the input is from a real file or something else.
+ *
+ * To use more advanced formatting than reading raw lines, use TextReader.
+ *
+ * Both methods in the interface can throw std::bad_alloc or other exceptions
+ * that indicate failures to read from the stream.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextInputStream
+{
+ public:
+ virtual ~TextInputStream() {}
+
+ /*! \brief
+ * Reads a line (with newline included) from the stream.
+ *
+ * \param[out] line String to receive the line.
+ * \returns `false` if nothing was read because the stream ended.
+ *
+ * On error or when `false` is returned, \p line will be empty.
+ */
+ virtual bool readLine(std::string *line) = 0;
+ /*! \brief
+ * Closes the stream.
+ *
+ * It is not allowed to read from a stream after it has been closed.
+ * See TextOutputStream::close() for rationale for a close() method
+ * separate from the destructor. For input, failures during close
+ * should be rare, but it is clearer to keep the interface symmetric.
+ */
+ virtual void close() = 0;
+};
+
+/*! \libinternal \brief
+ * Interface for writing text.
+ *
+ * Concrete implementations can write the text to, e.g., a file or an in-memory
+ * string. The main use is to allow unit tests to inject in-memory buffers
+ * instead of reading in files produced by the code under test, but there are
+ * also use cases outside the tests where it is useful to abstract out whether
+ * the output is into a real file or something else.
+ *
+ * To use more advanced formatting than writing plain strings, use TextWriter.
+ *
+ * The current implementation assumes text-only output in several places, but
+ * this interface could possibly be generalized also for binary files.
+ * However, since all binary files currently written by \Gromacs are either
+ * XDR- or TNG-based, they may require a different approach. Also, it is worth
+ * keeping the distinction between text and binary files clear, since Windows
+ * does transparent `LF`-`CRLF` newline translation for text files, so mixing
+ * modes when reading and/or writing the same file can cause subtle issues.
+ *
+ * Both methods in the interface can throw std::bad_alloc or other exceptions
+ * that indicate failures to write to the stream.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextOutputStream
+{
+ public:
+ virtual ~TextOutputStream() {}
+
+ /*! \brief
+ * Writes a given string to the stream.
+ */
+ virtual void write(const char *text) = 0;
+ /*! \brief
+ * Closes the stream.
+ *
+ * It is not allowed to write to a stream after it has been closed.
+ * A method separate from the destructor is provided such that errors
+ * that occur while closing the stream (e.g., when closing the file)
+ * can be handled using exceptions.
+ * The destructor is not allowed to throw, so code that wants to
+ * observe such errors needs to call close() after it has finished
+ * writing to the stream.
+ */
+ virtual void close() = 0;
+};
+
+//! Shorthand for a smart pointer to a TextInputStream.
+typedef boost::shared_ptr<TextInputStream> TextInputStreamPointer;
+//! Shorthand for a smart pointer to a TextOutputStream.
+typedef boost::shared_ptr<TextOutputStream> TextOutputStreamPointer;
+
+} // namespace gmx
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements gmx::TextWriter.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_utility
+ */
+#include "gmxpre.h"
+
+#include "textwriter.h"
+
+#include <cstring>
+
+#include "gromacs/utility/filestream.h"
+#include "gromacs/utility/nodelete.h"
+#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+class TextWriter::Impl
+{
+ public:
+ explicit Impl(const TextOutputStreamPointer &stream)
+ : stream_(stream)
+ {
+ wrapper_.settings().setKeepFinalSpaces(true);
+ }
+
+ void writeWrappedString(const std::string &str)
+ {
+ stream_->write(wrapper_.wrapToString(str).c_str());
+ }
+
+ TextOutputStreamPointer stream_;
+ TextLineWrapper wrapper_;
+};
+
+// static
+void TextWriter::writeFileFromString(const std::string &filename,
+ const std::string &text)
+{
+ TextWriter file(filename);
+ file.writeString(text);
+ file.close();
+}
+
+TextWriter::TextWriter(const std::string &filename)
+ : impl_(new Impl(TextOutputStreamPointer(new TextOutputFile(filename))))
+{
+}
+
+TextWriter::TextWriter(FILE *fp)
+ : impl_(new Impl(TextOutputStreamPointer(new TextOutputFile(fp))))
+{
+}
+
+TextWriter::TextWriter(TextOutputStream *stream)
+ : impl_(new Impl(TextOutputStreamPointer(stream, no_delete<TextOutputStream>())))
+{
+}
+
+TextWriter::TextWriter(const TextOutputStreamPointer &stream)
+ : impl_(new Impl(stream))
+{
+}
+
+TextWriter::~TextWriter()
+{
+}
+
+TextOutputStream &TextWriter::stream()
+{
+ return *impl_->stream_;
+}
+
+TextLineWrapperSettings &TextWriter::wrapperSettings()
+{
+ return impl_->wrapper_.settings();
+}
+
+void TextWriter::writeString(const char *str)
+{
+ if (impl_->wrapper_.isTrivial())
+ {
+ impl_->stream_->write(str);
+ }
+ else
+ {
+ impl_->writeWrappedString(str);
+ }
+}
+
+void TextWriter::writeString(const std::string &str)
+{
+ impl_->writeWrappedString(str);
+}
+
+void TextWriter::writeLine(const char *line)
+{
+ writeString(line);
+ if (!endsWith(line, "\n"))
+ {
+ writeLine();
+ }
+}
+
+void TextWriter::writeLine(const std::string &line)
+{
+ writeString(line);
+ if (!endsWith(line, "\n"))
+ {
+ writeLine();
+ }
+}
+
+void TextWriter::writeLine()
+{
+ writeString("\n");
+}
+
+void TextWriter::close()
+{
+ impl_->stream_->close();
+}
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares gmx::TextWriter.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEXTWRITER_H
+#define GMX_UTILITY_TEXTWRITER_H
+
+#include <cstdio>
+
+#include <string>
+
+#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/textstream.h"
+
+namespace gmx
+{
+
+class TextLineWrapperSettings;
+
+/*! \libinternal \brief
+ * Writes text into a TextOutputStream.
+ *
+ * This class provides more formatting and line-oriented writing capabilities
+ * than writing raw strings into the stream.
+ *
+ * All methods that write to the stream can throw any exceptions that the
+ * underlying stream throws.
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+class TextWriter
+{
+ public:
+ /*! \brief
+ * Convenience method for writing a file from a string in a single call.
+ *
+ * \param[in] filename Name of the file to read.
+ * \param[in] text String to write to \p filename.
+ * \throws std::bad_alloc if out of memory.
+ * \throws FileIOError on any I/O error.
+ *
+ * If \p filename exists, it is overwritten.
+ */
+ static void writeFileFromString(const std::string &filename,
+ const std::string &text);
+
+ /*! \brief
+ * Creates a writer that writes to specified file.
+ *
+ * \param[in] filename Path to the file to open.
+ * \throws std::bad_alloc if out of memory.
+ * \throws FileIOError on any I/O error.
+ *
+ * This constructor is provided for convenience for writing directly to
+ * a file, without the need to construct multiple objects.
+ */
+ explicit TextWriter(const std::string &filename);
+ /*! \brief
+ * Creates a writer that writes to specified file.
+ *
+ * \param[in] fp File handle to write to.
+ * \throws std::bad_alloc if out of memory.
+ * \throws FileIOError on any I/O error.
+ *
+ * This constructor is provided for interoperability with C-like code
+ * for writing directly to an already opened file, without the need to
+ * construct multiple objects.
+ *
+ * The caller is responsible of closing \p fp; it is not allowed to
+ * call close() on the writer.
+ */
+ explicit TextWriter(FILE *fp);
+ /*! \brief
+ * Creates a writer that writes to specified stream.
+ *
+ * \param[in] stream Stream to write to.
+ * \throws std::bad_alloc if out of memory.
+ *
+ * The caller is responsible of the lifetime of the stream (should
+ * remain in existence as long as the writer exists).
+ *
+ * This constructor is provided for convenience for cases where the
+ * stream is not allocated with `new` and/or not managed by a
+ * boost::shared_ptr (e.g., if the stream is an object on the stack).
+ */
+ explicit TextWriter(TextOutputStream *stream);
+ /*! \brief
+ * Creates a writer that writes to specified stream.
+ *
+ * \param[in] stream Stream to write to.
+ * \throws std::bad_alloc if out of memory.
+ *
+ * The writer keeps a reference to the stream, so the caller can pass
+ * in a temporary if necessary.
+ */
+ explicit TextWriter(const TextOutputStreamPointer &stream);
+ ~TextWriter();
+
+ //! Returns the underlying stream for this writer.
+ TextOutputStream &stream();
+
+ /*! \brief
+ * Allows adjusting wrapping settings for the writer.
+ *
+ * \todo
+ * Wrapping is not currently implemented for code that writes partial
+ * lines with writeString().
+ */
+ TextLineWrapperSettings &wrapperSettings();
+
+ /*! \brief
+ * Writes a string to the stream.
+ *
+ * \param[in] str String to write.
+ */
+ void writeString(const char *str);
+ //! \copydoc writeString(const char *)
+ void writeString(const std::string &str);
+ /*! \brief
+ * Writes a line to the stream.
+ *
+ * \param[in] line Line to write.
+ *
+ * If \p line does not end in a newline, one newline is appended.
+ * Otherwise, works as writeString().
+ */
+ void writeLine(const char *line);
+ //! \copydoc writeLine(const char *)
+ void writeLine(const std::string &line);
+ //! Writes a newline to the stream.
+ void writeLine();
+
+ /*! \brief
+ * Closes the underlying stream.
+ */
+ void close();
+
+ private:
+ class Impl;
+
+ PrivateImplPointer<Impl> impl_;
+};
+
+} // namespace gmx
+
+#endif
#include "gromacs/utility/fatalerror.h"
+/* DISCLAIMER: All the atom count and thread numbers below are heuristic.
+ * The real switching points will depend on the system simulation,
+ * the algorithms used and the hardware it's running on, as well as if there
+ * are other jobs running on the same machine. We try to take into account
+ * factors that have a large influence, such as recent Intel CPUs being
+ * much better at wide multi-threading. The remaining factors should
+ * (hopefully) have a small influence, such that the performance just before
+ * and after a switch point doesn't change too much.
+ */
+
+#ifdef GMX_OPENMP
+static const bool bOMP = true;
+#else
+static const bool bOMP = false;
+#endif
+
#ifdef GMX_THREAD_MPI
/* The minimum number of atoms per tMPI thread. With fewer atoms than this,
* the number of threads will get lowered.
/* TODO choose nthreads_omp based on hardware topology
when we have a hardware topology detection library */
/* First we consider the case of no MPI (1 MPI rank).
- * In general, when running up to 4 threads, OpenMP should be faster.
+ * In general, when running up to 8 threads, OpenMP should be faster.
* Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
* On Intel>=Nehalem running OpenMP on a single CPU is always faster,
* even on two CPUs it's usually faster (but with many OpenMP threads
* Sandy/Ivy Bridge, Has/Broadwell. By checking for AVX instead of
* model numbers we ensure also future Intel CPUs are covered.
*/
-const int nthreads_omp_always_faster_default = 6;
-const int nthreads_omp_always_faster_Nehalem = 12;
-const int nthreads_omp_always_faster_Intel_AVX = 16;
+const int nthreads_omp_faster_default = 8;
+const int nthreads_omp_faster_Nehalem = 12;
+const int nthreads_omp_faster_Intel_AVX = 16;
/* For CPU only runs the fastest options are usually MPI or OpenMP only.
* With one GPU, using MPI only is almost never optimal, so we need to
* compare running pure OpenMP with combined MPI+OpenMP. This means higher
* OpenMP threads counts can still be ok. Multiplying the numbers above
* by a factor of 2 seems to be a good estimate.
*/
-const int nthreads_omp_always_faster_gpu_fac = 2;
+const int nthreads_omp_faster_gpu_fac = 2;
/* This is the case with MPI (2 or more MPI PP ranks).
* By default we will terminate with a fatal error when more than 8
const int nthreads_omp_mpi_target_max = 6;
-#ifdef GMX_USE_OPENCL
-static const bool bGpuSharingSupported = false;
-#else
-static const bool bGpuSharingSupported = true;
-#endif
-
-
-static int nthreads_omp_always_faster(gmx_cpuid_t cpuid_info, gmx_bool bUseGPU)
+/* Returns the maximum OpenMP thread count for which using a single MPI rank
+ * should be faster than using multiple ranks with the same total thread count.
+ */
+static int nthreads_omp_faster(gmx_cpuid_t cpuid_info, gmx_bool bUseGPU)
{
int nth;
if (gmx_cpuid_vendor(cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
gmx_cpuid_feature(cpuid_info, GMX_CPUID_FEATURE_X86_AVX))
{
- nth = nthreads_omp_always_faster_Intel_AVX;
+ nth = nthreads_omp_faster_Intel_AVX;
}
else if (gmx_cpuid_is_intel_nehalem(cpuid_info))
{
- nth = nthreads_omp_always_faster_Nehalem;
+ nth = nthreads_omp_faster_Nehalem;
}
else
{
- nth = nthreads_omp_always_faster_default;
+ nth = nthreads_omp_faster_default;
}
if (bUseGPU)
{
- nth *= nthreads_omp_always_faster_gpu_fac;
+ nth *= nthreads_omp_faster_gpu_fac;
}
nth = std::min(nth, GMX_OPENMP_MAX_THREADS);
return nth;
}
+/* Returns that maximum OpenMP thread count that passes the efficiency check */
+static int nthreads_omp_efficient_max(int gmx_unused nrank,
+ gmx_cpuid_t cpuid_info,
+ gmx_bool bUseGPU)
+{
+#if defined GMX_OPENMP && defined GMX_MPI
+ if (nrank > 1)
+ {
+ return nthreads_omp_mpi_ok_max;
+ }
+ else
+#endif
+ {
+ return nthreads_omp_faster(cpuid_info, bUseGPU);
+ }
+}
+
+/* Return the number of thread-MPI ranks to use.
+ * This is chosen such that we can always obey our own efficiency checks.
+ */
static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
const gmx_hw_opt_t *hw_opt,
int nthreads_tot,
/* #thread < #gpu is very unlikely, but if so: waste gpu(s) */
nrank = nthreads_tot;
}
- else if (bGpuSharingSupported &&
- (nthreads_tot > nthreads_omp_always_faster(hwinfo->cpuid_info,
- ngpu > 0) ||
+ else if (gmx_gpu_sharing_supported() &&
+ (nthreads_tot > nthreads_omp_faster(hwinfo->cpuid_info,
+ ngpu > 0) ||
(ngpu > 1 && nthreads_tot/ngpu > nthreads_omp_mpi_target_max)))
{
/* The high OpenMP thread count will likely result in sub-optimal
}
else
{
- if (nthreads_tot <= nthreads_omp_always_faster(hwinfo->cpuid_info,
- ngpu > 0))
+ if (nthreads_tot <= nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0))
{
/* Use pure OpenMP parallelization */
nrank = 1;
}
+static int getMaxGpuUsable(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t *hwinfo, int cutoff_scheme)
+{
+ /* This code relies on the fact that GPU are not detected when GPU
+ * acceleration was disabled at run time by the user.
+ */
+ if (cutoff_scheme == ecutsVERLET &&
+ hwinfo->gpu_info.n_dev_compatible > 0)
+ {
+ if (gmx_multiple_gpu_per_node_supported())
+ {
+ return hwinfo->gpu_info.n_dev_compatible;
+ }
+ else
+ {
+ if (hwinfo->gpu_info.n_dev_compatible > 1)
+ {
+ md_print_warn(cr, fplog, "More than one compatible GPU is available, but GROMACS can only use one of them. Using a single thread-MPI rank.\n");
+ }
+ return 1;
+ }
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+
#ifdef GMX_THREAD_MPI
/* Get the number of MPI ranks to use for thread-MPI based on how many
* were requested, which algorithms we're using,
* with the hardware, except that ntmpi could be larger than #GPU.
*/
int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
- const gmx_hw_opt_t *hw_opt,
+ gmx_hw_opt_t *hw_opt,
const t_inputrec *inputrec,
const gmx_mtop_t *mtop,
const t_commrec *cr,
{
int nthreads_hw, nthreads_tot_max, nrank, ngpu;
int min_atoms_per_mpi_rank;
- gmx_bool bCanUseGPU;
/* Check if an algorithm does not support parallel simulation. */
if (inputrec->eI == eiLBFGS ||
inputrec->coulombtype == eelEWALD)
{
- md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
+ md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI rank.\n");
if (hw_opt->nthreads_tmpi > 1)
{
- gmx_fatal(FARGS, "You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
+ gmx_fatal(FARGS, "You asked for more than 1 thread-MPI rank, but an algorithm doesn't support that");
}
return 1;
nthreads_tot_max = nthreads_hw;
}
- bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET &&
- hwinfo->gpu_info.n_dev_compatible > 0);
- if (bCanUseGPU)
- {
- ngpu = hwinfo->gpu_info.n_dev_compatible;
- }
- else
- {
- ngpu = 0;
- }
+ ngpu = getMaxGpuUsable(fplog, cr, hwinfo, inputrec->cutoff_scheme);
if (inputrec->cutoff_scheme == ecutsGROUP)
{
}
else
{
- if (bCanUseGPU)
+ if (ngpu >= 1)
{
min_atoms_per_mpi_rank = min_atoms_per_gpu;
}
nrank = nrank_new;
+ /* We reduced the number of tMPI ranks, which means we might violate
+ * our own efficiency checks if we simply use all hardware threads.
+ */
+ if (bOMP && hw_opt->nthreads_omp <= 0 && hw_opt->nthreads_tot <= 0)
+ {
+ /* The user set neither the total nor the OpenMP thread count,
+ * we should use all hardware threads, unless we will violate
+ * our own efficiency limitation on the thread count.
+ */
+ int nt_omp_max;
+
+ nt_omp_max = nthreads_omp_efficient_max(nrank, hwinfo->cpuid_info, ngpu >= 1);
+
+ if (nrank*nt_omp_max < hwinfo->nthreads_hw_avail)
+ {
+ /* Limit the number of OpenMP threads to start */
+ hw_opt->nthreads_omp = nt_omp_max;
+ }
+ }
+
fprintf(stderr, "\n");
fprintf(stderr, "NOTE: Parallelization is limited by the small number of atoms,\n");
fprintf(stderr, " only starting %d thread-MPI ranks.\n", nrank);
void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
const gmx_hw_opt_t *hw_opt,
- gmx_bool bNTOptSet,
+ gmx_bool bNtOmpOptionSet,
t_commrec *cr,
FILE *fplog)
{
* OpenMP have been initialized. Check that here.
*/
#ifdef GMX_THREAD_MPI
+ assert(nthreads_omp_faster_default >= nthreads_omp_mpi_ok_max);
assert(hw_opt->nthreads_tmpi >= 1);
#endif
assert(gmx_omp_nthreads_get(emntDefault) >= 1);
if (DOMAINDECOMP(cr) && cr->nnodes > 1)
{
if (nth_omp_max < nthreads_omp_mpi_ok_min ||
- (!(ngpu > 0 && !bGpuSharingSupported) &&
+ (!(ngpu > 0 && !gmx_gpu_sharing_supported()) &&
nth_omp_max > nthreads_omp_mpi_ok_max))
{
/* Note that we print target_max here, not ok_max */
nthreads_omp_mpi_ok_min,
nthreads_omp_mpi_target_max);
- if (bNTOptSet)
+ if (bNtOmpOptionSet)
{
md_print_warn(cr, fplog, "NOTE: %s\n", buf);
}
else
{
/* No domain decomposition (or only one domain) */
- if (!(ngpu > 0 && !bGpuSharingSupported) &&
- nth_omp_max > nthreads_omp_always_faster(hwinfo->cpuid_info, ngpu > 0))
+ if (!(ngpu > 0 && !gmx_gpu_sharing_supported()) &&
+ nth_omp_max > nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0))
{
/* To arrive here, the user/system set #ranks and/or #OMPthreads */
gmx_bool bEnvSet;
bEnvSet = (getenv("OMP_NUM_THREADS") != NULL);
- if (bNTOptSet || bEnvSet)
+ if (bNtOmpOptionSet || bEnvSet)
{
sprintf(buf2, "You requested %d OpenMP threads", nth_omp_max);
}
* with different values per rank or node, since in that case
* the user can not set -ntomp to override the error.
*/
- if (bNTOptSet || (bEnvSet && nth_omp_min != nth_omp_max))
+ if (bNtOmpOptionSet || (bEnvSet && nth_omp_min != nth_omp_max))
{
md_print_warn(cr, fplog, "NOTE: %s\n", buf);
}
#else /* GMX_OPENMP && GMX_MPI */
/* No OpenMP and/or MPI: it doesn't make much sense to check */
GMX_UNUSED_VALUE(hw_opt);
- GMX_UNUSED_VALUE(bNTOptSet);
+ GMX_UNUSED_VALUE(bNtOmpOptionSet);
/* Check if we have more than 1 physical core, if detected,
* or more than 1 hardware thread if physical cores were not detected.
*/
}
if (hw_opt->nthreads_tmpi > 0)
{
- gmx_fatal(FARGS, "Setting the number of thread-MPI threads is only supported with thread-MPI and GROMACS was compiled without thread-MPI");
+ gmx_fatal(FARGS, "Setting the number of thread-MPI ranks is only supported with thread-MPI and GROMACS was compiled without thread-MPI");
}
#endif
-#ifndef GMX_OPENMP
- if (hw_opt->nthreads_omp > 1)
+ if (!bOMP)
{
- gmx_fatal(FARGS, "More than 1 OpenMP thread requested, but GROMACS was compiled without OpenMP support");
+ if (hw_opt->nthreads_omp > 1)
+ {
+ gmx_fatal(FARGS, "More than 1 OpenMP thread requested, but GROMACS was compiled without OpenMP support");
+ }
+ hw_opt->nthreads_omp = 1;
}
- hw_opt->nthreads_omp = 1;
-#endif
if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
{
hw_opt->nthreads_omp > 0 &&
hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
{
- gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
+ gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI ranks (%d) times the OpenMP threads (%d) requested",
hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp);
}
if (hw_opt->nthreads_tmpi > 0 &&
hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
{
- gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
+ gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI ranks requested (%d)",
hw_opt->nthreads_tot, hw_opt->nthreads_tmpi);
}
}
}
-#ifndef GMX_OPENMP
- if (hw_opt->nthreads_omp > 1)
+ if (!bOMP && hw_opt->nthreads_omp > 1)
{
gmx_fatal(FARGS, "OpenMP threads are requested, but GROMACS was compiled without OpenMP support");
}
-#endif
if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
{
* At the point we have already called check_and_update_hw_opt.
* Thus all options should be internally consistent and consistent
* with the hardware, except that ntmpi could be larger than #GPU.
+ * If necessary, this function will modify hw_opt->nthreads_omp.
*/
int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
- const gmx_hw_opt_t *hw_opt,
+ gmx_hw_opt_t *hw_opt,
const t_inputrec *inputrec,
const gmx_mtop_t *mtop,
const t_commrec *cr,
* intended to catch cases where the user starts 1 MPI rank per hardware
* thread or 1 rank per physical node.
* With a sub-optimal setup a note is printed to fplog and stderr when
- * bNtOptSet==TRUE; with bNtOptSet==FALSE a fatal error is issued.
+ * bNtOmpSet==TRUE; with bNtOptOptionSet==FALSE a fatal error is issued.
* This function should be called after thread-MPI and OpenMP are set up.
*/
void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
const gmx_hw_opt_t *hw_opt,
- gmx_bool bNTOptSet,
+ gmx_bool bNtOmpOptionSet,
t_commrec *cr,
FILE *fplog);
hw_opt,
inputrec, mtop,
cr, fplog);
- if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
- {
- hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
- }
if (hw_opt->nthreads_tmpi > 1)
{
#include "gromacs/options/options.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/basenetwork.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/textwriter.h"
#include "programs/mdrun/mdrun_main.h"
#include "testutils/cmdlinetest.h"
void
SimulationRunner::useStringAsMdpFile(const std::string &mdpString)
{
- gmx::File::writeFileFromString(mdpInputFileName_, mdpString);
+ gmx::TextWriter::writeFileFromString(mdpInputFileName_, mdpString);
}
void
SimulationRunner::useStringAsNdxFile(const char *ndxString)
{
- gmx::File::writeFileFromString(ndxFileName_, ndxString);
+ gmx::TextWriter::writeFileFromString(ndxFileName_, ndxString);
}
void
set(TESTUTILS_SOURCES
cmdlinetest.cpp
integrationtests.cpp
+ interactivetest.cpp
mpi-printer.cpp
refdata.cpp
stringtest.cpp
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/commandline/cmdlineoptionsmodule.h"
#include "gromacs/commandline/cmdlineprogramcontext.h"
#include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/file.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textreader.h"
+#include "gromacs/utility/textwriter.h"
#include "testutils/refdata.h"
#include "testutils/testfilemanager.h"
GMX_ASSERT(extension[0] != '.', "Extension should not contain a dot");
std::string fullFilename = impl_->fileManager_.getTemporaryFilePath(
formatString("%d.%s", args->argc(), extension));
- File::writeFileFromString(fullFilename, contents);
+ TextWriter::writeFileFromString(fullFilename, contents);
args->addOption(option, fullFilename);
}
GMX_ASSERT(extension[0] != '.', "Extension should not contain a dot");
std::string fullFilename = impl_->fileManager_.getTemporaryFilePath(
formatString("%d.%s", args->argc(), extension));
- File file(fullFilename, "w");
+ TextWriter file(fullFilename);
ConstArrayRef<const char *>::const_iterator i;
for (i = contents.begin(); i != contents.end(); ++i)
{
outfile != impl_->outputFiles_.end();
++outfile)
{
- std::string output = File::readToString(outfile->path);
+ std::string output = TextReader::readFileToString(outfile->path);
outputChecker.checkStringBlock(output, outfile->option.c_str());
}
}
<xsl:value-of select="."/>
</xsl:template>
+<xsl:template match="InteractiveSession">
+ <pre>
+ <xsl:for-each select="*">
+ <xsl:choose>
+ <xsl:when test="starts-with(@Name, 'Output')">
+ <xsl:value-of select="substring(.,2)"/>
+ </xsl:when>
+ <xsl:when test="string-length(.)=1">
+ <xsl:text>►</xsl:text>
+ <xsl:text>¶</xsl:text>
+ </xsl:when>
+ <xsl:when test="contains(substring(.,2), ' ')">
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="translate(substring(.,2), ' ', '⏎')"/>
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>►</xsl:text>
+ <xsl:value-of select="substring(.,2)"/>
+ <xsl:text>¶</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:for-each>
+ <xsl:text>[EOF]</xsl:text>
+ </pre>
+</xsl:template>
+
</xsl:stylesheet>
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <stdio.h>
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/textwriter.h"
namespace gmx
{
IntegrationTestFixture::redirectStringToStdin(const char* theString)
{
std::string fakeStdin("fake-stdin");
- gmx::File::writeFileFromString(fakeStdin, theString);
+ gmx::TextWriter::writeFileFromString(fakeStdin, theString);
if (NULL == std::freopen(fakeStdin.c_str(), "r", stdin))
{
GMX_THROW_WITH_ERRNO(FileIOError("Failed to redirect a string to stdin"),
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Implements classes from interactivetest.h.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_testutils
+ */
+#include "gmxpre.h"
+
+#include "interactivetest.h"
+
+#include <string>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textstream.h"
+
+#include "testutils/refdata.h"
+#include "testutils/stringtest.h"
+
+namespace gmx
+{
+namespace test
+{
+
+// These two classes cannot be in an unnamed namespace (easily), since
+// then their use as members below would trigger warnings.
+// But if anyone needs these outside this file, they can easily be moved to a
+// separate header.
+
+class MockTextInputStream : public TextInputStream
+{
+ public:
+ MOCK_METHOD1(readLine, bool(std::string *));
+ MOCK_METHOD0(close, void());
+};
+
+class MockTextOutputStream : public TextOutputStream
+{
+ public:
+ MOCK_METHOD1(write, void(const char *));
+ MOCK_METHOD0(close, void());
+};
+
+class InteractiveTestHelper::Impl
+{
+ public:
+ explicit Impl(TestReferenceChecker checker)
+ : checker_(checker), bLastNewline_(true),
+ currentLine_(0), bHasOutput_(false)
+ {
+ using ::testing::_;
+ using ::testing::Invoke;
+ EXPECT_CALL(inputStream_, readLine(_))
+ .WillRepeatedly(Invoke(this, &Impl::readInputLine));
+ EXPECT_CALL(inputStream_, close()).Times(0);
+ EXPECT_CALL(outputStream_, write(_))
+ .WillRepeatedly(Invoke(this, &Impl::addOutput));
+ EXPECT_CALL(outputStream_, close()).Times(0);
+ }
+
+ bool readInputLine(std::string *line)
+ {
+ checkOutput();
+ line->clear();
+ const bool bPresent = (currentLine_ < inputLines_.size());
+ if (bPresent)
+ {
+ line->assign(inputLines_[currentLine_]);
+ if (bLastNewline_ || currentLine_ + 1 < inputLines_.size())
+ {
+ line->append("\n");
+ }
+ }
+ ++currentLine_;
+ const std::string id = formatString("Input%d", static_cast<int>(currentLine_));
+ StringTestBase::checkText(&checker_, *line, id.c_str());
+ return bPresent;
+ }
+ void addOutput(const char *str)
+ {
+ bHasOutput_ = true;
+ currentOutput_.append(str);
+ }
+
+ void checkOutput()
+ {
+ const std::string id = formatString("Output%d", static_cast<int>(currentLine_));
+ if (checker_.checkPresent(bHasOutput_, id.c_str()))
+ {
+ StringTestBase::checkText(&checker_, currentOutput_, id.c_str());
+ }
+ bHasOutput_ = false;
+ currentOutput_.clear();
+ }
+ void checkPendingInput()
+ {
+ const std::string id = formatString("Input%d", static_cast<int>(currentLine_+1));
+ checker_.checkPresent(false, id.c_str());
+ }
+
+ TestReferenceChecker checker_;
+ ConstArrayRef<const char *> inputLines_;
+ bool bLastNewline_;
+ size_t currentLine_;
+ bool bHasOutput_;
+ std::string currentOutput_;
+ MockTextInputStream inputStream_;
+ MockTextOutputStream outputStream_;
+};
+
+InteractiveTestHelper::InteractiveTestHelper(TestReferenceChecker checker)
+ : impl_(new Impl(checker.checkCompound("InteractiveSession", "Interactive")))
+{
+}
+
+InteractiveTestHelper::~InteractiveTestHelper()
+{
+}
+
+void InteractiveTestHelper::setLastNewline(bool bInclude)
+{
+ impl_->bLastNewline_ = bInclude;
+}
+
+void InteractiveTestHelper::setInputLines(
+ const ConstArrayRef<const char *> &inputLines)
+{
+ impl_->inputLines_ = inputLines;
+ impl_->currentLine_ = 0;
+}
+
+TextInputStream &InteractiveTestHelper::inputStream()
+{
+ return impl_->inputStream_;
+}
+
+TextOutputStream &InteractiveTestHelper::outputStream()
+{
+ return impl_->outputStream_;
+}
+
+void InteractiveTestHelper::checkSession()
+{
+ impl_->checkOutput();
+ impl_->checkPendingInput();
+}
+
+} // namespace test
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Provides helper classes for testing interactive prompts.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_testutils
+ */
+#ifndef GMX_TESTUTILS_INTERACTIVETEST_H
+#define GMX_TESTUTILS_INTERACTIVETEST_H
+
+#include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/classhelpers.h"
+
+namespace gmx
+{
+
+class TextInputStream;
+class TextOutputStream;
+
+namespace test
+{
+
+class TestReferenceChecker;
+
+/*! \libinternal \brief
+ * Helper class for testing interactive sessions.
+ *
+ * The calling test can set the user input using setInputLines() (and possibly
+ * setLastNewline()), pass the streams from inputStream() and outputStream() to
+ * the code that executes the interactive session, and then call checkSession()
+ * after the session is finished.
+ * The input is provided from the array set with setInputLines(), and all
+ * output is checked using the reference data framework.
+ * The reference XML data can be viewed with the XSLT stylesheet to show
+ * exactly how the session went.
+ *
+ * \inlibraryapi
+ * \ingroup module_testutils
+ */
+class InteractiveTestHelper
+{
+ public:
+ /*! \brief
+ * Initializes the helper.
+ *
+ * \param[in] checker Parent reference checker to use.
+ *
+ * The helper creates a compound item under \p checker for the
+ * interactive session it tests.
+ */
+ explicit InteractiveTestHelper(gmx::test::TestReferenceChecker checker);
+ ~InteractiveTestHelper();
+
+ //! Sets whether the last input line contains a newline (by default, it does).
+ void setLastNewline(bool bInclude);
+ /*! \brief
+ * Sets the input lines for the interactive session.
+ *
+ * Calls to TextInputStream::readLine() will return strings from this
+ * array in sequence.
+ * Newlines are added at the end automatically (except for the last
+ * line if `setLastNewLine(false)` has been called).
+ * If there are more `readLine()` calls than there are input lines,
+ * the remaining calls return end-of-input.
+ */
+ void setInputLines(const ConstArrayRef<const char *> &inputLines);
+
+ //! Returns the input stream for the session.
+ TextInputStream &inputStream();
+ //! Returns the output stream for the session.
+ TextOutputStream &outputStream();
+
+ /*! \brief
+ * Finalizes the checking for the session.
+ *
+ * This must be called after all input and output from a session has
+ * occurred, as the helper will not otherwise know when output after
+ * the last input has finished. This method also checks that the
+ * required number of input lines were read in the session.
+ */
+ void checkSession();
+
+ private:
+ class Impl;
+
+ PrivateImplPointer<Impl> impl_;
+};
+} // namespace test
+} // namespace gmx
+
+#endif
#include "stringtest.h"
-#include <algorithm>
#include <string>
-#include <utility>
-#include <vector>
#include <boost/scoped_ptr.hpp>
#include "gromacs/options/basicoptions.h"
#include "gromacs/options/options.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
-#include "gromacs/utility/fileredirector.h"
+#include "gromacs/utility/textreader.h"
#include "testutils/refdata.h"
-#include "testutils/testexceptions.h"
-#include "testutils/testfilemanager.h"
#include "testutils/testoptions.h"
namespace gmx
{
//! Stores the -stdout flag value to print out values instead of checking them.
bool g_bWriteToStdOut = false;
-
-/*! \brief
- * Helper for checking a block of text, e.g., implementing the `-stdout`
- * option.
- *
- * \ingroup module_testutils
- */
-void checkTextImpl(TestReferenceChecker *checker, const std::string &text,
- const char *id)
-{
- if (g_bWriteToStdOut)
- {
- printf("%s:\n", id);
- printf("%s[END]\n", text.c_str());
- }
- else
- {
- checker->checkStringBlock(text, id);
- }
-}
-
}
// TODO: Only add this option to those test binaries that actually need it
}
//! \endcond
-/********************************************************************
- * TestFileOutputRedirector
- */
-
-/*! \internal
- * \brief
- * Implementation of FileOutputRedirectorInterface for tests.
- *
- * This class redirects all output files to temporary files managed by a
- * TestFileManager, and supports checking the contents of these files using the
- * reference data framework.
- *
- * \ingroup module_testutils
- */
-class TestFileOutputRedirector : public FileOutputRedirectorInterface
-{
- public:
- //! Initializes the redirector with the given file manager.
- explicit TestFileOutputRedirector(TestFileManager *fileManager)
- : fileManager_(*fileManager)
- {
- }
-
- virtual File &standardOutput()
- {
- if (!stdoutFile_)
- {
- const std::string path = fileManager_.getTemporaryFilePath("stdout.txt");
- stdoutFile_.reset(new File(path, "w"));
- fileList_.push_back(FileListEntry("<stdout>", path));
- }
- return *stdoutFile_;
- }
- virtual FileInitializer openFileForWriting(const char *filename)
- {
- std::string suffix = filename;
- std::replace(suffix.begin(), suffix.end(), '/', '_');
- const std::string path = fileManager_.getTemporaryFilePath(suffix);
- fileList_.push_back(FileListEntry(filename, path));
- return FileInitializer(fileList_.back().second.c_str(), "w");
- }
-
- /*! \brief
- * Checks the contents of all redirected files.
- */
- void checkRedirectedFiles(TestReferenceChecker *checker)
- {
- if (stdoutFile_)
- {
- stdoutFile_->close();
- stdoutFile_.reset();
- }
- std::vector<FileListEntry>::const_iterator i;
- for (i = fileList_.begin(); i != fileList_.end(); ++i)
- {
- const std::string text = File::readToString(i->second);
- checkTextImpl(checker, text, i->first.c_str());
- }
- }
-
- private:
- typedef std::pair<std::string, std::string> FileListEntry;
-
- TestFileManager &fileManager_;
- boost::scoped_ptr<File> stdoutFile_;
- std::vector<FileListEntry> fileList_;
-};
-
/********************************************************************
* StringTestBase::Impl
*/
public:
TestReferenceData data_;
boost::scoped_ptr<TestReferenceChecker> checker_;
- boost::scoped_ptr<TestFileOutputRedirector> redirector_;
};
/********************************************************************
* StringTestBase
*/
-StringTestBase::StringTestBase()
- : impl_(new Impl)
+// static
+void StringTestBase::checkText(TestReferenceChecker *checker,
+ const std::string &text, const char *id)
{
+ if (g_bWriteToStdOut)
+ {
+ printf("%s:\n", id);
+ printf("%s[END]\n", text.c_str());
+ }
+ else
+ {
+ checker->checkStringBlock(text, id);
+ }
}
-StringTestBase::~StringTestBase()
+StringTestBase::StringTestBase()
+ : impl_(new Impl)
{
}
-FileOutputRedirectorInterface &
-StringTestBase::initOutputRedirector(TestFileManager *fileManager)
+StringTestBase::~StringTestBase()
{
- if (impl_->redirector_)
- {
- GMX_THROW(TestException("initOutputRedirector() called more than once"));
- }
- impl_->redirector_.reset(new TestFileOutputRedirector(fileManager));
- return *impl_->redirector_;
}
TestReferenceChecker &
void
StringTestBase::checkText(const std::string &text, const char *id)
{
- checkTextImpl(&checker(), text, id);
+ checkText(&checker(), text, id);
}
void
StringTestBase::checkFileContents(const std::string &filename, const char *id)
{
- const std::string text = File::readToString(filename);
+ const std::string text = TextReader::readFileToString(filename);
checkText(text, id);
}
-void
-StringTestBase::checkRedirectedOutputFiles()
-{
- if (!impl_->redirector_)
- {
- GMX_THROW(TestException("initOutputRedirector() not called"));
- }
- impl_->redirector_->checkRedirectedFiles(&checker());
-}
-
} // namespace test
} // namespace gmx
namespace gmx
{
-class FileOutputRedirectorInterface;
-
namespace test
{
-class TestFileManager;
class TestReferenceChecker;
/*! \libinternal \brief
class StringTestBase : public ::testing::Test
{
public:
- StringTestBase();
- ~StringTestBase();
-
/*! \brief
- * Creates a redirector that directs all output to temporary files.
+ * Checks a block of text.
*
- * \param[in] fileManager File manager to use for temporary files.
- *
- * Can only be called once in a test.
- *
- * \see checkRedirectedOutputFiles()
+ * This static method is provided for code that does not derive from
+ * StringTestBase to use the same functionality, e.g., implementing the
+ * `-stdout` option.
*/
- FileOutputRedirectorInterface &
- initOutputRedirector(TestFileManager *fileManager);
+ static void checkText(TestReferenceChecker *checker,
+ const std::string &text, const char *id);
+
+ StringTestBase();
+ ~StringTestBase();
/*! \brief
* Returns the root checker for this test's reference data.
* single string and calls checkText().
*/
void checkFileContents(const std::string &filename, const char *id);
- /*! \brief
- * Checks contents of all files redirected with initOutputRedirector().
- *
- * Uses the same logic as checkFileContents() to check each file
- * (including `stdout`) that has been created using the redirector
- * returned by initOutputRedirector().
- *
- * initOutputRedirector() must have been called.
- * This method should not be called if the redirector will still be
- * used for further output in the test. Behavior is not designed for
- * checking in the middle of the test, although that could potentially
- * be changed if necessary.
- */
- void checkRedirectedOutputFiles();
private:
class Impl;
#include <set>
#include <string>
+#include <utility>
+#include <vector>
+
+#include <boost/shared_ptr.hpp>
+
+#include "gromacs/utility/stringstream.h"
+
+#include "testutils/stringtest.h"
namespace gmx
{
namespace test
{
+/********************************************************************
+ * TestFileInputRedirector
+ */
+
TestFileInputRedirector::TestFileInputRedirector()
{
}
return existingFiles_.count(filename) > 0;
}
+/********************************************************************
+ * TestFileOutputRedirector::Impl
+ */
+
+class TestFileOutputRedirector::Impl
+{
+ public:
+ typedef boost::shared_ptr<StringOutputStream> StringStreamPointer;
+ typedef std::pair<std::string, StringStreamPointer> FileListEntry;
+
+ StringStreamPointer stdoutStream_;
+ std::vector<FileListEntry> fileList_;
+};
+
+/********************************************************************
+ * TestFileOutputRedirector
+ */
+
+TestFileOutputRedirector::TestFileOutputRedirector()
+ : impl_(new Impl)
+{
+}
+
+TestFileOutputRedirector::~TestFileOutputRedirector()
+{
+}
+
+TextOutputStream &TestFileOutputRedirector::standardOutput()
+{
+ if (!impl_->stdoutStream_)
+ {
+ impl_->stdoutStream_.reset(new StringOutputStream);
+ impl_->fileList_.push_back(Impl::FileListEntry("<stdout>", impl_->stdoutStream_));
+ }
+ return *impl_->stdoutStream_;
+}
+
+TextOutputStreamPointer
+TestFileOutputRedirector::openTextOutputFile(const char *filename)
+{
+ Impl::StringStreamPointer stream(new StringOutputStream);
+ impl_->fileList_.push_back(Impl::FileListEntry(filename, stream));
+ return stream;
+}
+
+void TestFileOutputRedirector::checkRedirectedFiles(TestReferenceChecker *checker)
+{
+ std::vector<Impl::FileListEntry>::const_iterator i;
+ for (i = impl_->fileList_.begin(); i != impl_->fileList_.end(); ++i)
+ {
+ StringTestBase::checkText(checker, i->second->toString(), i->first.c_str());
+ }
+}
+
} // namespace test
} // namespace gmx
namespace test
{
+class TestReferenceChecker;
+
/*! \libinternal \brief
* In-memory implementation for FileInputRedirectorInterface for tests.
*
GMX_DISALLOW_COPY_AND_ASSIGN(TestFileInputRedirector);
};
+/*! \libinternal \brief
+ * In-memory implementation of FileOutputRedirectorInterface for tests.
+ *
+ * This class redirects all output files to in-memory buffers, and supports
+ * checking the contents of these files using the reference data framework.
+ *
+ * \ingroup module_testutils
+ */
+class TestFileOutputRedirector : public FileOutputRedirectorInterface
+{
+ public:
+ TestFileOutputRedirector();
+ virtual ~TestFileOutputRedirector();
+
+ /*! \brief
+ * Checks contents of all redirected files (including stdout).
+ *
+ * This method should not be called if the redirector will still be
+ * used for further output in the test. Behavior is not designed for
+ * checking in the middle of the test, although that could potentially
+ * be changed if necessary.
+ */
+ void checkRedirectedFiles(TestReferenceChecker *checker);
+
+ // From FileOutputRedirectorInterface
+ virtual TextOutputStream &standardOutput();
+ virtual TextOutputStreamPointer openTextOutputFile(const char *filename);
+
+ private:
+ class Impl;
+
+ PrivateImplPointer<Impl> impl_;
+};
+
} // namespace test
} // namespace gmx
#include "gromacs/options/options.h"
#include "gromacs/utility/errorcodes.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/file.h"
+#include "gromacs/utility/filestream.h"
#include "gromacs/utility/futil.h"
#include "gromacs/utility/path.h"
#include "gromacs/utility/programcontext.h"
std::fprintf(stderr,
"\nYou can use the following GROMACS-specific command-line flags\n"
"to control the behavior of the tests:\n\n");
- CommandLineHelpContext context(&File::standardError(),
+ CommandLineHelpContext context(&TextOutputFile::standardError(),
eHelpOutputFormat_Console, NULL, program);
context.setModuleDisplayName(program);
CommandLineHelpWriter(options).writeHelp(context);
#include <list>
-#include "thread_mpi/mutex.h"
-
#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/mutex.h"
namespace gmx
{
//! Adds a provider into the registry.
void add(const char * /*name*/, TestOptionsProvider *provider)
{
- tMPI::lock_guard<tMPI::mutex> lock(listMutex_);
+ lock_guard<Mutex> lock(listMutex_);
providerList_.push_back(provider);
}
typedef std::list<TestOptionsProvider *> ProviderList;
- tMPI::mutex listMutex_;
+ Mutex listMutex_;
ProviderList providerList_;
GMX_DISALLOW_COPY_AND_ASSIGN(TestOptionsRegistry);
{
// TODO: Have some deterministic order for the options; now it depends on
// the order in which the global initializers are run.
- tMPI::lock_guard<tMPI::mutex> lock(listMutex_);
+ lock_guard<Mutex> lock(listMutex_);
ProviderList::const_iterator i;
for (i = providerList_.begin(); i != providerList_.end(); ++i)
{
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2011,2012,2014, by the GROMACS development team, led by
+# Copyright (c) 2011,2012,2014,2015, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
# the research papers on the package. Check out http://www.gromacs.org.
gmx_add_unit_test(TestUtilsUnitTests testutils-test
+ interactivetest.cpp
refdata_tests.cpp
testasserts_tests.cpp)
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Self-tests for interactive test helpers.
+ *
+ * \author Teemu Murtola <teemu.murtola@gmail.com>
+ * \ingroup module_testutils
+ */
+#include "gmxpre.h"
+
+#include "testutils/interactivetest.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+#include <gtest/gtest-spi.h>
+
+#include "gromacs/utility/textstream.h"
+
+#include "testutils/refdata.h"
+
+namespace
+{
+
+class InteractiveSession
+{
+ public:
+ InteractiveSession(gmx::test::ReferenceDataMode mode)
+ : data_(mode), helper_(data_.rootChecker()), nextInputLine_(0)
+ {
+ }
+
+ void addOutput(const char *output)
+ {
+ events_.push_back(Event(WriteOutput, output));
+ }
+ void addInputLine(const char *inputLine)
+ {
+ inputLines_.push_back(inputLine);
+ }
+ void addReadInput()
+ {
+ events_.push_back(Event(ReadInput, ""));
+ }
+ void addInput(const char *inputLine)
+ {
+ addInputLine(inputLine);
+ addReadInput();
+ }
+ void addInputNoNewline(const char *inputLine)
+ {
+ addInputLine(inputLine);
+ helper_.setLastNewline(false);
+ events_.push_back(Event(ReadInputNoNewline, ""));
+ }
+
+ void run()
+ {
+ gmx::TextInputStream &input = helper_.inputStream();
+ gmx::TextOutputStream &output = helper_.outputStream();
+ helper_.setInputLines(inputLines_);
+ std::vector<Event>::const_iterator event;
+ for (event = events_.begin(); event != events_.end(); ++event)
+ {
+ if (event->first == WriteOutput)
+ {
+ output.write(event->second);
+ }
+ else
+ {
+ std::string expectedLine;
+ const bool bInputRemaining = (nextInputLine_ < inputLines_.size());
+ if (bInputRemaining)
+ {
+ expectedLine = inputLines_[nextInputLine_];
+ if (event->first != ReadInputNoNewline)
+ {
+ expectedLine.append("\n");
+ }
+ }
+ ++nextInputLine_;
+ std::string line;
+ EXPECT_EQ(bInputRemaining, input.readLine(&line));
+ EXPECT_EQ(expectedLine, line);
+ }
+ }
+ helper_.checkSession();
+ }
+
+ private:
+ enum EventType
+ {
+ ReadInput,
+ ReadInputNoNewline,
+ WriteOutput
+ };
+ // The latter is the output string.
+ typedef std::pair<EventType, const char *> Event;
+
+ gmx::test::TestReferenceData data_;
+ gmx::test::InteractiveTestHelper helper_;
+ std::vector<const char *> inputLines_;
+ size_t nextInputLine_;
+ std::vector<Event> events_;
+};
+
+TEST(InteractiveTestHelperTest, ChecksSimpleSession)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n");
+ session.addOutput("> ");
+ session.addInput("input");
+ session.addOutput("Second line\n");
+ session.addOutput("> ");
+ session.addReadInput();
+ session.addOutput("\n");
+ session.addOutput(".\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n");
+ session.addOutput("> ");
+ session.addInput("input");
+ session.addOutput("Second line\n");
+ session.addOutput("> ");
+ session.addReadInput();
+ session.addOutput("\n");
+ session.addOutput(".\n");
+ session.run();
+ }
+}
+
+TEST(InteractiveTestHelperTest, ChecksSessionWithoutLastNewline)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n");
+ session.addOutput("> ");
+ session.addInput("input");
+ session.addOutput("Second line\n");
+ session.addOutput("> ");
+ session.addInputNoNewline("input2");
+ session.addOutput("\n");
+ session.addOutput(".\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n");
+ session.addOutput("> ");
+ session.addInput("input");
+ session.addOutput("Second line\n");
+ session.addOutput("> ");
+ session.addInputNoNewline("input2");
+ session.addOutput("\n");
+ session.addOutput(".\n");
+ session.run();
+ }
+}
+
+TEST(InteractiveTestHelperTest, ChecksSessionWithMissingOutput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addInput("input2");
+ session.addOutput("Second line\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addInput("input2");
+ session.addOutput("Second line\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ session.run();
+ }
+}
+
+TEST(InteractiveTestHelperTest, ChecksSessionWithEquivalentOutput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n");
+ session.addOutput("> ");
+ session.addInput("input");
+ session.addOutput("Second line\n> ");
+ session.addReadInput();
+ session.addOutput("\n");
+ session.addOutput(".\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addOutput("Second line\n");
+ session.addOutput("> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ session.run();
+ }
+}
+
+TEST(InteractiveTestHelperTest, DetectsIncorrectOutput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addOutput("Second line\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addOutput("Incorrect line\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ EXPECT_NONFATAL_FAILURE(session.run(), "");
+ }
+}
+
+TEST(InteractiveTestHelperTest, DetectsMissingOutput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addOutput("Second line\n> ");
+ session.addInput("input2");
+ session.addOutput("Third line\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addInput("input2");
+ session.addOutput("Third line\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ EXPECT_NONFATAL_FAILURE(session.run(), "");
+ }
+}
+
+TEST(InteractiveTestHelperTest, DetectsMissingFinalOutput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addOutput("Second line\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addOutput("Second line\n> ");
+ session.addReadInput();
+ EXPECT_NONFATAL_FAILURE(session.run(), "");
+ }
+}
+
+TEST(InteractiveTestHelperTest, DetectsExtraOutput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addInput("input2");
+ session.addOutput("More output\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addOutput("First line\n> ");
+ session.addInput("input");
+ session.addOutput("Extra output\n> ");
+ session.addInput("input2");
+ session.addOutput("More output\n> ");
+ session.addReadInput();
+ session.addOutput("\n.\n");
+ EXPECT_NONFATAL_FAILURE(session.run(), "");
+ }
+}
+
+TEST(InteractiveTestHelperTest, DetectsMissingInput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addInput("input");
+ session.addInput("input2");
+ session.addReadInput();
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addInputLine("input");
+ session.addInputLine("input2");
+ session.addReadInput();
+ session.addReadInput();
+ EXPECT_NONFATAL_FAILURE(session.run(), "");
+ }
+}
+
+TEST(InteractiveTestHelperTest, DetectsExtraInput)
+{
+ {
+ InteractiveSession session(gmx::test::erefdataUpdateAll);
+ session.addInput("input");
+ session.addInput("input2");
+ session.addReadInput();
+ session.run();
+ }
+ {
+ InteractiveSession session(gmx::test::erefdataCompare);
+ session.addInputLine("input");
+ session.addInputLine("input2");
+ session.addReadInput();
+ session.addReadInput();
+ session.addReadInput();
+ session.addReadInput();
+ EXPECT_NONFATAL_FAILURE(session.run(), "");
+ }
+}
+
+} // namespace
* - gmx::test::TestFileInputRedirector (in testfileredirector.h) provides
* functionality for capturing file existence checks in code that uses
* gmx::FileInputRedirectorInterface.
+ * - gmx::test::TestFileOutputRedirector (in testfileredirector.h) provides
+ * functionality for capturing file output (including `stdout`) from code
+ * that uses gmx::FileOutputRedirectorInterface, and checking that output
+ * against reference data.
+ * - gmx::test::InteractiveTestHelper (in interactivetest.h) provides
+ * a helper class for testing an interactive session that uses
+ * gmx::TextInputStream and gmx::TextOutputStream for prompting input and
+ * printing status messages.
* - #GMX_TEST_OPTIONS macro provides facilities for adding custom command
* line options for the test binary.
* - testasserts.h provides several custom test assertions for better