# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-cmake_minimum_required(VERSION 2.8.8)
-# When we require cmake >= 2.8.12, it will provide
-# CMAKE_MINIMUM_REQUIRED_VERSION automatically, but in the meantime we
-# need to set a variable, and it must have a different name.
-set(GMX_CMAKE_MINIMUM_REQUIRED_VERSION "2.8.8")
+cmake_minimum_required(VERSION 3.4.3)
# CMake modules/macros are in a subdirectory to keep this file cleaner
# This needs to be set before project() in order to pick up toolchain files
include(gmxBuildTypeReleaseWithAssert)
if(NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert Profile." FORCE)
- # There's no need to offer a user the choice of ThreadSanitizer
+ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert Profile TSAN ASAN MSAN." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
- "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert" "Profile")
+ "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert" "Profile" "TSAN" "ASAN" "MSAN")
endif()
if(CMAKE_CONFIGURATION_TYPES)
# Add appropriate GROMACS-specific build types for the Visual
set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
-# Set a default valgrind suppression file.
-# This unfortunately needs to duplicate information from CTest to work as
-# expected...
-set(MEMORYCHECK_SUPPRESSIONS_FILE
- "${CMAKE_SOURCE_DIR}/cmake/legacy_and_external.supp"
- CACHE FILEPATH
- "File that contains suppressions for the memory checker")
-include(CTest)
+include(gmxCTestUtilities)
+gmx_ctest_init()
include(gmxCPackUtilities)
gmx_cpack_init()
set(INSTALLED_HEADER_INCLUDE_DIRS "")
set(INSTALLED_HEADER_DEFINITIONS "")
+########################################################################
+# Global non-cache variables for implementing the build system
+########################################################################
+
+# These variables collect libraries that GROMACS requires for
+# linking. They should be appended to with list(APPEND ${name}
+# new-library) calls. They are:
+# - Libraries that are required for libgromacs (only)
+set(GMX_EXTRA_LIBRARIES "")
+# - Libraries that are required for all code in the repository
+set(GMX_COMMON_LIBRARIES "")
+# - Libraries that all code linked against libgromacs needs
+# (i.e., something that is exposed in installed headers).
+set(GMX_PUBLIC_LIBRARIES "")
+
########################################################################
# Check and warn if cache generated on a different host is being reused
########################################################################
option(GMX_USE_OPENCL "Enable OpenCL acceleration" OFF)
-# Decide on GPU settings based on user-settings and GPU/CUDA detection.
-# GCC 4.6 requires CUDA 5.0 and VS2015 requires CUDA 8.0
+# Decide on GPU settings based on user-settings and GPU/CUDA
+# detection. GCC 4.8 requires CUDA 6.0 (but we choose 6.5 for the
+# preliminary C++11 support), icc 15 requires CUDA 7.0, and VS2015
+# requires CUDA 8.0
if(MSVC)
set(REQUIRED_CUDA_VERSION 8.0)
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+ set(REQUIRED_CUDA_VERSION 7.0)
else()
- set(REQUIRED_CUDA_VERSION 5.0)
+ set(REQUIRED_CUDA_VERSION 6.5)
endif()
set(REQUIRED_CUDA_COMPUTE_CAPABILITY 2.0)
endif()
endif()
-include(gmxDetectSimd)
-gmx_detect_simd(GMX_SUGGESTED_SIMD)
-
gmx_option_multichoice(
GMX_SIMD
"SIMD instruction set for CPU kernels and compiler optimization"
- "${GMX_SUGGESTED_SIMD}"
- None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference)
+ "AUTO"
+ AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX2_128 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference)
if(GMX_TARGET_MIC)
set(GMX_FFT_LIBRARY_DEFAULT "mkl")
# This variable should be used for additional compiler flags which are not
# generated in gmxCFlags nor are SIMD or MPI related.
+ #
+ # TODO These variables should be consolidated into
+ # EXTRA_COMPILER_FLAGS so that we we don't perpetrate bugs where
+ # things that work in C compilation (e.g. merging from old branches)
+ # also work for C++ compilation.
set(EXTRA_C_FLAGS "")
set(EXTRA_CXX_FLAGS "")
# Run through a number of tests for buggy compilers and other issues
include(gmxTestCompilerProblems)
gmx_test_compiler_problems()
-# GMX_SIMD will not be set automatically until the second
-# pass (which is not strictly guaranteed to occur), so putting this
-# check here among logically-related tests is inefficient, but the
-# potential loss is likely zero.
-if(GMX_SIMD STREQUAL "AVX_256"
- AND CMAKE_COMPILER_IS_GNUCC
- AND (C_COMPILER_VERSION VERSION_EQUAL "4.6.1"
- OR CXX_COMPILER_VERSION VERSION_EQUAL "4.6.1"))
- message(FATAL_ERROR "gcc 4.6.1 has buggy support for AVX, and GROMACS mdrun will not work. If you want simulation performance, use a more recent compiler. Otherwise, use GMX_SIMD=SSE4.1")
- # See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49002
-endif()
# Implement double-precision option. This is complicated because we
# need installed headers to use the precision mode of the build that
# Find external packages #
########################################################################
-# TNG wants zlib if it is available. And static libxml2 might have a dependency
-find_package(ZLIB QUIET)
-include(gmxTestZLib)
-gmx_test_zlib(HAVE_ZLIB)
-
# Unconditionally find the package, as it is also required for unit
# tests. This exports LIBXML2_FOUND, which we should not use because
# it does not tell us that linking will succeed. Instead, we test that
option(GMX_HWLOC "Add support for hwloc Portable Hardware locality library" ${GMX_HWLOC_DEFAULT})
if(GMX_HWLOC)
if(HWLOC_FOUND)
- include_directories(${HWLOC_INCLUDE_DIRS})
+ include_directories(SYSTEM ${HWLOC_INCLUDE_DIRS})
list(APPEND GMX_EXTRA_LIBRARIES ${HWLOC_LIBRARIES})
else()
message(FATAL_ERROR "Hwloc package support requested, but not found.")
add_definitions(-DTMPI_ATOMICS_DISABLED)
endif()
-# Note this relies on zlib detection having already run
include(gmxManageTNG)
include(gmxManageLmfit)
if(GMX_GPU)
# now that we have detected the dependencies, do the second configure pass
gmx_gpu_setup()
-else()
- mark_as_advanced(CUDA_HOST_COMPILER)
endif()
if(CYGWIN)
add_definitions( -DHAVE_CONFIG_H )
include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src)
-include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/include)
+# TODO required at high level because both libgromacs and progs/mdrun
+# require it, both for thread-MPI and its atomics and mutexes.
+include_directories(BEFORE SYSTEM ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/include)
# Required for config.h, maybe should only be set in src/CMakeLists.txt
include_directories(BEFORE ${CMAKE_BINARY_DIR}/src)
include(gmxSetBuildInformation)
gmx_set_build_information()
-# Turn on RDTSCP if:
-# - the build system's CPU supports it
-# - the acceleration is set to AVX as all AVX-capable CPUs support AVX (which
-# at this point means that the user set it).
-# Note: it's better to not use the later set value of GMX_SIMD because
-# it reflects the system's capability of both compiling and running AVX code.
-# TODO: After merge with 5.0 one could implement a cache variable dependency
-# such that GMX_USE_RDTSCP can change if GMX_SIMD is changed to AVX
-# after the first cmake pass.
-if (BUILD_CPU_FEATURES MATCHES "rdtscp" OR GMX_SIMD MATCHES "AVX")
- set(GMX_USE_RDTSCP_DEFAULT_VALUE ON)
-else()
- set(GMX_USE_RDTSCP_DEFAULT_VALUE OFF)
-endif()
-option(GMX_USE_RDTSCP "Use RDTSCP for better CPU-based timers (available on recent x86 CPUs; might need to be off when compiling for heterogeneous environments)" ${GMX_USE_RDTSCP_DEFAULT_VALUE})
+
+gmx_option_multichoice(
+ GMX_USE_RDTSCP
+ "Use low-latency RDTSCP instruction for CPU-based timers for mdrun execution; might need to be off when compiling for heterogeneous environments)"
+ "AUTO"
+ OFF ON AUTO DETECT)
mark_as_advanced(GMX_USE_RDTSCP)
-if(GMX_USE_RDTSCP)
+
+macro(gmx_check_rdtscp)
+ if (CPU_DETECTION_FEATURES MATCHES "rdtscp")
+ set(HAVE_RDTSCP 1)
+ set(RDTSCP_DETECTION_MESSAGE " - detected on the build host")
+ else()
+ set(RDTSCP_DETECTION_MESSAGE " - not detected on the build host")
+ endif()
+endmacro()
+
+set(HAVE_RDTSCP 0)
+if (GMX_USE_RDTSCP STREQUAL "ON")
set(HAVE_RDTSCP 1)
+elseif(GMX_USE_RDTSCP STREQUAL "DETECT")
+ gmx_check_rdtscp()
+elseif(GMX_USE_RDTSCP STREQUAL "AUTO")
+ # If the user specified automated SIMD selection, that the choice
+ # is made based on detection on the build host. If so, then RDTSCP
+ # should be chosen the same way.
+ #
+ # If the user specified an AVX SIMD level (e.g. when
+ # cross-compiling GROMACS) then they will get our best guess, ie
+ # that in practice AVX mostly correlates with rdtscp (and anyway
+ # is only relevant in rather old x86 hardware).
+ if (GMX_SIMD STREQUAL "AUTO")
+ gmx_check_rdtscp()
+ elseif (GMX_SIMD MATCHES "AVX")
+ set(HAVE_RDTSCP 1)
+ endif()
+endif()
+gmx_check_if_changed(HAVE_RDTSCP_CHANGED HAVE_RDTSCP)
+if (HAVE_RDTSCP_CHANGED)
+ if (HAVE_RDTSCP)
+ message(STATUS "Enabling RDTSCP support${RDTSCP_DETECTION_MESSAGE}")
+ else()
+ message(STATUS "Disabling RDTSCP support${RDTSCP_DETECTION_MESSAGE}")
+ endif()
endif()
include(gmxTestLargeFiles)
endif()
# Math and thread libraries must often come after all others when linking...
-if(HAVE_LIBM)
- list(APPEND GMX_EXTRA_LIBRARIES m)
+if (HAVE_LIBM)
+ list(APPEND GMX_PUBLIC_LIBRARIES m)
endif()
option(GMX_NACL "Configure for Native Client builds" OFF)
message("CMAKE_CXX_FLAGS_RELEASE: ${GMXC_CXXFLAGS_RELEASE}")
message("CMAKE_CXX_FLAGS_DEBUG: ${GMXC_CXXFLAGS_DEBUG}")
message("CMAKE_EXE_LINKER_FLAGS: ${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS}")
- message("CMAKE_SHARED_LINKER_FLAGS: ${MPI_LINKER_FLAGS}")
+ message("CMAKE_SHARED_LINKER_FLAGS: ${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS}")
endif()
if(NOT GMX_OPENMP)
################################################################
# Shared library load path settings
################################################################
-# CMake supports RPATH on OS X only from 2.8.12 upwards.
-# CMAKE_SYSTEM_VERSION > 8.0 matches OS X 10.5 and above, where RPATH support
-# was added.
-
if(NOT GMX_BUILD_SHARED_EXE)
# No rpath
set(CMAKE_SKIP_RPATH TRUE)
set(CMAKE_EXE_LINK_DYNAMIC_C_FLAGS) # remove -Wl,-Bdynamic
set(CMAKE_EXE_LINK_DYNAMIC_CXX_FLAGS)
-elseif((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") OR
- ((CMAKE_SYSTEM_VERSION VERSION_GREATER 8.0) AND (NOT CMAKE_VERSION VERSION_LESS 2.8.12)))
+else()
# The build folder always has bin/ and lib/; if we are also going to
# install to lib/, then the installation RPATH works also in the build
# tree. This makes installation slightly faster (no need to rewrite the
endif()
# Set the RPATH as relative to the executable location to make the
# binaries relocatable.
- if(NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ if(NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") #Assume OS X >=10.5
set(CMAKE_INSTALL_RPATH "\$ORIGIN/../${GMX_LIB_INSTALL_DIR}")
else()
set(CMAKE_INSTALL_RPATH "@executable_path/../${GMX_LIB_INSTALL_DIR}")
endif()
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_MACOSX_RPATH 1)
-else()
- # We are on Darwin/OSX, and CMake cannot handle RPATHs automatically.
- if(CMAKE_SYSTEM_VERSION VERSION_GREATER 8.0)
- # Set the RPATH options manually.
- set(CMAKE_INSTALL_NAME_DIR "@rpath")
- set(GMX_EXE_LINKER_FLAGS ${GMX_EXE_LINKER_FLAGS} "-Wl,-rpath,@executable_path/../${GMX_LIB_INSTALL_DIR}")
- else()
- # Use the old INSTALL_NAME_DIR mechanism if RPATH is not supported.
- set(CMAKE_INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/${LIB_INSTALL_DIR}")
- endif()
endif()
#COPYING file: Only necessary for binary distributions.
endif()
if (BUILD_TESTING)
- # "tests" target builds all the separate test binaries.
- add_custom_target(tests)
- # "run-ctest" is an internal target that actually runs the tests.
- # This is necessary to be able to add separate targets that execute as part
- # of 'make check', but are ensured to be executed after the actual tests.
- add_custom_target(run-ctest
- COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
- COMMENT "Running all tests"
- VERBATIM)
- add_dependencies(run-ctest tests)
- # "check" target builds and runs all tests.
- add_custom_target(check DEPENDS run-ctest)
+ include(tests/CheckTarget.cmake)
endif()
if (NOT GMX_BUILD_MDRUN_ONLY)
set(PKG_FFT "${${FFTW}_PKG}")
include_directories(SYSTEM ${${FFTW}_INCLUDE_DIRS})
- if ((${GMX_SIMD} MATCHES "SSE" OR ${GMX_SIMD} MATCHES "AVX") AND NOT ${FFTW}_HAVE_SIMD)
+ if ((${GMX_SIMD_ACTIVE} MATCHES "SSE" OR ${GMX_SIMD_ACTIVE} MATCHES "AVX") AND NOT ${FFTW}_HAVE_SIMD)
message(WARNING "The fftw library found is compiled without SIMD support, which makes it slow. Consider recompiling it or contact your admin")
else()
- if(${GMX_SIMD} MATCHES "AVX" AND NOT (${FFTW}_HAVE_SSE OR ${FFTW}_HAVE_SSE2))
+ if(${GMX_SIMD_ACTIVE} MATCHES "AVX" AND NOT (${FFTW}_HAVE_SSE OR ${FFTW}_HAVE_SSE2))
# If we end up here we have an AVX Gromacs build, and
# FFTW with SIMD.
message(WARNING "The FFTW library was compiled with neither --enable-sse nor --enable-sse2; those would have enabled SSE(2) SIMD instructions. This will give suboptimal performance. You should (re)compile the FFTW library with --enable-sse2 and --enable-avx (and --enable-avx2 or --enable-avx512 if supported).")
# Some versions of icc require this in order that mkl.h can be
# found at compile time.
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} ${FFT_LINKER_FLAGS}")
+ set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} ${FFT_LINKER_FLAGS}")
set(MKL_ERROR_MESSAGE "Make sure you have configured your compiler so that ${FFT_LINKER_FLAGS} will work.")
else()
Quick and dirty installation
----------------------------
1. Get the latest version of your C and C++ compilers.
-2. Check that you have CMake version |GMX_CMAKE_MINIMUM_REQUIRED_VERSION| or later.
+2. Check that you have CMake version |CMAKE_MINIMUM_REQUIRED_VERSION| or later.
3. Get and unpack the latest version of the |Gromacs| tarball.
4. Make a separate build directory and change to it.
5. Run ``cmake`` with the path to the source as an argument
frequently provides the best performance.
You should strive to use the most recent version of your
-compiler. Minimum supported compiler versions are
+compiler. Since we require full C++11 support the minimum supported
+compiler versions are
-* GNU (gcc) 4.6
-* Intel (icc) 14
-* LLVM (clang) 3.4
+* GNU (gcc) 4.8.1
+* Intel (icc) 15.0
+* LLVM (clang) 3.3
* Microsoft (MSVC) 2015
Other compilers may work (Cray, Pathscale, older clang) but do
On Linux, both the Intel and clang compiler use the libstdc++ which
comes with gcc as the default C++ library. For |Gromacs|, we require
-the compiler to support libstc++ version 4.6.1 or higher. To select a
+the compiler to support libstc++ version 4.8.1 or higher. To select a
particular libstdc++ library, use:
* For Intel: ``-DGMX_STDLIB_CXX_FLAGS=-gcc-name=/path/to/gcc/binary``
the vendor's default or recommended compiler, and check for
specialized information below.
+For updated versions of gcc to add to your Linux OS, see
+
+* Ubuntu: `Ubuntu toolchain ppa page`_
+* RHEL/CentOS: `EPEL page`_ or the RedHat Developer Toolset
+
Compiling with parallelization options
--------------------------------------
GPU support
^^^^^^^^^^^
|Gromacs| has excellent support for NVIDIA GPUs supported via CUDA.
-NVIDIA's CUDA_ version |REQUIRED_CUDA_VERSION| software development kit is required,
-and the latest version is strongly encouraged. NVIDIA GPUs with at
+On Linux with gcc, NVIDIA's CUDA_ version |REQUIRED_CUDA_VERSION|
+software development kit is required, and the latest
+version is strongly encouraged. Using Intel or Microsoft compilers
+requires version 7.0 and 8.0, respectively. NVIDIA GPUs with at
least NVIDIA compute capability |REQUIRED_CUDA_COMPUTE_CAPABILITY| are
required, e.g. Fermi, Kepler, Maxwell or Pascal cards. You are strongly recommended to
get the latest CUDA version and driver supported by your hardware, but
-----
|Gromacs| builds with the CMake build system, requiring at least
-version |GMX_CMAKE_MINIMUM_REQUIRED_VERSION|. You can check whether
+version |CMAKE_MINIMUM_REQUIRED_VERSION|. You can check whether
CMake is installed, and what version it is, with ``cmake
--version``. If you need to install CMake, then first check whether
your platform's package management system provides a suitable version,
``-DGMX_BUILD_SHARED_EXE=off``, and you may need to set the F77
environmental variable to ``ftn`` when compiling FFTW.
+ Building on Solaris
+ -------------------
+
+ The built-in |Gromacs| processor detection does not work on Solaris,
+ so it is strongly recommended that you build |Gromacs| with
+ ``-DGMX_HWLOC=on`` and ensure that the ``CMAKE_PREFIX_PATH`` includes
+ the path where the hwloc headers and libraries can be found. At least
+ version 1.11.8 of hwloc is recommended.
+
+ Oracle Developer Studio is not a currently supported compiler (and
+ does not currently compile |Gromacs| correctly, perhaps because the
+ thread-MPI atomics are incorrectly implemented in |Gromacs|).
+
Building on BlueGene
--------------------
it works because we have tested it. We do test on Linux, Windows, and
Mac with a range of compilers and libraries for a range of our
configuration options. Every commit in our git source code repository
-is currently tested on x86 with gcc versions ranging from 4.6 through
-5.2, and versions 16 of the Intel compiler as well as Clang
-version 3.4 through 3.8. For this, we use a variety of GNU/Linux
+is currently tested on x86 with a number of gcc versions ranging from 4.8.1
+through 6.1, versions 16 of the Intel compiler, and Clang
+versions 3.4 through 3.8. For this, we use a variety of GNU/Linux
flavors and versions as well as recent versions of Windows. Under
Windows, we test both MSVC 2015 and version 16 of the Intel compiler.
For details, you can
:ref:`pdb` file, which allows to check whether multimeric channels have
the correct PBC representation.
+ ``GMX_TRAJECTORY_IO_VERBOSITY``
+ Defaults to 1, which prints frame count e.g. when reading trajectory
+ files. Set to 0 for quiet operation.
+
Debugging
---------
``GMX_PRINT_DEBUG_LINES``
``GMX_EMULATE_GPU``
emulate GPU runs by using algorithmically equivalent CPU reference code instead of
GPU-accelerated functions. As the CPU code is slow, it is intended to be used only for debugging purposes.
- The behavior is automatically triggered if non-bonded calculations are turned off using ``GMX_NO_NONBONDED``
- case in which the non-bonded calculations will not be called, but the CPU-GPU transfer will also be skipped.
``GMX_ENX_NO_FATAL``
disable exiting upon encountering a corrupted frame in an :ref:`edr`
skip non-bonded calculations; can be used to estimate the possible
performance gain from adding a GPU accelerator to the current hardware setup -- assuming that this is
fast enough to complete the non-bonded calculations while the CPU does bonded force and PME computation.
+ Freezing the particles will be required to stop the system blowing up.
``GMX_NO_PULLVIR``
when set, do not add virial contribution to COM pull forces.
``MDRUN``
the :ref:`gmx mdrun` command used by :ref:`gmx tune_pme`.
-``GMX_NSTLIST``
- sets the default value for :mdp:`nstlist`, preventing it from being tuned during
- :ref:`gmx mdrun` startup when using the Verlet cutoff scheme.
+``GMX_DISABLE_DYNAMICPRUNING``
+ disables dynamic pair-list pruning. Note that :ref:`gmx mdrun` will
+ still tune nstlist to the optimal value picked assuming dynamic pruning. Thus
+ for good performance the -nstlist option should be used.
+
+``GMX_NSTLIST_DYNAMICPRUNING``
+ overrides the dynamic pair-list pruning interval chosen heuristically
+ by mdrun. Values should be between the pruning frequency value
+ (1 for CPU and 2 for GPU) and :mdp:`nstlist - 1`.
``GMX_USE_TREEREDUCE``
use tree reduction for nbnxn force reduction. Potentially faster for large number of
override |Gromacs| default behavior, or if you want to test
your own kernels.
+``GMX_OCL_DISABLE_COMPATIBILITY_CHECK``
+ Disables the hardware compatibility check. Useful for developers
+ and allows testing the OpenCL kernels on non-supported platforms
+ (like Intel iGPUs) without source code modification.
+
Analysis and Core Functions
---------------------------
``GMX_QM_ACCURACY``
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/programcontext.h"
#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
struct gmx_output_env_t
{
explicit gmx_output_env_t(const gmx::IProgramContext &context)
- : programContext(context)
- {
- time_unit = time_ps;
- view = FALSE;
- xvg_format = exvgNONE;
- verbosity = 0;
- trajectory_io_verbosity = 1;
- }
+ : programContext(context),
+ time_unit(time_ps),
+ view(FALSE),
+ xvg_format(exvgNONE),
- verbosity(0) {}
++ verbosity(0),
++ trajectory_io_verbosity(0) {}
++
const gmx::IProgramContext &programContext;
xvg_format_t xvg_format;
/* The level of verbosity for this program */
int verbosity;
+ /* The level of verbosity during trajectory I/O. Default=1, quiet=0. */
+ int trajectory_io_verbosity;
};
/* The source code in this file should be thread-safe.
static const real timefactors[] = { real(0), real(1e3), real(1), real(1e-3), real(1e-6), real(1e-9), real(1e-12), real(0) };
static const real timeinvfactors[] = { real(0), real(1e-3), real(1), real(1e3), real(1e6), real(1e9), real(1e12), real(0) };
static const char *time_units_str[] = {
- NULL, "fs", "ps", "ns", "us",
+ nullptr, "fs", "ps", "ns", "us",
"\\mus", "ms", "s"
};
static const char *time_units_xvgr[] = {
- NULL, "fs", "ps", "ns",
- "ms", "s", NULL
+ nullptr, "fs", "ps", "ns",
+ "ms", "s", nullptr
};
oenv->view = view;
oenv->xvg_format = xvg_format;
oenv->verbosity = verbosity;
+ const char *env = getenv("GMX_TRAJECTORY_IO_VERBOSITY");
+ oenv->trajectory_io_verbosity = (env != nullptr ? strtol(env, NULL, 10) : 1);
+
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
return oenv->verbosity;
}
-const char *output_env_get_time_unit(const gmx_output_env_t *oenv)
+ int output_env_get_trajectory_io_verbosity(const gmx_output_env_t *oenv)
+ {
+ return oenv->trajectory_io_verbosity;
+ }
+
+std::string output_env_get_time_unit(const gmx_output_env_t *oenv)
{
return time_units_str[oenv->time_unit];
}
-const char *output_env_get_time_label(const gmx_output_env_t *oenv)
+std::string output_env_get_time_label(const gmx_output_env_t *oenv)
{
- char *label;
- snew(label, 20);
-
- sprintf(label, "Time (%s)", time_units_str[oenv->time_unit] ?
- time_units_str[oenv->time_unit] : "ps");
-
- return label;
+ return gmx::formatString("Time (%s)", time_units_str[oenv->time_unit] ?
+ time_units_str[oenv->time_unit] : "ps");
}
-const char *output_env_get_xvgr_tlabel(const gmx_output_env_t *oenv)
+std::string output_env_get_xvgr_tlabel(const gmx_output_env_t *oenv)
{
- char *label;
- snew(label, 20);
-
- sprintf(label, "Time (%s)", time_units_xvgr[oenv->time_unit] ?
- time_units_xvgr[oenv->time_unit] : "ps");
-
- return label;
+ return gmx::formatString("Time (%s)", time_units_xvgr[oenv->time_unit] ?
+ time_units_xvgr[oenv->time_unit] : "ps");
}
real output_env_get_time_factor(const gmx_output_env_t *oenv)
const char *output_env_get_program_display_name(const gmx_output_env_t *oenv)
{
- const char *displayName = NULL;
+ const char *displayName = nullptr;
try
{
#ifndef GMX_FILEIO_OENV_H
#define GMX_FILEIO_OENV_H
+#include <string>
+
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
struct gmx_output_env_t;
/* output_env member functions */
- /* The output_env structure holds information about program name, cmd line,
- default times, etc.
+ /* The output_env structure holds information about program name, cmd
+ line, default times, etc. along with verbosity levels for code
+ components that use this structure for regulating output.
There are still legacy functions for the program name, and the command
line, but the output_env versions are now preferred.*/
int output_env_get_verbosity(const gmx_output_env_t *oenv);
/* return the verbosity */
-const char *output_env_get_time_unit(const gmx_output_env_t *oenv);
+ int output_env_get_trajectory_io_verbosity(const gmx_output_env_t *oenv);
+ /* return the verbosity for trajectory IO handling */
+
+std::string output_env_get_time_unit(const gmx_output_env_t *oenv);
/* return time unit (e.g. ps or ns) */
-const char *output_env_get_time_label(const gmx_output_env_t *oenv);
+std::string output_env_get_time_label(const gmx_output_env_t *oenv);
/* return time unit label (e.g. "Time (ps)") */
-const char *output_env_get_xvgr_tlabel(const gmx_output_env_t *oenv);
-/* retrun x-axis time label for xmgr */
+std::string output_env_get_xvgr_tlabel(const gmx_output_env_t *oenv);
+/* return x-axis time label for xmgr */
real output_env_get_time_factor(const gmx_output_env_t *oenv);
/* return time conversion factor from ps (i.e. 1e-3 for ps->ns) */
*/
const char *output_env_get_program_display_name(const gmx_output_env_t *oenv);
-#ifdef __cplusplus
-}
-
namespace gmx
{
class IProgramContext;
output_env_get_program_context(const gmx_output_env_t *oenv);
#endif
-
-#endif
#include "gromacs/fileio/pdbio.h"
#include "gromacs/fileio/timecontrol.h"
#include "gromacs/fileio/tngio.h"
-#include "gromacs/fileio/tngio_for_tools.h"
#include "gromacs/fileio/tpxio.h"
#include "gromacs/fileio/trrio.h"
#include "gromacs/fileio/xdrf.h"
* for skipping frames with -dt */
real tf; /* internal frame time */
t_trxframe *xframe;
- int nxframe;
t_fileio *fio;
tng_trajectory_t tng;
int natoms;
static void status_init(t_trxstatus *status)
{
status->flags = 0;
- status->nxframe = 0;
- status->xframe = NULL;
- status->fio = NULL;
+ status->xframe = nullptr;
+ status->fio = nullptr;
status->__frame = -1;
status->t0 = 0;
status->tf = 0;
- status->persistent_line = NULL;
- status->tng = NULL;
+ status->persistent_line = nullptr;
+ status->tng = nullptr;
}
{
if ((status->__frame < 2*SKIP1 || status->__frame % SKIP1 == 0) &&
(status->__frame < 2*SKIP2 || status->__frame % SKIP2 == 0) &&
- (status->__frame < 2*SKIP3 || status->__frame % SKIP3 == 0))
+ (status->__frame < 2*SKIP3 || status->__frame % SKIP3 == 0) &&
+ output_env_get_trajectory_io_verbosity(oenv) != 0)
{
fprintf(stderr, "\r%-14s %6d time %8.3f ", l, status->__frame,
output_env_conv_time(oenv, t));
void clear_trxframe(t_trxframe *fr, gmx_bool bFirst)
{
fr->not_ok = 0;
- fr->bTitle = FALSE;
fr->bStep = FALSE;
fr->bTime = FALSE;
fr->bLambda = FALSE;
{
fr->bDouble = FALSE;
fr->natoms = -1;
- fr->title = NULL;
fr->step = 0;
fr->time = 0;
fr->lambda = 0;
fr->fep_state = 0;
- fr->atoms = NULL;
+ fr->atoms = nullptr;
fr->prec = 0;
- fr->x = NULL;
- fr->v = NULL;
- fr->f = NULL;
+ fr->x = nullptr;
+ fr->v = nullptr;
+ fr->f = nullptr;
clear_mat(fr->box);
fr->bPBC = FALSE;
fr->ePBC = -1;
const int *ind, gmx_conect gc)
{
char title[STRLEN];
- rvec *xout = NULL, *vout = NULL, *fout = NULL;
+ rvec *xout = nullptr, *vout = nullptr, *fout = nullptr;
int i, ftp = -1;
real prec;
copy_rvec(fr->f[ind[i]], fout[i]);
}
}
- /* no break */
+ // fallthrough
case efXTC:
if (fr->bX)
{
if (ftp == efGRO)
{
write_hconf_indexed_p(gmx_fio_getfp(status->fio), title, fr->atoms, nind, ind,
- fr->x, fr->bV ? fr->v : NULL, fr->box);
+ fr->x, fr->bV ? fr->v : nullptr, fr->box);
}
else
{
}
break;
case efG96:
- write_g96_conf(gmx_fio_getfp(status->fio), fr, nind, ind);
+ sprintf(title, "frame t= %.3f", fr->time);
+ write_g96_conf(gmx_fio_getfp(status->fio), title, fr, nind, ind);
break;
default:
gmx_fatal(FARGS, "Sorry, write_trxframe_indexed can not write %s",
{
sfree(fout);
}
- /* no break */
+ // fallthrough
case efXTC:
sfree(xout);
break;
gmx_incons("Sorry, can only prepare for TNG output.");
}
- if (*out == NULL)
+ if (*out == nullptr)
{
snew((*out), 1);
}
status_init(*out);
- if (in != NULL)
+ if (in != nullptr)
{
gmx_prepare_tng_writing(filename,
filemode,
break;
case efTRR:
gmx_trr_write_frame(status->fio, fr->step, fr->time, fr->lambda, fr->box, fr->natoms,
- fr->bX ? fr->x : NULL, fr->bV ? fr->v : NULL, fr->bF ? fr->f : NULL);
+ fr->bX ? fr->x : nullptr, fr->bV ? fr->v : nullptr, fr->bF ? fr->f : nullptr);
break;
case efGRO:
case efPDB:
if (gmx_fio_getftp(status->fio) == efGRO)
{
write_hconf_p(gmx_fio_getfp(status->fio), title, fr->atoms,
- fr->x, fr->bV ? fr->v : NULL, fr->box);
+ fr->x, fr->bV ? fr->v : nullptr, fr->box);
}
else
{
}
break;
case efG96:
- write_g96_conf(gmx_fio_getfp(status->fio), fr, -1, NULL);
+ write_g96_conf(gmx_fio_getfp(status->fio), title, fr, -1, nullptr);
break;
default:
gmx_fatal(FARGS, "Sorry, write_trxframe can not write %s",
fr.step = step;
fr.bTime = TRUE;
fr.time = time;
- fr.bAtoms = atoms != NULL;
+ fr.bAtoms = atoms != nullptr;
fr.atoms = const_cast<t_atoms *>(atoms);
fr.bX = TRUE;
fr.x = x;
- fr.bV = v != NULL;
+ fr.bV = v != nullptr;
fr.v = v;
fr.bBox = TRUE;
copy_mat(box, fr.box);
{
gmx_fio_close(status->fio);
}
+ sfree(status->persistent_line);
+#if GMX_USE_PLUGINS
+ sfree(status->vmdplugin);
+#endif
+ /* The memory in status->xframe is lost here,
+ * but the read_first_x/read_next_x functions are deprecated anyhow.
+ * read_first_frame/read_next_frame and close_trx should be used.
+ */
sfree(status);
}
fr->bBox = sh.box_size > 0;
if (status->flags & (TRX_READ_X | TRX_NEED_X))
{
- if (fr->x == NULL)
+ if (fr->x == nullptr)
{
snew(fr->x, sh.natoms);
}
}
if (status->flags & (TRX_READ_V | TRX_NEED_V))
{
- if (fr->v == NULL)
+ if (fr->v == nullptr)
{
snew(fr->v, sh.natoms);
}
}
if (status->flags & (TRX_READ_F | TRX_NEED_F))
{
- if (fr->f == NULL)
+ if (fr->f == nullptr)
{
snew(fr->f, sh.natoms);
}
double dbl;
atoms.nr = fr->natoms;
- atoms.atom = NULL;
- atoms.pdbinfo = NULL;
+ atoms.atom = nullptr;
+ atoms.pdbinfo = nullptr;
/* the other pointers in atoms should not be accessed if these are NULL */
snew(symtab, 1);
open_symtab(symtab);
- na = read_pdbfile(fp, title, &model_nr, &atoms, symtab, fr->x, &ePBC, boxpdb, TRUE, NULL);
+ na = read_pdbfile(fp, title, &model_nr, &atoms, symtab, fr->x, &ePBC, boxpdb, TRUE, nullptr);
free_symtab(symtab);
sfree(symtab);
set_trxframe_ePBC(fr, ePBC);
break;
case efG96:
{
- t_symtab *symtab;
- snew(symtab, 1);
- open_symtab(symtab);
- read_g96_conf(gmx_fio_getfp(status->fio), NULL, fr,
+ t_symtab *symtab = nullptr;
+ read_g96_conf(gmx_fio_getfp(status->fio), nullptr, nullptr, fr,
symtab, status->persistent_line);
- free_symtab(symtab);
bRet = (fr->natoms > 0);
break;
}
}
break;
case efTNG:
- bRet = gmx_read_next_tng_frame(status->tng, fr, NULL, 0);
+ bRet = gmx_read_next_tng_frame(status->tng, fr, nullptr, 0);
break;
case efPDB:
bRet = pdb_next_x(status, gmx_fio_getfp(status->fio), fr);
int read_first_frame(const gmx_output_env_t *oenv, t_trxstatus **status,
const char *fn, t_trxframe *fr, int flags)
{
- t_fileio *fio;
+ t_fileio *fio = nullptr;
gmx_bool bFirst, bOK;
int ftp = fn2ftp(fn);
snew((*status), 1);
status_init( *status );
- (*status)->nxframe = 1;
initcount(*status);
(*status)->flags = flags;
/* allocate the persistent line */
snew((*status)->persistent_line, STRLEN+1);
}
- t_symtab *symtab;
- snew(symtab, 1);
- open_symtab(symtab);
- read_g96_conf(gmx_fio_getfp(fio), fn, fr, symtab, (*status)->persistent_line);
- free_symtab(symtab);
+ t_symtab *symtab = nullptr;
+ read_g96_conf(gmx_fio_getfp(fio), fn, nullptr, fr, symtab, (*status)->persistent_line);
gmx_fio_close(fio);
clear_trxframe(fr, FALSE);
if (flags & (TRX_READ_X | TRX_NEED_X))
break;
case efTNG:
fr->step = -1;
- if (!gmx_read_next_tng_frame((*status)->tng, fr, NULL, 0))
+ if (!gmx_read_next_tng_frame((*status)->tng, fr, nullptr, 0))
{
fr->not_ok = DATA_NOT_OK;
fr->natoms = 0;
read_first_frame(oenv, status, fn, &fr, TRX_NEED_X);
snew((*status)->xframe, 1);
- (*status)->nxframe = 1;
(*(*status)->xframe) = fr;
*t = (*status)->xframe->time;
*x = (*status)->xframe->x;
return bRet;
}
-void close_trj(t_trxstatus *status)
-{
- if (status == nullptr)
- {
- return;
- }
- gmx_tng_close(&status->tng);
- if (status->fio)
- {
- gmx_fio_close(status->fio);
- }
-
- /* The memory in status->xframe is lost here,
- * but the read_first_x/read_next_x functions are deprecated anyhow.
- * read_first_frame/read_next_frame and close_trx should be used.
- */
- sfree(status);
-}
-
void rewind_trj(t_trxstatus *status)
{
initcount(status);
t_topology *top;
snew(top, 1);
- epbc = read_tpx_top(fn, NULL, NULL, &natoms, NULL, NULL, top);
+ epbc = read_tpx_top(fn, nullptr, nullptr, &natoms, nullptr, nullptr, top);
if (ePBC)
{
*ePBC = epbc;
# Sources that should always be built
file(GLOB NONBONDED_SOURCES *.cpp nb_kernel_c/*.c)
-if("${GMX_SIMD}" STREQUAL "SSE2" AND NOT GMX_DOUBLE)
+if("${GMX_SIMD_ACTIVE}" STREQUAL "SSE2" AND NOT GMX_DOUBLE)
file(GLOB NONBONDED_SSE2_SINGLE_SOURCES nb_kernel_sse2_single/*.c)
endif()
-if("${GMX_SIMD}" STREQUAL "SSE4.1" AND NOT GMX_DOUBLE)
+if("${GMX_SIMD_ACTIVE}" STREQUAL "SSE4.1" AND NOT GMX_DOUBLE)
file(GLOB NONBONDED_SSE4_1_SINGLE_SOURCES nb_kernel_sse4_1_single/*.c)
endif()
-if("${GMX_SIMD}" STREQUAL "AVX_128_FMA" AND NOT GMX_DOUBLE)
+if("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_128_FMA" AND NOT GMX_DOUBLE)
file(GLOB NONBONDED_AVX_128_FMA_SINGLE_SOURCES nb_kernel_avx_128_fma_single/*.c)
endif()
- if((("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_256") OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX2_256")) AND NOT GMX_DOUBLE)
-if((("${GMX_SIMD}" STREQUAL "AVX_256")
- OR ("${GMX_SIMD}" STREQUAL "AVX2_256")
- OR ("${GMX_SIMD}" STREQUAL "AVX_512")
- OR ("${GMX_SIMD}" STREQUAL "AVX_512_KNL"))
++if((("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_256")
++ OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX2_256")
++ OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512")
++ OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL"))
+ AND NOT GMX_DOUBLE)
file(GLOB NONBONDED_AVX_256_SINGLE_SOURCES nb_kernel_avx_256_single/*.c)
endif()
-if("${GMX_SIMD}" STREQUAL "SSE2" AND GMX_DOUBLE)
+if("${GMX_SIMD_ACTIVE}" STREQUAL "SSE2" AND GMX_DOUBLE)
file(GLOB NONBONDED_SSE2_DOUBLE_SOURCES nb_kernel_sse2_double/*.c)
endif()
-if("${GMX_SIMD}" STREQUAL "SSE4.1" AND GMX_DOUBLE)
+if("${GMX_SIMD_ACTIVE}" STREQUAL "SSE4.1" AND GMX_DOUBLE)
file(GLOB NONBONDED_SSE4_1_DOUBLE_SOURCES nb_kernel_sse4_1_double/*.c)
endif()
-if("${GMX_SIMD}" STREQUAL "AVX_128_FMA" AND GMX_DOUBLE)
+if("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_128_FMA" AND GMX_DOUBLE)
file(GLOB NONBONDED_AVX_128_FMA_DOUBLE_SOURCES nb_kernel_avx_128_fma_double/*.c)
endif()
- if((("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_256") OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX2_256")) AND GMX_DOUBLE)
-if((("${GMX_SIMD}" STREQUAL "AVX_256")
- OR ("${GMX_SIMD}" STREQUAL "AVX2_256")
- OR ("${GMX_SIMD}" STREQUAL "AVX_512")
- OR ("${GMX_SIMD}" STREQUAL "AVX_512_KNL"))
++if((("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_256")
++ OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX2_256")
++ OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512")
++ OR ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL"))
+ AND GMX_DOUBLE)
file(GLOB NONBONDED_AVX_256_DOUBLE_SOURCES nb_kernel_avx_256_double/*.c)
endif()
-if("${GMX_SIMD}" STREQUAL "SPARC64_HPC_ACE" AND GMX_DOUBLE)
+if("${GMX_SIMD_ACTIVE}" STREQUAL "SPARC64_HPC_ACE" AND GMX_DOUBLE)
file(GLOB NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES nb_kernel_sparc64_hpc_ace_double/*.c)
endif()
# include "gromacs/utility/basedefinitions.h"
#endif
+#include "architecture.h"
+
namespace gmx
{
unsigned int * ecx,
unsigned int * edx)
{
-#if defined __i386__ || defined __i386 || defined _X86_ || defined _M_IX86 || \
- defined __x86_64__ || defined __amd64__ || defined _M_X64 || defined _M_AMD64
-
-# if defined __GNUC__ || GMX_X86_GCC_INLINE_ASM
-
- // any compiler that understands gcc inline assembly
- *eax = level;
- *ecx = ecxval;
- *ebx = 0;
- *edx = 0;
-
-# if (defined __i386__ || defined __i386 || defined _X86_ || defined _M_IX86) && defined(__PIC__)
- // Avoid clobbering the global offset table in 32-bit pic code (ebx register)
- __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
- "cpuid \n\t"
- "xchgl %%ebx, %1 \n\t"
- : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
-# else
- // i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want
- __asm__ __volatile__ ("cpuid \n\t"
- : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
-# endif
- return 0;
-
-# elif defined _MSC_VER
-
- // MSVC (and icc on windows) on ia32 or x86-64
- int cpuInfo[4];
- __cpuidex(cpuInfo, level, ecxval);
- *eax = static_cast<unsigned int>(cpuInfo[0]);
- *ebx = static_cast<unsigned int>(cpuInfo[1]);
- *ecx = static_cast<unsigned int>(cpuInfo[2]);
- *edx = static_cast<unsigned int>(cpuInfo[3]);
- return 0;
-
+ if (c_architecture == Architecture::X86)
+ {
+#if defined __GNUC__ || GMX_X86_GCC_INLINE_ASM
+
+ // any compiler that understands gcc inline assembly
+ *eax = level;
+ *ecx = ecxval;
+ *ebx = 0;
+ *edx = 0;
+
+# if (defined __i386__ || defined __i386 || defined _X86_ || defined _M_IX86) && defined(__PIC__)
+ // Avoid clobbering the global offset table in 32-bit pic code (ebx register)
+ __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
+ "cpuid \n\t"
+ "xchgl %%ebx, %1 \n\t"
+ : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
# else
+ // i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want
+ __asm__ __volatile__ ("cpuid \n\t"
+ : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
+# endif
+ return 0;
- // We are on x86, but without compiler support for cpuid if we get here
- *eax = 0;
- *ebx = 0;
- *ecx = 0;
- *edx = 0;
- return 1;
+#elif defined _MSC_VER
-# endif // check for inline asm on x86
+ // MSVC (and icc on windows) on ia32 or x86-64
+ int cpuInfo[4];
+ __cpuidex(cpuInfo, level, ecxval);
+ *eax = static_cast<unsigned int>(cpuInfo[0]);
+ *ebx = static_cast<unsigned int>(cpuInfo[1]);
+ *ecx = static_cast<unsigned int>(cpuInfo[2]);
+ *edx = static_cast<unsigned int>(cpuInfo[3]);
+ return 0;
#else
- // We are not on x86
- *eax = 0;
- *ebx = 0;
- *ecx = 0;
- *edx = 0;
- return 1;
+ // We are on x86, but without compiler support for cpuid if we get here
+ *eax = 0;
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ return 1;
-#endif // x86
+#endif // check for inline asm on x86
+ }
+ else
+ {
+ // We are not on x86
+ *eax = 0;
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ return 1;
+ }
}
{ "AArch64", CpuInfo::Vendor::Arm },
{ "Fujitsu", CpuInfo::Vendor::Fujitsu },
{ "IBM", CpuInfo::Vendor::Ibm },
- { "POWER", CpuInfo::Vendor::Ibm }
+ { "POWER", CpuInfo::Vendor::Ibm },
+ { "Oracle", CpuInfo::Vendor::Oracle },
};
// For each label in /proc/cpuinfo, compare the value to the name in the
}
if (cpuInfo.count("CPU architecture"))
{
- *family = std::strtol(cpuInfo.at("CPU architecture").c_str(), NULL, 10);
+ *family = std::strtol(cpuInfo.at("CPU architecture").c_str(), nullptr, 10);
// For some 64-bit CPUs it appears to say 'AArch64' instead
if (*family == 0 && cpuInfo.at("CPU architecture").find("AArch64") != std::string::npos)
{
}
if (cpuInfo.count("CPU variant"))
{
- *model = std::strtol(cpuInfo.at("CPU variant").c_str(), NULL, 16);
+ *model = std::strtol(cpuInfo.at("CPU variant").c_str(), nullptr, 16);
}
if (cpuInfo.count("CPU revision"))
{
- *stepping = std::strtol(cpuInfo.at("CPU revision").c_str(), NULL, 10);
+ *stepping = std::strtol(cpuInfo.at("CPU revision").c_str(), nullptr, 10);
}
if (cpuInfo.count("Features"))
{
CpuInfo result;
-#if defined __i386__ || defined __i386 || defined _X86_ || defined _M_IX86 || \
- defined __x86_64__ || defined __amd64__ || defined _M_X64 || defined _M_AMD64
-
- result.vendor_ = detectX86Vendor();
- detectX86Features(&result.brandString_, &result.family_, &result.model_,
- &result.stepping_, &result.features_);
- result.logicalProcessors_ = detectX86LogicalProcessors();
-#else // not x86
-
-# if defined __arm__ || defined __arm || defined _M_ARM || defined __aarch64__
- result.vendor_ = CpuInfo::Vendor::Arm;
-# elif defined __powerpc__ || defined __ppc__ || defined __PPC__
- result.vendor_ = CpuInfo::Vendor::Ibm;
-# endif
+ if (c_architecture == Architecture::X86)
+ {
+ result.vendor_ = detectX86Vendor();
-# if defined __aarch64__ || ( defined _M_ARM && _M_ARM >= 8 )
- result.features_.insert(Feature::Arm_Neon); // ARMv8 always has Neon
- result.features_.insert(Feature::Arm_NeonAsimd); // ARMv8 always has Neon-asimd
-# endif
+ if (result.vendor_ == CpuInfo::Vendor::Intel)
+ {
+ result.features_.insert(CpuInfo::Feature::X86_Intel);
+ }
+ else if (result.vendor_ == CpuInfo::Vendor::Amd)
+ {
+ result.features_.insert(CpuInfo::Feature::X86_Amd);
+ }
+ detectX86Features(&result.brandString_, &result.family_, &result.model_,
+ &result.stepping_, &result.features_);
+ result.logicalProcessors_ = detectX86LogicalProcessors();
+ }
+ else
+ {
+ // Not x86
+ if (c_architecture == Architecture::Arm)
+ {
+ result.vendor_ = CpuInfo::Vendor::Arm;
+ }
+ else if (c_architecture == Architecture::PowerPC)
+ {
+ result.vendor_ = CpuInfo::Vendor::Ibm;
+ }
-# if defined sun
- result.vendor_ = CpuInfo::Vendor::Oracle;
-# endif
+#if defined __aarch64__ || ( defined _M_ARM && _M_ARM >= 8 )
+ result.features_.insert(Feature::Arm_Neon); // ARMv8 always has Neon
+ result.features_.insert(Feature::Arm_NeonAsimd); // ARMv8 always has Neon-asimd
+#endif
- // On Linux we might be able to find information in /proc/cpuinfo. If vendor or brand
- // is set to a known value this routine will not overwrite it.
- detectProcCpuInfo(&result.vendor_, &result.brandString_, &result.family_,
- &result.model_, &result.stepping_, &result.features_);
++#if defined sun
++ result.vendor_ = CpuInfo::Vendor::Oracle;
++#endif
+
-#endif // x86 or not
+ // On Linux we might be able to find information in /proc/cpuinfo. If vendor or brand
+ // is set to a known value this routine will not overwrite it.
+ detectProcCpuInfo(&result.vendor_, &result.brandString_, &result.family_,
+ &result.model_, &result.stepping_, &result.features_);
+ }
if (!result.logicalProcessors_.empty())
{
{ CpuInfo::Vendor::Amd, "AMD" },
{ CpuInfo::Vendor::Fujitsu, "Fujitsu" },
{ CpuInfo::Vendor::Ibm, "IBM" },
- { CpuInfo::Vendor::Arm, "ARM" }
+ { CpuInfo::Vendor::Arm, "ARM" },
+ { CpuInfo::Vendor::Oracle, "Oracle" },
};
CpuInfo::s_featureStrings_ =
{
{ CpuInfo::Feature::X86_Aes, "aes" },
+ { CpuInfo::Feature::X86_Amd, "amd" },
{ CpuInfo::Feature::X86_Apic, "apic" },
{ CpuInfo::Feature::X86_Avx, "avx" },
{ CpuInfo::Feature::X86_Avx2, "avx2" },
{ CpuInfo::Feature::X86_Fma4, "fma4" },
{ CpuInfo::Feature::X86_Hle, "hle" },
{ CpuInfo::Feature::X86_Htt, "htt" },
+ { CpuInfo::Feature::X86_Intel, "intel" },
{ CpuInfo::Feature::X86_Lahf, "lahf" },
{ CpuInfo::Feature::X86_MisalignSse, "misalignsse" },
{ CpuInfo::Feature::X86_Mmx, "mmx" },
}
else if (arg == "-features")
{
+ // Separate the feature strings with spaces. Note that in the
+ // GROMACS cmake code, surrounding whitespace is first
+ // stripped by the CPU detection routine, and then added back
+ // in the code for making the SIMD suggestion.
for (auto &f : cpuInfo.featureSet() )
{
- printf(" %s", cpuInfo.featureString(f).c_str());
+ printf("%s ", cpuInfo.featureString(f).c_str());
}
- printf(" \n"); // extra space so we can grep output for " <feature> " in CMake
+ printf("\n");
}
else if (arg == "-topology")
{
Fujitsu, //!< Only works on Linux (parsed from /proc/cpuinfo)
Ibm, //!< Only works on Linux (parsed from /proc/cpuinfo)
Arm, //!< Only works on Linux (parsed from /proc/cpuinfo)
+ Oracle, //!< Cannot detect anything else yet (no /proc/cpuinfo available)
};
/*! \brief List of CPU features
enum class Feature
{
X86_Aes, //!< x86 advanced encryption standard accel.
+ X86_Amd, //!< This is an AMD x86 processor
X86_Apic, //!< APIC support
X86_Avx, //!< Advanced vector extensions
X86_Avx2, //!< AVX2 including gather support (not used yet)
X86_Fma4, //!< 4-operand FMA, only on AMD for now
X86_Hle, //!< Hardware lock elision
X86_Htt, //!< Hyper-Threading supported (but maybe not enabled)
+ X86_Intel, //!< This is an Intel x86 processor
X86_Lahf, //!< LAHF/SAHF support in 64 bits
X86_MisalignSse, //!< Support for misaligned SSE data instructions
X86_Mmx, //!< MMX registers and instructions
#include <algorithm>
#include "gromacs/domdec/domdec.h"
-#include "gromacs/gmxlib/md_logging.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/math/vec.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/logger.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/snprintf.h"
return nmin;
}
-void copy_coupling_state(t_state *statea, t_state *stateb,
- gmx_ekindata_t *ekinda, gmx_ekindata_t *ekindb, t_grpopts* opts)
-{
-
- /* MRS note -- might be able to get rid of some of the arguments. Look over it when it's all debugged */
-
- int i, j, nc;
-
- /* Make sure we have enough space for x and v */
- if (statea->nalloc > stateb->nalloc)
- {
- stateb->nalloc = statea->nalloc;
- /* We need to allocate one element extra, since we might use
- * (unaligned) 4-wide SIMD loads to access rvec entries.
- */
- srenew(stateb->x, stateb->nalloc + 1);
- srenew(stateb->v, stateb->nalloc + 1);
- }
-
- stateb->natoms = statea->natoms;
- stateb->ngtc = statea->ngtc;
- stateb->nnhpres = statea->nnhpres;
- stateb->veta = statea->veta;
- if (ekinda)
- {
- copy_mat(ekinda->ekin, ekindb->ekin);
- for (i = 0; i < stateb->ngtc; i++)
- {
- ekindb->tcstat[i].T = ekinda->tcstat[i].T;
- ekindb->tcstat[i].Th = ekinda->tcstat[i].Th;
- copy_mat(ekinda->tcstat[i].ekinh, ekindb->tcstat[i].ekinh);
- copy_mat(ekinda->tcstat[i].ekinf, ekindb->tcstat[i].ekinf);
- ekindb->tcstat[i].ekinscalef_nhc = ekinda->tcstat[i].ekinscalef_nhc;
- ekindb->tcstat[i].ekinscaleh_nhc = ekinda->tcstat[i].ekinscaleh_nhc;
- ekindb->tcstat[i].vscale_nhc = ekinda->tcstat[i].vscale_nhc;
- }
- }
- copy_rvecn(statea->x, stateb->x, 0, stateb->natoms);
- copy_rvecn(statea->v, stateb->v, 0, stateb->natoms);
- copy_mat(statea->box, stateb->box);
- copy_mat(statea->box_rel, stateb->box_rel);
- copy_mat(statea->boxv, stateb->boxv);
-
- for (i = 0; i < stateb->ngtc; i++)
- {
- nc = i*opts->nhchainlength;
- for (j = 0; j < opts->nhchainlength; j++)
- {
- stateb->nosehoover_xi[nc+j] = statea->nosehoover_xi[nc+j];
- stateb->nosehoover_vxi[nc+j] = statea->nosehoover_vxi[nc+j];
- }
- }
- if (stateb->nhpres_xi != NULL)
- {
- for (i = 0; i < stateb->nnhpres; i++)
- {
- nc = i*opts->nhchainlength;
- for (j = 0; j < opts->nhchainlength; j++)
- {
- stateb->nhpres_xi[nc+j] = statea->nhpres_xi[nc+j];
- stateb->nhpres_vxi[nc+j] = statea->nhpres_vxi[nc+j];
- }
- }
- }
-}
-
-real compute_conserved_from_auxiliary(t_inputrec *ir, t_state *state, t_extmass *MassQ)
-{
- real quantity = 0;
- switch (ir->etc)
- {
- case etcNO:
- break;
- case etcBERENDSEN:
- break;
- case etcNOSEHOOVER:
- quantity = NPT_energy(ir, state, MassQ);
- break;
- case etcVRESCALE:
- quantity = vrescale_energy(&(ir->opts), state->therm_integral);
- break;
- default:
- break;
- }
- return quantity;
-}
-
/* TODO Specialize this routine into init-time and loop-time versions?
e.g. bReadEkin is only true when restoring from checkpoint */
void compute_globals(FILE *fplog, gmx_global_stat *gstat, t_commrec *cr, t_inputrec *ir,
t_state *state, t_mdatoms *mdatoms,
t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle,
gmx_enerdata_t *enerd, tensor force_vir, tensor shake_vir, tensor total_vir,
- tensor pres, rvec mu_tot, gmx_constr_t constr,
+ tensor pres, rvec mu_tot, struct gmx_constr *constr,
gmx::SimulationSignaller *signalCoordinator,
matrix box, int *totalNumberOfBondedInteractions,
gmx_bool *bSumEkinhOld, int flags)
if (bStopCM)
{
calc_vcm_grp(0, mdatoms->homenr, mdatoms,
- state->x, state->v, vcm);
+ as_rvec_array(state->x.data()), as_rvec_array(state->v.data()), vcm);
}
if (bTemp || bStopCM || bPres || bEner || bConstrain)
{
wallcycle_start(wcycle, ewcMoveE);
global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
- ir, ekind, constr, bStopCM ? vcm : NULL,
+ ir, ekind, constr, bStopCM ? vcm : nullptr,
signalBuffer.size(), signalBuffer.data(),
totalNumberOfBondedInteractions,
*bSumEkinhOld, flags);
{
correct_ekin(debug,
0, mdatoms->homenr,
- state->v, vcm->group_p[0],
+ as_rvec_array(state->v.data()), vcm->group_p[0],
mdatoms->massT, mdatoms->tmass, ekind->ekin);
}
{
check_cm_grp(fplog, vcm, ir, 1);
do_stopcm_grp(0, mdatoms->homenr, mdatoms->cVCM,
- state->x, state->v, vcm);
+ as_rvec_array(state->x.data()), as_rvec_array(state->v.data()), vcm);
inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
}
}
}
-void check_nst_param(FILE *fplog, t_commrec *cr,
- const char *desc_nst, int nst,
- const char *desc_p, int *p)
+/* check whether an 'nst'-style parameter p is a multiple of nst, and
+ set it to be one if not, with a warning. */
+static void check_nst_param(const gmx::MDLogger &mdlog,
+ const char *desc_nst, int nst,
+ const char *desc_p, int *p)
{
if (*p > 0 && *p % nst != 0)
{
/* Round up to the next multiple of nst */
*p = ((*p)/nst + 1)*nst;
- md_print_warn(cr, fplog,
- "NOTE: %s changes %s to %d\n", desc_nst, desc_p, *p);
+ GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
+ "NOTE: %s changes %s to %d", desc_nst, desc_p, *p);
}
}
/* find the current lambdas. If rerunning, we either read in a state, or a lambda value,
requiring different logic. */
{
- real frac;
- int i, fep_state = 0;
+ int i;
if (bRerunMD)
{
if (rerun_fr->bLambda)
else
{
/* find out between which two value of lambda we should be */
- frac = (step*fepvals->delta_lambda);
- fep_state = static_cast<int>(floor(frac*fepvals->n_lambda));
+ real frac = (step*fepvals->delta_lambda);
+ int fep_state = static_cast<int>(floor(frac*fepvals->n_lambda));
/* interpolate between this state and the next */
/* this assumes that the initial lambda corresponds to lambda==0, which is verified in grompp */
frac = (frac*fepvals->n_lambda)-fep_state;
state_global->fep_state = rerun_fr->fep_state;
for (i = 0; i < efptNR; i++)
{
- state_global->lambda[i] = fepvals->all_lambda[i][fep_state];
+ state_global->lambda[i] = fepvals->all_lambda[i][state_global->fep_state];
}
}
}
if (fepvals->delta_lambda != 0)
{
/* find out between which two value of lambda we should be */
- frac = (step*fepvals->delta_lambda);
+ real frac = (step*fepvals->delta_lambda);
if (fepvals->n_lambda > 0)
{
- fep_state = static_cast<int>(floor(frac*fepvals->n_lambda));
+ int fep_state = static_cast<int>(floor(frac*fepvals->n_lambda));
/* interpolate between this state and the next */
/* this assumes that the initial lambda corresponds to lambda==0, which is verified in grompp */
frac = (frac*fepvals->n_lambda)-fep_state;
return nst;
}
-int check_nstglobalcomm(FILE *fplog, t_commrec *cr,
- int nstglobalcomm, t_inputrec *ir)
+int check_nstglobalcomm(const gmx::MDLogger &mdlog, int nstglobalcomm, t_inputrec *ir)
{
if (!EI_DYNAMICS(ir->eI))
{
nstglobalcomm > ir->nstlist && nstglobalcomm % ir->nstlist != 0)
{
nstglobalcomm = (nstglobalcomm / ir->nstlist)*ir->nstlist;
- md_print_warn(cr, fplog, "WARNING: nstglobalcomm is larger than nstlist, but not a multiple, setting it to %d\n", nstglobalcomm);
+ GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
+ "WARNING: nstglobalcomm is larger than nstlist, but not a multiple, setting it to %d",
+ nstglobalcomm);
}
if (ir->nstcalcenergy > 0)
{
- check_nst_param(fplog, cr, "-gcom", nstglobalcomm,
+ check_nst_param(mdlog, "-gcom", nstglobalcomm,
"nstcalcenergy", &ir->nstcalcenergy);
}
if (ir->etc != etcNO && ir->nsttcouple > 0)
{
- check_nst_param(fplog, cr, "-gcom", nstglobalcomm,
+ check_nst_param(mdlog, "-gcom", nstglobalcomm,
"nsttcouple", &ir->nsttcouple);
}
if (ir->epc != epcNO && ir->nstpcouple > 0)
{
- check_nst_param(fplog, cr, "-gcom", nstglobalcomm,
+ check_nst_param(mdlog, "-gcom", nstglobalcomm,
"nstpcouple", &ir->nstpcouple);
}
- check_nst_param(fplog, cr, "-gcom", nstglobalcomm,
+ check_nst_param(mdlog, "-gcom", nstglobalcomm,
"nstenergy", &ir->nstenergy);
- check_nst_param(fplog, cr, "-gcom", nstglobalcomm,
+ check_nst_param(mdlog, "-gcom", nstglobalcomm,
"nstlog", &ir->nstlog);
}
if (ir->comm_mode != ecmNO && ir->nstcomm < nstglobalcomm)
{
- md_print_warn(cr, fplog, "WARNING: Changing nstcomm from %d to %d\n",
- ir->nstcomm, nstglobalcomm);
+ GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
+ "WARNING: Changing nstcomm from %d to %d",
+ ir->nstcomm, nstglobalcomm);
ir->nstcomm = nstglobalcomm;
}
- if (fplog)
- {
- fprintf(fplog, "Intra-simulation communication will occur every %d steps.\n", nstglobalcomm);
- }
+ GMX_LOG(mdlog.info).appendTextFormatted(
+ "Intra-simulation communication will occur every %d steps.\n", nstglobalcomm);
return nstglobalcomm;
}
}
+// TODO Most of this logic seems to belong in the respective modules
void set_state_entries(t_state *state, const t_inputrec *ir)
{
/* The entries in the state in the tpx file might not correspond
state->flags |= (1<<estFEPSTATE);
}
state->flags |= (1<<estX);
- if (state->lambda == NULL)
- {
- snew(state->lambda, efptNR);
- }
- if (state->x == NULL)
- {
- /* We need to allocate one element extra, since we might use
- * (unaligned) 4-wide SIMD loads to access rvec entries.
- */
- snew(state->x, state->nalloc + 1);
- }
+ GMX_RELEASE_ASSERT(state->x.size() >= static_cast<unsigned int>(state->natoms), "We should start a run with an initialized state->x");
if (EI_DYNAMICS(ir->eI))
{
state->flags |= (1<<estV);
- if (state->v == NULL)
- {
- snew(state->v, state->nalloc + 1);
- }
- }
- if (ir->eI == eiCG)
- {
- state->flags |= (1<<estCGP);
- if (state->cg_p == NULL)
- {
- /* cg_p is not stored in the tpx file, so we need to allocate it */
- snew(state->cg_p, state->nalloc + 1);
- }
}
state->nnhpres = 0;
if ((ir->epc == epcPARRINELLORAHMAN) || (ir->epc == epcMTTK))
{
state->flags |= (1<<estBOXV);
+ state->flags |= (1<<estPRES_PREV);
}
- if (ir->epc != epcNO)
+ if (inputrecNptTrotter(ir) || (inputrecNphTrotter(ir)))
{
- if (inputrecNptTrotter(ir) || (inputrecNphTrotter(ir)))
- {
- state->nnhpres = 1;
- state->flags |= (1<<estNHPRES_XI);
- state->flags |= (1<<estNHPRES_VXI);
- state->flags |= (1<<estSVIR_PREV);
- state->flags |= (1<<estFVIR_PREV);
- state->flags |= (1<<estVETA);
- state->flags |= (1<<estVOL0);
- }
- else
- {
- state->flags |= (1<<estPRES_PREV);
- }
+ state->nnhpres = 1;
+ state->flags |= (1<<estNHPRES_XI);
+ state->flags |= (1<<estNHPRES_VXI);
+ state->flags |= (1<<estSVIR_PREV);
+ state->flags |= (1<<estFVIR_PREV);
+ state->flags |= (1<<estVETA);
+ state->flags |= (1<<estVOL0);
+ }
+ if (ir->epc == epcBERENDSEN)
+ {
+ state->flags |= (1<<estBAROS_INT);
}
}
state->flags |= (1<<estNH_VXI);
}
- if (ir->etc == etcVRESCALE)
+ if (ir->etc == etcVRESCALE || ir->etc == etcBERENDSEN)
{
- state->flags |= (1<<estTC_INT);
+ state->flags |= (1<<estTHERM_INT);
}
init_gtc_state(state, state->ngtc, state->nnhpres, ir->opts.nhchainlength); /* allocate the space for nose-hoover chains */
init_ekinstate(&state->ekinstate, ir);
- snew(state->enerhist, 1);
- init_energyhistory(state->enerhist);
- init_df_history(&state->dfhist, ir->fepvals->n_lambda);
- state->swapstate.eSwapCoords = ir->eSwapCoords;
+
+ if (ir->bExpanded)
+ {
+ snew(state->dfhist, 1);
+ init_df_history(state->dfhist, ir->fepvals->n_lambda);
+ }
}
*/
#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
+#include "gromacs/gpu_utils/cuda_kernel_utils.cuh"
#include "gromacs/math/utilities.h"
#include "gromacs/pbcutil/ishift.h"
/* Note that floating-point constants in CUDA code should be suffixed
* code that is in double precision.
*/
-#if GMX_PTX_ARCH < 300
-#error "nbnxn_cuda_kernel.cuh included with GMX_PTX_ARCH < 300"
+#if GMX_PTX_ARCH < 300 && GMX_PTX_ARCH != 0
+#error "nbnxn_cuda_kernel.cuh included with GMX_PTX_ARCH < 300 or host pass"
#endif
#if defined EL_EWALD_ANA || defined EL_EWALD_TAB
Kernel launch parameters:
- #blocks = #pair lists, blockId = pair list Id
- #threads = NTHREAD_Z * c_clSize^2
- - shmem = see nbnxn_cuda.cu:calc_shmem_required()
+ - shmem = see nbnxn_cuda.cu:calc_shmem_required_nonbonded()
Each thread calculates an i force-component taking one pair of i-j atoms.
*/
* shuffle-based reduction, hence CC >= 3.0.
*
*
- * NOTEs / TODO on Volta / CUDA 9 support extensions:
- * - the current way of computing active mask using ballot_sync() should be
- * reconsidered: we can compute all masks with bitwise ops iso ballot and
- * secondly, all conditionals are warp-uniform, so the sync is not needed;
- * - reconsider the use of __syncwarp(): its only role is currently to prevent
+ * NOTEs on Volta / CUDA 9 extensions:
+ *
+ * - While active thread masks are required for the warp collectives
+ * (we use any and shfl), the kernel is designed such that all conditions
+ * (other than the inner-most distance check) including loop trip counts
+ * are warp-synchronous. Therefore, we don't need ballot to compute the
+ * active masks as these are all full-warp masks.
+ *
+ * - TODO: reconsider the use of __syncwarp(): its only role is currently to prevent
* WAR hazard due to the cj preload; we should try to replace it with direct
* loads (which may be faster given the improved L1 on Volta).
*/
#ifdef EL_RF
float two_k_rf = nbparam.two_k_rf;
#endif
-#ifdef EL_EWALD_TAB
- float coulomb_tab_scale = nbparam.coulomb_tab_scale;
-#endif
#ifdef EL_EWALD_ANA
float beta2 = nbparam.ewald_beta*nbparam.ewald_beta;
float beta3 = nbparam.ewald_beta*nbparam.ewald_beta*nbparam.ewald_beta;
#endif
#ifdef PRUNE_NBL
- float rlist_sq = nbparam.rlist_sq;
+ float rlist_sq = nbparam.rlistOuter_sq;
#endif
#ifdef CALC_ENERGIES
ci = sci * c_numClPerSupercl + tidxj;
ai = ci * c_clSize + tidxi;
- xqbuf = xq[ai] + shift_vec[nb_sci.shift];
+ float *shiftptr = (float *)&shift_vec[nb_sci.shift];
+ xqbuf = xq[ai] + make_float4(LDG(shiftptr), LDG(shiftptr + 1), LDG(shiftptr + 2), 0.0f);
xqbuf.w *= nbparam.epsfac;
xqib[tidxj * c_clSize + tidxi] = xqbuf;
#endif
#ifdef LJ_EWALD
+ #if DISABLE_CUDA_TEXTURES
+ E_lj += LDG(&nbparam.nbfp[atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2]);
+ #else
E_lj += tex1Dfetch<float>(nbparam.nbfp_texobj, atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2);
+ #endif
#endif
}
#endif /* CALC_ENERGIES */
- int j4LoopStart = cij4_start + tidxz;
- unsigned int j4LoopThreadMask = gmx_ballot_sync(c_fullWarpMask, j4LoopStart < cij4_end);
+#ifdef EXCLUSION_FORCES
+ const int nonSelfInteraction = !(nb_sci.shift == CENTRAL & tidxj <= tidxi);
+#endif
+
+ int j4LoopStart = cij4_start + tidxz;
/* loop over the j clusters = seen by any of the atoms in the current super-cluster */
for (j4 = j4LoopStart; j4 < cij4_end; j4 += NTHREAD_Z)
{
imask = pl_cj4[j4].imei[widx].imask;
wexcl = excl[wexcl_idx].pair[(tidx) & (warp_size - 1)];
- unsigned int imaskSkipConditionThreadMask = j4LoopThreadMask;
#ifndef PRUNE_NBL
- imaskSkipConditionThreadMask = gmx_ballot_sync(j4LoopThreadMask, imask);
if (imask)
#endif
{
/* Pre-load cj into shared memory on both warps separately */
- if ((tidxj == 0 || tidxj == 4) && tidxi < c_nbnxnGpuJgroupSize)
+ if ((tidxj == 0 | tidxj == 4) & (tidxi < c_nbnxnGpuJgroupSize))
{
cjs[tidxi + tidxj * c_nbnxnGpuJgroupSize/c_splitClSize] = pl_cj4[j4].cj[tidxi];
}
- gmx_syncwarp(imaskSkipConditionThreadMask);
+ gmx_syncwarp(c_fullWarpMask);
/* Unrolling this loop
- with pruning leads to register spilling;
Tested with up to nvcc 7.5 */
for (jm = 0; jm < c_nbnxnGpuJgroupSize; jm++)
{
- const unsigned int jmSkipCondition = imask & (superClInteractionMask << (jm * c_numClPerSupercl));
- const unsigned int jmSkipConditionThreadMask = gmx_ballot_sync(imaskSkipConditionThreadMask, jmSkipCondition);
+ const unsigned int jmSkipCondition = imask & (superClInteractionMask << (jm * c_numClPerSupercl));
if (jmSkipCondition)
{
mask_ji = (1U << (jm * c_numClPerSupercl));
#endif
for (i = 0; i < c_numClPerSupercl; i++)
{
- const unsigned int iInnerSkipCondition = imask & mask_ji;
- const unsigned int iInnerSkipConditionThreadMask = gmx_ballot_sync(jmSkipConditionThreadMask, iInnerSkipCondition);
+ const unsigned int iInnerSkipCondition = imask & mask_ji;
if (iInnerSkipCondition)
{
ci = sci * c_numClPerSupercl + i; /* i cluster index */
/* If _none_ of the atoms pairs are in cutoff range,
the bit corresponding to the current
cluster-pair in imask gets set to 0. */
- if (!gmx_any_sync(iInnerSkipConditionThreadMask, r2 < rlist_sq))
+ if (!gmx_any_sync(c_fullWarpMask, r2 < rlist_sq))
{
imask &= ~mask_ji;
}
/* cutoff & exclusion check */
#ifdef EXCLUSION_FORCES
- if (r2 < rcoulomb_sq *
- (nb_sci.shift != CENTRAL || ci != cj || tidxj > tidxi))
+ if ((r2 < rcoulomb_sq) * (nonSelfInteraction | (ci != cj)))
#else
- if (r2 < rcoulomb_sq * int_bit)
+ if ((r2 < rcoulomb_sq) * int_bit)
#endif
{
/* load the rest of the i-atom parameters */
#ifndef LJ_COMB
/* LJ 6*C6 and 12*C12 */
typei = atib[i * c_clSize + tidxi];
- c6 = tex1Dfetch<float>(nbparam.nbfp_texobj, 2 * (ntypes * typei + typej));
- c12 = tex1Dfetch<float>(nbparam.nbfp_texobj, 2 * (ntypes * typei + typej) + 1);
+ fetch_nbfp_c6_c12(c6, c12, nbparam, ntypes * typei + typej);
#else
ljcp_i = ljcpib[i * c_clSize + tidxi];
#ifdef LJ_COMB_GEOM
#ifdef LJ_POT_SWITCH
#ifdef CALC_ENERGIES
- calculate_potential_switch_F_E(nbparam, c6, c12, inv_r, r2, &F_invr, &E_lj_p);
+ calculate_potential_switch_F_E(nbparam, inv_r, r2, &F_invr, &E_lj_p);
#else
- calculate_potential_switch_F(nbparam, c6, c12, inv_r, r2, &F_invr, &E_lj_p);
+ calculate_potential_switch_F(nbparam, inv_r, r2, &F_invr, &E_lj_p);
#endif /* CALC_ENERGIES */
#endif /* LJ_POT_SWITCH */
F_invr += qi * qj_f * (int_bit*inv_r2*inv_r + pmecorrF(beta2*r2)*beta3);
#elif defined EL_EWALD_TAB
F_invr += qi * qj_f * (int_bit*inv_r2 -
- interpolate_coulomb_force_r(nbparam.coulomb_tab_texobj, r2 * inv_r, coulomb_tab_scale)) * inv_r;
+ interpolate_coulomb_force_r(nbparam, r2 * inv_r)) * inv_r;
#endif /* EL_EWALD_ANA/TAB */
#ifdef CALC_ENERGIES
}
/* reduce j forces */
- reduce_force_j_warp_shfl(fcj_buf, f, tidxi, aj, jmSkipConditionThreadMask);
+ reduce_force_j_warp_shfl(fcj_buf, f, tidxi, aj, c_fullWarpMask);
}
}
#ifdef PRUNE_NBL
#endif
}
// avoid shared memory WAR hazards between loop iterations
- gmx_syncwarp(j4LoopThreadMask);
- // update thread mask for next loop iteration
- j4LoopThreadMask = gmx_ballot_sync(j4LoopThreadMask, (j4 + NTHREAD_Z) < cij4_end);
+ gmx_syncwarp(c_fullWarpMask);
}
/* skip central shifts when summing shift forces */
/*! \brief Calculate 1/sqrt(x) for SIMD float.
*
- * \param x Argument that must be >0. This routine does not check arguments.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline SimdFloat gmx_simdcall
/*! \brief Calculate 1/sqrt(x) for two SIMD floats.
*
- * \param x0 First set of arguments, x0 must be positive - no argument checking.
- * \param x1 Second set of arguments, x1 must be positive - no argument checking.
+ * \param x0 First set of arguments, x0 must be in single range (see below).
+ * \param x1 Second set of arguments, x1 must be in single range (see below).
* \param[out] out0 Result 1/sqrt(x0)
* \param[out] out1 Result 1/sqrt(x1)
*
* In particular for double precision we can sometimes calculate square root
* pairs slightly faster by using single precision until the very last step.
+ *
+ * \note Both arguments must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
*/
static inline void gmx_simdcall
invsqrtPair(SimdFloat x0, SimdFloat x1,
/*! \brief Calculate 1/x for SIMD float.
*
- * \param x Argument that must be nonzero. This routine does not check arguments.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \return 1/x. Result is undefined if your argument was invalid.
*/
static inline SimdFloat gmx_simdcall
/*! \brief Division for SIMD floats
*
* \param nom Nominator
- * \param denom Denominator
+ * \param denom Denominator, with magnitude in range (GMX_FLOAT_MIN,GMX_FLOAT_MAX).
+ * For single precision this is equivalent to a nonzero argument,
+ * but in double precision it adds an extra restriction since
+ * the first lookup step might have to be performed in single
+ * precision on some architectures. Note that the responsibility
+ * for checking falls on you - this routine does not check arguments.
*
* \return nom/denom
*
* Illegal values in the masked-out elements will not lead to
* floating-point exceptions.
*
- * \param x Argument that must be >0 for masked-in entries
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX for masked-in entries.
+ * See \ref invsqrt for the discussion about argument restrictions.
* \param m Mask
* \return 1/sqrt(x). Result is undefined if your argument was invalid or
* entry was not masked, and 0.0 for masked-out entries.
/*! \brief Calculate 1/x for SIMD float, masked version.
*
- * \param x Argument that must be nonzero for non-masked entries.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX for masked-in entries.
+ * See \ref invsqrt for the discussion about argument restrictions.
* \param m Mask
* \return 1/x for elements where m is true, or 0.0 for masked-out entries.
*/
return lu;
}
-/*! \brief Calculate sqrt(x) correctly for SIMD floats, including argument 0.0.
+/*! \brief Calculate sqrt(x) for SIMD floats
+ *
+ * \tparam opt By default, this function checks if the input value is 0.0
+ * and masks this to return the correct result. If you are certain
+ * your argument will never be zero, and you know you need to save
+ * every single cycle you can, you can alternatively call the
+ * function as sqrt<MathOptimization::Unsafe>(x).
+ *
+ * \param x Argument that must be in range 0 <=x <= GMX_FLOAT_MAX, since the
+ * lookup step often has to be implemented in single precision.
+ * Arguments smaller than GMX_FLOAT_MIN will always lead to a zero
+ * result, even in double precision. If you are using the unsafe
+ * math optimization parameter, the argument must be in the range
+ * GMX_FLOAT_MIN <= x <= GMX_FLOAT_MAX.
*
- * \param x Argument that must be >=0.
- * \return sqrt(x). If x=0, the result will correctly be set to 0.
- * The result is undefined if the input value is negative.
+ * \return sqrt(x). The result is undefined if the input value does not fall
+ * in the allowed range specified for the argument.
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdFloat gmx_simdcall
sqrt(SimdFloat x)
{
- SimdFloat res = maskzInvsqrt(x, setZero() < x);
- return res*x;
+ if (opt == MathOptimization::Safe)
+ {
+ SimdFloat res = maskzInvsqrt(x, setZero() < x);
+ return res*x;
+ }
+ else
+ {
+ return x * invsqrt(x);
+ }
}
#if !GMX_SIMD_HAVE_NATIVE_LOG_FLOAT
#endif
#if !GMX_SIMD_HAVE_NATIVE_EXP2_FLOAT
-/*! \brief SIMD float 2^x.
- *
- * \param x Argument.
- * \result 2^x. Undefined if input argument caused overflow.
+/*! \brief SIMD float 2^x
+ *
+ * \tparam opt If this is changed from the default (safe) into the unsafe
+ * option, input values that would otherwise lead to zero-clamped
+ * results are not allowed and will lead to undefined results.
+ *
+ * \param x Argument. For the default (safe) function version this can be
+ * arbitrarily small value, but the routine might clamp the result to
+ * zero for arguments that would produce subnormal IEEE754-2008 results.
+ * This corresponds to inputs below -126 in single or -1022 in double,
+ * and it might overflow for arguments reaching 127 (single) or
+ * 1023 (double). If you enable the unsafe math optimization,
+ * very small arguments will not necessarily be zero-clamped, but
+ * can produce undefined results.
+ *
+ * \result 2^x. The result is undefined for very large arguments that cause
+ * internal floating-point overflow. If unsafe optimizations are enabled,
+ * this is also true for very small values.
+ *
+ * \note The definition range of this function is just-so-slightly smaller
+ * than the allowed IEEE exponents for many architectures. This is due
+ * to the implementation, which will hopefully improve in the future.
+ *
+ * \warning You cannot rely on this implementation returning inf for arguments
+ * that cause overflow. If you have some very large
+ * values and need to rely on getting a valid numerical output,
+ * take the minimum of your variable and the largest valid argument
+ * before calling this routine.
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdFloat gmx_simdcall
exp2(SimdFloat x)
{
- // Lower bound: Disallow numbers that would lead to an IEEE fp exponent reaching +-127.
- const SimdFloat arglimit(126.0f);
+ // Lower bound: Clamp args that would lead to an IEEE fp exponent below -127.
+ // Note that we use a slightly wider range inside this *implementation* compared
+ // to that guaranteed by the API documentation in the comment above (which has
+ // to cover all architectures).
+ const SimdFloat smallArgLimit(-127.0f);
const SimdFloat CC6(0.0001534581200287996416911311f);
const SimdFloat CC5(0.001339993121934088894618990f);
const SimdFloat CC4(0.009618488957115180159497841f);
SimdFloat intpart;
SimdFloat fexppart;
SimdFloat p;
- SimdFBool m;
+
+ // Large negative values are valid arguments to exp2(), so there are two
+ // things we need to account for:
+ // 1. When the exponents reaches -127, the (biased) exponent field will be
+ // zero and we can no longer multiply with it. There are special IEEE
+ // formats to handle this range, but for now we have to accept that
+ // we cannot handle those arguments. If input value becomes even more
+ // negative, it will start to loop and we would end up with invalid
+ // exponents. Thus, we need to limit or mask this.
+ // 2. For VERY large negative values, we will have problems that the
+ // subtraction to get the fractional part loses accuracy, and then we
+ // can end up with overflows in the polynomial.
+ //
+ // For now, we handle this by clamping smaller arguments to -127. At this
+ // point we will already return zero, so we don't need to do anything
+ // extra for the exponent.
+
+ if (opt == MathOptimization::Safe)
+ {
+ x = max(x, smallArgLimit);
+ }
fexppart = ldexp(one, cvtR2I(x));
intpart = round(x);
- m = abs(x) <= arglimit;
- fexppart = selectByMask(fexppart, m);
x = x - intpart;
p = fma(CC6, x, CC5);
* In addition to scaling the argument for 2^x this routine correctly does
* extended precision arithmetics to improve accuracy.
*
- * \param x Argument.
- * \result exp(x). Undefined if input argument caused overflow,
- * which can happen if abs(x) \> 7e13.
+ * \tparam opt If this is changed from the default (safe) into the unsafe
+ * option, input values that would otherwise lead to zero-clamped
+ * results are not allowed and will lead to undefined results.
+ *
+ * \param x Argument. For the default (safe) function version this can be
+ * arbitrarily small value, but the routine might clamp the result to
+ * zero for arguments that would produce subnormal IEEE754-2008 results.
+ * This corresponds to input arguments reaching
+ * -126*ln(2)=-87.3 in single, or -1022*ln(2)=-708.4 (double).
+ * Similarly, it might overflow for arguments reaching
+ * 127*ln(2)=88.0 (single) or 1023*ln(2)=709.1 (double). If the
+ * unsafe math optimizations are enabled, small input values that would
+ * result in zero-clamped output are not allowed.
+ *
+ * \result exp(x). Overflowing arguments are likely to either return 0 or inf,
+ * depending on the underlying implementation. If unsafe optimizations
+ * are enabled, this is also true for very small values.
+ *
+ * \note The definition range of this function is just-so-slightly smaller
+ * than the allowed IEEE exponents for many architectures. This is due
+ * to the implementation, which will hopefully improve in the future.
+ *
+ * \warning You cannot rely on this implementation returning inf for arguments
+ * that cause overflow. If you have some very large
+ * values and need to rely on getting a valid numerical output,
+ * take the minimum of your variable and the largest valid argument
+ * before calling this routine.
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdFloat gmx_simdcall
exp(SimdFloat x)
{
const SimdFloat argscale(1.44269504088896341f);
- // Lower bound: Disallow numbers that would lead to an IEEE fp exponent reaching +-127.
- const SimdFloat arglimit(126.0f);
+ // Lower bound: Clamp args that would lead to an IEEE fp exponent below -127.
+ // Note that we use a slightly wider range inside this *implementation* compared
+ // to that guaranteed by the API documentation in the comment above (which has
+ // to cover all architectures).
+ const SimdFloat smallArgLimit(-88.0296919311f);
const SimdFloat invargscale0(-0.693145751953125f);
const SimdFloat invargscale1(-1.428606765330187045e-06f);
const SimdFloat CC4(0.00136324646882712841033936f);
SimdFloat fexppart;
SimdFloat intpart;
SimdFloat y, p;
- SimdFBool m;
+
+ // Large negative values are valid arguments to exp2(), so there are two
+ // things we need to account for:
+ // 1. When the IEEE exponents reaches -127, the (biased) exponent field will be
+ // zero and we can no longer multiply with it. There are special IEEE
+ // formats to handle this range, but for now we have to accept that
+ // we cannot handle those arguments. If input value becomes even more
+ // negative, it will start to loop and we would end up with invalid
+ // exponents. Thus, we need to limit or mask this.
+ // 2. For VERY large negative values, we will have problems that the
+ // subtraction to get the fractional part loses accuracy, and then we
+ // can end up with overflows in the polynomial.
+ //
+ // For now, we handle this by clamping smaller arguments to -127*ln(2). At this
+ // point we will already return zero, so we don't need to do anything
+ // extra for the exponent.
+
+ if (opt == MathOptimization::Safe)
+ {
+ x = max(x, smallArgLimit);
+ }
y = x * argscale;
fexppart = ldexp(one, cvtR2I(y));
intpart = round(y);
- m = (abs(y) <= arglimit);
- fexppart = selectByMask(fexppart, m);
// Extended precision arithmetics
x = fma(invargscale0, intpart, x);
/*! \brief Calculate 1/sqrt(x) for SIMD double.
*
- * \param x Argument that must be >0. This routine does not check arguments.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline SimdDouble gmx_simdcall
/*! \brief Calculate 1/sqrt(x) for two SIMD doubles.
*
- * \param x0 First set of arguments, x0 must be positive - no argument checking.
- * \param x1 Second set of arguments, x1 must be positive - no argument checking.
+ * \param x0 First set of arguments, x0 must be in single range (see below).
+ * \param x1 Second set of arguments, x1 must be in single range (see below).
* \param[out] out0 Result 1/sqrt(x0)
* \param[out] out1 Result 1/sqrt(x1)
*
* In particular for double precision we can sometimes calculate square root
* pairs slightly faster by using single precision until the very last step.
+ *
+ * \note Both arguments must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
*/
static inline void gmx_simdcall
invsqrtPair(SimdDouble x0, SimdDouble x1,
/*! \brief Calculate 1/x for SIMD double.
*
- * \param x Argument that must be nonzero. This routine does not check arguments.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \return 1/x. Result is undefined if your argument was invalid.
*/
static inline SimdDouble gmx_simdcall
/*! \brief Division for SIMD doubles
*
* \param nom Nominator
- * \param denom Denominator
+ * \param denom Denominator, with magnitude in range (GMX_FLOAT_MIN,GMX_FLOAT_MAX).
+ * For single precision this is equivalent to a nonzero argument,
+ * but in double precision it adds an extra restriction since
+ * the first lookup step might have to be performed in single
+ * precision on some architectures. Note that the responsibility
+ * for checking falls on you - this routine does not check arguments.
*
* \return nom/denom
*
* Illegal values in the masked-out elements will not lead to
* floating-point exceptions.
*
- * \param x Argument that must be >0 for masked-in entries
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX for masked-in entries.
+ * See \ref invsqrt for the discussion about argument restrictions.
* \param m Mask
* \return 1/sqrt(x). Result is undefined if your argument was invalid or
* entry was not masked, and 0.0 for masked-out entries.
/*! \brief Calculate 1/x for SIMD double, masked version.
*
- * \param x Argument that must be nonzero for non-masked entries.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX for masked-in entries.
+ * See \ref invsqrt for the discussion about argument restrictions.
* \param m Mask
* \return 1/x for elements where m is true, or 0.0 for masked-out entries.
*/
}
-/*! \brief Calculate sqrt(x) correctly for SIMD doubles, including argument 0.0.
+/*! \brief Calculate sqrt(x) for SIMD doubles.
*
- * \param x Argument that must be >=0.
- * \return sqrt(x). If x=0, the result will correctly be set to 0.
- * if x>0 && x<float_min, the result will incorrectly be set to 0.
- * The result is undefined if the input value is negative.
+ * \copydetails sqrt(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdDouble gmx_simdcall
sqrt(SimdDouble x)
{
- // As we might use a float version of rsqrt, we mask out small values
- return x * maskzInvsqrt(x, SimdDouble(GMX_FLOAT_MIN) <= x);
+ if (opt == MathOptimization::Safe)
+ {
++ // As we might use a float version of rsqrt, we mask out small values
+ SimdDouble res = maskzInvsqrt(x, SimdDouble(GMX_FLOAT_MIN) < x);
+ return res*x;
+ }
+ else
+ {
+ return x * invsqrt(x);
+ }
}
#if !GMX_SIMD_HAVE_NATIVE_LOG_DOUBLE
#if !GMX_SIMD_HAVE_NATIVE_EXP2_DOUBLE
/*! \brief SIMD double 2^x.
*
- * \param x Argument.
- * \result 2^x. Undefined if input argument caused overflow.
+ * \copydetails exp2(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdDouble gmx_simdcall
exp2(SimdDouble x)
{
- const SimdDouble arglimit(1022.0);
+ // Lower bound: Clamp args that would lead to an IEEE fp exponent below -1023.
+ const SimdDouble smallArgLimit(-1023.0);
const SimdDouble CE11(4.435280790452730022081181e-10);
const SimdDouble CE10(7.074105630863314448024247e-09);
const SimdDouble CE9(1.017819803432096698472621e-07);
SimdDouble intpart;
SimdDouble fexppart;
SimdDouble p;
- SimdDBool m;
+
+ // Large negative values are valid arguments to exp2(), so there are two
+ // things we need to account for:
+ // 1. When the exponents reaches -1023, the (biased) exponent field will be
+ // zero and we can no longer multiply with it. There are special IEEE
+ // formats to handle this range, but for now we have to accept that
+ // we cannot handle those arguments. If input value becomes even more
+ // negative, it will start to loop and we would end up with invalid
+ // exponents. Thus, we need to limit or mask this.
+ // 2. For VERY large negative values, we will have problems that the
+ // subtraction to get the fractional part loses accuracy, and then we
+ // can end up with overflows in the polynomial.
+ //
+ // For now, we handle this by clamping smaller arguments to -1023. At this
+ // point we will already return zero, so we don't need to do anything
+ // extra for the exponent.
+
+ if (opt == MathOptimization::Safe)
+ {
+ x = max(x, smallArgLimit);
+ }
fexppart = ldexp(one, cvtR2I(x));
intpart = round(x);
- m = abs(x) <= arglimit;
- fexppart = selectByMask(fexppart, m);
x = x - intpart;
p = fma(CE11, x, CE10);
#if !GMX_SIMD_HAVE_NATIVE_EXP_DOUBLE
/*! \brief SIMD double exp(x).
*
- * In addition to scaling the argument for 2^x this routine correctly does
- * extended precision arithmetics to improve accuracy.
- *
- * \param x Argument.
- * \result exp(x). Undefined if input argument caused overflow,
- * which can happen if abs(x) \> 7e13.
+ * \copydetails exp(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdDouble gmx_simdcall
exp(SimdDouble x)
{
const SimdDouble argscale(1.44269504088896340735992468100);
- const SimdDouble arglimit(1022.0);
+ // Lower bound: Clamp args that would lead to an IEEE fp exponent below -1023.
+ const SimdDouble smallArgLimit(-709.0895657128);
const SimdDouble invargscale0(-0.69314718055966295651160180568695068359375);
const SimdDouble invargscale1(-2.8235290563031577122588448175013436025525412068e-13);
const SimdDouble CE12(2.078375306791423699350304e-09);
SimdDouble fexppart;
SimdDouble intpart;
SimdDouble y, p;
- SimdDBool m;
+
+ // Large negative values are valid arguments to exp2(), so there are two
+ // things we need to account for:
+ // 1. When the exponents reaches -1023, the (biased) exponent field will be
+ // zero and we can no longer multiply with it. There are special IEEE
+ // formats to handle this range, but for now we have to accept that
+ // we cannot handle those arguments. If input value becomes even more
+ // negative, it will start to loop and we would end up with invalid
+ // exponents. Thus, we need to limit or mask this.
+ // 2. For VERY large negative values, we will have problems that the
+ // subtraction to get the fractional part loses accuracy, and then we
+ // can end up with overflows in the polynomial.
+ //
+ // For now, we handle this by clamping smaller arguments to -1023*ln(2). At this
+ // point we will already return zero, so we don't need to do anything
+ // extra for the exponent.
+
+ if (opt == MathOptimization::Safe)
+ {
+ x = max(x, smallArgLimit);
+ }
y = x * argscale;
fexppart = ldexp(one, cvtR2I(y));
intpart = round(y);
- m = (abs(y) <= arglimit);
- fexppart = selectByMask(fexppart, m);
// Extended precision arithmetics
x = fma(invargscale0, intpart, x);
/*! \brief Calculate 1/sqrt(x) for SIMD double, but in single accuracy.
*
- * \param x Argument that must be >0. This routine does not check arguments.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline SimdDouble gmx_simdcall
* Illegal values in the masked-out elements will not lead to
* floating-point exceptions.
*
- * \param x Argument that must be >0 for masked-in entries
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \param m Mask
* \return 1/sqrt(x). Result is undefined if your argument was invalid or
* entry was not masked, and 0.0 for masked-out entries.
/*! \brief Calculate 1/sqrt(x) for two SIMD doubles, but single accuracy.
*
- * \param x0 First set of arguments, x0 must be positive - no argument checking.
- * \param x1 Second set of arguments, x1 must be positive - no argument checking.
+ * \param x0 First set of arguments, x0 must be in single range (see below).
+ * \param x1 Second set of arguments, x1 must be in single range (see below).
* \param[out] out0 Result 1/sqrt(x0)
* \param[out] out1 Result 1/sqrt(x1)
*
* In particular for double precision we can sometimes calculate square root
* pairs slightly faster by using single precision until the very last step.
+ *
+ * \note Both arguments must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
*/
static inline void gmx_simdcall
invsqrtPairSingleAccuracy(SimdDouble x0, SimdDouble x1,
/*! \brief Calculate 1/x for SIMD double, but in single accuracy.
*
- * \param x Argument that must be nonzero. This routine does not check arguments.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \return 1/x. Result is undefined if your argument was invalid.
*/
static inline SimdDouble gmx_simdcall
/*! \brief 1/x for masked entries of SIMD double, single accuracy.
*
- * \param x Argument that must be nonzero for non-masked entries.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ *
* \param m Mask
* \return 1/x for elements where m is true, or 0.0 for masked-out entries.
*/
}
-/*! \brief Calculate sqrt(x) (correct for 0.0) for SIMD double, single accuracy.
+/*! \brief Calculate sqrt(x) (correct for 0.0) for SIMD double, with single accuracy.
*
- * \param x Argument that must be >=0.
- * \return sqrt(x). If x<float_min, the result will correctly be set to 0.
- * The result is undefined if the input value is negative.
+ * \copydetails sqrt(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdDouble gmx_simdcall
sqrtSingleAccuracy(SimdDouble x)
{
- return x * maskzInvsqrtSingleAccuracy(x, SimdDouble(GMX_FLOAT_MIN) <= x);
+ if (opt == MathOptimization::Safe)
+ {
+ SimdDouble res = maskzInvsqrt(x, SimdDouble(GMX_FLOAT_MIN) < x);
+ return res*x;
+ }
+ else
+ {
+ return x * invsqrtSingleAccuracy(x);
+ }
}
+
/*! \brief SIMD log(x). Double precision SIMD data, single accuracy.
*
* \param x Argument, should be >0.
return p;
}
-/*! \brief SIMD 2^x. Double precision SIMD data, single accuracy.
+/*! \brief SIMD 2^x. Double precision SIMD, single accuracy.
*
- * \param x Argument.
- * \result 2^x. Undefined if input argument caused overflow.
+ * \copydetails exp2(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdDouble gmx_simdcall
exp2SingleAccuracy(SimdDouble x)
{
- // Lower bound: Disallow numbers that would lead to an IEEE fp exponent reaching +-127.
- const SimdDouble arglimit = SimdDouble(126.0);
+ // Lower bound: Clamp args that would lead to an IEEE fp exponent below -1023.
+ const SimdDouble smallArgLimit(-1023.0);
const SimdDouble CC6(0.0001534581200287996416911311);
const SimdDouble CC5(0.001339993121934088894618990);
const SimdDouble CC4(0.009618488957115180159497841);
SimdDouble intpart;
SimdDouble p;
- SimdDBool valuemask;
SimdDInt32 ix;
+ // Large negative values are valid arguments to exp2(), so there are two
+ // things we need to account for:
+ // 1. When the exponents reaches -1023, the (biased) exponent field will be
+ // zero and we can no longer multiply with it. There are special IEEE
+ // formats to handle this range, but for now we have to accept that
+ // we cannot handle those arguments. If input value becomes even more
+ // negative, it will start to loop and we would end up with invalid
+ // exponents. Thus, we need to limit or mask this.
+ // 2. For VERY large negative values, we will have problems that the
+ // subtraction to get the fractional part loses accuracy, and then we
+ // can end up with overflows in the polynomial.
+ //
+ // For now, we handle this by clamping smaller arguments to -1023. At this
+ // point we will already return zero, so we don't need to do anything
+ // extra for the exponent.
+
+ if (opt == MathOptimization::Safe)
+ {
+ x = max(x, smallArgLimit);
+ }
+
ix = cvtR2I(x);
intpart = round(x);
- valuemask = (abs(x) <= arglimit);
x = x - intpart;
p = fma(CC6, x, CC5);
p = fma(p, x, CC1);
p = fma(p, x, one);
x = ldexp(p, ix);
- x = selectByMask(x, valuemask);
return x;
}
-/*! \brief SIMD exp(x). Double precision SIMD data, single accuracy.
+
+
+/*! \brief SIMD exp(x). Double precision SIMD, single accuracy.
*
- * \param x Argument.
- * \result exp(x). Undefined if input argument caused overflow.
+ * \copydetails exp(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdDouble gmx_simdcall
expSingleAccuracy(SimdDouble x)
{
const SimdDouble argscale(1.44269504088896341);
- // Lower bound: Disallow numbers that would lead to an IEEE fp exponent reaching +-127
- const SimdDouble arglimit(126.0);
+ // Lower bound: Clamp args that would lead to an IEEE fp exponent below -1023.
+ const SimdDouble smallArgLimit(-709.0895657128);
const SimdDouble invargscale(-0.69314718055994528623);
const SimdDouble CC4(0.00136324646882712841033936);
const SimdDouble CC3(0.00836596917361021041870117);
const SimdDouble one(1.0);
SimdDouble intpart;
SimdDouble y, p;
- SimdDBool valuemask;
SimdDInt32 iy;
+ // Large negative values are valid arguments to exp2(), so there are two
+ // things we need to account for:
+ // 1. When the exponents reaches -127, the (biased) exponent field will be
+ // zero and we can no longer multiply with it. There are special IEEE
+ // formats to handle this range, but for now we have to accept that
+ // we cannot handle those arguments. If input value becomes even more
+ // negative, it will start to loop and we would end up with invalid
+ // exponents. Thus, we need to limit or mask this.
+ // 2. For VERY large negative values, we will have problems that the
+ // subtraction to get the fractional part loses accuracy, and then we
+ // can end up with overflows in the polynomial.
+ //
+ // For now, we handle this by clamping smaller arguments to -127. At this
+ // point we will already return zero, so we don't need to do anything
+ // extra for the exponent.
+
+ if (opt == MathOptimization::Safe)
+ {
+ x = max(x, smallArgLimit);
+ }
+
y = x * argscale;
iy = cvtR2I(y);
intpart = round(y); // use same rounding algorithm here
- valuemask = (abs(y) <= arglimit);
// Extended precision arithmetics not needed since
// we have double precision and only need single accuracy.
p = fma(x*x, p, x);
p = p + one;
x = ldexp(p, iy);
- x = selectByMask(x, valuemask);
return x;
}
+
/*! \brief SIMD erf(x). Double precision SIMD data, single accuracy.
*
* \param x The value to calculate erf(x) for.
* added to \f$1/r\f$ the error will be insignificant.
*
*/
-static SimdDouble gmx_simdcall
+static inline SimdDouble gmx_simdcall
pmeForceCorrectionSingleAccuracy(SimdDouble z2)
{
const SimdDouble FN6(-1.7357322914161492954e-8);
* This approximation achieves an accuracy slightly lower than 1e-6; when
* added to \f$1/r\f$ the error will be insignificant.
*/
-static SimdDouble gmx_simdcall
+static inline SimdDouble gmx_simdcall
pmePotentialCorrectionSingleAccuracy(SimdDouble z2)
{
const SimdDouble VN6(1.9296833005951166339e-8);
/*! \brief Calculate 1/sqrt(x) for SIMD4 float.
*
- * \param x Argument that must be >0. This routine does not check arguments.
- * \return 1/sqrt(x). Result is undefined if your argument was invalid.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ * \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline Simd4Float gmx_simdcall
invsqrt(Simd4Float x)
/*! \brief Calculate 1/sqrt(x) for SIMD4 double.
*
- * \param x Argument that must be >0. This routine does not check arguments.
- * \return 1/sqrt(x). Result is undefined if your argument was invalid.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ * \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline Simd4Double gmx_simdcall
invsqrt(Simd4Double x)
/*! \brief Calculate 1/sqrt(x) for SIMD4 double, but in single accuracy.
*
- * \param x Argument that must be >0. This routine does not check arguments.
- * \return 1/sqrt(x). Result is undefined if your argument was invalid.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ * \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline Simd4Double gmx_simdcall
invsqrtSingleAccuracy(Simd4Double x)
#if GMX_SIMD_HAVE_FLOAT
/*! \brief Calculate 1/sqrt(x) for SIMD float, only targeting single accuracy.
*
- * \param x Argument that must be >0. This routine does not check arguments.
- * \return 1/sqrt(x). Result is undefined if your argument was invalid.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ * \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline SimdFloat gmx_simdcall
invsqrtSingleAccuracy(SimdFloat x)
* Illegal values in the masked-out elements will not lead to
* floating-point exceptions.
*
- * \param x Argument that must be >0 for masked-in entries
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
* \param m Mask
- * \return 1/sqrt(x). Result is undefined if your argument was invalid or
- * entry was not masked, and 0.0 for masked-out entries.
+ * \return 1/sqrt(x). Result is undefined if your argument was invalid or
+ * entry was not masked, and 0.0 for masked-out entries.
*/
static inline SimdFloat
maskzInvsqrtSingleAccuracy(SimdFloat x, SimdFBool m)
/*! \brief Calculate 1/sqrt(x) for two SIMD floats, only targeting single accuracy.
*
- * \param x0 First set of arguments, x0 must be positive - no argument checking.
- * \param x1 Second set of arguments, x1 must be positive - no argument checking.
+ * \param x0 First set of arguments, x0 must be in single range (see below).
+ * \param x1 Second set of arguments, x1 must be in single range (see below).
* \param[out] out0 Result 1/sqrt(x0)
* \param[out] out1 Result 1/sqrt(x1)
*
* In particular for double precision we can sometimes calculate square root
* pairs slightly faster by using single precision until the very last step.
+ *
+ * \note Both arguments must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
*/
static inline void gmx_simdcall
invsqrtPairSingleAccuracy(SimdFloat x0, SimdFloat x1,
/*! \brief Calculate 1/x for SIMD float, only targeting single accuracy.
*
- * \param x Argument that must be nonzero. This routine does not check arguments.
- * \return 1/x. Result is undefined if your argument was invalid.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
+ * \return 1/x. Result is undefined if your argument was invalid.
*/
static inline SimdFloat gmx_simdcall
invSingleAccuracy(SimdFloat x)
/*! \brief Calculate 1/x for masked SIMD floats, only targeting single accuracy.
*
- * \param x Argument that must be nonzero for non-masked entries.
+ * \param x Argument with magnitude larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
* \param m Mask
- * \return 1/x for elements where m is true, or 0.0 for masked-out entries.
+ * \return 1/x for elements where m is true, or 0.0 for masked-out entries.
*/
static inline SimdFloat
maskzInvSingleAccuracy(SimdFloat x, SimdFBool m)
return maskzInv(x, m);
}
-/*! \brief Calculate sqrt(x) for SIMD float, only targeting single accuracy.
+/*! \brief Calculate sqrt(x) for SIMD float, always targeting single accuracy.
*
- * \param x Argument that must be >=0.
- * \return sqrt(x). If x=0, the result will correctly be set to 0.
- * The result is undefined if the input value is negative.
+ * \copydetails sqrt(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdFloat gmx_simdcall
sqrtSingleAccuracy(SimdFloat x)
{
- return sqrt(x);
+ return sqrt<opt>(x);
}
/*! \brief SIMD float log(x), only targeting single accuracy. This is the natural logarithm.
/*! \brief SIMD float 2^x, only targeting single accuracy.
*
- * \param x Argument.
- * \result 2^x. Undefined if input argument caused overflow.
+ * \copydetails exp2(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdFloat gmx_simdcall
exp2SingleAccuracy(SimdFloat x)
{
- return exp2(x);
+ return exp2<opt>(x);
}
/*! \brief SIMD float e^x, only targeting single accuracy.
*
- * \param x Argument.
- * \result exp(x). Undefined if input argument caused overflow.
+ * \copydetails exp(SimdFloat)
*/
+template <MathOptimization opt = MathOptimization::Safe>
static inline SimdFloat gmx_simdcall
expSingleAccuracy(SimdFloat x)
{
- return exp(x);
+ return exp<opt>(x);
}
+
/*! \brief SIMD float erf(x), only targeting single accuracy.
*
* \param x The value to calculate erf(x) for.
#if GMX_SIMD4_HAVE_FLOAT
/*! \brief Calculate 1/sqrt(x) for SIMD4 float, only targeting single accuracy.
*
- * \param x Argument that must be >0. This routine does not check arguments.
+ * \param x Argument that must be larger than GMX_FLOAT_MIN and smaller than
+ * GMX_FLOAT_MAX, i.e. within the range of single precision.
+ * For the single precision implementation this is obviously always
+ * true for positive values, but for double precision it adds an
+ * extra restriction since the first lookup step might have to be
+ * performed in single precision on some architectures. Note that the
+ * responsibility for checking falls on you - this routine does not
+ * check arguments.
* \return 1/sqrt(x). Result is undefined if your argument was invalid.
*/
static inline Simd4Float gmx_simdcall
#include "gromacs/mdlib/groupcoord.h"
#include "gromacs/mdlib/mdrun.h"
#include "gromacs/mdlib/sim_util.h"
+#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/observableshistory.h"
+#include "gromacs/mdtypes/swaphistory.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/timing/wallcycle.h"
-#include "gromacs/topology/mtop_util.h"
+#include "gromacs/topology/mtop_lookup.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/fatalerror.h"
static const char *SwS = {"SWAP:"}; /**< For output that comes from the swap module */
static const char *SwSEmpty = {" "}; /**< Placeholder for multi-line output */
static const char* CompStr[eCompNR] = {"A", "B" }; /**< Compartment name */
-static const char *SwapStr[eSwapTypesNR+1] = { "", "X-", "Y-", "Z-", NULL}; /**< Name for the swap types. */
-static const char *DimStr[DIM+1] = { "X", "Y", "Z", NULL}; /**< Name for the swap dimension. */
+static const char *SwapStr[eSwapTypesNR+1] = { "", "X-", "Y-", "Z-", nullptr}; /**< Name for the swap types. */
+static const char *DimStr[DIM+1] = { "X", "Y", "Z", nullptr}; /**< Name for the swap dimension. */
/** Keep track of through which channel the ions have passed */
enum eChannelHistory {
rvec_add(reference, dx, correctPBCimage);
/* Take weight into account */
- if (NULL == weights)
+ if (nullptr == weights)
{
wi = 1.0;
}
add_to_list(iAtom, &g->comp[comp], dist);
/* Master also checks for ion groups through which channel each ion has passed */
- if (MASTER(cr) && (g->comp_now != NULL) && !bIsSolvent)
+ if (MASTER(cr) && (g->comp_now != nullptr) && !bIsSolvent)
{
int globalAtomNr = g->ind[iAtom] + 1; /* PDB index starts at 1 ... */
detect_flux_per_channel(g, globalAtomNr, comp, g->xc[iAtom],
}
}
- if (bIsSolvent && NULL != fpout)
+ if (bIsSolvent && nullptr != fpout)
{
fprintf(fpout, "# Solv. molecules in comp.%s: %d comp.%s: %d\n",
CompStr[eCompA], g->comp[eCompA].nMol,
* over the values from .cpt file to the swap data structure.
*/
static void get_initial_ioncounts_from_cpt(
- t_inputrec *ir, swapstate_t *swapstate,
+ t_inputrec *ir, swaphistory_t *swapstate,
t_commrec *cr, gmx_bool bVerbose)
{
t_swapcoords *sc;
/*! \brief Ensure that each atom belongs to at most one of the swap groups. */
static void check_swap_groups(t_swap *s, int nat, gmx_bool bVerbose)
{
- int *nGroup = NULL; /* This array counts for each atom in the MD system to
- how many swap groups it belongs (should be 0 or 1!) */
+ int *nGroup = nullptr; /* This array counts for each atom in the MD system to
+ how many swap groups it belongs (should be 0 or 1!) */
int ind = -1;
- int nMultiple = 0; /* Number of atoms belonging to multiple groups */
+ int nMultiple = 0; /* Number of atoms belonging to multiple groups */
if (bVerbose)
int igroup,
t_swap *s,
gmx_bool bVerbose,
- const gmx_mtop_atomlookup_t alook,
gmx_mtop_t *mtop)
{
- int molb, molnr, atnr_mol;
t_swapgrp *g = &s->group[igroup];
int *ind = s->group[igroup].ind;
int nat = s->group[igroup].nat;
/* Determine the number of solvent atoms per solvent molecule from the
* first solvent atom: */
- gmx_mtop_atomnr_to_molblock_ind(alook, ind[0], &molb, &molnr, &atnr_mol);
+ int molb = 0;
+ mtopGetMolblockIndex(mtop, ind[0], &molb, nullptr, nullptr);
int apm = mtop->molblock[molb].natoms_mol;
if (bVerbose)
/* Check whether this is also true for all other solvent atoms */
for (int i = 1; i < nat; i++)
{
- gmx_mtop_atomnr_to_molblock_ind(alook, ind[i], &molb, &molnr, &atnr_mol);
+ mtopGetMolblockIndex(mtop, ind[i], &molb, nullptr, nullptr);
if (apm != mtop->molblock[molb].natoms_mol)
{
gmx_fatal(FARGS, "Not all molecules of swap group %d consist of %d atoms.",
}
// Center of split groups
- snprintf(buf, STRLEN, "%scenter of %s of split group 0", SwapStr[ir->eSwapCoords], (NULL != s->group[eGrpSplit0].m) ? "mass" : "geometry");
+ snprintf(buf, STRLEN, "%scenter of %s of split group 0", SwapStr[ir->eSwapCoords], (nullptr != s->group[eGrpSplit0].m) ? "mass" : "geometry");
legend[count++] = gmx_strdup(buf);
- snprintf(buf, STRLEN, "%scenter of %s of split group 1", SwapStr[ir->eSwapCoords], (NULL != s->group[eGrpSplit1].m) ? "mass" : "geometry");
+ snprintf(buf, STRLEN, "%scenter of %s of split group 1", SwapStr[ir->eSwapCoords], (nullptr != s->group[eGrpSplit1].m) ? "mass" : "geometry");
legend[count++] = gmx_strdup(buf);
// Ion flux for each channel and ion type
* they go.
*/
static void detect_flux_per_channel_init(
- t_commrec *cr,
- t_swap *s,
- swapstate_t *swapstate,
- gmx_bool bStartFromCpt)
+ t_swap *s,
+ swaphistory_t *swapstate,
+ gmx_bool bStartFromCpt)
{
t_swapgrp *g;
swapstateIons_t *gs;
+ /* All these flux detection routines run on the master only */
+ if (swapstate == nullptr)
+ {
+ return;
+ }
for (int ig = eSwapFixedGrpNR; ig < s->ngrp; ig++)
{
g = &s->group[ig];
gs = &swapstate->ionType[ig - eSwapFixedGrpNR];
- /* All these flux detection routines run on the master only */
- if (!MASTER(cr))
- {
- g->comp_now = NULL;
- g->comp_from = NULL;
- g->channel_label = NULL;
-
- return;
- }
-
/******************************************************/
/* Channel and domain history for the individual ions */
/******************************************************/
{
char *env = getenv("GMX_COMPELDUMP");
- if (env != NULL)
+ if (env != nullptr)
{
fprintf(stderr, "\n%s Found env.var. GMX_COMPELDUMP, will output CompEL starting structure made whole.\n"
"%s In case of multimeric channels, please check whether they have the correct PBC representation.\n",
SwS, SwSEmpty);
- write_sto_conf_mtop("CompELAssumedWholeConfiguration.pdb", *mtop->name, mtop, x, NULL, ePBC, box);
+ write_sto_conf_mtop("CompELAssumedWholeConfiguration.pdb", *mtop->name, mtop, x, nullptr, ePBC, box);
}
}
* multimeric channels at the last time step.
*/
static void init_swapstate(
- swapstate_t *swapstate,
+ swaphistory_t *swapstate,
t_swapcoords *sc,
gmx_mtop_t *mtop,
rvec x[], /* the initial positions */
matrix box,
- int ePBC)
+ t_inputrec *ir)
{
- rvec *x_pbc = NULL; /* positions of the whole MD system with molecules made whole */
+ rvec *x_pbc = nullptr; /* positions of the whole MD system with molecules made whole */
t_swapgrp *g;
t_swap *s;
}
else
{
+ swapstate->eSwapCoords = ir->eSwapCoords;
+
/* Set the number of ion types and allocate memory for checkpointing */
swapstate->nIonTypes = s->ngrp - eSwapFixedGrpNR;
snew(swapstate->ionType, swapstate->nIonTypes);
copy_rvecn(x, x_pbc, 0, mtop->natoms);
/* This can only make individual molecules whole, not multimers */
- do_pbc_mtop(NULL, ePBC, box, mtop, x_pbc);
+ do_pbc_mtop(nullptr, ir->ePBC, box, mtop, x_pbc);
/* Output the starting structure? */
- outputStartStructureIfWanted(mtop, x_pbc, ePBC, box);
+ outputStartStructureIfWanted(mtop, x_pbc, ir->ePBC, box);
/* If this is the first run (i.e. no checkpoint present) we assume
* that the starting positions give us the correct PBC representation */
* This routine should be called for the 'anions' and 'cations' group,
* of which the indices were lumped together in the older version of the code.
*/
-void copyIndicesToGroup(
+static void copyIndicesToGroup(
int *indIons,
int nIons,
t_swapGroup *g,
* #4 cations - empty before conversion
*
*/
-void convertOldToNewGroupFormat(
+static void convertOldToNewGroupFormat(
t_swapcoords *sc,
gmx_mtop_t *mtop,
gmx_bool bVerbose,
t_commrec *cr)
{
- t_atom *atom;
- gmx_mtop_atomlookup_t alook = gmx_mtop_atomlookup_init(mtop);
t_swapGroup *g = &sc->grp[3];
/* Loop through the atom indices of group #3 (anions) and put all indices
*/
int nAnions = 0;
int nCations = 0;
- int *indAnions = NULL;
- int *indCations = NULL;
+ int *indAnions = nullptr;
+ int *indCations = nullptr;
snew(indAnions, g->nat);
snew(indCations, g->nat);
+ int molb = 0;
for (int i = 0; i < g->nat; i++)
{
- gmx_mtop_atomnr_to_atom(alook, g->ind[i], &atom);
- if (atom->q < 0)
+ const t_atom &atom = mtopGetAtomParameters(mtop, g->ind[i], &molb);
+ if (atom.q < 0)
{
// This is an anion, add it to the list of anions
indAnions[nAnions++] = g->ind[i];
/*! \brief Returns TRUE if we started from an old .tpr
*
* Then we need to re-sort anions and cations into separate groups */
-gmx_bool bConvertFromOldTpr(t_swapcoords *sc)
+static gmx_bool bConvertFromOldTpr(t_swapcoords *sc)
{
// If the last group has no atoms it means we need to convert!
if ( (sc->ngrp >= 5) && (0 == sc->grp[4].nat) )
void init_swapcoords(
FILE *fplog,
- gmx_bool bVerbose,
t_inputrec *ir,
const char *fn,
gmx_mtop_t *mtop,
rvec x[],
matrix box,
- swapstate_t *swapstate,
+ ObservablesHistory *oh,
t_commrec *cr,
const gmx_output_env_t *oenv,
- unsigned long Flags)
+ const MdrunOptions &mdrunOptions)
{
t_swapcoords *sc;
t_swap *s;
- t_atom *atom;
t_swapgrp *g;
swapstateIons_t *gs;
- gmx_bool bAppend, bStartFromCpt, bRerun;
- gmx_mtop_atomlookup_t alook = NULL;
-
- alook = gmx_mtop_atomlookup_init(mtop);
+ gmx_bool bAppend, bStartFromCpt;
- matrix boxCopy;
+ swaphistory_t *swapstate = nullptr;
-
if ( (PAR(cr)) && !DOMAINDECOMP(cr) )
{
gmx_fatal(FARGS, "Position swapping is only implemented for domain decomposition!");
}
- bAppend = Flags & MD_APPENDFILES;
- bStartFromCpt = Flags & MD_STARTFROMCPT;
- bRerun = Flags & MD_RERUN;
+ bAppend = mdrunOptions.continuationOptions.appendFiles;
+ bStartFromCpt = mdrunOptions.continuationOptions.startedFromCheckpoint;
sc = ir->swap;
snew(sc->si_priv, 1);
s = sc->si_priv;
- if (bRerun)
+ if (mdrunOptions.rerun)
{
if (PAR(cr))
{
break;
}
+ const gmx_bool bVerbose = mdrunOptions.verbose;
+
// For compatibility with old .tpr files
if (bConvertFromOldTpr(sc) )
{
if (MASTER(cr))
{
- init_swapstate(swapstate, sc, mtop, x, box, ir->ePBC);
+ if (oh->swapHistory == nullptr)
+ {
+ oh->swapHistory = std::unique_ptr<swaphistory_t>(new swaphistory_t {});
+ }
+ swapstate = oh->swapHistory.get();
+
+ init_swapstate(swapstate, sc, mtop, x, box, ir);
}
/* After init_swapstate we have a set of (old) whole positions for our
real charge;
g = &(s->group[ig]);
- g->apm = get_group_apm_check(ig, s, MASTER(cr) && bVerbose, alook, mtop);
+ g->apm = get_group_apm_check(ig, s, MASTER(cr) && bVerbose, mtop);
/* Since all molecules of a group are equal, we only need enough space
* to determine properties of a single molecule at at time */
snew(g->m, g->apm); /* For the center of mass */
charge = 0; /* To determine the charge imbalance */
+ int molb = 0;
for (int j = 0; j < g->apm; j++)
{
- gmx_mtop_atomnr_to_atom(alook, g->ind[j], &atom);
- g->m[j] = atom->m;
- charge += atom->q;
+ const t_atom &atom = mtopGetAtomParameters(mtop, g->ind[j], &molb);
+ g->m[j] = atom.m;
+ charge += atom.q;
}
/* Total charge of one molecule of this group: */
g->q = charge;
{
/* Save the split group masses if mass-weighting is requested */
snew(g->m, g->nat);
+ int molb = 0;
for (int i = 0; i < g->nat; i++)
{
- gmx_mtop_atomnr_to_atom(alook, g->ind[i], &atom);
- g->m[i] = atom->m;
+ g->m[i] = mtopGetAtomMass(mtop, g->ind[i], &molb);
}
}
}
/* Make a t_pbc struct on all nodes so that the molecules
* chosen for an exchange can be made whole. */
snew(s->pbc, 1);
- /* Every node needs to call set_pbc() and therefore every node needs
- * to know the box dimensions */
- copy_mat(box, boxCopy);
- if (PAR(cr))
- {
- gmx_bcast(sizeof(boxCopy), boxCopy, cr);
- }
- set_pbc(s->pbc, -1, boxCopy);
if (MASTER(cr))
{
sc->cyl1r, sc->cyl1u, sc->cyl1l);
fprintf(s->fpout, "#\n");
- if (!bRerun)
+ if (!mdrunOptions.rerun)
{
fprintf(s->fpout, "# Coupling constant (number of swap attempt steps to average over): %d (translates to %f ps).\n",
sc->nAverage, sc->nAverage*sc->nstswap*ir->delta_t);
}
else
{
- s->fpout = NULL;
+ s->fpout = nullptr;
}
/* Prepare for parallel or serial run */
g = &(s->group[ig]);
g->nat_loc = 0;
g->nalloc_loc = 0;
- g->ind_loc = NULL;
+ g->ind_loc = nullptr;
}
}
else
else
{
fprintf(stderr, "%s Determining initial numbers of ions per compartment.\n", SwS);
- get_initial_ioncounts(ir, x, box, cr, bRerun);
+ get_initial_ioncounts(ir, x, box, cr, mdrunOptions.rerun);
}
/* Prepare (further) checkpoint writes ... */
}
/* Initialize arrays that keep track of through which channel the ions go */
- detect_flux_per_channel_init(cr, s, swapstate, bStartFromCpt);
+ detect_flux_per_channel_init(s, swapstate, bStartFromCpt);
/* We need to print the legend if we open this file for the first time. */
if (MASTER(cr) && !bAppend)
gmx_int64_t step,
double t,
t_inputrec *ir,
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle *wcycle,
rvec x[],
matrix box,
- gmx_mtop_t *mtop,
gmx_bool bVerbose,
gmx_bool bRerun)
{
t_swapgrp *g, *gsol;
int isol, iion;
rvec com_solvent, com_particle; /* solvent and swap molecule's center of mass */
- gmx_mtop_atomlookup_t alook = NULL;
wallcycle_start(wcycle, ewcSWAP);
sc = ir->swap;
s = sc->si_priv;
+ set_pbc(s->pbc, ir->ePBC, box);
/* Assemble the positions of the split groups, i.e. the channels.
* Here we also pass a shifts array to communicate_group_positions(), so that it can make
for (ig = eSwapFixedGrpNR; ig < s->ngrp; ig++)
{
g = &(s->group[ig]);
- communicate_group_positions(cr, g->xc, NULL, NULL, FALSE,
- x, g->nat, g->nat_loc, g->ind_loc, g->c_ind_loc, NULL, NULL);
+ communicate_group_positions(cr, g->xc, nullptr, nullptr, FALSE,
+ x, g->nat, g->nat_loc, g->ind_loc, g->c_ind_loc, nullptr, nullptr);
/* Determine how many ions of this type each compartment contains */
sortMoleculesIntoCompartments(g, cr, sc, box, step, s->fpout, bRerun, FALSE);
/* Since we here know that we have to perform ion/water position exchanges,
* we now assemble the solvent positions */
g = &(s->group[eGrpSolvent]);
- communicate_group_positions(cr, g->xc, NULL, NULL, FALSE,
- x, g->nat, g->nat_loc, g->ind_loc, g->c_ind_loc, NULL, NULL);
+ communicate_group_positions(cr, g->xc, nullptr, nullptr, FALSE,
+ x, g->nat, g->nat_loc, g->ind_loc, g->c_ind_loc, nullptr, nullptr);
/* Determine how many molecules of solvent each compartment contains */
sortMoleculesIntoCompartments(g, cr, sc, box, step, s->fpout, bRerun, TRUE);
}
/* Now actually perform the particle exchanges, one swap group after another */
- alook = gmx_mtop_atomlookup_init(mtop);
gsol = &s->group[eGrpSolvent];
for (ig = eSwapFixedGrpNR; ig < s->ngrp; ig++)
{
SwS, nswaps, nswaps > 1 ? "s" : "", step, g->molname);
}
}
- gmx_mtop_atomlookup_destroy(alook);
- if (s->fpout != NULL)
+ if (s->fpout != nullptr)
{
print_ionlist(s, t, " # after swap");
}
vi = v_inrange - dc*(i - i_inrange)*dx;
}
- if (table_v != NULL)
+ if (table_v != nullptr)
{
table_v[i] = vi;
}
/* Currently the last value only contains half the force: double it */
table_f[0] *= 2;
- if (table_v != NULL && table_fdv0 != NULL)
+ if (table_v != nullptr && table_fdv0 != nullptr)
{
/* Copy to FDV0 table too. Allocation occurs in forcerec.c,
* init_ewald_f_table().
/* Don't try to be more accurate on energy than the precision */
func_tol = std::max(func_tol, static_cast<double>(GMX_REAL_EPS));
- sc_func = std::cbrt(third_deriv_max/(6*12*sqrt(3)*func_tol))*x_scale;
+ sc_func = std::cbrt(third_deriv_max/(6*12*std::sqrt(3.0)*func_tol))*x_scale;
return std::max(sc_deriv, sc_func);
}
return sc;
}
-/* Calculate the potential and force for an r value
- * in exactly the same way it is done in the inner loop.
- * VFtab is a pointer to the table data, offset is
- * the point where we should begin and stride is
- * 4 if we have a buckingham table, 3 otherwise.
- * If you want to evaluate table no N, set offset to 4*N.
- *
- * We use normal precision here, since that is what we
- * will use in the inner loops.
- */
-static void evaluate_table(real VFtab[], int offset, int stride,
- real tabscale, real r, real *y, real *yp)
-{
- int n;
- real rt, eps, eps2;
- real Y, F, Geps, Heps2, Fp;
-
- rt = r*tabscale;
- n = (int)rt;
- eps = rt - n;
- eps2 = eps*eps;
- n = offset+stride*n;
- Y = VFtab[n];
- F = VFtab[n+1];
- Geps = eps*VFtab[n+2];
- Heps2 = eps2*VFtab[n+3];
- Fp = F+Geps+Heps2;
- *y = Y+eps*Fp;
- *yp = (Fp+Geps+2.0*Heps2)*tabscale;
-}
-
static void copy2table(int n, int offset, int stride,
double x[], double Vtab[], double Ftab[], real scalefactor,
real dest[])
{
char *libfn;
char buf[STRLEN];
- double **yy = NULL, start, end, dx0, dx1, ssd, vm, vp, f, numf;
+ double **yy = nullptr, start, end, dx0, dx1, ssd, vm, vp, f, numf;
int k, i, nx, nx0 = 0, ny, nny, ns;
gmx_bool bAllZero, bZeroV, bZeroF;
double tabscale;
t_forcerec *fr, real rtab,
const char *tabfn)
{
- t_forcetable *dispersionCorrectionTable = NULL;
+ t_forcetable *dispersionCorrectionTable = nullptr;
- if (tabfn == NULL)
+ if (tabfn == nullptr)
{
if (debug)
{
buf[len-1] = '\0';
#elif (defined(__sun))
/*Solaris*/
- ctime_r(clock, buf, len);
+ ctime_r(clock, buf);
#else
char tmpbuf[30];
ctime_r(clock, tmpbuf);
void gmx_format_current_time(char *buf, size_t len)
{
- time_t clock = time(NULL);
+ time_t clock = time(nullptr);
gmx_ctime_r(&clock, buf, len);
}