# be tagged. Official GROMACS releases should be mappable to a distinct gmxapi
# release string. For roadmap details, see https://gitlab.com/gromacs/gromacs/-/issues/2585
set(GMXAPI_MAJOR 0)
-set(GMXAPI_MINOR 1)
+set(GMXAPI_MINOR 2)
set(GMXAPI_PATCH 0)
set(GMXAPI_RELEASE ${GMXAPI_MAJOR}.${GMXAPI_MINOR}.${GMXAPI_PATCH})
string(REPLACE " " ";" IGNORED_CLANG_ALL_WARNINGS "${IGNORED_CLANG_ALL_WARNINGS}")
set(TESTUTILS_DIR ${PROJECT_SOURCE_DIR}/src/testutils)
-include(${PROJECT_SOURCE_DIR}/src/testutils/TestMacros.cmake)
+if (BUILD_TESTING)
+ if(NOT GMX_DEVELOPER_BUILD)
+ set(UNITTEST_TARGET_OPTIONS EXCLUDE_FROM_ALL)
+ endif()
+ include(${TESTUTILS_DIR}/TestMacros.cmake)
+endif()
# this allows all nblib tests to be run with "make check-nblib"
add_custom_target(check-nblib
set_property(CACHE GMX_CUDA_TARGET_COMPUTE PROPERTY TYPE STRING)
endif()
+# FindCUDA.cmake is unaware of the mechanism used by cmake to embed
+# the compiler flag for the required C++ standard in the generated
+# build files, so we have to pass it ourselves
+if (CUDA_VERSION VERSION_LESS 10.2)
+ # CUDA doesn't formally support C++17 until version 10.2, so for
+ # now host-side code that compiles with CUDA is restricted to
+ # C++14. This needs to be expressed formally for older CUDA
+ # version.
+ list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX14_STANDARD_COMPILE_OPTION}")
+else()
+ list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX17_STANDARD_COMPILE_OPTION}")
+endif()
+
# assemble the CUDA flags
list(APPEND GMX_CUDA_NVCC_FLAGS "${GMX_CUDA_NVCC_GENCODE_FLAGS}")
list(APPEND GMX_CUDA_NVCC_FLAGS "-use_fast_math")
# where nullptr would be preferable. GROMACS can't fix these, so
# must suppress them.
GMX_TEST_CXXFLAG(CXXFLAGS_NO_ZERO_AS_NULL_POINTER_CONSTANT "-Wno-zero-as-null-pointer-constant" NVCC_CLANG_SUPPRESSIONS_CXXFLAGS)
- if (CUDA_VERSION VERSION_LESS 11.0)
- # CUDA header crt/math_functions.h before CUDA 11.0 used
- # throw() specifications that are deprecated in more recent
- # C++ versions. GROMACS can't fix these, so must suppress
- # them.
- GMX_TEST_CXXFLAG(CXXFLAGS_NO_DEPRECATED_DYNAMIC_EXCEPTION_SPEC "-Wno-deprecated-dynamic-exception-spec" NVCC_CLANG_SUPPRESSIONS_CXXFLAGS)
- endif()
+
+ # CUDA header crt/math_functions.h in at least CUDA 10.x and 11.1
+ # used throw() specifications that are deprecated in more recent
+ # C++ versions. GROMACS can't fix these, so must suppress them.
+ GMX_TEST_CXXFLAG(CXXFLAGS_NO_DEPRECATED_DYNAMIC_EXCEPTION_SPEC "-Wno-deprecated-dynamic-exception-spec" NVCC_CLANG_SUPPRESSIONS_CXXFLAGS)
+
# Add these flags to those used for the host compiler. The
# "-Xcompiler" prefix directs nvcc to only use them for host
# compilation, which is all that is needed in this case.
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2012,2013,2014,2015,2018, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2018,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
endif()
set(SHARED_LIBS_DEFAULT OFF)
endif()
-set(GMX_PREFER_STATIC_LIBS_DEFAULT OFF)
-if (WIN32 AND NOT BUILD_SHARED_LIBS)
- set(GMX_PREFER_STATIC_LIBS_DEFAULT ON)
-endif()
if (NOT GMX_BUILD_SHARED_EXE)
set(GMX_PREFER_STATIC_LIBS_DEFAULT ON)
set(SHARED_LIBS_DEFAULT OFF)
# Declare the user-visible options
option(BUILD_SHARED_LIBS "Enable shared libraries (can be problematic e.g. with MPI, or on some HPC systems)" ${SHARED_LIBS_DEFAULT})
+
+set(GMX_PREFER_STATIC_LIBS_DEFAULT OFF)
+if (WIN32 OR NOT BUILD_SHARED_LIBS)
+ set(GMX_PREFER_STATIC_LIBS_DEFAULT ON)
+endif()
+
if(BUILD_SHARED_LIBS AND GMX_BUILD_MDRUN_ONLY)
message(WARNING "Both BUILD_SHARED_LIBS and GMX_BUILD_MDRUN_ONLY are set. Generally, an mdrun-only build should prefer to use static libraries, which is the default if you make a fresh build tree. You may be re-using an old build tree, and so may wish to set BUILD_SHARED_LIBS=off yourself.")
endif()
# Change the real CMake variables for the given build type in each
# language, in the parent scope.
foreach(language C CXX)
- string(REPLACE /MD /MT CMAKE_${language}_FLAGS${punctuation}${build_type} ${CMAKE_${language}_FLAGS${punctuation}${build_type}} PARENT_SCOPE)
+ string(REPLACE /MD /MT CMAKE_${language}_FLAGS${punctuation}${build_type} ${CMAKE_${language}_FLAGS${punctuation}${build_type}})
+ set(CMAKE_${language}_FLAGS${punctuation}${build_type} ${CMAKE_${language}_FLAGS${punctuation}${build_type}} PARENT_SCOPE)
endforeach()
endfunction()
IF( CMAKE_C_COMPILER_ID MATCHES "Intel" )
if(BUILD_SHARED_LIBS) #not sure why incremental building with shared libs doesn't work
STRING(REPLACE "/INCREMENTAL:YES" "" CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS})
+ set(CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS} PARENT_SCOPE)
endif()
ENDIF()
ENDIF()
# IBM_VSX and gcc > 9 do not work together, so we need to prevent people from
# choosing a combination that might fail. Issue #3380.
- if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "9")
+ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
message(FATAL_ERROR "IBM_VSX does not work together with gcc > 9. Disable SIMD support (slower), or use an older version of the GNU compiler")
endif()
Fixes that affect portability
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Fix building on OSX
+"""""""""""""""""""
+
+The code wouldn't compile due to a missing include.
+
+:issue:`3730`
+
Miscellaneous
^^^^^^^^^^^^^
"OS X should build Python package for 64-bit architecture"
FORCE)
-project(gmxapi VERSION 0.1.0)
+# Note that this is the gmxapi._gmxapi Python bindings package version,
+# not the C++ API version. It is not essential that it match the pure Python
+# package version, but is likely to do so.
+project(gmxapi VERSION 0.2.0)
# Check if Python package is being built directly or via add_subdirectory
set(GMXAPI_MASTER_PROJECT OFF)
endif()
if(GMXAPI_MASTER_PROJECT)
- find_package(gmxapi 0.0.8 REQUIRED
+ # TODO: Retain compatibility with libgmxapi 0.1 and back down the requirement.
+ find_package(gmxapi 0.2.0 REQUIRED
HINTS "$ENV{GROMACS_DIR}"
)
endif()
LIBRARY_OUTPUT_DIRECTORY ${GMXAPI_PYTHON_STAGING_DIR}/gmxapi)
if(GMXAPI_MASTER_PROJECT)
+ # TODO: This requirement is probably overly restrictive.
find_package(GROMACS 2021 REQUIRED
HINTS "$ENV{GROMACS_DIR}"
)
setup(
name='gmxapi',
- # TODO: single-source version information (currently repeated in gmxapi/version.py)
+ # TODO: single-source version information (currently repeated in gmxapi/version.py and CMakeLists.txt)
version='0.2.0b1',
python_requires='>=3.6',
install_requires=['networkx>=2.0',
void GpuHaloExchange::Impl::reinitHalo(float3* d_coordinatesBuffer, float3* d_forcesBuffer)
{
+ wallcycle_start(wcycle_, ewcDOMDEC);
+ wallcycle_sub_start(wcycle_, ewcsDD_GPU);
d_x_ = d_coordinatesBuffer;
d_f_ = d_forcesBuffer;
MPI_BYTE, sendRankF_, 0, mpi_comm_mysim_, MPI_STATUS_IGNORE);
#endif
+ wallcycle_sub_stop(wcycle_, ewcsDD_GPU);
+ wallcycle_stop(wcycle_, ewcDOMDEC);
+
return;
}
GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
{
+ wallcycle_start(wcycle_, ewcLAUNCH_GPU);
if (pulse_ == 0)
{
// ensure stream waits until coordinate data is available on device
coordinatesReadyOnDeviceEvent->enqueueWaitEvent(nonLocalStream_);
}
- wallcycle_start(wcycle_, ewcLAUNCH_GPU);
wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_MOVEX);
// launch kernel to pack send buffer
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringcompare.h"
/* these MUST correspond to the enum in hackblock.h */
const char* btsNames[ebtsNR] = { "bonds", "angles", "dihedrals", "impropers", "exclusions", "cmap" };
* Since we only have the unparsed string here we can only detect
* EXACT matches (including identical whitespace).
*/
- if (b.s != it->s)
+ if (b.s == it->s)
{
gmx_warning("Duplicate line found in or between hackblock and rtp entries");
}
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
#include "gromacs/utility/fixedcapacityvector.h"
*
* \param [in] deviceContext GPU device context
* \param [in] deviceStream Stream to use for reduction
+ * \param [in] wcycle Wall-clock cycle counter
*/
- GpuForceReduction(const DeviceContext& deviceContext, const DeviceStream& deviceStream);
+ GpuForceReduction(const DeviceContext& deviceContext,
+ const DeviceStream& deviceStream,
+ gmx_wallcycle* wcycle);
~GpuForceReduction();
/*! \brief Register a nbnxm-format force to be reduced
};
GpuForceReduction::GpuForceReduction(const DeviceContext& /* deviceContext */,
- const DeviceStream& /* deviceStream */) :
+ const DeviceStream& /* deviceStream */,
+ gmx_wallcycle* /*wcycle*/) :
impl_(nullptr)
{
GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
return;
}
-GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStream) :
+GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext,
+ const DeviceStream& deviceStream,
+ gmx_wallcycle* wcycle) :
deviceContext_(deviceContext),
- deviceStream_(deviceStream){};
+ deviceStream_(deviceStream),
+ wcycle_(wcycle){};
void GpuForceReduction::Impl::reinit(float3* baseForcePtr,
const int numAtoms,
accumulate_ = accumulate;
completionMarker_ = completionMarker;
cellInfo_.cell = cell.data();
+
+ wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
reallocateDeviceBuffer(&cellInfo_.d_cell, numAtoms_, &cellInfo_.cellSize,
&cellInfo_.cellSizeAlloc, deviceContext_);
copyToDeviceBuffer(&cellInfo_.d_cell, &(cellInfo_.cell[atomStart]), 0, numAtoms_, deviceStream_,
GpuApiCallBehavior::Async, nullptr);
+ wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
dependencyList_.clear();
};
void GpuForceReduction::Impl::execute()
{
+ wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+ wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_NB_F_BUF_OPS);
if (numAtoms_ == 0)
{
{
completionMarker_->markEvent(deviceStream_);
}
+
+ wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NB_F_BUF_OPS);
+ wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
}
GpuForceReduction::Impl::~Impl(){};
-GpuForceReduction::GpuForceReduction(const DeviceContext& deviceContext, const DeviceStream& deviceStream) :
- impl_(new Impl(deviceContext, deviceStream))
+GpuForceReduction::GpuForceReduction(const DeviceContext& deviceContext,
+ const DeviceStream& deviceStream,
+ gmx_wallcycle* wcycle) :
+ impl_(new Impl(deviceContext, deviceStream, wcycle))
{
}
*
* \param [in] deviceStream Stream to use for reduction
* \param [in] deviceContext GPU device context
+ * \param [in] wcycle The wallclock counter
*/
- Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStream);
+ Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStreami, gmx_wallcycle* wcycle);
~Impl();
/*! \brief Register a nbnxm-format force to be reduced
DeviceBuffer<RVec> rvecForceToAdd_ = nullptr;
//! event to be marked when redcution launch has been completed
GpuEventSynchronizer* completionMarker_ = nullptr;
+ //! The wallclock counter
+ gmx_wallcycle* wcycle_ = nullptr;
};
} // namespace gmx
#include "shake.h"
#include <cmath>
+#include <cstdlib>
#include <algorithm>
{
fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique<gmx::GpuForceReduction>(
deviceStreamManager->context(),
- deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal));
+ deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal), wcycle);
fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique<gmx::GpuForceReduction>(
deviceStreamManager->context(),
- deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal));
+ deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal), wcycle);
}
std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
const bool simulationsShareState) :
writeEnergyStep_(-1),
writeStateStep_(-1),
+ writeLogStep_(-1),
outf_(init_mdoutf(fplog,
nfile,
fnm,
{
const bool writeEnergyThisStep = writeEnergyStep_ == step;
const bool writeStateThisStep = writeStateStep_ == step;
- const bool writeLogThisStep = logWritingStep_ == step;
+ const bool writeLogThisStep = writeLogStep_ == step;
if (writeEnergyThisStep || writeStateThisStep || writeLogThisStep)
{
registerRunFunction([this, step, time, writeStateThisStep, writeEnergyThisStep, writeLogThisStep]() {
std::optional<SignallerCallback> TrajectoryElement::registerLoggingCallback()
{
- return [this](Step step, Time /*unused*/) { logWritingStep_ = step; };
+ return [this](Step step, Time /*unused*/) { writeLogStep_ = step; };
}
std::optional<SignallerCallback> TrajectoryElement::registerTrajectorySignallerCallback(TrajectoryEvent event)
//! The next state writing step
Step writeStateStep_;
//! The next communicated log writing step
- Step logWritingStep_;
+ Step writeLogStep_;
//! The output object
gmx_mdoutf* outf_;
std::numeric_limits<typename GeneratorType::result_type>::digits;
uint64_t result = static_cast<uint64_t>(gen());
- // This conditional is needed so that compiler understands that what follows is a dead branch
- // and not complains about shift larger than number of bits in the result.
- if (resultBits < numBitsInRandomNumber)
+ // This is needed so that compiler understands that what follows is a dead branch
+ // and not complains about shift count larger than number of bits in the result.
+ constexpr std::size_t shiftCount = (resultBits < numBitsInRandomNumber) ? numBitsInRandomNumber : 0;
+ for (std::size_t bits = numBitsInRandomNumber; bits < resultBits; bits += numBitsInRandomNumber)
{
- for (std::size_t bits = numBitsInRandomNumber; bits < resultBits; bits += numBitsInRandomNumber)
- {
- result = (result << numBitsInRandomNumber) | static_cast<uint64_t>(gen());
- }
+ result = (result << shiftCount) | static_cast<uint64_t>(gen());
}
return result;
}
"DD make top.",
"DD make constr.",
"DD top. other",
+ "DD GPU ops.",
"NS grid local",
"NS grid non-loc.",
"NS search local",
ewcsDD_MAKETOP,
ewcsDD_MAKECONSTR,
ewcsDD_TOPOTHER,
+ ewcsDD_GPU,
ewcsNBS_GRID_LOCAL,
ewcsNBS_GRID_NONLOCAL,
ewcsNBS_SEARCH_LOCAL,
writer->writeLine(formatCentered(78, "GROMACS is written by:"));
for (int i = 0; i < NCONTRIBUTORS;)
{
- for (int j = 0; j < 4 && i < NCONTRIBUTORS; ++j, ++i)
+ for (int j = 0; j < 3 && i < NCONTRIBUTORS; ++j, ++i)
{
- const int width = 18;
+ const int width = 26;
std::array<char, 30> buf;
const int offset = centeringOffset(width, strlen(Contributors[i]));
GMX_RELEASE_ASSERT(static_cast<int>(strlen(Contributors[i])) + offset < gmx::ssize(buf),