gmx_option_multichoice(
GMX_GPU
"Framework for GPU acceleration"
- None
- None CUDA OpenCL)
+ OFF
+ OFF CUDA OpenCL)
gmx_option_multichoice(
GMX_SIMD
include(gmxManageLmfit)
-string(TOUPPER "${GMX_GPU}" _gmx_gpu_uppercase)
-if(NOT ${_gmx_gpu_uppercase} STREQUAL "NONE")
+if(GMX_GPU)
+ string(TOUPPER "${GMX_GPU}" _gmx_gpu_uppercase)
if(${_gmx_gpu_uppercase} STREQUAL "CUDA")
include(gmxManageCuda)
elseif(${_gmx_gpu_uppercase} STREQUAL "OPENCL")
endif()
-# Not ideal to set this without the GPU acceleration, but the source presently requires GMX_GPU
-# to be set to GMX_GPU_None for a non-GPU build to work.
-set(GMX_GPU_ACCELERATION_FRAMEWORK "GMX_GPU_${_gmx_gpu_uppercase}")
-
if(CYGWIN)
set(GMX_CYGWIN 1)
endif()
CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=OFF"
CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug"
- CMAKE_GPU_OPTIONS: "-DGMX_GPU=None"
+ CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF"
CMAKE_GMXAPI_OPTIONS: "-DGMX_PYTHON_PACKAGE=ON"
script:
- if [[ ! -z $GROMACS_RELEASE && $GROMACS_RELEASE == "true" ]] ; then
CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON"
CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug"
- CMAKE_GPU_OPTIONS: "-DGMX_GPU=None"
+ CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF"
CMAKE_GMXAPI_OPTIONS: "-DGMX_PYTHON_PACKAGE=ON"
RELEASE_BUILD_DIR: release-doc-builds
RELEASE_SOURCE: release-source-from-tarball
CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON"
CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug"
- CMAKE_GPU_OPTIONS: "-DGMX_GPU=None"
+ CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF"
CMAKE_GMXAPI_OPTIONS: "-DGMX_PYTHON_PACKAGE=OFF"
COMPILER_MAJOR_VERSION: 9
BUILD_DIR: simple-build
CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON"
CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug"
- CMAKE_GPU_OPTIONS: "-DGMX_GPU=None"
+ CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF"
script:
- echo $CMAKE_COMPILER_SCRIPT
- echo $CMAKE_EXTRA_OPTIONS
CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON"
CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=RelWithAssert"
- CMAKE_GPU_OPTIONS: "-DGMX_GPU=None"
+ CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF"
CMAKE_REGRESSIONTEST_OPTIONS: "-DREGRESSIONTEST_PATH=../\\$RELEASE_REGRESSIONTESTS"
RELEASE_BUILD_DIR: release-builds
RELEASE_SOURCE: release-source-from-tarball
set(REQUIRED_CUDA_VERSION 9.0)
set(REQUIRED_CUDA_COMPUTE_CAPABILITY 3.0)
-set(GMX_USE_CUDA ON)
+set(GMX_GPU_CUDA ON)
option(GMX_CLANG_CUDA "Use clang for CUDA" OFF)
set(REQUIRED_OPENCL_MIN_VERSION_MINOR 2)
set(REQUIRED_OPENCL_MIN_VERSION ${REQUIRED_OPENCL_MIN_VERSION_MAJOR}.${REQUIRED_OPENCL_MIN_VERSION_MINOR})
-set(GMX_USE_OPENCL ON)
+set(GMX_GPU_OPENCL ON)
if(GMX_DOUBLE)
message(FATAL_ERROR "OpenCL acceleration is not available in double precision")
include(GetCompilerInfo.cmake)
get_compiler_info(C BUILD_C_COMPILER)
get_compiler_info(CXX BUILD_CXX_COMPILER)
-if(GMX_USE_CUDA)
+if(GMX_GPU_CUDA)
if(NOT GMX_CLANG_CUDA)
GMX_SET_CUDA_NVCC_FLAGS()
endif()
# pseudo-library for code for mdrun
$<TARGET_OBJECTS:mdrun_objlib>
)
-if(NOT GMX_USE_OPENCL)
+if(NOT GMX_GPU_OPENCL)
# GPU resources may not be properly reinitialized between simulations in
# the same process.
# TODO: include this with the other test sources once the issue is resolved
# pseudo-library for code for mdrun
$<TARGET_OBJECTS:mdrun_objlib>
)
-if(NOT GMX_USE_OPENCL)
+if(NOT GMX_GPU_OPENCL)
# GPU resources may not be properly reinitialized between simulations in
# the same process.
# TODO: include this with the other test sources once the issue is resolved
/* Enable x86 gcc inline assembly */
#cmakedefine01 GMX_X86_GCC_INLINE_ASM
-/* Define constants useful for handling GPU support */
-#define GMX_GPU_NONE 0
-#define GMX_GPU_CUDA 1
-#define GMX_GPU_OPENCL 2
-/* Which kind of GPU support is configured */
-#define GMX_GPU @GMX_GPU_ACCELERATION_FRAMEWORK@
+/* Define if any type of GPU acceleration is compiled */
+#cmakedefine01 GMX_GPU
+
+/* Define if CUDA GPU acceleration is compiled */
+#cmakedefine01 GMX_GPU_CUDA
+
+/* Define if OpenCL GPU acceleration is compiled */
+#cmakedefine01 GMX_GPU_OPENCL
/* Use a single compilation unit when compiling the CUDA (non-bonded) kernels. */
#cmakedefine01 GMX_CUDA_NB_SINGLE_COMPILATION_UNIT
list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
# Mark some shared GPU implementation files to compile with CUDA if needed
-if (GMX_USE_CUDA)
+if (GMX_GPU_CUDA)
get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
endif()
endforeach()
endif()
-if (GMX_USE_CUDA)
+if (GMX_GPU_CUDA)
# Work around FindCUDA that prevents using target_link_libraries()
# with keywords otherwise...
set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
target_compile_definitions(libgromacs PRIVATE HAVE_CONFIG_H)
target_include_directories(libgromacs SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
-if (GMX_USE_OPENCL)
+if (GMX_GPU_OPENCL)
option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE)
mark_as_advanced(GMX_EXTERNAL_CLFFT)
# using the CUDA runtime, but currently there's no reason to want to
# do that.
if (INSTALL_CUDART_LIB) #can be set manual by user
- if (GMX_USE_CUDA)
+ if (GMX_GPU_CUDA)
foreach(CUDA_LIB ${CUDA_LIBRARIES})
string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB})
if(IS_CUDART) #libcuda should not be installed
endif()
endif()
-if(GMX_USE_OPENCL)
+if(GMX_GPU_OPENCL)
# Install the utility headers
file(GLOB OPENCL_INSTALLED_FILES
gpu_utils/vectype_ops.clh
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2014,2018,2019, by the GROMACS development team, led by
+# Copyright (c) 2014,2018,2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
file(GLOB DOMDEC_SOURCES *.cpp)
-if(GMX_USE_CUDA)
+if(GMX_GPU_CUDA)
file(GLOB DOMDEC_CUDA_SOURCES gpuhaloexchange_impl.cu)
endif()
#include "gromacs/domdec/gpuhaloexchange.h"
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
namespace gmx
{
} // namespace gmx
-#endif /* GMX_GPU != GMX_GPU_CUDA */
+#endif // !GMX_GPU_CUDA
pme_coordinate_receiver_gpu_impl.cpp
pme_force_sender_gpu_impl.cpp
)
-if (GMX_USE_CUDA)
+if (GMX_GPU_CUDA)
gmx_add_libgromacs_sources(
# CUDA-specific sources
pme_gather.cu
pme_gpu_internal.cpp
pme_gpu_timings.cpp
)
-elseif (GMX_USE_OPENCL)
+elseif (GMX_GPU_OPENCL)
gmx_add_libgromacs_sources(
# OpenCL-specific sources
pme_gpu_3dfft_ocl.cpp
{
errorReasons.emplace_back("a double-precision build");
}
- if (GMX_GPU == GMX_GPU_NONE)
+ if (!GMX_GPU)
{
errorReasons.emplace_back("a non-GPU build");
}
{
std::list<std::string> errorReasons;
- if (GMX_GPU == GMX_GPU_OPENCL)
+ if (GMX_GPU_OPENCL)
{
#ifdef __APPLE__
errorReasons.emplace_back("Apple OS X operating system");
{
errorReasons.emplace_back("double precision");
}
- if (GMX_GPU == GMX_GPU_NONE)
+ if (!GMX_GPU)
{
errorReasons.emplace_back("non-GPU build of GROMACS");
}
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/gmxassert.h"
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
namespace gmx
{
-/*!\brief Impl class stub. */
+/*!\brief \internal Impl class stub. */
class PmeCoordinateReceiverGpu::Impl
{
};
} // namespace gmx
-#endif /* GMX_GPU != GMX_GPU_CUDA */
+#endif // !GMX_GPU_CUDA
#include "gromacs/ewald/pme_force_sender_gpu.h"
#include "gromacs/utility/arrayref.h"
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
namespace gmx
{
-/*!\brief Impl class stub. */
+/*!\brief \internal Impl class stub. */
class PmeForceSenderGpu::Impl
{
};
} // namespace gmx
-#endif /* GMX_GPU != GMX_GPU_CUDA */
+#endif // !GMX_GPU_CUDA
const real lambdaQ)
{
GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
- GMX_ASSERT(xReadyOnDevice || !pme->bPPnode || (GMX_GPU != GMX_GPU_CUDA),
+ GMX_ASSERT(!GMX_GPU_CUDA || xReadyOnDevice || !pme->bPPnode,
"Need a valid xReadyOnDevice on PP+PME ranks with CUDA.");
GMX_ASSERT(pme->doCoulomb, "Only Coulomb PME can be run on GPU.");
// time needed for that checking, but do not yet record that the
// gather has occured.
bool needToSynchronize = true;
- constexpr bool c_streamQuerySupported = (GMX_GPU == GMX_GPU_CUDA);
+ constexpr bool c_streamQuerySupported = bool(GMX_GPU_CUDA);
+
// TODO: implement c_streamQuerySupported with an additional GpuEventSynchronizer per stream (#2521)
if ((completionKind == GpuTaskCompletion::Check) && c_streamQuerySupported)
{
#include <vector>
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include <cufft.h>
# include "gromacs/gpu_utils/gputraits.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include <clFFT.h>
# include "gromacs/gpu_utils/gmxopencl.h"
void perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent);
private:
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
cufftHandle planR2C_;
cufftHandle planC2R_;
cufftReal* realGrid_;
cufftComplex* complexGrid_;
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
clfftPlanHandle planR2C_;
clfftPlanHandle planC2R_;
std::vector<cl_command_queue> deviceStreams_;
#include "config.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/cuda_arch_utils.cuh" // for warp_size
#endif
//! Gathering max block width in warps - picked empirically among 2, 4, 8, 16 for max. occupancy and min. runtime
constexpr int c_gatherMaxWarpsPerBlock = 4;
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
/* All the fields below are dependent on warp_size and should
* ideally be removed from the device-side code, as we have to
* do that for OpenCL already.
//! Gathering min blocks per CUDA multiprocessor
static constexpr int c_gatherMinBlocksPerMP = GMX_CUDA_MAX_THREADS_PER_MP / c_gatherMaxThreadsPerBlock;
-#endif // GMX_GPU == GMX_GPU_CUDA
+#endif // GMX_GPU_CUDA
#endif
#include "gromacs/utility/logger.h"
#include "gromacs/utility/stringutil.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/pmalloc_cuda.h"
# include "pme.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include "gromacs/gpu_utils/gmxopencl.h"
#endif
void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu)
{
auto* kernelParamsPtr = pmeGpu->kernelParams.get();
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
destroyParamLookupTable(&kernelParamsPtr->grid.d_fractShiftsTable,
kernelParamsPtr->fractShiftsTableTexture);
destroyParamLookupTable(&kernelParamsPtr->grid.d_gridlineIndicesTable,
kernelParamsPtr->gridlineIndicesTableTexture);
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
freeDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable);
freeDeviceBuffer(&kernelParamsPtr->grid.d_gridlineIndicesTable);
#endif
*/
static void pme_gpu_init_internal(PmeGpu* pmeGpu, const DeviceContext& deviceContext, const DeviceStream& deviceStream)
{
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
// Prepare to use the device that this PME task was assigned earlier.
// Other entities, such as CUDA timing events, are known to implicitly use the device context.
CU_RET_ERR(cudaSetDevice(deviceContext.deviceInfo().id), "Switching to PME CUDA device");
* TODO: PME could also try to pick up nice grid sizes (with factors of 2, 3, 5, 7).
*/
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
pmeGpu->maxGridWidthX = deviceContext.deviceInfo().prop.maxGridSize[0];
-#elif GMX_GPU == GMX_GPU_OPENCL
- pmeGpu->maxGridWidthX = INT32_MAX / 2;
+#else
+ // Use this path for any non-CUDA GPU acceleration
// TODO: is there no really global work size limit in OpenCL?
+ pmeGpu->maxGridWidthX = INT32_MAX / 2;
#endif
}
*/
static void pme_gpu_select_best_performing_pme_spreadgather_kernels(PmeGpu* pmeGpu)
{
- if (pmeGpu->kernelParams->atoms.nAtoms > c_pmeGpuPerformanceAtomLimit && (GMX_GPU == GMX_GPU_CUDA))
+ if (GMX_GPU_CUDA && pmeGpu->kernelParams->atoms.nAtoms > c_pmeGpuPerformanceAtomLimit)
{
pmeGpu->settings.threadsPerAtom = ThreadsPerAtom::Order;
pmeGpu->settings.recalculateSplines = true;
const int threadsPerAtom =
(pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::Order ? order : order * order);
const bool recalculateSplines = pmeGpu->settings.recalculateSplines;
-#if GMX_GPU == GMX_GPU_OPENCL
- GMX_ASSERT(pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared,
+
+ GMX_ASSERT(!GMX_GPU_OPENCL || pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared,
"Only 16 threads per atom supported in OpenCL");
- GMX_ASSERT(!recalculateSplines, "Recalculating splines not supported in OpenCL");
-#endif
+ GMX_ASSERT(!GMX_GPU_OPENCL || !recalculateSplines,
+ "Recalculating splines not supported in OpenCL");
+
const int atomsPerBlock = blockSize / threadsPerAtom;
// TODO: pick smaller block size in runtime if needed
// Ensure that coordinates are ready on the device before launching spread;
// only needed with CUDA on PP+PME ranks, not on separate PME ranks, in unit tests
// nor in OpenCL as these cases use a single stream (hence xReadyOnDevice == nullptr).
- GMX_ASSERT(xReadyOnDevice != nullptr || (GMX_GPU != GMX_GPU_CUDA)
- || pmeGpu->common->isRankPmeOnly || pme_gpu_settings(pmeGpu).copyAllOutputs,
+ GMX_ASSERT(!GMX_GPU_CUDA || xReadyOnDevice != nullptr || pmeGpu->common->isRankPmeOnly
+ || pme_gpu_settings(pmeGpu).copyAllOutputs,
"Need a valid coordinate synchronizer on PP+PME ranks with CUDA.");
+
if (xReadyOnDevice)
{
xReadyOnDevice->enqueueWaitEvent(pmeGpu->archSpecific->pmeStream_);
const int warpSize = pmeGpu->programHandle_->warpSize();
const int blockSize = (cellsPerBlock + warpSize - 1) / warpSize * warpSize;
- static_assert(GMX_GPU != GMX_GPU_CUDA || c_solveMaxWarpsPerBlock / 2 >= 4,
+ static_assert(!GMX_GPU_CUDA || c_solveMaxWarpsPerBlock / 2 >= 4,
"The CUDA solve energy kernels needs at least 4 warps. "
"Here we launch at least half of the max warps.");
const int threadsPerAtom =
(pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::Order ? order : order * order);
const bool recalculateSplines = pmeGpu->settings.recalculateSplines;
-#if GMX_GPU == GMX_GPU_OPENCL
- GMX_ASSERT(pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared,
+
+ GMX_ASSERT(!GMX_GPU_OPENCL || pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared,
"Only 16 threads per atom supported in OpenCL");
- GMX_ASSERT(!recalculateSplines, "Recalculating splines not supported in OpenCL");
-#endif
+ GMX_ASSERT(!GMX_GPU_OPENCL || !recalculateSplines,
+ "Recalculating splines not supported in OpenCL");
+
const int atomsPerBlock = blockSize / threadsPerAtom;
GMX_ASSERT(!(c_pmeAtomDataBlockSize % atomsPerBlock),
const DeviceContext& deviceContext_;
//! Conveniently all the PME kernels use the same single argument type
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
using PmeKernelHandle = void (*)(const struct PmeGpuCudaKernelParams);
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
using PmeKernelHandle = cl_kernel;
#else
using PmeKernelHandle = void*;
class PmeDeviceBuffers;
} // namespace gmx
-#if GMX_GPU != GMX_GPU_NONE
+#if GMX_GPU
struct PmeGpuSpecific;
#else
/*! \brief A dummy typedef for the GPU host data placeholder on non-GPU builds */
typedef int PmeGpuSpecific;
#endif
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
struct PmeGpuCudaKernelParams;
/*! \brief A typedef for including the GPU kernel arguments data by pointer */
typedef PmeGpuCudaKernelParams PmeGpuKernelParams;
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
struct PmeGpuKernelParamsBase;
/*! \brief A typedef for including the GPU kernel arguments data by pointer */
typedef PmeGpuKernelParamsBase PmeGpuKernelParams;
#include <set>
#include <vector>
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
# include "gromacs/gpu_utils/gpuregiontimer.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include "gromacs/gpu_utils/gpueventsynchronizer_ocl.h"
# include "gromacs/gpu_utils/gpuregiontimer_ocl.h"
#endif
/*! \brief environment variable to enable GPU P2P communication */
static const bool c_enableGpuPmePpComms =
- (getenv("GMX_GPU_PME_PP_COMMS") != nullptr) && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
+ GMX_GPU_CUDA && GMX_THREAD_MPI && (getenv("GMX_GPU_PME_PP_COMMS") != nullptr);
/*! \brief Master PP-PME communication data structure */
struct gmx_pme_pp
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/gmxmpi.h"
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
namespace gmx
{
-/*!\brief Impl class stub. */
+/*!\brief \internal Impl class stub. */
class PmePpCommGpu::Impl
{
};
} // namespace gmx
-#endif /* GMX_GPU != GMX_GPU_CUDA */
+#endif // !GMX_GPU_CUDA
if (!(flags & FFT5D_NOMALLOC))
{
// only needed for PME GPU mixed mode
- if (realGridAllocationPinningPolicy == gmx::PinningPolicy::PinnedIfSupported && GMX_GPU == GMX_GPU_CUDA)
+ if (GMX_GPU_CUDA && realGridAllocationPinningPolicy == gmx::PinningPolicy::PinnedIfSupported)
{
const std::size_t numBytes = lsize * sizeof(t_complex);
lin = static_cast<t_complex*>(gmx::PageAlignedAllocationPolicy::malloc(numBytes));
gpu_utils.cpp
gpu_testutils.cpp
)
-if(GMX_USE_OPENCL)
+if(GMX_GPU_OPENCL)
gmx_add_libgromacs_sources(
device_context_ocl.cpp
device_stream_ocl.cpp
ocl_caching.cpp
oclutils.cpp
)
-elseif(GMX_USE_CUDA)
+elseif(GMX_GPU_CUDA)
gmx_add_libgromacs_sources(
device_stream.cu
gpu_utils.cu
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/mutex.h"
#include "gromacs/utility/stringutil.h"
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
# include <clFFT.h>
#endif
ClfftInitializer::ClfftInitializer()
{
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
gmx::lock_guard<gmx::Mutex> guard(g_clfftMutex);
clfftSetupData fftSetup;
int initErrorCode = clfftInitSetupData(&fftSetup);
ClfftInitializer::~ClfftInitializer()
{
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
gmx::lock_guard<gmx::Mutex> guard(g_clfftMutex);
if (g_clfftInitialized)
{
#include "config.h"
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
# include "gromacs/gpu_utils/device_context_ocl.h"
#else
# include "gromacs/utility/classhelpers.h"
GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
};
-#endif // GMX_GPU != GMX_GPU_OPENCL
+#endif // !GMX_GPU_OPENCL
#endif // GMX_GPU_UTILS_DEVICE_CONTEXT_H
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
/**@}*/
+#ifndef DOXYGEN
+
DeviceContext::DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo)
{
cl_platform_id platformId = deviceInfo.oclPlatformId;
{
return context_;
}
+
+#endif
* \inlibraryapi
*/
-#include "gromacs/gpu_utils/gmxopencl.h"
-#include "gromacs/utility/classhelpers.h"
+#ifndef DOXYGEN
+
+# include "gromacs/gpu_utils/gmxopencl.h"
+# include "gromacs/utility/classhelpers.h"
struct DeviceInformation;
GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
};
+#endif // !defined DOXYGEN
#endif // GMX_GPU_UTILS_DEVICE_CONTEXT_OCL_H
#include "config.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include <cuda_runtime.h>
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include "gromacs/gpu_utils/gmxopencl.h"
#endif
#include "gromacs/utility/classhelpers.h"
//! Synchronize the steam
void synchronize() const;
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
//! Getter
cudaStream_t stream() const;
private:
cudaStream_t stream_ = nullptr;
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL || defined DOXYGEN
//! Getter
cl_command_queue stream() const;
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h" // TODO: this is only for over_alloc_large
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/devicebuffer.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include "gromacs/gpu_utils/devicebuffer_ocl.h"
#else
# error "devicebuffer.h included on non-GPU build!"
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "config.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
//! \brief A device-side buffer of ValueTypes
template<typename ValueType>
using DeviceBuffer = ValueType*;
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include "gromacs/gpu_utils/gputraits_ocl.h"
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
# define OPENCL_FUNC_TERM REAL_FUNC_TERM
# define OPENCL_FUNC_TERM_WITH_RETURN(arg) REAL_FUNC_TERM_WITH_RETURN(arg)
-#elif GMX_GPU != GMX_GPU_NONE
+#elif GMX_GPU
/* GPU support is enabled, so these functions will have real code
* defined somewhere */
# define GPU_FUNC_TERM REAL_FUNC_TERM
# define GPU_FUNC_TERM_WITH_RETURN(arg) REAL_FUNC_TERM_WITH_RETURN(arg)
-# if GMX_GPU == GMX_GPU_OPENCL
+# if GMX_GPU_OPENCL
/* OpenCL support is enabled, so CUDA-specific functions need empty
* implementations, while OpenCL-specific functions will have real
# define OPENCL_FUNC_TERM_WITH_RETURN(arg) REAL_FUNC_TERM_WITH_RETURN(arg)
# endif
-# if GMX_GPU == GMX_GPU_CUDA
+# if GMX_GPU_CUDA
/* CUDA support is enabled, so OpenCL-specific functions need empty
* implementations, while CUDA-specific functions will have real
# endif
-#elif GMX_GPU == GMX_GPU_NONE
+#elif !GMX_GPU
/* No GPU support is configured, so none of these functions will have
* real definitions. */
#endif
//! Constant used to help minimize preprocessed code
-static constexpr bool c_binarySupportsGpus = (GMX_GPU != GMX_GPU_NONE);
+static constexpr bool c_binarySupportsGpus = (GMX_GPU != 0);
bool canPerformGpuDetection()
{
}
}
-#if GMX_GPU == GMX_GPU_NONE
+#if !GMX_GPU
DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& /*unused*/, int /*unused*/)
{
return DeviceStatus::Nonexistent;
#include "config.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/gputraits.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include "gromacs/gpu_utils/gputraits_ocl.h"
using DeviceTexture = void*;
-//! Stub for device information.
+//! \internal Stub for device information.
struct DeviceInformation
{
// No member needed
/*! \brief Non-GPU builds return nullptr instead of streams,
* so we have to expect that in such build configurations. */
-const bool c_canExpectValidStreams = (GMX_GPU != GMX_GPU_NONE);
+constexpr bool c_canExpectValidStreams = (GMX_GPU != 0);
//! Helper function to implement readable testing
void expectValidStreams(DeviceStreamManager* manager, std::initializer_list<DeviceStreamType> types)
EXPECT_NE(AllocatorType{}, AllocatorType{ PinningPolicy::PinnedIfSupported });
}
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
// Policy suitable for pinning is only supported for a CUDA build
#include "testutils/testasserts.h"
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
namespace gmx
{
} // namespace test
} // namespace gmx
-#endif // GMX_GPU != GMX_GPU_CUDA
+#endif // !GMX_GPU_CUDA
#include "typecasts_runner.h"
-#include "config.h"
-
#include <vector>
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/stringutil.h"
-#if GMX_GPU == GMX_GPU_CUDA
-
namespace gmx
{
} // namespace test
} // namespace gmx
-
-#endif // GMX_GPU == GMX_GPU_CUDA
\ No newline at end of file
/* The OpenCL support requires us to run detection on all ranks.
* With CUDA we don't need to, and prefer to detect on one rank
* and send the information to the other ranks over MPI. */
- bool allRanksMustDetectGpus = (GMX_GPU == GMX_GPU_OPENCL);
- bool gpusCanBeDetected = false;
+ constexpr bool allRanksMustDetectGpus = (GMX_GPU_OPENCL != 0);
+ bool gpusCanBeDetected = false;
if (isMasterRankOfPhysicalNode || allRanksMustDetectGpus)
{
std::string errorMessage;
#include "gromacs/utility/sysinfo.h"
//! Constant used to help minimize preprocessed code
-static const bool bGPUBinary = GMX_GPU != GMX_GPU_NONE;
+static constexpr bool bGPUBinary = (GMX_GPU != 0);
/*! \internal \brief
* Returns the GPU information text, one GPU per line.
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2014,2015,2016,2018,2019, by the GROMACS development team, led by
+# Copyright (c) 2014,2015,2016,2018,2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
restcbt.cpp
)
-if(GMX_USE_CUDA)
+if(GMX_GPU_CUDA)
gmx_add_libgromacs_sources(
gpubonded_impl.cu
gpubondedkernels.cu
{
errorReasons.emplace_back("not supported with double precision");
}
- if (GMX_GPU == GMX_GPU_OPENCL)
+ if (GMX_GPU_OPENCL)
{
errorReasons.emplace_back("not supported with OpenCL build of GROMACS");
}
- else if (GMX_GPU == GMX_GPU_NONE)
+ else if (!GMX_GPU)
{
errorReasons.emplace_back("not supported with CPU-only build of GROMACS");
}
return addMessageIfNotSupported(errorReasons, error);
}
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
class GpuBonded::Impl
{
void GpuBonded::clearEnergies() {}
-#endif /* GMX_GPU != GMX_GPU_CUDA */
+#endif // !GMX_GPU_CUDA
} // namespace gmx
if (BUILD_TESTING)
add_subdirectory(tests)
endif()
-if(GMX_USE_CUDA)
+if(GMX_GPU_CUDA)
gmx_add_libgromacs_sources(
leapfrog_gpu.cu
lincs_gpu.cu
// For force buffer ops, we use the below conditon rather than
// useGpuFBufferOps to ensure that init is performed even if this
// NS step is also a virial step (on which f buf ops are deactivated).
- if (simulationWork.useGpuBufferOps && simulationWork.useGpuNonbonded && (GMX_GPU == GMX_GPU_CUDA))
+ if (GMX_GPU_CUDA && simulationWork.useGpuBufferOps && simulationWork.useGpuNonbonded)
{
GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded");
nbv->atomdata_init_add_nbat_f_to_f_gpu(stateGpu->fReducedOnDevice());
{
runnersNames.emplace_back("SHAKE");
runnersNames.emplace_back("LINCS");
- if (GMX_GPU == GMX_GPU_CUDA && canComputeOnGpu())
+ if (GMX_GPU_CUDA && canComputeOnGpu())
{
runnersNames.emplace_back("LINCS_GPU");
}
done_lincs(lincsd);
}
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
/*! \brief
* Stub for GPU version of LINCS constraints to satisfy compiler.
*
{
FAIL() << "Dummy LINCS CUDA function was called instead of the real one.";
}
-#endif
+#endif // !GMX_GPU_CUDA
} // namespace test
} // namespace gmx
// All runners should be registered here under appropriate conditions
//
s_runners_["LeapFrogSimple"] = integrateLeapFrogSimple;
- if (GMX_GPU == GMX_GPU_CUDA && canComputeOnGpu())
+ if (GMX_GPU_CUDA && canComputeOnGpu())
{
s_runners_["LeapFrogGpu"] = integrateLeapFrogGpu;
}
}
}
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
void integrateLeapFrogGpu(gmx_unused LeapFrogTestData* testData, gmx_unused int numSteps)
{
FAIL() << "Dummy Leap-Frog CUDA function was called instead of the real one.";
}
-#endif // GMX_GPU != GMX_GPU_CUDA
+#endif // !GMX_GPU_CUDA
} // namespace test
} // namespace gmx
// 2. There is a CUDA-capable GPU in a system
// 3. This GPU is detectable
// 4. GPU detection was not disabled by GMX_DISABLE_GPU_DETECTION environment variable
- if (s_hasCompatibleGpus)
+ if (GMX_GPU_CUDA && s_hasCompatibleGpus)
{
- if (GMX_GPU == GMX_GPU_CUDA && s_hasCompatibleGpus)
- {
- runners_["SETTLE_GPU"] = applySettleGpu;
- }
+ runners_["SETTLE_GPU"] = applySettleGpu;
}
}
EXPECT_FALSE(errorOccured) << testDescription;
}
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
void applySettleGpu(gmx_unused SettleTestData* testData,
gmx_unused const t_pbc pbc,
{
// These should never fail since this function should only be called if CUDA is enabled and
// there is a CUDA-capable device available.
- GMX_RELEASE_ASSERT(GMX_GPU == GMX_GPU_CUDA,
- "CUDA version of SETTLE was called from non-CUDA build.");
+ GMX_RELEASE_ASSERT(GMX_GPU_CUDA, "CUDA version of SETTLE was called from non-CUDA build.");
// TODO: Here we should check that at least 1 suitable GPU is available
GMX_RELEASE_ASSERT(canPerformGpuDetection(), "Can't detect CUDA-capable GPUs.");
#include "gromacs/mdlib/update_constrain_gpu.h"
-#if GMX_GPU != GMX_GPU_CUDA
+#if !GMX_GPU_CUDA
namespace gmx
{
} // namespace gmx
-#endif /* GMX_GPU != GMX_GPU_CUDA */
+#endif /* !GMX_GPU_CUDA */
*
* Any remaining vsites are assigned to a separate master thread task.
*/
-
namespace gmx
{
//! VirialHandling is often used outside VirtualSitesHandler class members
using VirialHandling = VirtualSitesHandler::VirialHandling;
-/*! \libinternal
- * \brief Information on PBC and domain decomposition for virtual sites
+/*! \brief Information on PBC and domain decomposition for virtual sites
*/
struct DomainInfo
{
const gmx_domdec_t* domdec_ = nullptr;
};
-/*! \libinternal
- * \brief List of atom indices belonging to a task
+/*! \brief List of atom indices belonging to a task
*/
struct AtomIndex
{
std::vector<int> atom;
};
-/*! \libinternal
- * \brief Data structure for thread tasks that use constructing atoms outside their own atom range
+/*! \brief Data structure for thread tasks that use constructing atoms outside their own atom range
*/
struct InterdependentTask
{
std::vector<int> reduceTask;
};
-/*! \libinternal
- * \brief Vsite thread task data structure
+/*! \brief Vsite thread task data structure
*/
struct VsiteThread
{
};
-/*! \libinternal
- * \brief Information on how the virtual site work is divided over thread tasks
+/*! \brief Information on how the virtual site work is divided over thread tasks
*/
class ThreadingInfo
{
std::vector<int> taskIndex_;
};
-/*! \libinternal
- * \brief Impl class for VirtualSitesHandler
+/*! \brief Impl class for VirtualSitesHandler
*/
class VirtualSitesHandler::Impl
{
// getenv results are ignored when clearly they are used.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
- devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr)
- && (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded;
- devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
- devFlags.enableGpuHaloExchange =
- (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
+
+ devFlags.enableGpuBufferOps =
+ GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
+ devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr;
devFlags.enableGpuPmePPComm =
- (getenv("GMX_GPU_PME_PP_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
+ GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
+
#pragma GCC diagnostic pop
if (devFlags.enableGpuBufferOps)
// timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?)
bool useTiming = true;
- if (GMX_GPU == GMX_GPU_CUDA)
+
+ if (GMX_GPU_CUDA)
{
/* WARNING: CUDA timings are incorrect with multiple streams.
* This is the main reason why they are disabled by default.
// TODO: Consider turning on by default when we can detect nr of streams.
useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
}
- else if (GMX_GPU == GMX_GPU_OPENCL)
+ else if (GMX_GPU_OPENCL)
{
useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
}
observableshistory.cpp
state.cpp)
-if(GMX_USE_CUDA OR GMX_USE_OPENCL)
+if(GMX_GPU)
gmx_add_libgromacs_sources(
state_propagator_data_gpu_impl_gpu.cpp
)
#include "gromacs/mdtypes/state_propagator_data_gpu.h"
-#if GMX_GPU == GMX_GPU_NONE
+#if !GMX_GPU
namespace gmx
{
} // namespace gmx
-#endif // GMX_GPU == GMX_GPU_NONE
+#endif // !GMX_GPU
#include "config.h"
#include "gromacs/gpu_utils/devicebuffer.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
+#elif GMX_GPU_OPENCL
# include "gromacs/gpu_utils/gpueventsynchronizer_ocl.h"
#endif
#include "gromacs/math/vectypes.h"
#include "config.h"
-#if GMX_GPU != GMX_GPU_NONE
+#if GMX_GPU
# include "gromacs/gpu_utils/device_stream_manager.h"
# include "gromacs/gpu_utils/devicebuffer.h"
wcycle_(wcycle)
{
static_assert(
- GMX_GPU != GMX_GPU_NONE,
+ GMX_GPU,
"GPU state propagator data object should only be constructed on the GPU code-paths.");
// We need to keep local copies for re-initialization.
localStream_ = &deviceStreamManager.stream(DeviceStreamType::NonBondedLocal);
nonLocalStream_ = &deviceStreamManager.stream(DeviceStreamType::NonBondedNonLocal);
// PME stream is used in OpenCL for H2D coordinate transfer
- if (GMX_GPU == GMX_GPU_OPENCL)
- {
- updateStream_ = &deviceStreamManager.stream(DeviceStreamType::Pme);
- }
- else
- {
- updateStream_ = &deviceStreamManager.stream(DeviceStreamType::UpdateAndConstraints);
- }
+ updateStream_ = &deviceStreamManager.stream(
+ GMX_GPU_OPENCL ? DeviceStreamType::Pme : DeviceStreamType::UpdateAndConstraints);
// Map the atom locality to the stream that will be used for coordinates,
// velocities and forces transfers. Same streams are used for H2D and D2H copies.
wcycle_(wcycle)
{
static_assert(
- GMX_GPU != GMX_GPU_NONE,
+ GMX_GPU,
"GPU state propagator data object should only be constructed on the GPU code-paths.");
GMX_ASSERT(pmeStream->isValid(), "GPU PME stream should be valid.");
reallocateDeviceBuffer(&d_v_, numAtomsAll_, &d_vSize_, &d_vCapacity_, deviceContext_);
const int d_fOldCapacity = d_fCapacity_;
reallocateDeviceBuffer(&d_f_, numAtomsAll_, &d_fSize_, &d_fCapacity_, deviceContext_);
+
// Clearing of the forces can be done in local stream since the nonlocal stream cannot reach
// the force accumulation stage before syncing with the local stream. Only done in CUDA,
// since the force buffer ops are not implemented in OpenCL.
- if (GMX_GPU == GMX_GPU_CUDA && d_fCapacity_ != d_fOldCapacity)
+ if (GMX_GPU_CUDA && d_fCapacity_ != d_fOldCapacity)
{
clearDeviceBufferAsync(&d_f_, 0, d_fCapacity_, *localStream_);
}
// - it's not needed, copy is done in the same stream as the only consumer task (PME)
// - we don't consume the events in OpenCL which is not allowed by GpuEventSynchronizer (would leak memory).
// TODO: remove this by adding an event-mark free flavor of this function
- if (GMX_GPU == GMX_GPU_CUDA)
+ if (GMX_GPU_CUDA)
{
xReadyOnDevice_[atomLocality].markEvent(*deviceStream);
}
// TODO: This should be reconsidered to support the halo exchange.
//
// In OpenCL no events are used as coordinate sync is not necessary
- if (GMX_GPU == GMX_GPU_OPENCL)
+ if (GMX_GPU_OPENCL)
{
return nullptr;
}
} // namespace gmx
-#endif // GMX_GPU == GMX_GPU_NONE
+#endif // GMX_GPU
benchmark/bench_system.cpp
)
-if(GMX_USE_CUDA)
+if(GMX_GPU_CUDA)
add_subdirectory(cuda)
gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp)
endif()
-if(GMX_USE_OPENCL)
+if(GMX_GPU_OPENCL)
add_subdirectory(opencl)
set(NBNXM_OPENCL_KERNELS ${NBNXM_OPENCL_KERNELS} PARENT_SCOPE)
gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp)
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-if(GMX_USE_CUDA)
+if(GMX_GPU_CUDA)
if(NOT GMX_CUDA_NB_SINGLE_COMPILATION_UNIT)
set(NBNXM_CUDA_KERNEL_SOURCES
nbnxm_cuda_kernel_F_noprune.cu
#include <string>
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "cuda/nbnxm_cuda_types.h"
#endif
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
# include "opencl/nbnxm_ocl_types.h"
#endif
#include "gromacs/nbnxm/nbnxm.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "cuda/nbnxm_cuda_types.h"
#endif
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
# include "opencl/nbnxm_ocl_types.h"
#endif
#include "pairlist.h"
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
# include "gromacs/gpu_utils/gpuregiontimer_ocl.h"
#endif
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "gromacs/gpu_utils/gpuregiontimer.cuh"
#endif
#include "config.h"
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
# include "cuda/nbnxm_cuda_types.h"
#endif
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
# include "opencl/nbnxm_ocl_types.h"
#endif
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-if(GMX_USE_OPENCL)
+if(GMX_GPU_OPENCL)
file(GLOB OPENCL_NB_SOURCES *.cpp)
set(NBNXM_SOURCES ${NBNXM_SOURCES} ${OPENCL_NB_SOURCES} PARENT_SCOPE)
endif()
static constexpr int c_nbnxnCpuIClusterSize = 4;
//! The i- and j-cluster size for GPU lists, 8 atoms for CUDA, set at compile time for OpenCL
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
static constexpr int c_nbnxnGpuClusterSize = GMX_OPENCL_NB_CLUSTER_SIZE;
#else
static constexpr int c_nbnxnGpuClusterSize = 8;
const char* g_specifyEverythingFormatString =
"When you use mdrun -gputasks, %s must be set to non-default "
"values, so that the device IDs can be interpreted correctly."
-#if GMX_GPU != GMX_GPU_NONE
+#if GMX_GPU
" If you simply want to restrict which GPUs are used, then it is "
"better to use mdrun -gpu_id. Otherwise, setting the "
-# if GMX_GPU == GMX_GPU_CUDA
+# if GMX_GPU_CUDA
"CUDA_VISIBLE_DEVICES"
-# elif GMX_GPU == GMX_GPU_OPENCL
+# elif GMX_GPU_OPENCL
// Technically there is no portable way to do this offered by the
// OpenCL standard, but the only current relevant case for GROMACS
// is AMD OpenCL, which offers this variable.
{
errorMessage += "Compatible GPUs must have been found.\n";
}
- if (GMX_GPU != GMX_GPU_CUDA)
+ if (!GMX_GPU_CUDA)
{
errorMessage += "Only a CUDA build is supported.\n";
}
# the research papers on the package. Check out http://www.gromacs.org.
file(GLOB UTILITY_SOURCES *.cpp)
-if (GMX_USE_CUDA)
+if (GMX_GPU_CUDA)
gmx_add_libgromacs_sources(cuda_version_information.cu)
endif()
set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${UTILITY_SOURCES} PARENT_SCOPE)
writer->writeLine(formatString("Linked with Intel MKL version %d.%d.%d.", __INTEL_MKL__,
__INTEL_MKL_MINOR__, __INTEL_MKL_UPDATE__));
#endif
-#if GMX_GPU == GMX_GPU_OPENCL
+#if GMX_GPU_OPENCL
writer->writeLine(formatString("OpenCL include dir: %s", OPENCL_INCLUDE_DIR));
writer->writeLine(formatString("OpenCL library: %s", OPENCL_LIBRARY));
writer->writeLine(formatString("OpenCL version: %s", OPENCL_VERSION_STRING));
#endif
-#if GMX_GPU == GMX_GPU_CUDA
+#if GMX_GPU_CUDA
writer->writeLine(formatString("CUDA compiler: %s", CUDA_COMPILER_INFO));
writer->writeLine(formatString("CUDA compiler flags:%s %s", CUDA_COMPILER_FLAGS,
CMAKE_BUILD_CONFIGURATION_CXX_FLAGS));
// TODO The time for OpenCL kernel compilation means these tests time
// out. Once that compilation is cached for the whole process, these
// tests can run in such configurations.
-#if GMX_GPU != GMX_GPU_OPENCL
+#if !GMX_GPU_OPENCL
INSTANTIATE_TEST_CASE_P(
NormalIntegrators,
// TODO The time for OpenCL kernel compilation means these tests time
// out. Once that compilation is cached for the whole process, these
// tests can run in such configurations.
-#if GMX_GPU != GMX_GPU_OPENCL
+#if !GMX_GPU_OPENCL
INSTANTIATE_TEST_CASE_P(BasicPropagators,
PeriodicActionsTest,
Combine(ValuesIn(simplePropagationParameters()), Values(outputParameters)));
// TODO The time for OpenCL kernel compilation means these tests time
// out. Once that compilation is cached for the whole process, these
// tests can run in such configurations.
-#if GMX_GPU != GMX_GPU_OPENCL
+#if !GMX_GPU_OPENCL
INSTANTIATE_TEST_CASE_P(
NormalMdrunIsReproduced,
MdrunRerunTest,
// TODO The time for OpenCL kernel compilation means these tests time
// out. Once that compilation is cached for the whole process, these
// tests can run in such configurations.
-#if GMX_GPU != GMX_GPU_OPENCL
+#if !GMX_GPU_OPENCL
INSTANTIATE_TEST_CASE_P(MdrunIsReproduced,
MdrunRerunFreeEnergyTest,
::testing::Combine(::testing::Values("nonanol_vacuo"),
// tests can run in such configurations.
// These tests are very sensitive, so we only run them in double precision.
// As we change call ordering, they might actually become too strict to be useful.
-#if GMX_GPU != GMX_GPU_OPENCL && GMX_DOUBLE
+#if !GMX_GPU_OPENCL && GMX_DOUBLE
INSTANTIATE_TEST_CASE_P(SimulatorsAreEquivalentDefaultModular,
SimulatorComparisonTest,
::testing::Combine(::testing::Combine(::testing::Values("argon12", "tip3p5"),
TEST_USES_HARDWARE_DETECTION=true)
endif()
- if (GMX_USE_CUDA AND NOT GMX_CLANG_CUDA)
+ if (GMX_GPU_CUDA AND NOT GMX_CLANG_CUDA)
# Work around FindCUDA that prevents using target_link_libraries()
# with keywords otherwise...
set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
${TESTUTILS_DIR}/unittest_main.cpp)
endif()
- if (GMX_USE_CUDA)
+ if (GMX_GPU_CUDA)
if (GMX_CLANG_CUDA)
target_sources(${EXENAME} PRIVATE
${ARG_CUDA_CU_SOURCE_FILES}
target_link_libraries(${EXENAME} PRIVATE ${GMX_EXTRA_LIBRARIES})
endif()
endif()
- elseif (GMX_USE_OPENCL)
+ elseif (GMX_GPU_OPENCL)
target_sources(${EXENAME} PRIVATE ${ARG_OPENCL_CPP_SOURCE_FILES} ${ARG_GPU_CPP_SOURCE_FILES})
if(ARG_OPENCL_CPP_SOURCE_FILES OR ARG_GPU_CPP_SOURCE_FILES)
target_link_libraries(${EXENAME} PRIVATE ${OpenCL_LIBRARIES})
# Both OpenCL (from JIT) and ThreadSanitizer (from how it
# checks) can take signficantly more time than other
# configurations.
- if (GMX_USE_OPENCL)
+ if (GMX_GPU_OPENCL)
set(_timeout 240)
elseif (${CMAKE_BUILD_TYPE} STREQUAL TSAN)
set(_timeout 300)