From 75c17a76b69c1ade1f29fdce9fe2f6dd78221c1d Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Tue, 4 Aug 2020 19:27:21 +0000 Subject: [PATCH] Simplified uniform GPU selection in CMake GPU selection is now done by setting GMX_GPU to either CUDA or OpenCL, with no other variables required. As part of the overall CMake simplification, the CMake automatic detection of hardware and "auto" settings for GPU acceleration have been removed. This will require the user to explicitly enable GPU support (which is a drawback), but it leads to much simplier and shorter CMake code, similar support for CUDA and OpenCL, and it will make it easier to handle multiple different APIs targeting e.g. NVIDIA GPU hardware in the near future. --- CMakeLists.txt | 12 ++--- admin/gitlab-ci/documentation.gitlab-ci.yml | 4 +- admin/gitlab-ci/gromacs.gitlab-ci.yml | 6 +-- cmake/gmxManageCuda.cmake | 2 +- cmake/gmxManageOpenCL.cmake | 2 +- src/CMakeLists.txt | 2 +- src/api/cpp/tests/CMakeLists.txt | 4 +- src/config.h.cmakein | 14 +++--- src/gromacs/CMakeLists.txt | 10 ++--- src/gromacs/domdec/CMakeLists.txt | 4 +- src/gromacs/domdec/gpuhaloexchange_impl.cpp | 4 +- src/gromacs/ewald/CMakeLists.txt | 4 +- src/gromacs/ewald/pme.cpp | 6 +-- .../pme_coordinate_receiver_gpu_impl.cpp | 6 +-- .../ewald/pme_force_sender_gpu_impl.cpp | 6 +-- src/gromacs/ewald/pme_gpu.cpp | 5 ++- src/gromacs/ewald/pme_gpu_3dfft.h | 8 ++-- src/gromacs/ewald/pme_gpu_constants.h | 6 +-- src/gromacs/ewald/pme_gpu_internal.cpp | 44 ++++++++++--------- src/gromacs/ewald/pme_gpu_program_impl.h | 4 +- src/gromacs/ewald/pme_gpu_types_host.h | 6 +-- src/gromacs/ewald/pme_gpu_types_host_impl.h | 4 +- src/gromacs/ewald/pme_only.cpp | 2 +- src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp | 6 +-- src/gromacs/fft/fft5d.cpp | 2 +- src/gromacs/gpu_utils/CMakeLists.txt | 4 +- src/gromacs/gpu_utils/clfftinitializer.cpp | 8 ++-- src/gromacs/gpu_utils/device_context.h | 4 +- src/gromacs/gpu_utils/device_context_ocl.cpp | 4 ++ src/gromacs/gpu_utils/device_context_ocl.h | 7 ++- src/gromacs/gpu_utils/device_stream.h | 8 ++-- src/gromacs/gpu_utils/devicebuffer.h | 4 +- src/gromacs/gpu_utils/devicebuffer_datatype.h | 6 +-- src/gromacs/gpu_utils/gpu_macros.h | 10 ++--- src/gromacs/gpu_utils/gpu_utils.cpp | 4 +- src/gromacs/gpu_utils/gputraits.h | 6 +-- .../gpu_utils/tests/device_stream_manager.cpp | 2 +- src/gromacs/gpu_utils/tests/hostallocator.cpp | 2 +- .../gpu_utils/tests/typecasts_runner.cpp | 4 +- .../gpu_utils/tests/typecasts_runner.cu | 6 --- src/gromacs/hardware/detecthardware.cpp | 4 +- src/gromacs/hardware/printhardware.cpp | 2 +- src/gromacs/listed_forces/CMakeLists.txt | 4 +- src/gromacs/listed_forces/gpubonded_impl.cpp | 8 ++-- src/gromacs/mdlib/CMakeLists.txt | 2 +- src/gromacs/mdlib/sim_util.cpp | 2 +- src/gromacs/mdlib/tests/constr.cpp | 2 +- src/gromacs/mdlib/tests/constrtestrunners.cpp | 4 +- src/gromacs/mdlib/tests/leapfrog.cpp | 2 +- .../mdlib/tests/leapfrogtestrunners.cpp | 4 +- src/gromacs/mdlib/tests/settle.cpp | 7 +-- src/gromacs/mdlib/tests/settletestrunners.cpp | 2 +- src/gromacs/mdlib/tests/settletestrunners.cu | 3 +- .../mdlib/update_constrain_gpu_impl.cpp | 4 +- src/gromacs/mdlib/vsite.cpp | 19 +++----- src/gromacs/mdrun/runner.cpp | 17 +++---- src/gromacs/mdtypes/CMakeLists.txt | 2 +- .../state_propagator_data_gpu_impl.cpp | 4 +- .../mdtypes/state_propagator_data_gpu_impl.h | 4 +- .../state_propagator_data_gpu_impl_gpu.cpp | 25 +++++------ src/gromacs/nbnxm/CMakeLists.txt | 4 +- src/gromacs/nbnxm/cuda/CMakeLists.txt | 2 +- src/gromacs/nbnxm/gpu_common.h | 4 +- src/gromacs/nbnxm/gpu_common_utils.h | 4 +- src/gromacs/nbnxm/gpu_types_common.h | 4 +- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 4 +- src/gromacs/nbnxm/opencl/CMakeLists.txt | 2 +- src/gromacs/nbnxm/pairlistparams.h | 2 +- src/gromacs/taskassignment/decidegpuusage.cpp | 8 ++-- src/gromacs/utility/CMakeLists.txt | 2 +- src/gromacs/utility/binaryinformation.cpp | 4 +- .../mdrun/tests/exactcontinuation.cpp | 2 +- src/programs/mdrun/tests/periodicactions.cpp | 2 +- src/programs/mdrun/tests/rerun.cpp | 4 +- src/programs/mdrun/tests/simulator.cpp | 2 +- src/testutils/TestMacros.cmake | 8 ++-- 76 files changed, 210 insertions(+), 221 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cbc6a16be4..94afc5a6f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,8 +204,8 @@ option(GMX_INSTALL_LEGACY_API "Install legacy headers" OFF) gmx_option_multichoice( GMX_GPU "Framework for GPU acceleration" - None - None CUDA OpenCL) + OFF + OFF CUDA OpenCL) gmx_option_multichoice( GMX_SIMD @@ -568,9 +568,9 @@ include(gmxManageTNG) include(gmxManageLmfit) -string(TOUPPER "${GMX_GPU}" _gmx_gpu_uppercase) -if(NOT ${_gmx_gpu_uppercase} STREQUAL "NONE") +if(GMX_GPU) + string(TOUPPER "${GMX_GPU}" _gmx_gpu_uppercase) if(${_gmx_gpu_uppercase} STREQUAL "CUDA") include(gmxManageCuda) elseif(${_gmx_gpu_uppercase} STREQUAL "OPENCL") @@ -582,10 +582,6 @@ if(NOT ${_gmx_gpu_uppercase} STREQUAL "NONE") endif() -# Not ideal to set this without the GPU acceleration, but the source presently requires GMX_GPU -# to be set to GMX_GPU_None for a non-GPU build to work. -set(GMX_GPU_ACCELERATION_FRAMEWORK "GMX_GPU_${_gmx_gpu_uppercase}") - if(CYGWIN) set(GMX_CYGWIN 1) endif() diff --git a/admin/gitlab-ci/documentation.gitlab-ci.yml b/admin/gitlab-ci/documentation.gitlab-ci.yml index 4087a7c8cb..15dede0c5e 100644 --- a/admin/gitlab-ci/documentation.gitlab-ci.yml +++ b/admin/gitlab-ci/documentation.gitlab-ci.yml @@ -19,7 +19,7 @@ CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=OFF" CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF" CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug" - CMAKE_GPU_OPTIONS: "-DGMX_GPU=None" + CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF" CMAKE_GMXAPI_OPTIONS: "-DGMX_PYTHON_PACKAGE=ON" script: - if [[ ! -z $GROMACS_RELEASE && $GROMACS_RELEASE == "true" ]] ; then @@ -86,7 +86,7 @@ CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON" CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF" CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug" - CMAKE_GPU_OPTIONS: "-DGMX_GPU=None" + CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF" CMAKE_GMXAPI_OPTIONS: "-DGMX_PYTHON_PACKAGE=ON" RELEASE_BUILD_DIR: release-doc-builds RELEASE_SOURCE: release-source-from-tarball diff --git a/admin/gitlab-ci/gromacs.gitlab-ci.yml b/admin/gitlab-ci/gromacs.gitlab-ci.yml index 84f1008ce3..4c38a6f15d 100644 --- a/admin/gitlab-ci/gromacs.gitlab-ci.yml +++ b/admin/gitlab-ci/gromacs.gitlab-ci.yml @@ -17,7 +17,7 @@ simple-build: CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON" CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF" CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug" - CMAKE_GPU_OPTIONS: "-DGMX_GPU=None" + CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF" CMAKE_GMXAPI_OPTIONS: "-DGMX_PYTHON_PACKAGE=OFF" COMPILER_MAJOR_VERSION: 9 BUILD_DIR: simple-build @@ -93,7 +93,7 @@ simple-build: CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON" CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF" CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug" - CMAKE_GPU_OPTIONS: "-DGMX_GPU=None" + CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF" script: - echo $CMAKE_COMPILER_SCRIPT - echo $CMAKE_EXTRA_OPTIONS @@ -145,7 +145,7 @@ simple-build: CMAKE_MPI_OPTIONS: "-DGMX_THREAD_MPI=ON" CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF" CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=RelWithAssert" - CMAKE_GPU_OPTIONS: "-DGMX_GPU=None" + CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF" CMAKE_REGRESSIONTEST_OPTIONS: "-DREGRESSIONTEST_PATH=../\\$RELEASE_REGRESSIONTESTS" RELEASE_BUILD_DIR: release-builds RELEASE_SOURCE: release-source-from-tarball diff --git a/cmake/gmxManageCuda.cmake b/cmake/gmxManageCuda.cmake index 6d90f0e2e5..96aaa27c4e 100644 --- a/cmake/gmxManageCuda.cmake +++ b/cmake/gmxManageCuda.cmake @@ -38,7 +38,7 @@ set(REQUIRED_CUDA_VERSION 9.0) set(REQUIRED_CUDA_COMPUTE_CAPABILITY 3.0) -set(GMX_USE_CUDA ON) +set(GMX_GPU_CUDA ON) option(GMX_CLANG_CUDA "Use clang for CUDA" OFF) diff --git a/cmake/gmxManageOpenCL.cmake b/cmake/gmxManageOpenCL.cmake index 402ecacb45..3f849e9e28 100644 --- a/cmake/gmxManageOpenCL.cmake +++ b/cmake/gmxManageOpenCL.cmake @@ -38,7 +38,7 @@ set(REQUIRED_OPENCL_MIN_VERSION_MAJOR 1) set(REQUIRED_OPENCL_MIN_VERSION_MINOR 2) set(REQUIRED_OPENCL_MIN_VERSION ${REQUIRED_OPENCL_MIN_VERSION_MAJOR}.${REQUIRED_OPENCL_MIN_VERSION_MINOR}) -set(GMX_USE_OPENCL ON) +set(GMX_GPU_OPENCL ON) if(GMX_DOUBLE) message(FATAL_ERROR "OpenCL acceleration is not available in double precision") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c7107072d..e05d603e49 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,7 +40,7 @@ include(GetCompilerInfo.cmake) get_compiler_info(C BUILD_C_COMPILER) get_compiler_info(CXX BUILD_CXX_COMPILER) -if(GMX_USE_CUDA) +if(GMX_GPU_CUDA) if(NOT GMX_CLANG_CUDA) GMX_SET_CUDA_NVCC_FLAGS() endif() diff --git a/src/api/cpp/tests/CMakeLists.txt b/src/api/cpp/tests/CMakeLists.txt index e7c6498c0d..e674ac44e6 100644 --- a/src/api/cpp/tests/CMakeLists.txt +++ b/src/api/cpp/tests/CMakeLists.txt @@ -60,7 +60,7 @@ gmx_add_gtest_executable(gmxapi-test # pseudo-library for code for mdrun $ ) -if(NOT GMX_USE_OPENCL) +if(NOT GMX_GPU_OPENCL) # GPU resources may not be properly reinitialized between simulations in # the same process. # TODO: include this with the other test sources once the issue is resolved @@ -89,7 +89,7 @@ gmx_add_gtest_executable(gmxapi-mpi-test MPI # pseudo-library for code for mdrun $ ) -if(NOT GMX_USE_OPENCL) +if(NOT GMX_GPU_OPENCL) # GPU resources may not be properly reinitialized between simulations in # the same process. # TODO: include this with the other test sources once the issue is resolved diff --git a/src/config.h.cmakein b/src/config.h.cmakein index 524ff47a53..32e7026306 100644 --- a/src/config.h.cmakein +++ b/src/config.h.cmakein @@ -224,12 +224,14 @@ /* Enable x86 gcc inline assembly */ #cmakedefine01 GMX_X86_GCC_INLINE_ASM -/* Define constants useful for handling GPU support */ -#define GMX_GPU_NONE 0 -#define GMX_GPU_CUDA 1 -#define GMX_GPU_OPENCL 2 -/* Which kind of GPU support is configured */ -#define GMX_GPU @GMX_GPU_ACCELERATION_FRAMEWORK@ +/* Define if any type of GPU acceleration is compiled */ +#cmakedefine01 GMX_GPU + +/* Define if CUDA GPU acceleration is compiled */ +#cmakedefine01 GMX_GPU_CUDA + +/* Define if OpenCL GPU acceleration is compiled */ +#cmakedefine01 GMX_GPU_OPENCL /* Use a single compilation unit when compiling the CUDA (non-bonded) kernels. */ #cmakedefine01 GMX_CUDA_NB_SINGLE_COMPILATION_UNIT diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt index caa8d41d82..3416b46680 100644 --- a/src/gromacs/CMakeLists.txt +++ b/src/gromacs/CMakeLists.txt @@ -167,7 +167,7 @@ gmx_configure_version_file( list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) # Mark some shared GPU implementation files to compile with CUDA if needed -if (GMX_USE_CUDA) +if (GMX_GPU_CUDA) get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES) set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) endif() @@ -183,7 +183,7 @@ if (GMX_CLANG_CUDA) endforeach() endif() -if (GMX_USE_CUDA) +if (GMX_GPU_CUDA) # Work around FindCUDA that prevents using target_link_libraries() # with keywords otherwise... set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) @@ -222,7 +222,7 @@ gmx_target_compile_options(libgromacs) target_compile_definitions(libgromacs PRIVATE HAVE_CONFIG_H) target_include_directories(libgromacs SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include) -if (GMX_USE_OPENCL) +if (GMX_GPU_OPENCL) option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE) mark_as_advanced(GMX_EXTERNAL_CLFFT) @@ -381,7 +381,7 @@ endif() # using the CUDA runtime, but currently there's no reason to want to # do that. if (INSTALL_CUDART_LIB) #can be set manual by user - if (GMX_USE_CUDA) + if (GMX_GPU_CUDA) foreach(CUDA_LIB ${CUDA_LIBRARIES}) string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) if(IS_CUDART) #libcuda should not be installed @@ -396,7 +396,7 @@ if (INSTALL_CUDART_LIB) #can be set manual by user endif() endif() -if(GMX_USE_OPENCL) +if(GMX_GPU_OPENCL) # Install the utility headers file(GLOB OPENCL_INSTALLED_FILES gpu_utils/vectype_ops.clh diff --git a/src/gromacs/domdec/CMakeLists.txt b/src/gromacs/domdec/CMakeLists.txt index c4fca1e308..d573003711 100644 --- a/src/gromacs/domdec/CMakeLists.txt +++ b/src/gromacs/domdec/CMakeLists.txt @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2014,2018,2019, by the GROMACS development team, led by +# Copyright (c) 2014,2018,2019,2020, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -34,7 +34,7 @@ file(GLOB DOMDEC_SOURCES *.cpp) -if(GMX_USE_CUDA) +if(GMX_GPU_CUDA) file(GLOB DOMDEC_CUDA_SOURCES gpuhaloexchange_impl.cu) endif() diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cpp b/src/gromacs/domdec/gpuhaloexchange_impl.cpp index 247c65b4e0..a6a0915fc7 100644 --- a/src/gromacs/domdec/gpuhaloexchange_impl.cpp +++ b/src/gromacs/domdec/gpuhaloexchange_impl.cpp @@ -49,7 +49,7 @@ #include "gromacs/domdec/gpuhaloexchange.h" -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA namespace gmx { @@ -109,4 +109,4 @@ GpuEventSynchronizer* GpuHaloExchange::getForcesReadyOnDeviceEvent() } // namespace gmx -#endif /* GMX_GPU != GMX_GPU_CUDA */ +#endif // !GMX_GPU_CUDA diff --git a/src/gromacs/ewald/CMakeLists.txt b/src/gromacs/ewald/CMakeLists.txt index 15bd872bf4..52c57a9a76 100644 --- a/src/gromacs/ewald/CMakeLists.txt +++ b/src/gromacs/ewald/CMakeLists.txt @@ -54,7 +54,7 @@ gmx_add_libgromacs_sources( pme_coordinate_receiver_gpu_impl.cpp pme_force_sender_gpu_impl.cpp ) -if (GMX_USE_CUDA) +if (GMX_GPU_CUDA) gmx_add_libgromacs_sources( # CUDA-specific sources pme_gather.cu @@ -70,7 +70,7 @@ if (GMX_USE_CUDA) pme_gpu_internal.cpp pme_gpu_timings.cpp ) -elseif (GMX_USE_OPENCL) +elseif (GMX_GPU_OPENCL) gmx_add_libgromacs_sources( # OpenCL-specific sources pme_gpu_3dfft_ocl.cpp diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index f26c2f6fe3..403ea96095 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -159,7 +159,7 @@ bool pme_gpu_supports_build(std::string* error) { errorReasons.emplace_back("a double-precision build"); } - if (GMX_GPU == GMX_GPU_NONE) + if (!GMX_GPU) { errorReasons.emplace_back("a non-GPU build"); } @@ -170,7 +170,7 @@ bool pme_gpu_supports_hardware(const gmx_hw_info_t gmx_unused& hwinfo, std::stri { std::list errorReasons; - if (GMX_GPU == GMX_GPU_OPENCL) + if (GMX_GPU_OPENCL) { #ifdef __APPLE__ errorReasons.emplace_back("Apple OS X operating system"); @@ -231,7 +231,7 @@ static bool pme_gpu_check_restrictions(const gmx_pme_t* pme, std::string* error) { errorReasons.emplace_back("double precision"); } - if (GMX_GPU == GMX_GPU_NONE) + if (!GMX_GPU) { errorReasons.emplace_back("non-GPU build of GROMACS"); } diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp index e377cb8731..be4cf2a16a 100644 --- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp +++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp @@ -51,12 +51,12 @@ #include "gromacs/utility/arrayref.h" #include "gromacs/utility/gmxassert.h" -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA namespace gmx { -/*!\brief Impl class stub. */ +/*!\brief \internal Impl class stub. */ class PmeCoordinateReceiverGpu::Impl { }; @@ -98,4 +98,4 @@ void PmeCoordinateReceiverGpu::enqueueWaitReceiveCoordinatesFromPpCudaDirect() } // namespace gmx -#endif /* GMX_GPU != GMX_GPU_CUDA */ +#endif // !GMX_GPU_CUDA diff --git a/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp b/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp index aa046ee718..9a05facec4 100644 --- a/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp +++ b/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp @@ -50,12 +50,12 @@ #include "gromacs/ewald/pme_force_sender_gpu.h" #include "gromacs/utility/arrayref.h" -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA namespace gmx { -/*!\brief Impl class stub. */ +/*!\brief \internal Impl class stub. */ class PmeForceSenderGpu::Impl { }; @@ -90,4 +90,4 @@ void PmeForceSenderGpu::sendFToPpCudaDirect(int /* ppRank */) } // namespace gmx -#endif /* GMX_GPU != GMX_GPU_CUDA */ +#endif // !GMX_GPU_CUDA diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp index bcac411467..19215fa90f 100644 --- a/src/gromacs/ewald/pme_gpu.cpp +++ b/src/gromacs/ewald/pme_gpu.cpp @@ -195,7 +195,7 @@ void pme_gpu_launch_spread(gmx_pme_t* pme, const real lambdaQ) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); - GMX_ASSERT(xReadyOnDevice || !pme->bPPnode || (GMX_GPU != GMX_GPU_CUDA), + GMX_ASSERT(!GMX_GPU_CUDA || xReadyOnDevice || !pme->bPPnode, "Need a valid xReadyOnDevice on PP+PME ranks with CUDA."); GMX_ASSERT(pme->doCoulomb, "Only Coulomb PME can be run on GPU."); @@ -344,7 +344,8 @@ bool pme_gpu_try_finish_task(gmx_pme_t* pme, // time needed for that checking, but do not yet record that the // gather has occured. bool needToSynchronize = true; - constexpr bool c_streamQuerySupported = (GMX_GPU == GMX_GPU_CUDA); + constexpr bool c_streamQuerySupported = bool(GMX_GPU_CUDA); + // TODO: implement c_streamQuerySupported with an additional GpuEventSynchronizer per stream (#2521) if ((completionKind == GpuTaskCompletion::Check) && c_streamQuerySupported) { diff --git a/src/gromacs/ewald/pme_gpu_3dfft.h b/src/gromacs/ewald/pme_gpu_3dfft.h index 96cfa9ed6a..c334d013e3 100644 --- a/src/gromacs/ewald/pme_gpu_3dfft.h +++ b/src/gromacs/ewald/pme_gpu_3dfft.h @@ -47,11 +47,11 @@ #include -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include # include "gromacs/gpu_utils/gputraits.cuh" -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include # include "gromacs/gpu_utils/gmxopencl.h" @@ -86,12 +86,12 @@ public: void perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent); private: -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA cufftHandle planR2C_; cufftHandle planC2R_; cufftReal* realGrid_; cufftComplex* complexGrid_; -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL clfftPlanHandle planR2C_; clfftPlanHandle planC2R_; std::vector deviceStreams_; diff --git a/src/gromacs/ewald/pme_gpu_constants.h b/src/gromacs/ewald/pme_gpu_constants.h index 5bfe1c3dde..d2503cb948 100644 --- a/src/gromacs/ewald/pme_gpu_constants.h +++ b/src/gromacs/ewald/pme_gpu_constants.h @@ -53,7 +53,7 @@ #include "config.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "gromacs/gpu_utils/cuda_arch_utils.cuh" // for warp_size #endif @@ -143,7 +143,7 @@ constexpr int c_solveMaxWarpsPerBlock = 8; //! Gathering max block width in warps - picked empirically among 2, 4, 8, 16 for max. occupancy and min. runtime constexpr int c_gatherMaxWarpsPerBlock = 4; -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA /* All the fields below are dependent on warp_size and should * ideally be removed from the device-side code, as we have to * do that for OpenCL already. @@ -164,6 +164,6 @@ static constexpr int c_gatherMaxThreadsPerBlock = c_gatherMaxWarpsPerBlock * war //! Gathering min blocks per CUDA multiprocessor static constexpr int c_gatherMinBlocksPerMP = GMX_CUDA_MAX_THREADS_PER_MP / c_gatherMaxThreadsPerBlock; -#endif // GMX_GPU == GMX_GPU_CUDA +#endif // GMX_GPU_CUDA #endif diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index 635f0f1a62..24e9a4ba4b 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -68,11 +68,11 @@ #include "gromacs/utility/logger.h" #include "gromacs/utility/stringutil.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "gromacs/gpu_utils/pmalloc_cuda.h" # include "pme.cuh" -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/gmxopencl.h" #endif @@ -421,12 +421,12 @@ void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu* pmeGpu) void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu) { auto* kernelParamsPtr = pmeGpu->kernelParams.get(); -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA destroyParamLookupTable(&kernelParamsPtr->grid.d_fractShiftsTable, kernelParamsPtr->fractShiftsTableTexture); destroyParamLookupTable(&kernelParamsPtr->grid.d_gridlineIndicesTable, kernelParamsPtr->gridlineIndicesTableTexture); -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL freeDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable); freeDeviceBuffer(&kernelParamsPtr->grid.d_gridlineIndicesTable); #endif @@ -502,7 +502,7 @@ void pme_gpu_sync_spread_grid(const PmeGpu* pmeGpu) */ static void pme_gpu_init_internal(PmeGpu* pmeGpu, const DeviceContext& deviceContext, const DeviceStream& deviceStream) { -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA // Prepare to use the device that this PME task was assigned earlier. // Other entities, such as CUDA timing events, are known to implicitly use the device context. CU_RET_ERR(cudaSetDevice(deviceContext.deviceInfo().id), "Switching to PME CUDA device"); @@ -518,11 +518,12 @@ static void pme_gpu_init_internal(PmeGpu* pmeGpu, const DeviceContext& deviceCon * TODO: PME could also try to pick up nice grid sizes (with factors of 2, 3, 5, 7). */ -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA pmeGpu->maxGridWidthX = deviceContext.deviceInfo().prop.maxGridSize[0]; -#elif GMX_GPU == GMX_GPU_OPENCL - pmeGpu->maxGridWidthX = INT32_MAX / 2; +#else + // Use this path for any non-CUDA GPU acceleration // TODO: is there no really global work size limit in OpenCL? + pmeGpu->maxGridWidthX = INT32_MAX / 2; #endif } @@ -763,7 +764,7 @@ static void pme_gpu_copy_common_data_from(const gmx_pme_t* pme) */ static void pme_gpu_select_best_performing_pme_spreadgather_kernels(PmeGpu* pmeGpu) { - if (pmeGpu->kernelParams->atoms.nAtoms > c_pmeGpuPerformanceAtomLimit && (GMX_GPU == GMX_GPU_CUDA)) + if (GMX_GPU_CUDA && pmeGpu->kernelParams->atoms.nAtoms > c_pmeGpuPerformanceAtomLimit) { pmeGpu->settings.threadsPerAtom = ThreadsPerAtom::Order; pmeGpu->settings.recalculateSplines = true; @@ -1187,11 +1188,12 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, const int threadsPerAtom = (pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::Order ? order : order * order); const bool recalculateSplines = pmeGpu->settings.recalculateSplines; -#if GMX_GPU == GMX_GPU_OPENCL - GMX_ASSERT(pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared, + + GMX_ASSERT(!GMX_GPU_OPENCL || pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared, "Only 16 threads per atom supported in OpenCL"); - GMX_ASSERT(!recalculateSplines, "Recalculating splines not supported in OpenCL"); -#endif + GMX_ASSERT(!GMX_GPU_OPENCL || !recalculateSplines, + "Recalculating splines not supported in OpenCL"); + const int atomsPerBlock = blockSize / threadsPerAtom; // TODO: pick smaller block size in runtime if needed @@ -1206,9 +1208,10 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, // Ensure that coordinates are ready on the device before launching spread; // only needed with CUDA on PP+PME ranks, not on separate PME ranks, in unit tests // nor in OpenCL as these cases use a single stream (hence xReadyOnDevice == nullptr). - GMX_ASSERT(xReadyOnDevice != nullptr || (GMX_GPU != GMX_GPU_CUDA) - || pmeGpu->common->isRankPmeOnly || pme_gpu_settings(pmeGpu).copyAllOutputs, + GMX_ASSERT(!GMX_GPU_CUDA || xReadyOnDevice != nullptr || pmeGpu->common->isRankPmeOnly + || pme_gpu_settings(pmeGpu).copyAllOutputs, "Need a valid coordinate synchronizer on PP+PME ranks with CUDA."); + if (xReadyOnDevice) { xReadyOnDevice->enqueueWaitEvent(pmeGpu->archSpecific->pmeStream_); @@ -1356,7 +1359,7 @@ void pme_gpu_solve(const PmeGpu* pmeGpu, const int warpSize = pmeGpu->programHandle_->warpSize(); const int blockSize = (cellsPerBlock + warpSize - 1) / warpSize * warpSize; - static_assert(GMX_GPU != GMX_GPU_CUDA || c_solveMaxWarpsPerBlock / 2 >= 4, + static_assert(!GMX_GPU_CUDA || c_solveMaxWarpsPerBlock / 2 >= 4, "The CUDA solve energy kernels needs at least 4 warps. " "Here we launch at least half of the max warps."); @@ -1528,11 +1531,12 @@ void pme_gpu_gather(PmeGpu* pmeGpu, real** h_grids, const float lambda) const int threadsPerAtom = (pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::Order ? order : order * order); const bool recalculateSplines = pmeGpu->settings.recalculateSplines; -#if GMX_GPU == GMX_GPU_OPENCL - GMX_ASSERT(pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared, + + GMX_ASSERT(!GMX_GPU_OPENCL || pmeGpu->settings.threadsPerAtom == ThreadsPerAtom::OrderSquared, "Only 16 threads per atom supported in OpenCL"); - GMX_ASSERT(!recalculateSplines, "Recalculating splines not supported in OpenCL"); -#endif + GMX_ASSERT(!GMX_GPU_OPENCL || !recalculateSplines, + "Recalculating splines not supported in OpenCL"); + const int atomsPerBlock = blockSize / threadsPerAtom; GMX_ASSERT(!(c_pmeAtomDataBlockSize % atomsPerBlock), diff --git a/src/gromacs/ewald/pme_gpu_program_impl.h b/src/gromacs/ewald/pme_gpu_program_impl.h index 75d3f881d0..254a1ab215 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl.h +++ b/src/gromacs/ewald/pme_gpu_program_impl.h @@ -80,9 +80,9 @@ struct PmeGpuProgramImpl const DeviceContext& deviceContext_; //! Conveniently all the PME kernels use the same single argument type -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA using PmeKernelHandle = void (*)(const struct PmeGpuCudaKernelParams); -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL using PmeKernelHandle = cl_kernel; #else using PmeKernelHandle = void*; diff --git a/src/gromacs/ewald/pme_gpu_types_host.h b/src/gromacs/ewald/pme_gpu_types_host.h index f8e42f7fb6..26405433f5 100644 --- a/src/gromacs/ewald/pme_gpu_types_host.h +++ b/src/gromacs/ewald/pme_gpu_types_host.h @@ -67,18 +67,18 @@ namespace gmx class PmeDeviceBuffers; } // namespace gmx -#if GMX_GPU != GMX_GPU_NONE +#if GMX_GPU struct PmeGpuSpecific; #else /*! \brief A dummy typedef for the GPU host data placeholder on non-GPU builds */ typedef int PmeGpuSpecific; #endif -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA struct PmeGpuCudaKernelParams; /*! \brief A typedef for including the GPU kernel arguments data by pointer */ typedef PmeGpuCudaKernelParams PmeGpuKernelParams; -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL struct PmeGpuKernelParamsBase; /*! \brief A typedef for including the GPU kernel arguments data by pointer */ typedef PmeGpuKernelParamsBase PmeGpuKernelParams; diff --git a/src/gromacs/ewald/pme_gpu_types_host_impl.h b/src/gromacs/ewald/pme_gpu_types_host_impl.h index f3deae2842..cf98d701c5 100644 --- a/src/gromacs/ewald/pme_gpu_types_host_impl.h +++ b/src/gromacs/ewald/pme_gpu_types_host_impl.h @@ -50,10 +50,10 @@ #include #include -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "gromacs/gpu_utils/gpueventsynchronizer.cuh" # include "gromacs/gpu_utils/gpuregiontimer.cuh" -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/gpueventsynchronizer_ocl.h" # include "gromacs/gpu_utils/gpuregiontimer_ocl.h" #endif diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp index 1a33cc8ed0..de9386f639 100644 --- a/src/gromacs/ewald/pme_only.cpp +++ b/src/gromacs/ewald/pme_only.cpp @@ -106,7 +106,7 @@ /*! \brief environment variable to enable GPU P2P communication */ static const bool c_enableGpuPmePpComms = - (getenv("GMX_GPU_PME_PP_COMMS") != nullptr) && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA); + GMX_GPU_CUDA && GMX_THREAD_MPI && (getenv("GMX_GPU_PME_PP_COMMS") != nullptr); /*! \brief Master PP-PME communication data structure */ struct gmx_pme_pp diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp index d194b73c20..0259cd0229 100644 --- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp +++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp @@ -51,12 +51,12 @@ #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/gmxmpi.h" -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA namespace gmx { -/*!\brief Impl class stub. */ +/*!\brief \internal Impl class stub. */ class PmePpCommGpu::Impl { }; @@ -120,4 +120,4 @@ void* PmePpCommGpu::getForcesReadySynchronizer() } // namespace gmx -#endif /* GMX_GPU != GMX_GPU_CUDA */ +#endif // !GMX_GPU_CUDA diff --git a/src/gromacs/fft/fft5d.cpp b/src/gromacs/fft/fft5d.cpp index 29618ee086..e01d28027b 100644 --- a/src/gromacs/fft/fft5d.cpp +++ b/src/gromacs/fft/fft5d.cpp @@ -431,7 +431,7 @@ fft5d_plan fft5d_plan_3d(int NG, if (!(flags & FFT5D_NOMALLOC)) { // only needed for PME GPU mixed mode - if (realGridAllocationPinningPolicy == gmx::PinningPolicy::PinnedIfSupported && GMX_GPU == GMX_GPU_CUDA) + if (GMX_GPU_CUDA && realGridAllocationPinningPolicy == gmx::PinningPolicy::PinnedIfSupported) { const std::size_t numBytes = lsize * sizeof(t_complex); lin = static_cast(gmx::PageAlignedAllocationPolicy::malloc(numBytes)); diff --git a/src/gromacs/gpu_utils/CMakeLists.txt b/src/gromacs/gpu_utils/CMakeLists.txt index f889cde431..4db569f5fa 100644 --- a/src/gromacs/gpu_utils/CMakeLists.txt +++ b/src/gromacs/gpu_utils/CMakeLists.txt @@ -43,7 +43,7 @@ gmx_add_libgromacs_sources( gpu_utils.cpp gpu_testutils.cpp ) -if(GMX_USE_OPENCL) +if(GMX_GPU_OPENCL) gmx_add_libgromacs_sources( device_context_ocl.cpp device_stream_ocl.cpp @@ -52,7 +52,7 @@ if(GMX_USE_OPENCL) ocl_caching.cpp oclutils.cpp ) -elseif(GMX_USE_CUDA) +elseif(GMX_GPU_CUDA) gmx_add_libgromacs_sources( device_stream.cu gpu_utils.cu diff --git a/src/gromacs/gpu_utils/clfftinitializer.cpp b/src/gromacs/gpu_utils/clfftinitializer.cpp index ca6b0c2145..2d1ff2470e 100644 --- a/src/gromacs/gpu_utils/clfftinitializer.cpp +++ b/src/gromacs/gpu_utils/clfftinitializer.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,7 +50,7 @@ #include "gromacs/utility/mutex.h" #include "gromacs/utility/stringutil.h" -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL # include #endif @@ -74,7 +74,7 @@ gmx::Mutex g_clfftMutex; ClfftInitializer::ClfftInitializer() { -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL gmx::lock_guard guard(g_clfftMutex); clfftSetupData fftSetup; int initErrorCode = clfftInitSetupData(&fftSetup); @@ -97,7 +97,7 @@ ClfftInitializer::ClfftInitializer() ClfftInitializer::~ClfftInitializer() { -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL gmx::lock_guard guard(g_clfftMutex); if (g_clfftInitialized) { diff --git a/src/gromacs/gpu_utils/device_context.h b/src/gromacs/gpu_utils/device_context.h index e1eb23255a..b3044c8912 100644 --- a/src/gromacs/gpu_utils/device_context.h +++ b/src/gromacs/gpu_utils/device_context.h @@ -50,7 +50,7 @@ #include "config.h" -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL # include "gromacs/gpu_utils/device_context_ocl.h" #else # include "gromacs/utility/classhelpers.h" @@ -75,6 +75,6 @@ private: GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext); }; -#endif // GMX_GPU != GMX_GPU_OPENCL +#endif // !GMX_GPU_OPENCL #endif // GMX_GPU_UTILS_DEVICE_CONTEXT_H diff --git a/src/gromacs/gpu_utils/device_context_ocl.cpp b/src/gromacs/gpu_utils/device_context_ocl.cpp index 2f7babd320..cfbd60c1a3 100644 --- a/src/gromacs/gpu_utils/device_context_ocl.cpp +++ b/src/gromacs/gpu_utils/device_context_ocl.cpp @@ -60,6 +60,8 @@ #define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4 /**@}*/ +#ifndef DOXYGEN + DeviceContext::DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo) { cl_platform_id platformId = deviceInfo.oclPlatformId; @@ -105,3 +107,5 @@ cl_context DeviceContext::context() const { return context_; } + +#endif diff --git a/src/gromacs/gpu_utils/device_context_ocl.h b/src/gromacs/gpu_utils/device_context_ocl.h index 090943962d..c754ad54b0 100644 --- a/src/gromacs/gpu_utils/device_context_ocl.h +++ b/src/gromacs/gpu_utils/device_context_ocl.h @@ -48,8 +48,10 @@ * \inlibraryapi */ -#include "gromacs/gpu_utils/gmxopencl.h" -#include "gromacs/utility/classhelpers.h" +#ifndef DOXYGEN + +# include "gromacs/gpu_utils/gmxopencl.h" +# include "gromacs/utility/classhelpers.h" struct DeviceInformation; @@ -81,4 +83,5 @@ private: GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext); }; +#endif // !defined DOXYGEN #endif // GMX_GPU_UTILS_DEVICE_CONTEXT_OCL_H diff --git a/src/gromacs/gpu_utils/device_stream.h b/src/gromacs/gpu_utils/device_stream.h index 9880de747e..5b74f590f2 100644 --- a/src/gromacs/gpu_utils/device_stream.h +++ b/src/gromacs/gpu_utils/device_stream.h @@ -48,10 +48,10 @@ #include "config.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/gmxopencl.h" #endif #include "gromacs/utility/classhelpers.h" @@ -120,7 +120,7 @@ public: //! Synchronize the steam void synchronize() const; -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA //! Getter cudaStream_t stream() const; @@ -130,7 +130,7 @@ public: private: cudaStream_t stream_ = nullptr; -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL || defined DOXYGEN //! Getter cl_command_queue stream() const; diff --git a/src/gromacs/gpu_utils/devicebuffer.h b/src/gromacs/gpu_utils/devicebuffer.h index c0cdfec329..8aaea72263 100644 --- a/src/gromacs/gpu_utils/devicebuffer.h +++ b/src/gromacs/gpu_utils/devicebuffer.h @@ -50,9 +50,9 @@ #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/smalloc.h" // TODO: this is only for over_alloc_large -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "gromacs/gpu_utils/devicebuffer.cuh" -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/devicebuffer_ocl.h" #else # error "devicebuffer.h included on non-GPU build!" diff --git a/src/gromacs/gpu_utils/devicebuffer_datatype.h b/src/gromacs/gpu_utils/devicebuffer_datatype.h index 9ee6517318..c1aec1fa51 100644 --- a/src/gromacs/gpu_utils/devicebuffer_datatype.h +++ b/src/gromacs/gpu_utils/devicebuffer_datatype.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2019, by the GROMACS development team, led by + * Copyright (c) 2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -46,13 +46,13 @@ #include "config.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA //! \brief A device-side buffer of ValueTypes template using DeviceBuffer = ValueType*; -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/gputraits_ocl.h" diff --git a/src/gromacs/gpu_utils/gpu_macros.h b/src/gromacs/gpu_utils/gpu_macros.h index 4f6d557537..8eef2307d9 100644 --- a/src/gromacs/gpu_utils/gpu_macros.h +++ b/src/gromacs/gpu_utils/gpu_macros.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -74,7 +74,7 @@ # define OPENCL_FUNC_TERM REAL_FUNC_TERM # define OPENCL_FUNC_TERM_WITH_RETURN(arg) REAL_FUNC_TERM_WITH_RETURN(arg) -#elif GMX_GPU != GMX_GPU_NONE +#elif GMX_GPU /* GPU support is enabled, so these functions will have real code * defined somewhere */ @@ -83,7 +83,7 @@ # define GPU_FUNC_TERM REAL_FUNC_TERM # define GPU_FUNC_TERM_WITH_RETURN(arg) REAL_FUNC_TERM_WITH_RETURN(arg) -# if GMX_GPU == GMX_GPU_OPENCL +# if GMX_GPU_OPENCL /* OpenCL support is enabled, so CUDA-specific functions need empty * implementations, while OpenCL-specific functions will have real @@ -98,7 +98,7 @@ # define OPENCL_FUNC_TERM_WITH_RETURN(arg) REAL_FUNC_TERM_WITH_RETURN(arg) # endif -# if GMX_GPU == GMX_GPU_CUDA +# if GMX_GPU_CUDA /* CUDA support is enabled, so OpenCL-specific functions need empty * implementations, while CUDA-specific functions will have real @@ -114,7 +114,7 @@ # endif -#elif GMX_GPU == GMX_GPU_NONE +#elif !GMX_GPU /* No GPU support is configured, so none of these functions will have * real definitions. */ diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp index fcfb1cc374..004ad30c86 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cpp +++ b/src/gromacs/gpu_utils/gpu_utils.cpp @@ -55,7 +55,7 @@ #endif //! Constant used to help minimize preprocessed code -static constexpr bool c_binarySupportsGpus = (GMX_GPU != GMX_GPU_NONE); +static constexpr bool c_binarySupportsGpus = (GMX_GPU != 0); bool canPerformGpuDetection() { @@ -69,7 +69,7 @@ bool canPerformGpuDetection() } } -#if GMX_GPU == GMX_GPU_NONE +#if !GMX_GPU DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& /*unused*/, int /*unused*/) { return DeviceStatus::Nonexistent; diff --git a/src/gromacs/gpu_utils/gputraits.h b/src/gromacs/gpu_utils/gputraits.h index 9ae87f1436..e3a3a9275e 100644 --- a/src/gromacs/gpu_utils/gputraits.h +++ b/src/gromacs/gpu_utils/gputraits.h @@ -47,11 +47,11 @@ #include "config.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "gromacs/gpu_utils/gputraits.cuh" -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/gputraits_ocl.h" @@ -59,7 +59,7 @@ using DeviceTexture = void*; -//! Stub for device information. +//! \internal Stub for device information. struct DeviceInformation { // No member needed diff --git a/src/gromacs/gpu_utils/tests/device_stream_manager.cpp b/src/gromacs/gpu_utils/tests/device_stream_manager.cpp index 3698f290a8..1491669201 100644 --- a/src/gromacs/gpu_utils/tests/device_stream_manager.cpp +++ b/src/gromacs/gpu_utils/tests/device_stream_manager.cpp @@ -71,7 +71,7 @@ const EnumerationArray c_deviceStreamNames = { /*! \brief Non-GPU builds return nullptr instead of streams, * so we have to expect that in such build configurations. */ -const bool c_canExpectValidStreams = (GMX_GPU != GMX_GPU_NONE); +constexpr bool c_canExpectValidStreams = (GMX_GPU != 0); //! Helper function to implement readable testing void expectValidStreams(DeviceStreamManager* manager, std::initializer_list types) diff --git a/src/gromacs/gpu_utils/tests/hostallocator.cpp b/src/gromacs/gpu_utils/tests/hostallocator.cpp index 41d38eb417..baf475a156 100644 --- a/src/gromacs/gpu_utils/tests/hostallocator.cpp +++ b/src/gromacs/gpu_utils/tests/hostallocator.cpp @@ -286,7 +286,7 @@ TYPED_TEST(HostAllocatorTestNoMem, Comparison) EXPECT_NE(AllocatorType{}, AllocatorType{ PinningPolicy::PinnedIfSupported }); } -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA // Policy suitable for pinning is only supported for a CUDA build diff --git a/src/gromacs/gpu_utils/tests/typecasts_runner.cpp b/src/gromacs/gpu_utils/tests/typecasts_runner.cpp index 580e457d28..71135fe85e 100644 --- a/src/gromacs/gpu_utils/tests/typecasts_runner.cpp +++ b/src/gromacs/gpu_utils/tests/typecasts_runner.cpp @@ -48,7 +48,7 @@ #include "testutils/testasserts.h" -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA namespace gmx { @@ -71,4 +71,4 @@ void convertRVecToFloat3OnDevice(std::vector& /* rVecOutput */, } // namespace test } // namespace gmx -#endif // GMX_GPU != GMX_GPU_CUDA +#endif // !GMX_GPU_CUDA diff --git a/src/gromacs/gpu_utils/tests/typecasts_runner.cu b/src/gromacs/gpu_utils/tests/typecasts_runner.cu index 1488edbed9..1aedf1a166 100644 --- a/src/gromacs/gpu_utils/tests/typecasts_runner.cu +++ b/src/gromacs/gpu_utils/tests/typecasts_runner.cu @@ -42,8 +42,6 @@ #include "typecasts_runner.h" -#include "config.h" - #include #include "gromacs/gpu_utils/cudautils.cuh" @@ -52,8 +50,6 @@ #include "gromacs/utility/exceptions.h" #include "gromacs/utility/stringutil.h" -#if GMX_GPU == GMX_GPU_CUDA - namespace gmx { @@ -150,5 +146,3 @@ void convertRVecToFloat3OnDevice(std::vector& h_rVecOutput, const std } // namespace test } // namespace gmx - -#endif // GMX_GPU == GMX_GPU_CUDA \ No newline at end of file diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp index 969399b3ec..e88d642dc3 100644 --- a/src/gromacs/hardware/detecthardware.cpp +++ b/src/gromacs/hardware/detecthardware.cpp @@ -132,8 +132,8 @@ static void gmx_detect_gpus(const gmx::MDLogger& mdlog, /* The OpenCL support requires us to run detection on all ranks. * With CUDA we don't need to, and prefer to detect on one rank * and send the information to the other ranks over MPI. */ - bool allRanksMustDetectGpus = (GMX_GPU == GMX_GPU_OPENCL); - bool gpusCanBeDetected = false; + constexpr bool allRanksMustDetectGpus = (GMX_GPU_OPENCL != 0); + bool gpusCanBeDetected = false; if (isMasterRankOfPhysicalNode || allRanksMustDetectGpus) { std::string errorMessage; diff --git a/src/gromacs/hardware/printhardware.cpp b/src/gromacs/hardware/printhardware.cpp index e729876458..4283441c0e 100644 --- a/src/gromacs/hardware/printhardware.cpp +++ b/src/gromacs/hardware/printhardware.cpp @@ -61,7 +61,7 @@ #include "gromacs/utility/sysinfo.h" //! Constant used to help minimize preprocessed code -static const bool bGPUBinary = GMX_GPU != GMX_GPU_NONE; +static constexpr bool bGPUBinary = (GMX_GPU != 0); /*! \internal \brief * Returns the GPU information text, one GPU per line. diff --git a/src/gromacs/listed_forces/CMakeLists.txt b/src/gromacs/listed_forces/CMakeLists.txt index 22936e1e5b..fa1fa07902 100644 --- a/src/gromacs/listed_forces/CMakeLists.txt +++ b/src/gromacs/listed_forces/CMakeLists.txt @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2014,2015,2016,2018,2019, by the GROMACS development team, led by +# Copyright (c) 2014,2015,2016,2018,2019,2020, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -45,7 +45,7 @@ gmx_add_libgromacs_sources( restcbt.cpp ) -if(GMX_USE_CUDA) +if(GMX_GPU_CUDA) gmx_add_libgromacs_sources( gpubonded_impl.cu gpubondedkernels.cu diff --git a/src/gromacs/listed_forces/gpubonded_impl.cpp b/src/gromacs/listed_forces/gpubonded_impl.cpp index da40157866..af62aac7a0 100644 --- a/src/gromacs/listed_forces/gpubonded_impl.cpp +++ b/src/gromacs/listed_forces/gpubonded_impl.cpp @@ -120,11 +120,11 @@ bool buildSupportsGpuBondeds(std::string* error) { errorReasons.emplace_back("not supported with double precision"); } - if (GMX_GPU == GMX_GPU_OPENCL) + if (GMX_GPU_OPENCL) { errorReasons.emplace_back("not supported with OpenCL build of GROMACS"); } - else if (GMX_GPU == GMX_GPU_NONE) + else if (!GMX_GPU) { errorReasons.emplace_back("not supported with CPU-only build of GROMACS"); } @@ -156,7 +156,7 @@ bool inputSupportsGpuBondeds(const t_inputrec& ir, const gmx_mtop_t& mtop, std:: return addMessageIfNotSupported(errorReasons, error); } -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA class GpuBonded::Impl { @@ -205,6 +205,6 @@ void GpuBonded::waitAccumulateEnergyTerms(gmx_enerdata_t* /* enerd */) {} void GpuBonded::clearEnergies() {} -#endif /* GMX_GPU != GMX_GPU_CUDA */ +#endif // !GMX_GPU_CUDA } // namespace gmx diff --git a/src/gromacs/mdlib/CMakeLists.txt b/src/gromacs/mdlib/CMakeLists.txt index 770028ba3b..c279548147 100644 --- a/src/gromacs/mdlib/CMakeLists.txt +++ b/src/gromacs/mdlib/CMakeLists.txt @@ -39,7 +39,7 @@ set(MDLIB_SOURCES ${MDLIB_SOURCES} PARENT_SCOPE) if (BUILD_TESTING) add_subdirectory(tests) endif() -if(GMX_USE_CUDA) +if(GMX_GPU_CUDA) gmx_add_libgromacs_sources( leapfrog_gpu.cu lincs_gpu.cu diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 1d61a68c59..d8fc0267f7 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1263,7 +1263,7 @@ void do_force(FILE* fplog, // For force buffer ops, we use the below conditon rather than // useGpuFBufferOps to ensure that init is performed even if this // NS step is also a virial step (on which f buf ops are deactivated). - if (simulationWork.useGpuBufferOps && simulationWork.useGpuNonbonded && (GMX_GPU == GMX_GPU_CUDA)) + if (GMX_GPU_CUDA && simulationWork.useGpuBufferOps && simulationWork.useGpuNonbonded) { GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded"); nbv->atomdata_init_add_nbat_f_to_f_gpu(stateGpu->fReducedOnDevice()); diff --git a/src/gromacs/mdlib/tests/constr.cpp b/src/gromacs/mdlib/tests/constr.cpp index 88f5e76c8c..35d9adce32 100644 --- a/src/gromacs/mdlib/tests/constr.cpp +++ b/src/gromacs/mdlib/tests/constr.cpp @@ -89,7 +89,7 @@ std::vector getRunnersNames() { runnersNames.emplace_back("SHAKE"); runnersNames.emplace_back("LINCS"); - if (GMX_GPU == GMX_GPU_CUDA && canComputeOnGpu()) + if (GMX_GPU_CUDA && canComputeOnGpu()) { runnersNames.emplace_back("LINCS_GPU"); } diff --git a/src/gromacs/mdlib/tests/constrtestrunners.cpp b/src/gromacs/mdlib/tests/constrtestrunners.cpp index 7959cdd928..5fca4c06b3 100644 --- a/src/gromacs/mdlib/tests/constrtestrunners.cpp +++ b/src/gromacs/mdlib/tests/constrtestrunners.cpp @@ -149,7 +149,7 @@ void applyLincs(ConstraintsTestData* testData, t_pbc pbc) done_lincs(lincsd); } -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA /*! \brief * Stub for GPU version of LINCS constraints to satisfy compiler. * @@ -160,7 +160,7 @@ void applyLincsGpu(ConstraintsTestData gmx_unused* testData, t_pbc gmx_unused pb { FAIL() << "Dummy LINCS CUDA function was called instead of the real one."; } -#endif +#endif // !GMX_GPU_CUDA } // namespace test } // namespace gmx diff --git a/src/gromacs/mdlib/tests/leapfrog.cpp b/src/gromacs/mdlib/tests/leapfrog.cpp index 1a03709ac6..4ff12126e8 100644 --- a/src/gromacs/mdlib/tests/leapfrog.cpp +++ b/src/gromacs/mdlib/tests/leapfrog.cpp @@ -153,7 +153,7 @@ public: // All runners should be registered here under appropriate conditions // s_runners_["LeapFrogSimple"] = integrateLeapFrogSimple; - if (GMX_GPU == GMX_GPU_CUDA && canComputeOnGpu()) + if (GMX_GPU_CUDA && canComputeOnGpu()) { s_runners_["LeapFrogGpu"] = integrateLeapFrogGpu; } diff --git a/src/gromacs/mdlib/tests/leapfrogtestrunners.cpp b/src/gromacs/mdlib/tests/leapfrogtestrunners.cpp index 939ac5c168..492fb8e080 100644 --- a/src/gromacs/mdlib/tests/leapfrogtestrunners.cpp +++ b/src/gromacs/mdlib/tests/leapfrogtestrunners.cpp @@ -103,14 +103,14 @@ void integrateLeapFrogSimple(LeapFrogTestData* testData, int numSteps) } } -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA void integrateLeapFrogGpu(gmx_unused LeapFrogTestData* testData, gmx_unused int numSteps) { FAIL() << "Dummy Leap-Frog CUDA function was called instead of the real one."; } -#endif // GMX_GPU != GMX_GPU_CUDA +#endif // !GMX_GPU_CUDA } // namespace test } // namespace gmx diff --git a/src/gromacs/mdlib/tests/settle.cpp b/src/gromacs/mdlib/tests/settle.cpp index 3a08231685..41552e2f58 100644 --- a/src/gromacs/mdlib/tests/settle.cpp +++ b/src/gromacs/mdlib/tests/settle.cpp @@ -187,12 +187,9 @@ public: // 2. There is a CUDA-capable GPU in a system // 3. This GPU is detectable // 4. GPU detection was not disabled by GMX_DISABLE_GPU_DETECTION environment variable - if (s_hasCompatibleGpus) + if (GMX_GPU_CUDA && s_hasCompatibleGpus) { - if (GMX_GPU == GMX_GPU_CUDA && s_hasCompatibleGpus) - { - runners_["SETTLE_GPU"] = applySettleGpu; - } + runners_["SETTLE_GPU"] = applySettleGpu; } } diff --git a/src/gromacs/mdlib/tests/settletestrunners.cpp b/src/gromacs/mdlib/tests/settletestrunners.cpp index 4c266b9038..3fd90dc689 100644 --- a/src/gromacs/mdlib/tests/settletestrunners.cpp +++ b/src/gromacs/mdlib/tests/settletestrunners.cpp @@ -78,7 +78,7 @@ void applySettle(SettleTestData* testData, EXPECT_FALSE(errorOccured) << testDescription; } -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA void applySettleGpu(gmx_unused SettleTestData* testData, gmx_unused const t_pbc pbc, diff --git a/src/gromacs/mdlib/tests/settletestrunners.cu b/src/gromacs/mdlib/tests/settletestrunners.cu index ddb8e04b63..f9cf9867f3 100644 --- a/src/gromacs/mdlib/tests/settletestrunners.cu +++ b/src/gromacs/mdlib/tests/settletestrunners.cu @@ -80,8 +80,7 @@ void applySettleGpu(SettleTestData* testData, { // These should never fail since this function should only be called if CUDA is enabled and // there is a CUDA-capable device available. - GMX_RELEASE_ASSERT(GMX_GPU == GMX_GPU_CUDA, - "CUDA version of SETTLE was called from non-CUDA build."); + GMX_RELEASE_ASSERT(GMX_GPU_CUDA, "CUDA version of SETTLE was called from non-CUDA build."); // TODO: Here we should check that at least 1 suitable GPU is available GMX_RELEASE_ASSERT(canPerformGpuDetection(), "Can't detect CUDA-capable GPUs."); diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp index e290939c09..d3d50c2578 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp +++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp @@ -46,7 +46,7 @@ #include "gromacs/mdlib/update_constrain_gpu.h" -#if GMX_GPU != GMX_GPU_CUDA +#if !GMX_GPU_CUDA namespace gmx { @@ -120,4 +120,4 @@ bool UpdateConstrainGpu::isNumCoupledConstraintsSupported(const gmx_mtop_t& /* m } // namespace gmx -#endif /* GMX_GPU != GMX_GPU_CUDA */ +#endif /* !GMX_GPU_CUDA */ diff --git a/src/gromacs/mdlib/vsite.cpp b/src/gromacs/mdlib/vsite.cpp index 879a28a692..6a6aa97675 100644 --- a/src/gromacs/mdlib/vsite.cpp +++ b/src/gromacs/mdlib/vsite.cpp @@ -91,15 +91,13 @@ * * Any remaining vsites are assigned to a separate master thread task. */ - namespace gmx { //! VirialHandling is often used outside VirtualSitesHandler class members using VirialHandling = VirtualSitesHandler::VirialHandling; -/*! \libinternal - * \brief Information on PBC and domain decomposition for virtual sites +/*! \brief Information on PBC and domain decomposition for virtual sites */ struct DomainInfo { @@ -126,8 +124,7 @@ public: const gmx_domdec_t* domdec_ = nullptr; }; -/*! \libinternal - * \brief List of atom indices belonging to a task +/*! \brief List of atom indices belonging to a task */ struct AtomIndex { @@ -135,8 +132,7 @@ struct AtomIndex std::vector atom; }; -/*! \libinternal - * \brief Data structure for thread tasks that use constructing atoms outside their own atom range +/*! \brief Data structure for thread tasks that use constructing atoms outside their own atom range */ struct InterdependentTask { @@ -158,8 +154,7 @@ struct InterdependentTask std::vector reduceTask; }; -/*! \libinternal - * \brief Vsite thread task data structure +/*! \brief Vsite thread task data structure */ struct VsiteThread { @@ -193,8 +188,7 @@ struct VsiteThread }; -/*! \libinternal - * \brief Information on how the virtual site work is divided over thread tasks +/*! \brief Information on how the virtual site work is divided over thread tasks */ class ThreadingInfo { @@ -232,8 +226,7 @@ private: std::vector taskIndex_; }; -/*! \libinternal - * \brief Impl class for VirtualSitesHandler +/*! \brief Impl class for VirtualSitesHandler */ class VirtualSitesHandler::Impl { diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index a32dc97a70..c2664db220 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -202,13 +202,13 @@ static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& md // getenv results are ignored when clearly they are used. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" - devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr) - && (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded; - devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr); - devFlags.enableGpuHaloExchange = - (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA)); + + devFlags.enableGpuBufferOps = + GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr); + devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr; devFlags.enableGpuPmePPComm = - (getenv("GMX_GPU_PME_PP_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA)); + GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr; + #pragma GCC diagnostic pop if (devFlags.enableGpuBufferOps) @@ -1165,7 +1165,8 @@ int Mdrunner::mdrunner() // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?) bool useTiming = true; - if (GMX_GPU == GMX_GPU_CUDA) + + if (GMX_GPU_CUDA) { /* WARNING: CUDA timings are incorrect with multiple streams. * This is the main reason why they are disabled by default. @@ -1173,7 +1174,7 @@ int Mdrunner::mdrunner() // TODO: Consider turning on by default when we can detect nr of streams. useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr); } - else if (GMX_GPU == GMX_GPU_OPENCL) + else if (GMX_GPU_OPENCL) { useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr); } diff --git a/src/gromacs/mdtypes/CMakeLists.txt b/src/gromacs/mdtypes/CMakeLists.txt index da3cecf437..f13e63c8d8 100644 --- a/src/gromacs/mdtypes/CMakeLists.txt +++ b/src/gromacs/mdtypes/CMakeLists.txt @@ -42,7 +42,7 @@ file(GLOB MDTYPES_SOURCES observableshistory.cpp state.cpp) -if(GMX_USE_CUDA OR GMX_USE_OPENCL) +if(GMX_GPU) gmx_add_libgromacs_sources( state_propagator_data_gpu_impl_gpu.cpp ) diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp index 1600c7a062..3274d3dd02 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp @@ -46,7 +46,7 @@ #include "gromacs/mdtypes/state_propagator_data_gpu.h" -#if GMX_GPU == GMX_GPU_NONE +#if !GMX_GPU namespace gmx { @@ -265,4 +265,4 @@ int StatePropagatorDataGpu::numAtomsAll() } // namespace gmx -#endif // GMX_GPU == GMX_GPU_NONE +#endif // !GMX_GPU diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h index fd9ff197ad..f4457311e1 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h @@ -48,9 +48,9 @@ #include "config.h" #include "gromacs/gpu_utils/devicebuffer.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "gromacs/gpu_utils/gpueventsynchronizer.cuh" -#elif GMX_GPU == GMX_GPU_OPENCL +#elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/gpueventsynchronizer_ocl.h" #endif #include "gromacs/math/vectypes.h" diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp index bf927f2da2..995976b461 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp @@ -44,7 +44,7 @@ #include "config.h" -#if GMX_GPU != GMX_GPU_NONE +#if GMX_GPU # include "gromacs/gpu_utils/device_stream_manager.h" # include "gromacs/gpu_utils/devicebuffer.h" @@ -70,7 +70,7 @@ StatePropagatorDataGpu::Impl::Impl(const DeviceStreamManager& deviceStreamManage wcycle_(wcycle) { static_assert( - GMX_GPU != GMX_GPU_NONE, + GMX_GPU, "GPU state propagator data object should only be constructed on the GPU code-paths."); // We need to keep local copies for re-initialization. @@ -78,14 +78,8 @@ StatePropagatorDataGpu::Impl::Impl(const DeviceStreamManager& deviceStreamManage localStream_ = &deviceStreamManager.stream(DeviceStreamType::NonBondedLocal); nonLocalStream_ = &deviceStreamManager.stream(DeviceStreamType::NonBondedNonLocal); // PME stream is used in OpenCL for H2D coordinate transfer - if (GMX_GPU == GMX_GPU_OPENCL) - { - updateStream_ = &deviceStreamManager.stream(DeviceStreamType::Pme); - } - else - { - updateStream_ = &deviceStreamManager.stream(DeviceStreamType::UpdateAndConstraints); - } + updateStream_ = &deviceStreamManager.stream( + GMX_GPU_OPENCL ? DeviceStreamType::Pme : DeviceStreamType::UpdateAndConstraints); // Map the atom locality to the stream that will be used for coordinates, // velocities and forces transfers. Same streams are used for H2D and D2H copies. @@ -114,7 +108,7 @@ StatePropagatorDataGpu::Impl::Impl(const DeviceStream* pmeStream, wcycle_(wcycle) { static_assert( - GMX_GPU != GMX_GPU_NONE, + GMX_GPU, "GPU state propagator data object should only be constructed on the GPU code-paths."); GMX_ASSERT(pmeStream->isValid(), "GPU PME stream should be valid."); @@ -172,10 +166,11 @@ void StatePropagatorDataGpu::Impl::reinit(int numAtomsLocal, int numAtomsAll) reallocateDeviceBuffer(&d_v_, numAtomsAll_, &d_vSize_, &d_vCapacity_, deviceContext_); const int d_fOldCapacity = d_fCapacity_; reallocateDeviceBuffer(&d_f_, numAtomsAll_, &d_fSize_, &d_fCapacity_, deviceContext_); + // Clearing of the forces can be done in local stream since the nonlocal stream cannot reach // the force accumulation stage before syncing with the local stream. Only done in CUDA, // since the force buffer ops are not implemented in OpenCL. - if (GMX_GPU == GMX_GPU_CUDA && d_fCapacity_ != d_fOldCapacity) + if (GMX_GPU_CUDA && d_fCapacity_ != d_fOldCapacity) { clearDeviceBufferAsync(&d_f_, 0, d_fCapacity_, *localStream_); } @@ -306,7 +301,7 @@ void StatePropagatorDataGpu::Impl::copyCoordinatesToGpu(const gmx::ArrayRef -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "cuda/nbnxm_cuda_types.h" #endif -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL # include "opencl/nbnxm_ocl_types.h" #endif diff --git a/src/gromacs/nbnxm/gpu_common_utils.h b/src/gromacs/nbnxm/gpu_common_utils.h index 4882c3530e..af0c69f36c 100644 --- a/src/gromacs/nbnxm/gpu_common_utils.h +++ b/src/gromacs/nbnxm/gpu_common_utils.h @@ -46,11 +46,11 @@ #include "gromacs/nbnxm/nbnxm.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "cuda/nbnxm_cuda_types.h" #endif -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL # include "opencl/nbnxm_ocl_types.h" #endif diff --git a/src/gromacs/nbnxm/gpu_types_common.h b/src/gromacs/nbnxm/gpu_types_common.h index 17b66e49d8..9166a3e50a 100644 --- a/src/gromacs/nbnxm/gpu_types_common.h +++ b/src/gromacs/nbnxm/gpu_types_common.h @@ -49,11 +49,11 @@ #include "pairlist.h" -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL # include "gromacs/gpu_utils/gpuregiontimer_ocl.h" #endif -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "gromacs/gpu_utils/gpuregiontimer.cuh" #endif diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index 7f6e433054..105ceefb72 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -48,11 +48,11 @@ #include "config.h" -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA # include "cuda/nbnxm_cuda_types.h" #endif -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL # include "opencl/nbnxm_ocl_types.h" #endif diff --git a/src/gromacs/nbnxm/opencl/CMakeLists.txt b/src/gromacs/nbnxm/opencl/CMakeLists.txt index 0c36906457..69d86e96ba 100644 --- a/src/gromacs/nbnxm/opencl/CMakeLists.txt +++ b/src/gromacs/nbnxm/opencl/CMakeLists.txt @@ -33,7 +33,7 @@ # To help us fund GROMACS development, we humbly ask that you cite # the research papers on the package. Check out http://www.gromacs.org. -if(GMX_USE_OPENCL) +if(GMX_GPU_OPENCL) file(GLOB OPENCL_NB_SOURCES *.cpp) set(NBNXM_SOURCES ${NBNXM_SOURCES} ${OPENCL_NB_SOURCES} PARENT_SCOPE) endif() diff --git a/src/gromacs/nbnxm/pairlistparams.h b/src/gromacs/nbnxm/pairlistparams.h index 063f6dbf59..92826f070d 100644 --- a/src/gromacs/nbnxm/pairlistparams.h +++ b/src/gromacs/nbnxm/pairlistparams.h @@ -60,7 +60,7 @@ enum class KernelType; static constexpr int c_nbnxnCpuIClusterSize = 4; //! The i- and j-cluster size for GPU lists, 8 atoms for CUDA, set at compile time for OpenCL -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL static constexpr int c_nbnxnGpuClusterSize = GMX_OPENCL_NB_CLUSTER_SIZE; #else static constexpr int c_nbnxnGpuClusterSize = 8; diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index e8a9e697f6..f2ae54cc08 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -85,12 +85,12 @@ namespace const char* g_specifyEverythingFormatString = "When you use mdrun -gputasks, %s must be set to non-default " "values, so that the device IDs can be interpreted correctly." -#if GMX_GPU != GMX_GPU_NONE +#if GMX_GPU " If you simply want to restrict which GPUs are used, then it is " "better to use mdrun -gpu_id. Otherwise, setting the " -# if GMX_GPU == GMX_GPU_CUDA +# if GMX_GPU_CUDA "CUDA_VISIBLE_DEVICES" -# elif GMX_GPU == GMX_GPU_OPENCL +# elif GMX_GPU_OPENCL // Technically there is no portable way to do this offered by the // OpenCL standard, but the only current relevant case for GROMACS // is AMD OpenCL, which offers this variable. @@ -602,7 +602,7 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecom { errorMessage += "Compatible GPUs must have been found.\n"; } - if (GMX_GPU != GMX_GPU_CUDA) + if (!GMX_GPU_CUDA) { errorMessage += "Only a CUDA build is supported.\n"; } diff --git a/src/gromacs/utility/CMakeLists.txt b/src/gromacs/utility/CMakeLists.txt index fef7f2cb90..7e3f5cfc2b 100644 --- a/src/gromacs/utility/CMakeLists.txt +++ b/src/gromacs/utility/CMakeLists.txt @@ -34,7 +34,7 @@ # the research papers on the package. Check out http://www.gromacs.org. file(GLOB UTILITY_SOURCES *.cpp) -if (GMX_USE_CUDA) +if (GMX_GPU_CUDA) gmx_add_libgromacs_sources(cuda_version_information.cu) endif() set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${UTILITY_SOURCES} PARENT_SCOPE) diff --git a/src/gromacs/utility/binaryinformation.cpp b/src/gromacs/utility/binaryinformation.cpp index 1ce4dd4804..8548a1818b 100644 --- a/src/gromacs/utility/binaryinformation.cpp +++ b/src/gromacs/utility/binaryinformation.cpp @@ -308,12 +308,12 @@ void gmx_print_version_info(gmx::TextWriter* writer) writer->writeLine(formatString("Linked with Intel MKL version %d.%d.%d.", __INTEL_MKL__, __INTEL_MKL_MINOR__, __INTEL_MKL_UPDATE__)); #endif -#if GMX_GPU == GMX_GPU_OPENCL +#if GMX_GPU_OPENCL writer->writeLine(formatString("OpenCL include dir: %s", OPENCL_INCLUDE_DIR)); writer->writeLine(formatString("OpenCL library: %s", OPENCL_LIBRARY)); writer->writeLine(formatString("OpenCL version: %s", OPENCL_VERSION_STRING)); #endif -#if GMX_GPU == GMX_GPU_CUDA +#if GMX_GPU_CUDA writer->writeLine(formatString("CUDA compiler: %s", CUDA_COMPILER_INFO)); writer->writeLine(formatString("CUDA compiler flags:%s %s", CUDA_COMPILER_FLAGS, CMAKE_BUILD_CONFIGURATION_CXX_FLAGS)); diff --git a/src/programs/mdrun/tests/exactcontinuation.cpp b/src/programs/mdrun/tests/exactcontinuation.cpp index 772544d328..ca57f3422c 100644 --- a/src/programs/mdrun/tests/exactcontinuation.cpp +++ b/src/programs/mdrun/tests/exactcontinuation.cpp @@ -435,7 +435,7 @@ TEST_P(MdrunNoAppendContinuationIsExact, WithinTolerances) // TODO The time for OpenCL kernel compilation means these tests time // out. Once that compilation is cached for the whole process, these // tests can run in such configurations. -#if GMX_GPU != GMX_GPU_OPENCL +#if !GMX_GPU_OPENCL INSTANTIATE_TEST_CASE_P( NormalIntegrators, diff --git a/src/programs/mdrun/tests/periodicactions.cpp b/src/programs/mdrun/tests/periodicactions.cpp index a909f4e650..887d1a8d9a 100644 --- a/src/programs/mdrun/tests/periodicactions.cpp +++ b/src/programs/mdrun/tests/periodicactions.cpp @@ -439,7 +439,7 @@ using ::testing::ValuesIn; // TODO The time for OpenCL kernel compilation means these tests time // out. Once that compilation is cached for the whole process, these // tests can run in such configurations. -#if GMX_GPU != GMX_GPU_OPENCL +#if !GMX_GPU_OPENCL INSTANTIATE_TEST_CASE_P(BasicPropagators, PeriodicActionsTest, Combine(ValuesIn(simplePropagationParameters()), Values(outputParameters))); diff --git a/src/programs/mdrun/tests/rerun.cpp b/src/programs/mdrun/tests/rerun.cpp index b490d79901..72b2323584 100644 --- a/src/programs/mdrun/tests/rerun.cpp +++ b/src/programs/mdrun/tests/rerun.cpp @@ -193,7 +193,7 @@ TEST_P(MdrunRerunTest, WithinTolerances) // TODO The time for OpenCL kernel compilation means these tests time // out. Once that compilation is cached for the whole process, these // tests can run in such configurations. -#if GMX_GPU != GMX_GPU_OPENCL +#if !GMX_GPU_OPENCL INSTANTIATE_TEST_CASE_P( NormalMdrunIsReproduced, MdrunRerunTest, @@ -252,7 +252,7 @@ TEST_P(MdrunRerunFreeEnergyTest, WithinTolerances) // TODO The time for OpenCL kernel compilation means these tests time // out. Once that compilation is cached for the whole process, these // tests can run in such configurations. -#if GMX_GPU != GMX_GPU_OPENCL +#if !GMX_GPU_OPENCL INSTANTIATE_TEST_CASE_P(MdrunIsReproduced, MdrunRerunFreeEnergyTest, ::testing::Combine(::testing::Values("nonanol_vacuo"), diff --git a/src/programs/mdrun/tests/simulator.cpp b/src/programs/mdrun/tests/simulator.cpp index e6c3fa3188..eae77384e7 100644 --- a/src/programs/mdrun/tests/simulator.cpp +++ b/src/programs/mdrun/tests/simulator.cpp @@ -207,7 +207,7 @@ TEST_P(SimulatorComparisonTest, WithinTolerances) // tests can run in such configurations. // These tests are very sensitive, so we only run them in double precision. // As we change call ordering, they might actually become too strict to be useful. -#if GMX_GPU != GMX_GPU_OPENCL && GMX_DOUBLE +#if !GMX_GPU_OPENCL && GMX_DOUBLE INSTANTIATE_TEST_CASE_P(SimulatorsAreEquivalentDefaultModular, SimulatorComparisonTest, ::testing::Combine(::testing::Combine(::testing::Values("argon12", "tip3p5"), diff --git a/src/testutils/TestMacros.cmake b/src/testutils/TestMacros.cmake index 8d5cf3b733..a9fb476a56 100644 --- a/src/testutils/TestMacros.cmake +++ b/src/testutils/TestMacros.cmake @@ -109,7 +109,7 @@ function (gmx_add_gtest_executable EXENAME) TEST_USES_HARDWARE_DETECTION=true) endif() - if (GMX_USE_CUDA AND NOT GMX_CLANG_CUDA) + if (GMX_GPU_CUDA AND NOT GMX_CLANG_CUDA) # Work around FindCUDA that prevents using target_link_libraries() # with keywords otherwise... set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) @@ -124,7 +124,7 @@ function (gmx_add_gtest_executable EXENAME) ${TESTUTILS_DIR}/unittest_main.cpp) endif() - if (GMX_USE_CUDA) + if (GMX_GPU_CUDA) if (GMX_CLANG_CUDA) target_sources(${EXENAME} PRIVATE ${ARG_CUDA_CU_SOURCE_FILES} @@ -135,7 +135,7 @@ function (gmx_add_gtest_executable EXENAME) target_link_libraries(${EXENAME} PRIVATE ${GMX_EXTRA_LIBRARIES}) endif() endif() - elseif (GMX_USE_OPENCL) + elseif (GMX_GPU_OPENCL) target_sources(${EXENAME} PRIVATE ${ARG_OPENCL_CPP_SOURCE_FILES} ${ARG_GPU_CPP_SOURCE_FILES}) if(ARG_OPENCL_CPP_SOURCE_FILES OR ARG_GPU_CPP_SOURCE_FILES) target_link_libraries(${EXENAME} PRIVATE ${OpenCL_LIBRARIES}) @@ -203,7 +203,7 @@ function (gmx_register_gtest_test NAME EXENAME) # Both OpenCL (from JIT) and ThreadSanitizer (from how it # checks) can take signficantly more time than other # configurations. - if (GMX_USE_OPENCL) + if (GMX_GPU_OPENCL) set(_timeout 240) elseif (${CMAKE_BUILD_TYPE} STREQUAL TSAN) set(_timeout 300) -- 2.22.0