option(GMX_USE_OPENCL "Enable OpenCL acceleration" OFF)
# Decide on GPU settings based on user-settings and GPU/CUDA
-# detection. GCC 4.8 requires CUDA 6.0 (but we choose 6.5 for the
-# preliminary C++11 support), icc 15 requires CUDA 7.0, and VS2015
-# requires CUDA 8.0
+# detection. VS2015 requires CUDA 8.0, for the other arch/compilers
+# rest we require CUDA 7.0 or later (including for clang-CUDA).
if(MSVC)
set(REQUIRED_CUDA_VERSION 8.0)
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
- set(REQUIRED_CUDA_VERSION 7.0)
else()
- set(REQUIRED_CUDA_VERSION 6.5)
+ set(REQUIRED_CUDA_VERSION 7.0)
endif()
set(REQUIRED_CUDA_COMPUTE_CAPABILITY 2.0)
# Test MPI with CUDA
# Test MPMD PME with library MPI
# Test recent cmake (3.7+), to cover minor FindCUDA changes from 3.7.0
-gcc-4.8 gpu cuda-6.5 cmake-3.8.1 mpi npme=1 nranks=2 openmp
+gcc-4.8 gpu cuda-7.0 cmake-3.8.1 mpi npme=1 nranks=2 openmp
# Test non-default use of mdrun -gpu_id
# Test newest gcc supported by newest CUDA at time of release
option(GMX_GPU "Enable GPU acceleration" OFF)
option(GMX_CLANG_CUDA "Use clang for CUDA" OFF)
-if (GMX_CLANG_CUDA)
- # CUDA 7.0 or later required, override req. version
- set(REQUIRED_CUDA_VERSION 7.0)
-endif()
if(GMX_GPU AND GMX_DOUBLE)
message(FATAL_ERROR "GPU acceleration is not available in double precision!")
|Gromacs| has excellent support for NVIDIA GPUs supported via CUDA.
On Linux, NVIDIA CUDA_ toolkit with minimum version |REQUIRED_CUDA_VERSION|
-is required, and the latest
-version is strongly encouraged. Using Intel or Microsoft MSVC compilers
-requires version 7.0 and 8.0, respectively. NVIDIA GPUs with at
+is required, and the latest version is strongly encouraged. Using
+Microsoft MSVC compiler requires version 8.0. NVIDIA GPUs with at
least NVIDIA compute capability |REQUIRED_CUDA_COMPUTE_CAPABILITY| are
required. You are strongly recommended to
get the latest CUDA version and driver that supports your hardware, but
``GMX_CUDA_NB_TAB_EWALD``
force the use of tabulated Ewald kernels. Should be used only for benchmarking.
-``GMX_DISABLE_CUDALAUNCH``
- disable the use of the lower-latency cudaLaunchKernel API even when supported (CUDA >=v7.0).
- Should only be used for benchmarking purposes.
-
``GMX_DISABLE_CUDA_TIMING``
Deprecated. Use ``GMX_DISABLE_GPU_TIMING`` instead.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gmxpre.h"
-#include "config.h"
-
#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/utility/exceptions.h"
//! Solving kernel max block size in threads
constexpr int c_solveMaxThreadsPerBlock = (c_solveMaxWarpsPerBlock * warp_size);
-// CUDA 6.5 can not compile enum class as a template kernel parameter,
-// so we replace it with a duplicate simple enum
-#if GMX_CUDA_VERSION >= 7000
-using GridOrderingInternal = GridOrdering;
-#else
-enum GridOrderingInternal
-{
- YZX,
- XYZ
-};
-#endif
-
/*! \brief
* PME complex grid solver kernel function.
*
* \param[in] kernelParams Input PME CUDA data in constant memory.
*/
template<
- GridOrderingInternal gridOrdering,
+ GridOrdering gridOrdering,
bool computeEnergyAndVirial
>
__launch_bounds__(c_solveMaxThreadsPerBlock)
int majorDim, middleDim, minorDim;
switch (gridOrdering)
{
- case GridOrderingInternal::YZX:
+ case GridOrdering::YZX:
majorDim = YY;
middleDim = ZZ;
minorDim = XX;
break;
- case GridOrderingInternal::XYZ:
+ case GridOrdering::XYZ:
majorDim = XX;
middleDim = YY;
minorDim = ZZ;
const int kMiddle = indexMiddle + localOffsetMiddle;
float mMiddle = kMiddle;
/* Checking Y in XYZ case */
- if (gridOrdering == GridOrderingInternal::XYZ)
+ if (gridOrdering == GridOrdering::XYZ)
{
mMiddle = (kMiddle < maxkMiddle) ? kMiddle : (kMiddle - nMiddle);
}
const int kMinor = localOffsetMinor + indexMinor;
float mMinor = kMinor;
/* Checking X in YZX case */
- if (gridOrdering == GridOrderingInternal::YZX)
+ if (gridOrdering == GridOrdering::YZX)
{
mMinor = (kMinor < maxkMinor) ? kMinor : (kMinor - nMinor);
}
float mX, mY, mZ;
switch (gridOrdering)
{
- case GridOrderingInternal::YZX:
+ case GridOrdering::YZX:
mX = mMinor;
mY = mMajor;
mZ = mMiddle;
break;
- case GridOrderingInternal::XYZ:
+ case GridOrdering::XYZ:
mX = mMajor;
mY = mMiddle;
mZ = mMinor;
float corner_fac = 1.0f;
switch (gridOrdering)
{
- case GridOrderingInternal::YZX:
+ case GridOrdering::YZX:
if ((kMiddle == 0) | (kMiddle == maxkMiddle))
{
corner_fac = 0.5f;
}
break;
- case GridOrderingInternal::XYZ:
+ case GridOrdering::XYZ:
if ((kMinor == 0) | (kMinor == maxkMinor))
{
corner_fac = 0.5f;
{
if (computeEnergyAndVirial)
{
- pme_solve_kernel<GridOrderingInternal::YZX, true> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
+ pme_solve_kernel<GridOrdering::YZX, true> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
}
else
{
- pme_solve_kernel<GridOrderingInternal::YZX, false> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
+ pme_solve_kernel<GridOrdering::YZX, false> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
}
}
else if (gridOrdering == GridOrdering::XYZ)
{
if (computeEnergyAndVirial)
{
- pme_solve_kernel<GridOrderingInternal::XYZ, true> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
+ pme_solve_kernel<GridOrdering::XYZ, true> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
}
else
{
- pme_solve_kernel<GridOrderingInternal::XYZ, false> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
+ pme_solve_kernel<GridOrdering::XYZ, false> <<< blocks, threads, 0, stream>>> (*kernelParamsPtr);
}
}
CU_LAUNCH_ERR("pme_solve_kernel");
/*********************************/
-/* XXX switch between chevron and cudaLaunch (supported only in CUDA >=7.0)
- -- only for benchmarking purposes */
-static const bool bUseCudaLaunchKernel =
- (GMX_CUDA_VERSION >= 7000) && (getenv("GMX_DISABLE_CUDALAUNCH") == NULL);
-
/*! Returns the number of blocks to be used for the nonbonded GPU kernel. */
static inline int calc_nb_kernel_nblock(int nwork_units, const gmx_device_info_t *dinfo)
{
shmem);
}
- if (bUseCudaLaunchKernel)
- {
- gmx_unused void* kernel_args[4];
- kernel_args[0] = adat;
- kernel_args[1] = nbp;
- kernel_args[2] = plist;
- kernel_args[3] = &bCalcFshift;
+ void* kernel_args[4];
+ kernel_args[0] = adat;
+ kernel_args[1] = nbp;
+ kernel_args[2] = plist;
+ kernel_args[3] = &bCalcFshift;
-#if GMX_CUDA_VERSION >= 7000
- cudaLaunchKernel((void *)nb_kernel, dim_grid, dim_block, kernel_args, shmem, stream);
-#endif
- }
- else
- {
- nb_kernel<<< dim_grid, dim_block, shmem, stream>>> (*adat, *nbp, *plist, bCalcFshift);
- }
+ cudaLaunchKernel((void *)nb_kernel, dim_grid, dim_block, kernel_args, shmem, stream);
CU_LAUNCH_ERR("k_calc_nb");
if (bDoTime)
shmem);
}
- if (bUseCudaLaunchKernel)
- {
- gmx_unused void* kernel_args[5];
- kernel_args[0] = adat;
- kernel_args[1] = nbp;
- kernel_args[2] = plist;
- kernel_args[3] = &numParts;
- kernel_args[4] = ∂
+ void* kernel_args[5];
+ kernel_args[0] = adat;
+ kernel_args[1] = nbp;
+ kernel_args[2] = plist;
+ kernel_args[3] = &numParts;
+ kernel_args[4] = ∂
-#if GMX_CUDA_VERSION >= 7000
- if (plist->haveFreshList)
- {
- cudaLaunchKernel((void *)nbnxn_kernel_prune_cuda<true>, dim_grid, dim_block, kernel_args, shmem, stream);
- }
- else
- {
- cudaLaunchKernel((void *)nbnxn_kernel_prune_cuda<false>, dim_grid, dim_block, kernel_args, shmem, stream);
- }
-#endif
+ if (plist->haveFreshList)
+ {
+ cudaLaunchKernel((void *)nbnxn_kernel_prune_cuda<true>, dim_grid, dim_block, kernel_args, shmem, stream);
}
else
{
- if (plist->haveFreshList)
- {
- nbnxn_kernel_prune_cuda<true><<< dim_grid, dim_block, shmem, stream>>> (*adat, *nbp, *plist, numParts, part);
- }
- else
- {
- nbnxn_kernel_prune_cuda<false><<< dim_grid, dim_block, shmem, stream>>> (*adat, *nbp, *plist, numParts, part);
- }
+ cudaLaunchKernel((void *)nbnxn_kernel_prune_cuda<false>, dim_grid, dim_block, kernel_args, shmem, stream);
}
CU_LAUNCH_ERR("k_pruneonly");