#include "config.h"
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/utility/classhelpers.h"
-#if GMX_GPU == GMX_GPU_CUDA
-#include "gromacs/gpu_utils/gputraits.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
-#include "gromacs/gpu_utils/gputraits_ocl.h"
-#elif GMX_GPU == GMX_GPU_NONE
-// TODO place in gputraits_stub.h
-using Context = void *;
-#endif
-
struct gmx_device_info_t;
/*! \internal
* TODO: Later we want to be able to own the context at a higher level and not here,
* but this class would still need the non-owning context handle to build the kernels.
*/
- Context context;
+ DeviceContext context;
//! Conveniently all the PME kernels use the same single argument type
#if GMX_GPU == GMX_GPU_CUDA
* but should be a constructor parameter to PmeGpu, as well as PmeGpuProgram,
* managed by high-level code.
*/
- Context context;
+ DeviceContext context;
/* Synchronization events */
/*! \brief Triggered after the PME Force Calculations have been completed */
//TODO the variable below should be queried from the task assignment info
const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed);
const void *commandStream = useGpuForPme ? pme_gpu_get_device_stream(pme) : nullptr;
- const void *gpuContext = useGpuForPme ? pme_gpu_get_device_context(pme) : nullptr;
+ const void *deviceContext = useGpuForPme ? pme_gpu_get_device_context(pme) : nullptr;
const int paddingSize = pme_gpu_get_padding_size(pme);
if (useGpuForPme)
{
}
// Unconditionally initialize the StatePropagatorDataGpu object to get more verbose message if it is used from CPU builds
- auto stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(commandStream, gpuContext, GpuApiCallBehavior::Sync, paddingSize);
+ auto stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(commandStream, deviceContext, GpuApiCallBehavior::Sync, paddingSize);
clear_nrnb(mynrnb);
* \tparam ValueType Raw value type of the \p buffer.
* \param[in,out] buffer Pointer to the device-side buffer.
* \param[in] numValues Number of values to accomodate.
- * \param[in] context The buffer's dummy context - not managed explicitly in CUDA RT.
+ * \param[in] deviceContext The buffer's dummy device context - not managed explicitly in CUDA RT.
*/
template <typename ValueType>
void allocateDeviceBuffer(DeviceBuffer<ValueType> *buffer,
size_t numValues,
- Context /* context */)
+ DeviceContext /* deviceContext */)
{
GMX_ASSERT(buffer, "needs a buffer pointer");
cudaError_t stat = cudaMalloc((void **)buffer, numValues * sizeof(ValueType));
* Allocation is buffered and therefore freeing is only needed
* if the previously allocated space is not enough.
* \p currentNumValues and \p currentMaxNumValues are updated.
- * TODO: \p currentNumValues, \p currentMaxNumValues, \p context
+ * TODO: \p currentNumValues, \p currentMaxNumValues, \p deviceContext
* should all be encapsulated in a host-side class together with the buffer.
*
* \tparam ValueType Raw value type of the \p buffer.
* \param[in] numValues Number of values to accommodate.
* \param[in,out] currentNumValues The pointer to the buffer's number of values.
* \param[in,out] currentMaxNumValues The pointer to the buffer's capacity.
- * \param[in] context The buffer's context.
+ * \param[in] deviceContext The buffer's device context.
*/
template <typename ValueType>
void reallocateDeviceBuffer(DeviceBuffer<ValueType> *buffer,
size_t numValues,
int *currentNumValues,
int *currentMaxNumValues,
- Context context)
+ DeviceContext deviceContext)
{
GMX_ASSERT(buffer, "needs a buffer pointer");
GMX_ASSERT(currentNumValues, "needs a size pointer");
}
*currentMaxNumValues = over_alloc_large(numValues);
- allocateDeviceBuffer(buffer, *currentMaxNumValues, context);
+ allocateDeviceBuffer(buffer, *currentMaxNumValues, deviceContext);
}
/* size could have changed without actual reallocation */
*currentNumValues = numValues;
* \tparam ValueType Raw value type of the \p buffer.
* \param[in,out] buffer Pointer to the device-side buffer.
* \param[in] numValues Number of values to accomodate.
- * \param[in] context The buffer's context-to-be.
+ * \param[in] deviceContext The buffer's device context-to-be.
*/
template <typename ValueType>
void allocateDeviceBuffer(DeviceBuffer<ValueType> *buffer,
size_t numValues,
- Context context)
+ DeviceContext deviceContext)
{
GMX_ASSERT(buffer, "needs a buffer pointer");
void *hostPtr = nullptr;
cl_int clError;
- *buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numValues * sizeof(ValueType), hostPtr, &clError);
+ *buffer = clCreateBuffer(deviceContext, CL_MEM_READ_WRITE, numValues * sizeof(ValueType), hostPtr, &clError);
GMX_RELEASE_ASSERT(clError == CL_SUCCESS, "clCreateBuffer failure");
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* \author Aleksei Iupinov <a.yupinov@gmail.com>
*
* \inlibraryapi
+ * \ingroup module_gpu_utils
*/
//! \brief GPU command stream
//! \brief Single GPU call timing event - meaningless in CUDA
using CommandEvent = void;
//! \brief Context used explicitly in OpenCL, does nothing in CUDA
-using Context = void *;
+using DeviceContext = void *;
/*! \internal \brief
* GPU kernels scheduling description. This is same in OpenCL/CUDA.
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifndef GMX_GPU_UTILS_GPUTRAITS_H
+#define GMX_GPU_UTILS_GPUTRAITS_H
+
+/*! \libinternal \file
+ * \brief Declares the GPU type traits for non-GPU builds
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ *
+ * \inlibraryapi
+ * \ingroup module_gpu_utils
+ */
+
+#include "config.h"
+
+#if GMX_GPU == GMX_GPU_CUDA
+
+#include "gromacs/gpu_utils/gputraits.cuh"
+
+#elif GMX_GPU == GMX_GPU_OPENCL
+
+#include "gromacs/gpu_utils/gputraits_ocl.h"
+
+#else
+
+//! \brief GPU command stream
+using CommandStream = void *;
+//! \brief Single GPU call timing event
+using CommandEvent = void *;
+//! \brief GPU context
+using DeviceContext = void *;
+
+#endif // GMX_GPU
+
+#endif // GMX_GPU_UTILS_GPUTRAITS_H
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* \author Aleksei Iupinov <a.yupinov@gmail.com>
*
* \inlibraryapi
+ * \ingroup module_gpu_utils
*/
#include "gromacs/gpu_utils/gmxopencl.h"
//! \brief Single GPU call timing event
using CommandEvent = cl_event;
//! \brief Context used explicitly in OpenCL
-using Context = cl_context;
+using DeviceContext = cl_context;
/*! \internal \brief
* GPU kernels scheduling description. This is same in OpenCL/CUDA.
fcd->disres.nsystems != 0);
const void *commandStream = ((GMX_GPU == GMX_GPU_OPENCL) && thisRankHasPmeGpuTask) ? pme_gpu_get_device_stream(fr->pmedata) : nullptr;
- const void *gpuContext = ((GMX_GPU == GMX_GPU_OPENCL) && thisRankHasPmeGpuTask) ? pme_gpu_get_device_context(fr->pmedata) : nullptr;
+ const void *deviceContext = ((GMX_GPU == GMX_GPU_OPENCL) && thisRankHasPmeGpuTask) ? pme_gpu_get_device_context(fr->pmedata) : nullptr;
const int paddingSize = pme_gpu_get_padding_size(fr->pmedata);
const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
// We initialize GPU state even for the CPU runs so we will have a more verbose
// error if someone will try accessing it from the CPU codepath
gmx::StatePropagatorDataGpu stateGpu(commandStream,
- gpuContext,
+ deviceContext,
transferKind,
paddingSize);
fr->stateGpu = &stateGpu;
* PME work on the GPU, and if that rank also does PP work that is the only
* rank. So all coordinates are always transferred.
*
- * \note \p commandStream and \p gpuContext are allowed to be nullptr if
+ * \note \p commandStream and \p deviceContext are allowed to be nullptr if
* StatePropagatorDataGpu is not used in the OpenCL run (e.g. if PME
* does not run on the GPU).
*
- * \todo Make \p CommandStream visible in the CPU parts of the code so we
- * will not have to pass a void*.
- * \todo Make \p Context visible in CPU parts of the code so we will not
- * have to pass a void*.
+ * \todo A CommandStream is now visible in the CPU parts of the code so we
+ * can stop passing a void*.
+ * \todo A DeviceContext object is visible in CPU parts of the code so we
+ * can stop passing a void*.
*
* \param[in] commandStream GPU stream, nullptr allowed.
- * \param[in] gpuContext GPU context, nullptr allowed.
+ * \param[in] deviceContext GPU context, nullptr allowed.
* \param[in] transferKind H2D/D2H transfer call behavior (synchronous or not).
* \param[in] paddingSize Padding size for coordinates buffer.
*/
StatePropagatorDataGpu(const void *commandStream,
- const void *gpuContext,
+ const void *deviceContext,
GpuApiCallBehavior transferKind,
int paddingSize);
//! Move constructor
};
StatePropagatorDataGpu::StatePropagatorDataGpu(const void * /* commandStream */,
- const void * /* gpuContext */,
+ const void * /* deviceContext */,
GpuApiCallBehavior /* transferKind */,
int /* paddingSize */)
: impl_(nullptr)
* PME work on the GPU, and if that rank also does PP work that is the only
* rank. So all coordinates are always transferred.
*
- * \note \p commandStream and \p gpuContext are allowed to be nullptr if
+ * \note \p commandStream and \p deviceContext are allowed to be nullptr if
* StatePropagatorDataGpu is not used in the OpenCL run (e.g. if PME
* does not run on the GPU).
*
- * \todo Make CommandStream visible in the CPU parts of the code so we
- * will not have to pass a void*.
- * \todo Make a Context object visible in CPU parts of the code so we
- * will not have to pass a void*.
+ * \todo A CommandStream is now visible in the CPU parts of the code so we
+ * can stop passing a void*.
+ * \todo A DeviceContext object is visible in CPU parts of the code so we
+ * can stop passing a void*.
*
* \param[in] commandStream GPU stream, nullptr allowed.
- * \param[in] gpuContext GPU context, nullptr allowed.
+ * \param[in] deviceContext GPU context, nullptr allowed.
* \param[in] transferKind H2D/D2H transfer call behavior (synchronous or not).
* \param[in] paddingSize Padding size for coordinates buffer.
*/
Impl(const void *commandStream,
- const void *gpuContext,
+ const void *deviceContext,
GpuApiCallBehavior transferKind,
int paddingSize);
/*! \brief GPU context (for OpenCL builds)
* \todo Make a Context class usable in CPU code
*/
- Context gpuContext_ = nullptr;
+ DeviceContext deviceContext_ = nullptr;
//! Default GPU calls behavior
GpuApiCallBehavior transferKind_ = GpuApiCallBehavior::Async;
//! Padding size for the coordinates buffer
{
StatePropagatorDataGpu::Impl::Impl(gmx_unused const void *commandStream,
- gmx_unused const void *gpuContext,
+ gmx_unused const void *deviceContext,
GpuApiCallBehavior transferKind,
int paddingSize) :
transferKind_(transferKind),
{
commandStream_ = *static_cast<const CommandStream*>(commandStream);
}
- if (gpuContext != nullptr)
+ if (deviceContext != nullptr)
{
- gpuContext_ = *static_cast<const Context*>(gpuContext);
+ deviceContext_ = *static_cast<const DeviceContext*>(deviceContext);
}
#endif
void StatePropagatorDataGpu::Impl::reinit(int numAtomsLocal, int numAtomsAll)
{
#if GMX_GPU == GMX_GPU_OPENCL
- GMX_ASSERT(gpuContext_ != nullptr, "GPU context should be set in OpenCL builds.");
+ GMX_ASSERT(deviceContext_ != nullptr, "GPU context should be set in OpenCL builds.");
#endif
numAtomsLocal_ = numAtomsLocal;
numAtomsAll_ = numAtomsAll;
numAtomsPadded = numAtomsAll_;
}
- reallocateDeviceBuffer(&d_x_, DIM*numAtomsPadded, &d_xSize_, &d_xCapacity_, gpuContext_);
+ reallocateDeviceBuffer(&d_x_, DIM*numAtomsPadded, &d_xSize_, &d_xCapacity_, deviceContext_);
const size_t paddingAllocationSize = numAtomsPadded - numAtomsAll_;
if (paddingAllocationSize > 0)
clearDeviceBufferAsync(&d_x_, DIM*numAtomsAll_, DIM*paddingAllocationSize, commandStream_);
}
- reallocateDeviceBuffer(&d_v_, DIM*numAtomsAll_, &d_vSize_, &d_vCapacity_, gpuContext_);
- reallocateDeviceBuffer(&d_f_, DIM*numAtomsAll_, &d_fSize_, &d_fCapacity_, gpuContext_);
+ reallocateDeviceBuffer(&d_v_, DIM*numAtomsAll_, &d_vSize_, &d_vCapacity_, deviceContext_);
+ reallocateDeviceBuffer(&d_f_, DIM*numAtomsAll_, &d_fSize_, &d_fCapacity_, deviceContext_);
}
{
#if GMX_GPU == GMX_GPU_OPENCL
- GMX_ASSERT(gpuContext_ != nullptr, "GPU context should be set in OpenCL builds.");
+ GMX_ASSERT(deviceContext_ != nullptr, "GPU context should be set in OpenCL builds.");
#endif
GMX_UNUSED_VALUE(dataSize);
{
#if GMX_GPU == GMX_GPU_OPENCL
- GMX_ASSERT(gpuContext_ != nullptr, "GPU context should be set in OpenCL builds.");
+ GMX_ASSERT(deviceContext_ != nullptr, "GPU context should be set in OpenCL builds.");
#endif
GMX_UNUSED_VALUE(dataSize);
StatePropagatorDataGpu::StatePropagatorDataGpu(const void *commandStream,
- const void *gpuContext,
+ const void *deviceContext,
GpuApiCallBehavior transferKind,
int paddingSize)
: impl_(new Impl(commandStream,
- gpuContext,
+ deviceContext,
transferKind,
paddingSize))
{
iTimers.didPairlistH2D = true;
}
- Context context = nullptr;
+ DeviceContext context = nullptr;
reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(),
&d_plist->nsci, &d_plist->sci_nalloc, context);
}
// TODO most of this function is same in CUDA and OpenCL, move into the header
- Context context = nb->dev_rundata->context;
+ DeviceContext context = nb->dev_rundata->context;
reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(),
&d_plist->nsci, &d_plist->sci_nalloc, context);