#include "pme.h"
#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/pmalloc_cuda.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h"
#include <cstdlib>
#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
+#include "gromacs/gpu_utils/devicebuffer.h" //TODO remove when removing cu_realloc_buffered
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h"
return true;
}
-/*! \brief Free a device-side buffer.
- * This does not reset separately stored size/capacity integers,
- * as this is planned to be a destructor of DeviceBuffer as a proper class,
- * and no calls on \p buffer should be made afterwards.
- *
- * \param[in] buffer Pointer to the buffer to free.
- */
-template <typename DeviceBuffer>
-void freeDeviceBuffer(DeviceBuffer *buffer)
-{
- GMX_ASSERT(buffer, "needs a buffer pointer");
- if (*buffer)
- {
- GMX_RELEASE_ASSERT(cudaFree(*buffer) == cudaSuccess, "cudaFree failed");
- }
-}
-
#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2018, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifndef GMX_GPU_UTILS_DEVICEBUFFER_CUH
+#define GMX_GPU_UTILS_DEVICEBUFFER_CUH
+
+/*! \libinternal \file
+ * \brief Implements the DeviceBuffer type and routines for CUDA.
+ * Should only be included directly by the main DeviceBuffer file devicebuffer.h.
+ * TODO: the intent is for DeviceBuffer to become a class.
+ *
+ * \author Aleksei Iupinov <a.yupinov@gmail.com>
+ *
+ * \inlibraryapi
+ */
+
+#include "gromacs/gpu_utils/gpu_utils.h" //only for GpuApiCallBehavior
+#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/utility/gmxassert.h"
+
+//! \brief A device-side buffer of ValueTypes
+template<typename ValueType>
+using DeviceBuffer = ValueType *;
+
+/*! \brief
+ * Allocates a device-side buffer.
+ * It is currently a caller's responsibility to call it only on not-yet allocated buffers.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ * \param[in,out] buffer Pointer to the device-side buffer.
+ * \param[in] numValues Number of values to accomodate.
+ * \param[in] context The buffer's dummy context - not managed explicitly in CUDA RT.
+ */
+template <typename ValueType>
+void allocateDeviceBuffer(DeviceBuffer<ValueType> *buffer,
+ size_t numValues,
+ Context /* context */)
+{
+ GMX_ASSERT(buffer, "needs a buffer pointer");
+ cudaError_t stat = cudaMalloc((void **)buffer, numValues * sizeof(ValueType));
+ GMX_RELEASE_ASSERT(stat == cudaSuccess, "cudaMalloc failure");
+}
+
+/*! \brief
+ * Frees a device-side buffer.
+ * This does not reset separately stored size/capacity integers,
+ * as this is planned to be a destructor of DeviceBuffer as a proper class,
+ * and no calls on \p buffer should be made afterwards.
+ *
+ * \param[in] buffer Pointer to the buffer to free.
+ */
+template <typename DeviceBuffer>
+void freeDeviceBuffer(DeviceBuffer *buffer)
+{
+ GMX_ASSERT(buffer, "needs a buffer pointer");
+ if (*buffer)
+ {
+ GMX_RELEASE_ASSERT(cudaFree(*buffer) == cudaSuccess, "cudaFree failed");
+ }
+}
+
+/*! \brief
+ * Performs the host-to-device data copy, synchronous or asynchronously on request.
+ *
+ * TODO: This is meant to gradually replace cu/ocl_copy_h2d.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ * \param[in,out] buffer Pointer to the device-side buffer
+ * \param[in] hostBuffer Pointer to the raw host-side memory, also typed \p ValueType
+ * \param[in] startingValueIndex Offset (in values) at the device-side buffer to copy into.
+ * \param[in] numValues Number of values to copy.
+ * \param[in] stream GPU stream to perform asynchronous copy in.
+ * \param[in] transferKind Copy type: synchronous or asynchronous.
+ * \param[out] timingEvent A dummy pointer to the H2D copy timing event to be filled in.
+ * Not used in CUDA implementation.
+ */
+template <typename ValueType>
+void copyToDeviceBuffer(DeviceBuffer<ValueType> *buffer,
+ const ValueType *hostBuffer,
+ size_t startingValueIndex,
+ size_t numValues,
+ CommandStream stream,
+ GpuApiCallBehavior transferKind,
+ CommandEvent */*timingEvent*/)
+{
+ if (numValues == 0)
+ {
+ return; // such calls are actually made with empty domains
+ }
+ GMX_ASSERT(buffer, "needs a buffer pointer");
+ GMX_ASSERT(hostBuffer, "needs a host buffer pointer");
+ cudaError_t stat;
+ const size_t bytes = numValues * sizeof(ValueType);
+
+ switch (transferKind)
+ {
+ case GpuApiCallBehavior::Async:
+ GMX_ASSERT(isHostMemoryPinned(hostBuffer), "Source host buffer was not pinned for CUDA");
+ stat = cudaMemcpyAsync(*((ValueType **)buffer) + startingValueIndex, hostBuffer, bytes, cudaMemcpyHostToDevice, stream);
+ GMX_RELEASE_ASSERT(stat == cudaSuccess, "Asynchronous H2D copy failed");
+ break;
+
+ case GpuApiCallBehavior::Sync:
+ stat = cudaMemcpy(*((ValueType **)buffer) + startingValueIndex, hostBuffer, bytes, cudaMemcpyHostToDevice);
+ GMX_RELEASE_ASSERT(stat == cudaSuccess, "Synchronous H2D copy failed");
+ break;
+
+ default:
+ throw;
+ }
+}
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2018, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifndef GMX_GPU_UTILS_DEVICEBUFFER_H
+#define GMX_GPU_UTILS_DEVICEBUFFER_H
+
+/*! \libinternal \file
+ * \brief Implements the logic for handling of DeviceBuffer types in OpenCL/CUDA.
+ * Can only be included on GPU build paths.
+ *
+ * \author Aleksei Iupinov <a.yupinov@gmail.com>
+ *
+ * \inlibraryapi
+ */
+
+#include "config.h"
+
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/smalloc.h" // TODO: this is only for over_alloc_large
+
+#if GMX_GPU == GMX_GPU_CUDA
+#include "gromacs/gpu_utils/devicebuffer.cuh"
+#elif GMX_GPU == GMX_GPU_OPENCL
+#include "gromacs/gpu_utils/devicebuffer_ocl.h"
+#else
+// cppcheck-suppress preprocessorErrorDirective
+#error "devicebuffer.h included on non-GPU build!"
+#endif
+
+/*! \brief
+ * Reallocates the device-side buffer.
+ *
+ * Reallocates the device-side memory pointed by \p buffer.
+ * Allocation is buffered and therefore freeing is only needed
+ * if the previously allocated space is not enough.
+ * \p currentNumValues and \p currentMaxNumValues are updated.
+ * TODO: \p currentNumValues, \p currentMaxNumValues, \p context
+ * should all be encapsulated in a host-side class together with the buffer.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ * \param[in,out] buffer Pointer to the device-side buffer
+ * \param[in] numValues Number of values to accomodate.
+ * \param[in,out] currentNumValues The pointer to the buffer's number of values.
+ * \param[in,out] currentMaxNumValues The pointer to the buffer's capacity.
+ * \param[in] context The buffer's context.
+ */
+template <typename ValueType>
+void reallocateDeviceBuffer(DeviceBuffer<ValueType> *buffer,
+ size_t numValues,
+ int *currentNumValues,
+ int *currentMaxNumValues,
+ Context context)
+{
+ GMX_ASSERT(buffer, "needs a buffer pointer");
+ GMX_ASSERT(currentNumValues, "needs a size pointer");
+ GMX_ASSERT(currentMaxNumValues, "needs a capacity pointer");
+
+ /* reallocate only if the data does not fit */
+ if (static_cast<int>(numValues) > *currentMaxNumValues)
+ {
+ if (*currentMaxNumValues >= 0)
+ {
+ freeDeviceBuffer(buffer);
+ }
+
+ *currentMaxNumValues = over_alloc_large(numValues);
+ allocateDeviceBuffer(buffer, *currentMaxNumValues, context);
+ }
+ /* size could have changed without actual reallocation */
+ *currentNumValues = numValues;
+}
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2018, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifndef GMX_GPU_UTILS_DEVICEBUFFER_OCL_H
+#define GMX_GPU_UTILS_DEVICEBUFFER_OCL_H
+
+/*! \libinternal \file
+ * \brief Implements the DeviceBuffer type and routines for OpenCL.
+ * Should only be included directly by the main DeviceBuffer file devicebuffer.h.
+ * TODO: the intent is for DeviceBuffer to become a class.
+ *
+ * \author Aleksei Iupinov <a.yupinov@gmail.com>
+ *
+ * \inlibraryapi
+ */
+
+#include "gromacs/gpu_utils/gpu_utils.h" //only for GpuApiCallBehavior
+#include "gromacs/gpu_utils/gputraits_ocl.h"
+#include "gromacs/utility/gmxassert.h"
+
+/*! \libinternal \brief
+ * A minimal cl_mem wrapper that remembers its allocation type.
+ * The only point is making template type deduction possible.
+ */
+template<typename ValueType>
+class TypedClMemory
+{
+ private:
+ //! \brief Underlying data - not nulled right here only because we still have some snew()'s around
+ cl_mem data_;
+ public:
+ //! \brief An assignment operator - the purpose is to make allocation/zeroing work
+ void operator=(cl_mem data){data_ = data; }
+ //! \brief Returns underlying cl_mem transparently
+ operator cl_mem() {return data_; }
+};
+
+//! \libinternal \brief A device-side buffer of ValueTypes
+template<typename ValueType>
+using DeviceBuffer = TypedClMemory<ValueType>;
+
+/*! \libinternal \brief
+ * Allocates a device-side buffer.
+ * It is currently a caller's responsibility to call it only on not-yet allocated buffers.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ * \param[in,out] buffer Pointer to the device-side buffer.
+ * \param[in] numValues Number of values to accomodate.
+ * \param[in] context The buffer's context-to-be.
+ */
+template <typename ValueType>
+void allocateDeviceBuffer(DeviceBuffer<ValueType> *buffer,
+ size_t numValues,
+ Context context)
+{
+ GMX_ASSERT(buffer, "needs a buffer pointer");
+ void *hostPtr = nullptr;
+ cl_int clError;
+ *buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numValues * sizeof(ValueType), hostPtr, &clError);
+ GMX_RELEASE_ASSERT(clError == CL_SUCCESS, "clCreateBuffer failure");
+}
+
+/*! \brief
+ * Frees a device-side buffer.
+ * This does not reset separately stored size/capacity integers,
+ * as this is planned to be a destructor of DeviceBuffer as a proper class,
+ * and no calls on \p buffer should be made afterwards.
+ *
+ * \param[in] buffer Pointer to the buffer to free.
+ */
+template <typename DeviceBuffer>
+void freeDeviceBuffer(DeviceBuffer *buffer)
+{
+ GMX_ASSERT(buffer, "needs a buffer pointer");
+ if (*buffer)
+ {
+ GMX_RELEASE_ASSERT(clReleaseMemObject(*buffer) == CL_SUCCESS, "clReleaseMemObject failed");
+ }
+}
+
+/*! \brief
+ * Performs the host-to-device data copy, synchronous or asynchronously on request.
+ *
+ * TODO: This is meant to gradually replace cu/ocl_copy_h2d.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ * \param[in,out] buffer Pointer to the device-side buffer
+ * \param[in] hostBuffer Pointer to the raw host-side memory, also typed \p ValueType
+ * \param[in] startingValueIndex Offset (in values) at the device-side buffer to copy into.
+ * \param[in] numValues Number of values to copy.
+ * \param[in] stream GPU stream to perform asynchronous copy in.
+ * \param[in] transferKind Copy type: synchronous or asynchronous.
+ * \param[out] timingEvent A pointer to the H2D copy timing event to be filled in.
+ * If the pointer is not null, the event can further be used
+ * to queue a wait for this operation or to query profiling information.
+ */
+template <typename ValueType>
+void copyToDeviceBuffer(DeviceBuffer<ValueType> *buffer,
+ const ValueType *hostBuffer,
+ size_t startingValueIndex,
+ size_t numValues,
+ CommandStream stream,
+ GpuApiCallBehavior transferKind,
+ CommandEvent *timingEvent)
+{
+ if (numValues == 0)
+ {
+ return; // such calls are actually made with empty domains
+ }
+ GMX_ASSERT(buffer, "needs a buffer pointer");
+ GMX_ASSERT(hostBuffer, "needs a host buffer pointer");
+ cl_int clError;
+ const size_t offset = startingValueIndex * sizeof(ValueType);
+ const size_t bytes = numValues * sizeof(ValueType);
+ switch (transferKind)
+ {
+ case GpuApiCallBehavior::Async:
+ clError = clEnqueueWriteBuffer(stream, *buffer, CL_FALSE, offset, bytes, hostBuffer, 0, nullptr, timingEvent);
+ GMX_RELEASE_ASSERT(clError == CL_SUCCESS, "Asynchronous H2D copy failed");
+ break;
+
+ case GpuApiCallBehavior::Sync:
+ clError = clEnqueueWriteBuffer(stream, *buffer, CL_TRUE, offset, bytes, hostBuffer, 0, nullptr, timingEvent);
+ GMX_RELEASE_ASSERT(clError == CL_SUCCESS, "Synchronous H2D copy failed");
+ break;
+
+ default:
+ throw;
+ }
+}
+
+#endif
}
}
-bool isHostMemoryPinned(void *h_ptr)
+bool isHostMemoryPinned(const void *h_ptr)
{
cudaPointerAttributes memoryAttributes;
cudaError_t stat = cudaPointerGetAttributes(&memoryAttributes, h_ptr);
//! Tells whether the host buffer was pinned for non-blocking transfers. Only implemented for CUDA.
CUDA_FUNC_QUALIFIER
-bool isHostMemoryPinned(void *CUDA_FUNC_ARGUMENT(h_ptr)) CUDA_FUNC_TERM_WITH_RETURN(false)
+bool isHostMemoryPinned(const void *CUDA_FUNC_ARGUMENT(h_ptr)) CUDA_FUNC_TERM_WITH_RETURN(false)
#endif
/*! \brief Resets internal state */
inline void reset(){}
+
+ /*! \brief Returns a new raw timing event
+ * for passing into individual GPU API calls.
+ * This is just a dummy in CUDA.
+ */
+ inline CommandEvent *fetchNextEvent()
+ {
+ return nullptr;
+ }
};
//! Short-hand for external use
using CommandStream = cudaStream_t;
//! \brief Single GPU call timing event - meaningless in CUDA
using CommandEvent = void;
-//! \brief A device-side buffer of ValueTypes
-template<typename ValueType>
-using DeviceBuffer = ValueType *;
+//! \brief Context used explicitly in OpenCL, does nothing in CUDA
+using Context = void *;
#endif
using CommandStream = cl_command_queue;
//! \brief Single GPU call timing event
using CommandEvent = cl_event;
-//! \brief A device-side buffer of ValueTypes
-template<typename ValueType>
-using DeviceBuffer = cl_mem;
+//! \brief Context used explicitly in OpenCL
+using Context = cl_context;
#endif
return false;
}
-/*! \brief Free a device-side buffer.
- * This does not reset separately stored size/capacity integers,
- * as this is planned to be a destructor of DeviceBuffer as a proper class,
- * and no calls on \p buffer should be made afterwards.
- *
- * \param[in] buffer Pointer to the buffer to free.
- */
-template <typename DeviceBuffer>
-void freeDeviceBuffer(DeviceBuffer *buffer)
-{
- GMX_ASSERT(buffer, "needs a buffer pointer");
- if (*buffer)
- {
- GMX_RELEASE_ASSERT(clReleaseMemObject(*buffer) == CL_SUCCESS, "clReleaseMemObject failed");
- }
-}
-
#endif
nb->timers->didPairlistH2D[iloc] = true;
}
- cu_realloc_buffered((void **)&d_plist->sci, h_plist->sci, sizeof(*d_plist->sci),
- &d_plist->nsci, &d_plist->sci_nalloc,
- h_plist->nsci,
- stream, true);
-
- cu_realloc_buffered((void **)&d_plist->cj4, h_plist->cj4, sizeof(*d_plist->cj4),
- &d_plist->ncj4, &d_plist->cj4_nalloc,
- h_plist->ncj4,
- stream, true);
-
- /* this call only allocates space on the device (no data is transferred) */
- cu_realloc_buffered((void **)&d_plist->imask, NULL, sizeof(*d_plist->imask),
- &d_plist->nimask, &d_plist->imask_nalloc,
- h_plist->ncj4*c_nbnxnGpuClusterpairSplit,
- stream, true);
-
- cu_realloc_buffered((void **)&d_plist->excl, h_plist->excl, sizeof(*d_plist->excl),
- &d_plist->nexcl, &d_plist->excl_nalloc,
- h_plist->nexcl,
- stream, true);
+ Context context = nullptr;
+
+ reallocateDeviceBuffer(&d_plist->sci, h_plist->nsci,
+ &d_plist->nsci, &d_plist->sci_nalloc, context);
+ copyToDeviceBuffer(&d_plist->sci, h_plist->sci, 0, h_plist->nsci,
+ stream, GpuApiCallBehavior::Async,
+ bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+
+ reallocateDeviceBuffer(&d_plist->cj4, h_plist->ncj4,
+ &d_plist->ncj4, &d_plist->cj4_nalloc, context);
+ copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4, 0, h_plist->ncj4,
+ stream, GpuApiCallBehavior::Async,
+ bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+
+ reallocateDeviceBuffer(&d_plist->imask, h_plist->ncj4*c_nbnxnGpuClusterpairSplit,
+ &d_plist->nimask, &d_plist->imask_nalloc, context);
+
+ reallocateDeviceBuffer(&d_plist->excl, h_plist->nexcl,
+ &d_plist->nexcl, &d_plist->excl_nalloc, context);
+ copyToDeviceBuffer(&d_plist->excl, h_plist->excl, 0, h_plist->nexcl,
+ stream, GpuApiCallBehavior::Async,
+ bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
if (bDoTime)
{
#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/gputraits.cuh"
#include "gromacs/mdlib/nbnxn_consts.h"
#include "gromacs/mdlib/nbnxn_gpu_types_common.h"
#include "gromacs/mdlib/nbnxn_pairlist.h"
vdwType == evdwOclCUTCOMBLB);
}
-/*! \brief Reallocation device buffers
- *
- * Reallocation of the memory pointed by d_ptr and copying of the data from
- * the location pointed by h_src host-side pointer is done. Allocation is
- * buffered and therefore freeing is only needed if the previously allocated
- * space is not enough.
- * The H2D copy is launched in command queue s and can be done synchronously or
- * asynchronously (the default is the latter).
- * If copy_event is not NULL, on return it will contain an event object
- * identifying the H2D copy. The event can further be used to queue a wait
- * for this operation or to query profiling information.
- * OpenCL equivalent of cu_realloc_buffered.
- */
-static void ocl_realloc_buffered(cl_mem *d_dest, void *h_src,
- size_t type_size,
- int *curr_size, int *curr_alloc_size,
- int req_size,
- cl_context context,
- cl_command_queue s,
- bool bAsync = true,
- cl_event *copy_event = NULL)
-{
- if (d_dest == NULL || req_size < 0)
- {
- return;
- }
-
- /* reallocate only if the data does not fit = allocation size is smaller
- than the current requested size */
- if (req_size > *curr_alloc_size)
- {
- cl_int gmx_unused cl_error;
-
- /* only free if the array has already been initialized */
- if (*curr_alloc_size >= 0)
- {
- freeDeviceBuffer(d_dest);
- }
-
- *curr_alloc_size = over_alloc_large(req_size);
-
- *d_dest = clCreateBuffer(context, CL_MEM_READ_WRITE, *curr_alloc_size * type_size, NULL, &cl_error);
- assert(cl_error == CL_SUCCESS);
- // TODO: handle errors, check clCreateBuffer flags
- }
-
- /* size could have changed without actual reallocation */
- *curr_size = req_size;
-
- /* upload to device */
- if (h_src)
- {
- if (bAsync)
- {
- ocl_copy_H2D_async(*d_dest, h_src, 0, *curr_size * type_size, s, copy_event);
- }
- else
- {
- ocl_copy_H2D_sync(*d_dest, h_src, 0, *curr_size * type_size, s);
- }
- }
-}
-
/*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale
* and the table GPU array.
*
nb->timers->didPairlistH2D[iloc] = true;
}
- ocl_realloc_buffered(&d_plist->sci, h_plist->sci, sizeof(nbnxn_sci_t),
- &d_plist->nsci, &d_plist->sci_nalloc,
- h_plist->nsci,
- nb->dev_rundata->context,
- stream, true, bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
-
- ocl_realloc_buffered(&d_plist->cj4, h_plist->cj4, sizeof(nbnxn_cj4_t),
- &d_plist->ncj4, &d_plist->cj4_nalloc,
- h_plist->ncj4,
- nb->dev_rundata->context,
- stream, true, bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
-
- /* this call only allocates space on the device (no data is transferred) - no timing as well! */
- ocl_realloc_buffered(&d_plist->imask, NULL, sizeof(unsigned int),
- &d_plist->nimask, &d_plist->imask_nalloc,
- h_plist->ncj4*c_nbnxnGpuClusterpairSplit,
- nb->dev_rundata->context,
- stream, true);
-
- ocl_realloc_buffered(&d_plist->excl, h_plist->excl, sizeof(nbnxn_excl_t),
- &d_plist->nexcl, &d_plist->excl_nalloc,
- h_plist->nexcl,
- nb->dev_rundata->context,
- stream, true, bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+ // TODO most of this function is same in CUDA and OpenCL, move into the header
+ Context context = nb->dev_rundata->context;
+
+ reallocateDeviceBuffer(&d_plist->sci, h_plist->nsci,
+ &d_plist->nsci, &d_plist->sci_nalloc, context);
+ copyToDeviceBuffer(&d_plist->sci, h_plist->sci, 0, h_plist->nsci,
+ stream, GpuApiCallBehavior::Async,
+ bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+
+ reallocateDeviceBuffer(&d_plist->cj4, h_plist->ncj4,
+ &d_plist->ncj4, &d_plist->cj4_nalloc, context);
+ copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4, 0, h_plist->ncj4,
+ stream, GpuApiCallBehavior::Async,
+ bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+
+ reallocateDeviceBuffer(&d_plist->imask, h_plist->ncj4*c_nbnxnGpuClusterpairSplit,
+ &d_plist->nimask, &d_plist->imask_nalloc, context);
+
+ reallocateDeviceBuffer(&d_plist->excl, h_plist->nexcl,
+ &d_plist->nexcl, &d_plist->excl_nalloc, context);
+ copyToDeviceBuffer(&d_plist->excl, h_plist->excl, 0, h_plist->nexcl,
+ stream, GpuApiCallBehavior::Async,
+ bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
if (bDoTime)
{
#ifndef NBNXN_OPENCL_TYPES_H
#define NBNXN_OPENCL_TYPES_H
+#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/gmxopencl.h"
#include "gromacs/gpu_utils/gputraits_ocl.h"
#include "gromacs/gpu_utils/oclutils.h"