From: Artem Zhmurov Date: Mon, 15 Mar 2021 16:57:02 +0000 (+0300) Subject: Move pmalloc(..)/pfree(...) to separate source files in CUDA/OpenCL/SYCL X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=e537c5d2ed5407d72f5312ec7f19cb80556deeb9;p=alexxy%2Fgromacs.git Move pmalloc(..)/pfree(...) to separate source files in CUDA/OpenCL/SYCL Having pmalloc(..) and pfree(..) functions in unrelated files in CUDA, OpenCL and SYCL makes it hard to use these functions in platform-agnostic code. This creates common header and moves these functions to separate source files. --- diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index 5f4a4fd289..8eea806ac1 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -60,6 +60,10 @@ #include "gromacs/gpu_utils/device_context.h" #include "gromacs/gpu_utils/device_stream.h" #include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/gpu_utils/pmalloc.h" +#if GMX_GPU_SYCL +# include "gromacs/gpu_utils/syclutils.h" +#endif #include "gromacs/hardware/device_information.h" #include "gromacs/math/invertmatrix.h" #include "gromacs/math/units.h" @@ -69,19 +73,12 @@ #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/logger.h" #include "gromacs/utility/stringutil.h" +#include "gromacs/ewald/pme.h" #if GMX_GPU_CUDA -# include "gromacs/gpu_utils/pmalloc_cuda.h" - # include "pme.cuh" -#elif GMX_GPU_OPENCL -# include "gromacs/gpu_utils/gmxopencl.h" -#elif GMX_GPU_SYCL -# include "gromacs/gpu_utils/syclutils.h" #endif -#include "gromacs/ewald/pme.h" - #include "pme_gpu_3dfft.h" #include "pme_gpu_calculate_splines.h" #include "pme_gpu_constants.h" diff --git a/src/gromacs/gpu_utils/CMakeLists.txt b/src/gromacs/gpu_utils/CMakeLists.txt index 3b59b9279e..7791042ae4 100644 --- a/src/gromacs/gpu_utils/CMakeLists.txt +++ b/src/gromacs/gpu_utils/CMakeLists.txt @@ -49,6 +49,7 @@ if(GMX_GPU_OPENCL) gmx_add_libgromacs_sources( device_context_ocl.cpp device_stream_ocl.cpp + pmalloc_ocl.cpp ocl_compiler.cpp ocl_caching.cpp oclutils.cpp @@ -59,7 +60,7 @@ elseif(GMX_GPU_CUDA) device_stream.cu gpu_utils.cu pinning.cu - pmalloc_cuda.cu + pmalloc.cu ) _gmx_add_files_to_property(CUDA_SOURCES device_stream_manager.cpp @@ -69,7 +70,7 @@ elseif(GMX_GPU_SYCL) devicebuffer_sycl.cpp device_context_sycl.cpp device_stream_sycl.cpp - syclutils.cpp + pmalloc_sycl.cpp ) _gmx_add_files_to_property(SYCL_SOURCES devicebuffer_sycl.cpp @@ -77,7 +78,7 @@ elseif(GMX_GPU_SYCL) device_context_sycl.cpp device_stream_manager.cpp device_stream_sycl.cpp - syclutils.cpp + pmalloc_sycl.cpp ) else() gmx_add_libgromacs_sources( diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu index 96611b6f91..b35fcabd4a 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cu +++ b/src/gromacs/gpu_utils/gpu_utils.cu @@ -52,7 +52,6 @@ #include "gromacs/gpu_utils/cudautils.cuh" #include "gromacs/gpu_utils/device_context.h" #include "gromacs/gpu_utils/device_stream.h" -#include "gromacs/gpu_utils/pmalloc_cuda.h" #include "gromacs/hardware/device_information.h" #include "gromacs/hardware/device_management.h" #include "gromacs/utility/basedefinitions.h" diff --git a/src/gromacs/gpu_utils/oclutils.cpp b/src/gromacs/gpu_utils/oclutils.cpp index 726e4f2cff..d5ee96fd55 100644 --- a/src/gromacs/gpu_utils/oclutils.cpp +++ b/src/gromacs/gpu_utils/oclutils.cpp @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team. - * Copyright (c) 2019,2020, by the GROMACS development team, led by + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,45 +50,7 @@ #include #include "gromacs/gpu_utils/gpu_utils.h" -#include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/smalloc.h" -/*! \brief \brief Allocates nbytes of host memory. Use ocl_free to free memory allocated with this function. - * - * \todo - * This function should allocate page-locked memory to help reduce D2H and H2D - * transfer times, similar with pmalloc from pmalloc_cuda.cu. - * - * \param[in,out] h_ptr Pointer where to store the address of the newly allocated buffer. - * \param[in] nbytes Size in bytes of the buffer to be allocated. - */ -void pmalloc(void** h_ptr, size_t nbytes) -{ - /* Need a temporary type whose size is 1 byte, so that the - * implementation of snew_aligned can cope without issuing - * warnings. */ - char** temporary = reinterpret_cast(h_ptr); - - /* 16-byte alignment is required by the neighbour-searching code, - * because it uses four-wide SIMD for bounding-box calculation. - * However, when we organize using page-locked memory for - * device-host transfers, it will probably need to be aligned to a - * 4kb page, like CUDA does. */ - snew_aligned(*temporary, nbytes, 16); -} - -/*! \brief Frees memory allocated with pmalloc. - * - * \param[in] h_ptr Buffer allocated with pmalloc that needs to be freed. - */ -void pfree(void* h_ptr) -{ - - if (h_ptr) - { - sfree_aligned(h_ptr); - } -} /*! \brief Convert error code to diagnostic string */ std::string ocl_get_error_string(cl_int error) diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h index c408167161..5f881c33c4 100644 --- a/src/gromacs/gpu_utils/oclutils.h +++ b/src/gromacs/gpu_utils/oclutils.h @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team. - * Copyright (c) 2019,2020, by the GROMACS development team, led by + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -71,12 +71,6 @@ struct gmx_device_runtime_data_t cl_program program; }; -/*! \brief Allocate host memory in malloc style */ -void pmalloc(void** h_ptr, size_t nbytes); - -/*! \brief Free host memory in malloc style */ -void pfree(void* h_ptr); - /*! \brief Convert error code to diagnostic string */ std::string ocl_get_error_string(cl_int error); diff --git a/src/gromacs/gpu_utils/pmalloc_cuda.cu b/src/gromacs/gpu_utils/pmalloc.cu similarity index 78% rename from src/gromacs/gpu_utils/pmalloc_cuda.cu rename to src/gromacs/gpu_utils/pmalloc.cu index 2d5e122052..3a8f1058fe 100644 --- a/src/gromacs/gpu_utils/pmalloc_cuda.cu +++ b/src/gromacs/gpu_utils/pmalloc.cu @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2014,2015,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -40,7 +40,7 @@ #include "gmxpre.h" -#include "pmalloc_cuda.h" +#include "pmalloc.h" #include @@ -70,29 +70,6 @@ void pmalloc(void** h_ptr, size_t nbytes) CU_RET_ERR(stat, strbuf); } -/*! Allocates nbytes of page-locked memory with write-combining. - * This memory should always be freed using pfree (or with the page-locked - * free functions provied by the CUDA library). - */ -void pmalloc_wc(void** h_ptr, size_t nbytes) -{ - cudaError_t stat; - char strbuf[STRLEN]; - int flag = cudaHostAllocDefault | cudaHostAllocWriteCombined; - - if (nbytes == 0) - { - *h_ptr = nullptr; - return; - } - - gmx::ensureNoPendingDeviceError("Could not allocate page-locked memory with write-combining."); - - stat = cudaMallocHost(h_ptr, nbytes, flag); - sprintf(strbuf, "cudaMallocHost of size %d bytes failed", (int)nbytes); - CU_RET_ERR(stat, strbuf); -} - /*! Frees page locked memory allocated with pmalloc. * This function can safely be called also with a pointer to a page-locked * memory allocated directly with CUDA API calls. diff --git a/src/gromacs/gpu_utils/pmalloc_cuda.h b/src/gromacs/gpu_utils/pmalloc.h similarity index 85% rename from src/gromacs/gpu_utils/pmalloc_cuda.h rename to src/gromacs/gpu_utils/pmalloc.h index d9ab9e088d..d6b2707556 100644 --- a/src/gromacs/gpu_utils/pmalloc_cuda.h +++ b/src/gromacs/gpu_utils/pmalloc.h @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2012,2013,2014,2015,2018 by the GROMACS development team. - * Copyright (c) 2019,2020, by the GROMACS development team, led by + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -34,13 +34,13 @@ * the research papers on the package. Check out http://www.gromacs.org. */ /*! \libinternal \file - * \brief Declare functions for host-side memory handling when using CUDA devices. + * \brief Declare functions for host-side memory handling. * * \author Szilard Pall * \inlibraryapi */ -#ifndef GMX_GPU_UTILS_PMALLOC_CUDA_H -#define GMX_GPU_UTILS_PMALLOC_CUDA_H +#ifndef GMX_GPU_UTILS_PMALLOC_H +#define GMX_GPU_UTILS_PMALLOC_H #include @@ -51,9 +51,6 @@ /*! \brief Allocates nbytes of page-locked memory. */ void pmalloc(void** h_ptr, size_t nbytes); -/*! \brief Allocates nbytes of page-locked memory with write-combining. */ -void pmalloc_wc(void** h_ptr, size_t nbytes); - /*! \brief Frees page locked memory allocated with pmalloc. */ void pfree(void* h_ptr); diff --git a/src/gromacs/gpu_utils/pmalloc_ocl.cpp b/src/gromacs/gpu_utils/pmalloc_ocl.cpp new file mode 100644 index 0000000000..9ac9d775c4 --- /dev/null +++ b/src/gromacs/gpu_utils/pmalloc_ocl.cpp @@ -0,0 +1,82 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2012,2014,2015,2018,2019,2020,2021, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * \brief Define functions for host-side memory handling when using OpenCL devices. + * + * \author Anca Hamuraru + */ + +#include "gmxpre.h" + +#include "pmalloc.h" + +#include "gromacs/utility/smalloc.h" + +/*! \brief \brief Allocates nbytes of host memory. Use pfree to free memory allocated with this function. + * + * \todo + * This function should allocate page-locked memory to help reduce D2H and H2D + * transfer times, similar with pmalloc from pmalloc.cu. + * + * \param[in,out] h_ptr Pointer where to store the address of the newly allocated buffer. + * \param[in] nbytes Size in bytes of the buffer to be allocated. + */ +void pmalloc(void** h_ptr, size_t nbytes) +{ + /* Need a temporary type whose size is 1 byte, so that the + * implementation of snew_aligned can cope without issuing + * warnings. */ + char** temporary = reinterpret_cast(h_ptr); + + /* 16-byte alignment is required by the neighbour-searching code, + * because it uses four-wide SIMD for bounding-box calculation. + * However, when we organize using page-locked memory for + * device-host transfers, it will probably need to be aligned to a + * 4kb page, like CUDA does. */ + snew_aligned(*temporary, nbytes, 16); +} + +/*! \brief Frees memory allocated with pmalloc. + * + * \param[in] h_ptr Buffer allocated with pmalloc that needs to be freed. + */ +void pfree(void* h_ptr) +{ + + if (h_ptr) + { + sfree_aligned(h_ptr); + } +} diff --git a/src/gromacs/gpu_utils/syclutils.cpp b/src/gromacs/gpu_utils/pmalloc_sycl.cpp similarity index 96% rename from src/gromacs/gpu_utils/syclutils.cpp rename to src/gromacs/gpu_utils/pmalloc_sycl.cpp index 0f660c04f5..839409b245 100644 --- a/src/gromacs/gpu_utils/syclutils.cpp +++ b/src/gromacs/gpu_utils/pmalloc_sycl.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2020,2021, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -39,7 +39,7 @@ */ #include "gmxpre.h" -#include "syclutils.h" +#include "pmalloc.h" #include "gromacs/utility/smalloc.h" diff --git a/src/gromacs/gpu_utils/syclutils.h b/src/gromacs/gpu_utils/syclutils.h index 1318a2a9b4..ffa4be2e83 100644 --- a/src/gromacs/gpu_utils/syclutils.h +++ b/src/gromacs/gpu_utils/syclutils.h @@ -67,12 +67,6 @@ struct gmx_device_runtime_data_t #ifndef DOXYGEN -/*! \brief Allocate host memory in malloc style */ -void pmalloc(void** h_ptr, size_t nbytes); - -/*! \brief Free host memory in malloc style */ -void pfree(void* h_ptr); - /* To properly mark function as [[noreturn]], we must do it everywhere it is declared, which * will pollute common headers.*/ # pragma clang diagnostic push diff --git a/src/gromacs/gpu_utils/tests/pinnedmemorychecker.cpp b/src/gromacs/gpu_utils/tests/pinnedmemorychecker.cpp index b4493e961e..699dff0613 100644 --- a/src/gromacs/gpu_utils/tests/pinnedmemorychecker.cpp +++ b/src/gromacs/gpu_utils/tests/pinnedmemorychecker.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,7 +50,7 @@ # include "gromacs/gpu_utils/gpu_utils.h" # include "gromacs/gpu_utils/hostallocator.h" -# include "gromacs/gpu_utils/pmalloc_cuda.h" +# include "gromacs/gpu_utils/pmalloc.h" # include "gromacs/utility/real.h" # include "gromacs/utility/smalloc.h" diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index 692f4076a6..ac77badc41 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -55,7 +55,7 @@ #include "gromacs/gpu_utils/device_stream_manager.h" #include "gromacs/gpu_utils/gpu_utils.h" #include "gromacs/gpu_utils/gpueventsynchronizer.cuh" -#include "gromacs/gpu_utils/pmalloc_cuda.h" +#include "gromacs/gpu_utils/pmalloc.h" #include "gromacs/hardware/device_information.h" #include "gromacs/hardware/device_management.h" #include "gromacs/math/vectypes.h" diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index b823973a68..b927bd3196 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -53,7 +53,7 @@ #include #include "gromacs/gpu_utils/device_stream_manager.h" -#include "gromacs/gpu_utils/oclutils.h" +#include "gromacs/gpu_utils/pmalloc.h" #include "gromacs/hardware/device_information.h" #include "gromacs/hardware/device_management.h" #include "gromacs/math/vectypes.h" diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp index b7ad72a6a0..e5a1a4e344 100644 --- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp @@ -42,6 +42,7 @@ #include "gmxpre.h" #include "gromacs/gpu_utils/device_stream_manager.h" +#include "gromacs/gpu_utils/pmalloc.h" #include "gromacs/hardware/device_information.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/nbnxm/atomdata.h"