From: Mark Abraham Date: Tue, 29 Jun 2021 06:36:52 +0000 (+0200) Subject: Move GPU 3D FFT code to fft module X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=8d2f0708e05395b64c0fa1b1fee275e465815e69;p=alexxy%2Fgromacs.git Move GPU 3D FFT code to fft module Then we can have tests of all the 3D FFT implementations together Refs #3965 --- diff --git a/src/gromacs/ewald/CMakeLists.txt b/src/gromacs/ewald/CMakeLists.txt index f9041d8d2b..ce81582dcb 100644 --- a/src/gromacs/ewald/CMakeLists.txt +++ b/src/gromacs/ewald/CMakeLists.txt @@ -60,7 +60,6 @@ if (GMX_GPU_CUDA) gmx_add_libgromacs_sources( # CUDA-specific sources pme_gather.cu - pme_gpu_3dfft.cu pme_solve.cu pme_spread.cu pme_gpu_program_impl.cu @@ -80,7 +79,6 @@ if (GMX_GPU_CUDA) elseif (GMX_GPU_OPENCL) gmx_add_libgromacs_sources( # OpenCL-specific sources - pme_gpu_3dfft_ocl.cpp pme_gpu_program_impl_ocl.cpp # GPU-specific sources pme_gpu.cpp @@ -92,7 +90,6 @@ elseif (GMX_GPU_SYCL) gmx_add_libgromacs_sources( # Files that implement stubs pme_gpu_sycl_stubs.cpp - pme_gpu_3dfft_sycl.cpp # GPU-specific sources pme_gpu.cpp pme_gpu_internal.cpp diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index 3561c20025..cee5a349c8 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -57,6 +57,7 @@ #include #include "gromacs/ewald/ewald_utils.h" +#include "gromacs/fft/gpu_3dfft.h" #include "gromacs/gpu_utils/device_context.h" #include "gromacs/gpu_utils/device_stream.h" #include "gromacs/gpu_utils/gpu_utils.h" @@ -79,7 +80,6 @@ # include "pme.cuh" #endif -#include "pme_gpu_3dfft.h" #include "pme_gpu_calculate_splines.h" #include "pme_gpu_constants.h" #include "pme_gpu_program_impl.h" @@ -612,15 +612,15 @@ void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu) for (int gridIndex = 0; gridIndex < pmeGpu->common->ngrids; gridIndex++) { pmeGpu->archSpecific->fftSetup.push_back( - std::make_unique(grid.realGridSize, - grid.realGridSizePadded, - grid.complexGridSizePadded, - useDecomposition, - performOutOfPlaceFFT, - pmeGpu->archSpecific->deviceContext_, - pmeGpu->archSpecific->pmeStream_, - grid.d_realGrid[gridIndex], - grid.d_fourierGrid[gridIndex])); + std::make_unique(grid.realGridSize, + grid.realGridSizePadded, + grid.complexGridSizePadded, + useDecomposition, + performOutOfPlaceFFT, + pmeGpu->archSpecific->deviceContext_, + pmeGpu->archSpecific->pmeStream_, + grid.d_realGrid[gridIndex], + grid.d_fourierGrid[gridIndex])); } } } diff --git a/src/gromacs/ewald/pme_gpu_types_host_impl.h b/src/gromacs/ewald/pme_gpu_types_host_impl.h index 6b16ba5c16..39b195d329 100644 --- a/src/gromacs/ewald/pme_gpu_types_host_impl.h +++ b/src/gromacs/ewald/pme_gpu_types_host_impl.h @@ -62,16 +62,18 @@ # include "gromacs/gpu_utils/gpuregiontimer_sycl.h" #endif +#include "gromacs/fft/gpu_3dfft.h" #include "gromacs/timing/gpu_timing.h" // for gtPME_EVENT_COUNT -#include "pme_gpu_3dfft.h" - #ifndef NUMFEPSTATES //! Number of FEP states. # define NUMFEPSTATES 2 #endif -class GpuParallel3dFft; +namespace gmx +{ +class Gpu3dFft; +} // namespace gmx /*! \internal \brief * The main PME CUDA/OpenCL-specific host data structure, included in the PME GPU structure by the archSpecific pointer. @@ -116,7 +118,7 @@ struct PmeGpuSpecific bool useTiming = false; //! Vector of FFT setups - std::vector> fftSetup; + std::vector> fftSetup; //! All the timers one might use gmx::EnumerationArray timingEvents; diff --git a/src/gromacs/fft/CMakeLists.txt b/src/gromacs/fft/CMakeLists.txt index bbcd6972a8..70dd951e6f 100644 --- a/src/gromacs/fft/CMakeLists.txt +++ b/src/gromacs/fft/CMakeLists.txt @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2013,2014,2015,2018,2019,2020, by the GROMACS development team, led by +# Copyright (c) 2013,2014,2015,2018,2019,2020,2021, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -54,6 +54,32 @@ if (GMX_FFT_MKL) gmx_add_libgromacs_sources(fft_mkl.cpp) endif() +if (GMX_GPU_CUDA) + gmx_add_libgromacs_sources( + # CUDA-specific sources + gpu_3dfft.cu + ) +elseif (GMX_GPU_OPENCL) + gmx_add_libgromacs_sources( + # OpenCL-specific sources + gpu_3dfft_ocl.cpp + ) +elseif (GMX_GPU_SYCL) + # SYCL-TODO: proper implementation + gmx_add_libgromacs_sources( + # SYCL-specific sources + gpu_3dfft_sycl.cpp + ) + _gmx_add_files_to_property(SYCL_SOURCES + gpu_3dfft_sycl.cpp + ) +else() + gmx_add_libgromacs_sources( + # Stub sources for CPU-only build + gpu_3dfft.cpp + ) +endif() + # Source files have the following private module dependencies. target_link_libraries(fft PRIVATE # gmxlib diff --git a/src/gromacs/fft/gpu_3dfft.cpp b/src/gromacs/fft/gpu_3dfft.cpp new file mode 100644 index 0000000000..3f218e8376 --- /dev/null +++ b/src/gromacs/fft/gpu_3dfft.cpp @@ -0,0 +1,83 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2021, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \internal \file + * \brief Implements stub GPU 3D FFT routines for CPU-only builds + * + * \author Mark Abraham + * \ingroup module_fft + */ + +#include "gmxpre.h" + +#include "gpu_3dfft.h" + +#include "gromacs/utility/exceptions.h" + +namespace gmx +{ + +// [[noreturn]] attributes must be added in the common headers, so it's easier to silence the warning here +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-noreturn" + +class Gpu3dFft::Impl +{ +}; + +Gpu3dFft::Gpu3dFft(ivec /*realGridSize*/, + ivec /*realGridSizePadded*/, + ivec /*complexGridSizePadded*/, + const bool /*useDecomposition*/, + const bool /*performOutOfPlaceFFT*/, + const DeviceContext& /*context*/, + const DeviceStream& /*pmeStream*/, + DeviceBuffer /*realGrid*/, + DeviceBuffer /*complexGrid*/) +{ + GMX_THROW(InternalError("Cannot run GPU routines in a CPU-only configuration")); +} + +Gpu3dFft::~Gpu3dFft() = default; + +// NOLINTNEXTLINE readability-convert-member-functions-to-static +void Gpu3dFft::perform3dFft(gmx_fft_direction /*dir*/, CommandEvent* /*timingEvent*/) +{ + GMX_THROW(InternalError("Cannot run GPU routines in a CPU-only configuration")); +} + +#pragma clang diagnostic pop + +} // namespace gmx diff --git a/src/gromacs/ewald/pme_gpu_3dfft.cu b/src/gromacs/fft/gpu_3dfft.cu similarity index 80% rename from src/gromacs/ewald/pme_gpu_3dfft.cu rename to src/gromacs/fft/gpu_3dfft.cu index f547fc6bcc..78f3ba90dc 100644 --- a/src/gromacs/ewald/pme_gpu_3dfft.cu +++ b/src/gromacs/fft/gpu_3dfft.cu @@ -34,15 +34,16 @@ */ /*! \internal \file - * \brief Implements CUDA FFT routines for PME GPU. + * \brief Implements GPU 3D FFT routines for CUDA. * * \author Aleksei Iupinov - * \ingroup module_ewald + * \author Mark Abraham + * \ingroup module_fft */ #include "gmxpre.h" -#include "pme_gpu_3dfft.h" +#include "gpu_3dfft.h" #include @@ -50,7 +51,10 @@ #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" -class GpuParallel3dFft::Impl +namespace gmx +{ + +class Gpu3dFft::Impl { public: Impl(ivec realGridSize, @@ -78,15 +82,15 @@ static void handleCufftError(cufftResult_t status, const char* msg) } } -GpuParallel3dFft::Impl::Impl(ivec realGridSize, - ivec realGridSizePadded, - ivec complexGridSizePadded, - const bool useDecomposition, - const bool /*performOutOfPlaceFFT*/, - const DeviceContext& /*context*/, - const DeviceStream& pmeStream, - DeviceBuffer realGrid, - DeviceBuffer complexGrid) : +Gpu3dFft::Impl::Impl(ivec realGridSize, + ivec realGridSizePadded, + ivec complexGridSizePadded, + const bool useDecomposition, + const bool /*performOutOfPlaceFFT*/, + const DeviceContext& /*context*/, + const DeviceStream& pmeStream, + DeviceBuffer realGrid, + DeviceBuffer complexGrid) : realGrid_(reinterpret_cast(realGrid)), complexGrid_(reinterpret_cast(complexGrid)) { @@ -147,7 +151,7 @@ GpuParallel3dFft::Impl::Impl(ivec realGridSize, handleCufftError(result, "cufftSetStream C2R failure"); } -GpuParallel3dFft::Impl::~Impl() +Gpu3dFft::Impl::~Impl() { cufftResult_t result; result = cufftDestroy(planR2C_); @@ -156,7 +160,7 @@ GpuParallel3dFft::Impl::~Impl() handleCufftError(result, "cufftDestroy C2R failure"); } -void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* /*timingEvent*/) +void Gpu3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* /*timingEvent*/) { cufftResult_t result; if (dir == GMX_FFT_REAL_TO_COMPLEX) @@ -171,15 +175,15 @@ void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* /*timin } } -GpuParallel3dFft::GpuParallel3dFft(ivec realGridSize, - ivec realGridSizePadded, - ivec complexGridSizePadded, - const bool useDecomposition, - const bool performOutOfPlaceFFT, - const DeviceContext& context, - const DeviceStream& pmeStream, - DeviceBuffer realGrid, - DeviceBuffer complexGrid) : +Gpu3dFft::Gpu3dFft(ivec realGridSize, + ivec realGridSizePadded, + ivec complexGridSizePadded, + const bool useDecomposition, + const bool performOutOfPlaceFFT, + const DeviceContext& context, + const DeviceStream& pmeStream, + DeviceBuffer realGrid, + DeviceBuffer complexGrid) : impl_(std::make_unique(realGridSize, realGridSizePadded, complexGridSizePadded, @@ -192,4 +196,6 @@ GpuParallel3dFft::GpuParallel3dFft(ivec realGridSize, { } -GpuParallel3dFft::~GpuParallel3dFft() = default; +Gpu3dFft::~Gpu3dFft() = default; + +} // namespace gmx diff --git a/src/gromacs/ewald/pme_gpu_3dfft.h b/src/gromacs/fft/gpu_3dfft.h similarity index 79% rename from src/gromacs/ewald/pme_gpu_3dfft.h rename to src/gromacs/fft/gpu_3dfft.h index 5939dea0c6..65d3f6f03b 100644 --- a/src/gromacs/ewald/pme_gpu_3dfft.h +++ b/src/gromacs/fft/gpu_3dfft.h @@ -1,8 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team. - * Copyright (c) 2021, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -35,14 +34,15 @@ */ /*! \internal \file - * \brief Declares the 3D FFT class for PME. + * \brief Declares the GPU 3D FFT routines. * * \author Aleksei Iupinov - * \ingroup module_ewald + * \author Mark Abraham + * \ingroup module_fft */ -#ifndef GMX_EWALD_PME_GPU_3DFFT_H -#define GMX_EWALD_PME_GPU_3DFFT_H +#ifndef GMX_FFT_GPU_3DFFT_H +#define GMX_FFT_GPU_3DFFT_H #include @@ -52,13 +52,15 @@ class DeviceContext; class DeviceStream; -struct PmeGpu; + +namespace gmx +{ /*! \internal \brief * A 3D FFT class for performing R2C/C2R transforms * \todo Make this class actually parallel over multiple GPUs */ -class GpuParallel3dFft +class Gpu3dFft { public: /*! \brief @@ -74,18 +76,18 @@ public: * \param[in] realGrid Device buffer of floats for the real grid * \param[in] complexGrid Device buffer of complex floats for the complex grid */ - GpuParallel3dFft(ivec realGridSize, - ivec realGridSizePadded, - ivec complexGridSizePadded, - bool useDecomposition, - bool performOutOfPlaceFFT, - const DeviceContext& context, - const DeviceStream& pmeStream, - DeviceBuffer realGrid, - DeviceBuffer complexGrid); + Gpu3dFft(ivec realGridSize, + ivec realGridSizePadded, + ivec complexGridSizePadded, + bool useDecomposition, + bool performOutOfPlaceFFT, + const DeviceContext& context, + const DeviceStream& pmeStream, + DeviceBuffer realGrid, + DeviceBuffer complexGrid); /*! \brief Destroys the FFT plans. */ - ~GpuParallel3dFft(); + ~Gpu3dFft(); /*! \brief Performs the FFT transform in given direction * * \param[in] dir FFT transform direction specifier @@ -98,4 +100,6 @@ private: std::unique_ptr impl_; }; +} // namespace gmx + #endif diff --git a/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp b/src/gromacs/fft/gpu_3dfft_ocl.cpp similarity index 82% rename from src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp rename to src/gromacs/fft/gpu_3dfft_ocl.cpp index d63901d319..69a4497445 100644 --- a/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp +++ b/src/gromacs/fft/gpu_3dfft_ocl.cpp @@ -34,15 +34,16 @@ */ /*! \internal \file - * \brief Implements OpenCL 3D FFT routines for PME GPU. + * \brief Implements GPU 3D FFT routines for OpenCL. * * \author Aleksei Iupinov - * \ingroup module_ewald + * \author Mark Abraham + * \ingroup module_fft */ #include "gmxpre.h" -#include "pme_gpu_3dfft.h" +#include "gpu_3dfft.h" #include #include @@ -56,7 +57,10 @@ #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/stringutil.h" -class GpuParallel3dFft::Impl +namespace gmx +{ + +class Gpu3dFft::Impl { public: Impl(ivec realGridSize, @@ -83,19 +87,19 @@ static void handleClfftError(clfftStatus status, const char* msg) // Supposedly it's just a superset of standard OpenCL errors if (status != CLFFT_SUCCESS) { - GMX_THROW(gmx::InternalError(gmx::formatString("%s: %d", msg, status))); + GMX_THROW(InternalError(formatString("%s: %d", msg, status))); } } -GpuParallel3dFft::Impl::Impl(ivec realGridSize, - ivec realGridSizePadded, - ivec complexGridSizePadded, - const bool useDecomposition, - const bool performOutOfPlaceFFT, - const DeviceContext& context, - const DeviceStream& pmeStream, - DeviceBuffer realGrid, - DeviceBuffer complexGrid) : +Gpu3dFft::Impl::Impl(ivec realGridSize, + ivec realGridSizePadded, + ivec complexGridSizePadded, + const bool useDecomposition, + const bool performOutOfPlaceFFT, + const DeviceContext& context, + const DeviceStream& pmeStream, + DeviceBuffer realGrid, + DeviceBuffer complexGrid) : realGrid_(realGrid), complexGrid_(complexGrid) { GMX_RELEASE_ASSERT(!useDecomposition, "FFT decomposition not implemented"); @@ -153,13 +157,13 @@ GpuParallel3dFft::Impl::Impl(ivec realGridSize, // TODO: disable last transpose (clfftSetPlanTransposeResult) } -GpuParallel3dFft::Impl::~Impl() +Gpu3dFft::Impl::~Impl() { clfftDestroyPlan(&planR2C_); clfftDestroyPlan(&planC2R_); } -void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent) +void Gpu3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent) { cl_mem tempBuffer = nullptr; constexpr std::array waitEvents{ {} }; @@ -183,8 +187,7 @@ void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingE outputGrids = &impl_->realGrid_; break; default: - GMX_THROW( - gmx::NotImplementedError("The chosen 3D-FFT case is not implemented on GPUs")); + GMX_THROW(NotImplementedError("The chosen 3D-FFT case is not implemented on GPUs")); } handleClfftError(clfftEnqueueTransform(plan, direction, @@ -199,15 +202,15 @@ void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingE "clFFT execution failure"); } -GpuParallel3dFft::GpuParallel3dFft(ivec realGridSize, - ivec realGridSizePadded, - ivec complexGridSizePadded, - const bool useDecomposition, - const bool performOutOfPlaceFFT, - const DeviceContext& context, - const DeviceStream& pmeStream, - DeviceBuffer realGrid, - DeviceBuffer complexGrid) : +Gpu3dFft::Gpu3dFft(ivec realGridSize, + ivec realGridSizePadded, + ivec complexGridSizePadded, + const bool useDecomposition, + const bool performOutOfPlaceFFT, + const DeviceContext& context, + const DeviceStream& pmeStream, + DeviceBuffer realGrid, + DeviceBuffer complexGrid) : impl_(std::make_unique(realGridSize, realGridSizePadded, complexGridSizePadded, @@ -220,4 +223,6 @@ GpuParallel3dFft::GpuParallel3dFft(ivec realGridSize, { } -GpuParallel3dFft::~GpuParallel3dFft() = default; +Gpu3dFft::~Gpu3dFft() = default; + +} // namespace gmx diff --git a/src/gromacs/ewald/pme_gpu_3dfft_sycl.cpp b/src/gromacs/fft/gpu_3dfft_sycl.cpp similarity index 66% rename from src/gromacs/ewald/pme_gpu_3dfft_sycl.cpp rename to src/gromacs/fft/gpu_3dfft_sycl.cpp index 213889bc8e..394aaac5a4 100644 --- a/src/gromacs/ewald/pme_gpu_3dfft_sycl.cpp +++ b/src/gromacs/fft/gpu_3dfft_sycl.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by + * Copyright (c) 2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -38,41 +38,46 @@ * * \author Andrey Alekseenko * \author Mark Abraham - * \ingroup module_ewald + * \ingroup module_fft */ #include "gmxpre.h" -#include "pme_gpu_3dfft.h" +#include "gpu_3dfft.h" #include "gromacs/utility/exceptions.h" +namespace gmx +{ + // [[noreturn]] attributes must be added in the common headers, so it's easier to silence the warning here #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wmissing-noreturn" -class GpuParallel3dFft::Impl +class Gpu3dFft::Impl { }; -GpuParallel3dFft::GpuParallel3dFft(ivec /*realGridSize*/, - ivec /*realGridSizePadded*/, - ivec /*complexGridSizePadded*/, - const bool /*useDecomposition*/, - const bool /*performOutOfPlaceFFT*/, - const DeviceContext& /*context*/, - const DeviceStream& /*pmeStream*/, - DeviceBuffer /*realGrid*/, - DeviceBuffer /*complexGrid*/) +Gpu3dFft::Gpu3dFft(ivec /*realGridSize*/, + ivec /*realGridSizePadded*/, + ivec /*complexGridSizePadded*/, + const bool /*useDecomposition*/, + const bool /*performOutOfPlaceFFT*/, + const DeviceContext& /*context*/, + const DeviceStream& /*pmeStream*/, + DeviceBuffer /*realGrid*/, + DeviceBuffer /*complexGrid*/) { - GMX_THROW(gmx::NotImplementedError("PME is not implemented in SYCL")); + GMX_THROW(NotImplementedError("GPU 3DFFT is not implemented in SYCL")); } -GpuParallel3dFft::~GpuParallel3dFft() = default; +Gpu3dFft::~Gpu3dFft() = default; -void GpuParallel3dFft::perform3dFft(gmx_fft_direction /*dir*/, CommandEvent* /*timingEvent*/) +void Gpu3dFft::perform3dFft(gmx_fft_direction /*dir*/, CommandEvent* /*timingEvent*/) { - GMX_THROW(gmx::NotImplementedError("Not implemented on SYCL yet")); + GMX_THROW(NotImplementedError("Not implemented on SYCL yet")); } #pragma clang diagnostic pop + +} // namespace gmx