gmx_add_libgromacs_sources(
# CUDA-specific sources
pme_gather.cu
- pme_gpu_3dfft.cu
pme_solve.cu
pme_spread.cu
pme_gpu_program_impl.cu
elseif (GMX_GPU_OPENCL)
gmx_add_libgromacs_sources(
# OpenCL-specific sources
- pme_gpu_3dfft_ocl.cpp
pme_gpu_program_impl_ocl.cpp
# GPU-specific sources
pme_gpu.cpp
gmx_add_libgromacs_sources(
# Files that implement stubs
pme_gpu_sycl_stubs.cpp
- pme_gpu_3dfft_sycl.cpp
# GPU-specific sources
pme_gpu.cpp
pme_gpu_internal.cpp
#include <string>
#include "gromacs/ewald/ewald_utils.h"
+#include "gromacs/fft/gpu_3dfft.h"
#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/device_stream.h"
#include "gromacs/gpu_utils/gpu_utils.h"
# include "pme.cuh"
#endif
-#include "pme_gpu_3dfft.h"
#include "pme_gpu_calculate_splines.h"
#include "pme_gpu_constants.h"
#include "pme_gpu_program_impl.h"
for (int gridIndex = 0; gridIndex < pmeGpu->common->ngrids; gridIndex++)
{
pmeGpu->archSpecific->fftSetup.push_back(
- std::make_unique<GpuParallel3dFft>(grid.realGridSize,
- grid.realGridSizePadded,
- grid.complexGridSizePadded,
- useDecomposition,
- performOutOfPlaceFFT,
- pmeGpu->archSpecific->deviceContext_,
- pmeGpu->archSpecific->pmeStream_,
- grid.d_realGrid[gridIndex],
- grid.d_fourierGrid[gridIndex]));
+ std::make_unique<gmx::Gpu3dFft>(grid.realGridSize,
+ grid.realGridSizePadded,
+ grid.complexGridSizePadded,
+ useDecomposition,
+ performOutOfPlaceFFT,
+ pmeGpu->archSpecific->deviceContext_,
+ pmeGpu->archSpecific->pmeStream_,
+ grid.d_realGrid[gridIndex],
+ grid.d_fourierGrid[gridIndex]));
}
}
}
# include "gromacs/gpu_utils/gpuregiontimer_sycl.h"
#endif
+#include "gromacs/fft/gpu_3dfft.h"
#include "gromacs/timing/gpu_timing.h" // for gtPME_EVENT_COUNT
-#include "pme_gpu_3dfft.h"
-
#ifndef NUMFEPSTATES
//! Number of FEP states.
# define NUMFEPSTATES 2
#endif
-class GpuParallel3dFft;
+namespace gmx
+{
+class Gpu3dFft;
+} // namespace gmx
/*! \internal \brief
* The main PME CUDA/OpenCL-specific host data structure, included in the PME GPU structure by the archSpecific pointer.
bool useTiming = false;
//! Vector of FFT setups
- std::vector<std::unique_ptr<GpuParallel3dFft>> fftSetup;
+ std::vector<std::unique_ptr<gmx::Gpu3dFft>> fftSetup;
//! All the timers one might use
gmx::EnumerationArray<PmeStage, GpuRegionTimer> timingEvents;
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2013,2014,2015,2018,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2013,2014,2015,2018,2019,2020,2021, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
gmx_add_libgromacs_sources(fft_mkl.cpp)
endif()
+if (GMX_GPU_CUDA)
+ gmx_add_libgromacs_sources(
+ # CUDA-specific sources
+ gpu_3dfft.cu
+ )
+elseif (GMX_GPU_OPENCL)
+ gmx_add_libgromacs_sources(
+ # OpenCL-specific sources
+ gpu_3dfft_ocl.cpp
+ )
+elseif (GMX_GPU_SYCL)
+ # SYCL-TODO: proper implementation
+ gmx_add_libgromacs_sources(
+ # SYCL-specific sources
+ gpu_3dfft_sycl.cpp
+ )
+ _gmx_add_files_to_property(SYCL_SOURCES
+ gpu_3dfft_sycl.cpp
+ )
+else()
+ gmx_add_libgromacs_sources(
+ # Stub sources for CPU-only build
+ gpu_3dfft.cpp
+ )
+endif()
+
# Source files have the following private module dependencies.
target_link_libraries(fft PRIVATE
# gmxlib
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ * \brief Implements stub GPU 3D FFT routines for CPU-only builds
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_fft
+ */
+
+#include "gmxpre.h"
+
+#include "gpu_3dfft.h"
+
+#include "gromacs/utility/exceptions.h"
+
+namespace gmx
+{
+
+// [[noreturn]] attributes must be added in the common headers, so it's easier to silence the warning here
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmissing-noreturn"
+
+class Gpu3dFft::Impl
+{
+};
+
+Gpu3dFft::Gpu3dFft(ivec /*realGridSize*/,
+ ivec /*realGridSizePadded*/,
+ ivec /*complexGridSizePadded*/,
+ const bool /*useDecomposition*/,
+ const bool /*performOutOfPlaceFFT*/,
+ const DeviceContext& /*context*/,
+ const DeviceStream& /*pmeStream*/,
+ DeviceBuffer<float> /*realGrid*/,
+ DeviceBuffer<float> /*complexGrid*/)
+{
+ GMX_THROW(InternalError("Cannot run GPU routines in a CPU-only configuration"));
+}
+
+Gpu3dFft::~Gpu3dFft() = default;
+
+// NOLINTNEXTLINE readability-convert-member-functions-to-static
+void Gpu3dFft::perform3dFft(gmx_fft_direction /*dir*/, CommandEvent* /*timingEvent*/)
+{
+ GMX_THROW(InternalError("Cannot run GPU routines in a CPU-only configuration"));
+}
+
+#pragma clang diagnostic pop
+
+} // namespace gmx
*/
/*! \internal \file
- * \brief Implements CUDA FFT routines for PME GPU.
+ * \brief Implements GPU 3D FFT routines for CUDA.
*
* \author Aleksei Iupinov <a.yupinov@gmail.com>
- * \ingroup module_ewald
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_fft
*/
#include "gmxpre.h"
-#include "pme_gpu_3dfft.h"
+#include "gpu_3dfft.h"
#include <cufft.h>
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
-class GpuParallel3dFft::Impl
+namespace gmx
+{
+
+class Gpu3dFft::Impl
{
public:
Impl(ivec realGridSize,
}
}
-GpuParallel3dFft::Impl::Impl(ivec realGridSize,
- ivec realGridSizePadded,
- ivec complexGridSizePadded,
- const bool useDecomposition,
- const bool /*performOutOfPlaceFFT*/,
- const DeviceContext& /*context*/,
- const DeviceStream& pmeStream,
- DeviceBuffer<float> realGrid,
- DeviceBuffer<float> complexGrid) :
+Gpu3dFft::Impl::Impl(ivec realGridSize,
+ ivec realGridSizePadded,
+ ivec complexGridSizePadded,
+ const bool useDecomposition,
+ const bool /*performOutOfPlaceFFT*/,
+ const DeviceContext& /*context*/,
+ const DeviceStream& pmeStream,
+ DeviceBuffer<float> realGrid,
+ DeviceBuffer<float> complexGrid) :
realGrid_(reinterpret_cast<cufftReal*>(realGrid)),
complexGrid_(reinterpret_cast<cufftComplex*>(complexGrid))
{
handleCufftError(result, "cufftSetStream C2R failure");
}
-GpuParallel3dFft::Impl::~Impl()
+Gpu3dFft::Impl::~Impl()
{
cufftResult_t result;
result = cufftDestroy(planR2C_);
handleCufftError(result, "cufftDestroy C2R failure");
}
-void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* /*timingEvent*/)
+void Gpu3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* /*timingEvent*/)
{
cufftResult_t result;
if (dir == GMX_FFT_REAL_TO_COMPLEX)
}
}
-GpuParallel3dFft::GpuParallel3dFft(ivec realGridSize,
- ivec realGridSizePadded,
- ivec complexGridSizePadded,
- const bool useDecomposition,
- const bool performOutOfPlaceFFT,
- const DeviceContext& context,
- const DeviceStream& pmeStream,
- DeviceBuffer<float> realGrid,
- DeviceBuffer<float> complexGrid) :
+Gpu3dFft::Gpu3dFft(ivec realGridSize,
+ ivec realGridSizePadded,
+ ivec complexGridSizePadded,
+ const bool useDecomposition,
+ const bool performOutOfPlaceFFT,
+ const DeviceContext& context,
+ const DeviceStream& pmeStream,
+ DeviceBuffer<float> realGrid,
+ DeviceBuffer<float> complexGrid) :
impl_(std::make_unique<Impl>(realGridSize,
realGridSizePadded,
complexGridSizePadded,
{
}
-GpuParallel3dFft::~GpuParallel3dFft() = default;
+Gpu3dFft::~Gpu3dFft() = default;
+
+} // namespace gmx
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team.
- * Copyright (c) 2021, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*/
/*! \internal \file
- * \brief Declares the 3D FFT class for PME.
+ * \brief Declares the GPU 3D FFT routines.
*
* \author Aleksei Iupinov <a.yupinov@gmail.com>
- * \ingroup module_ewald
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_fft
*/
-#ifndef GMX_EWALD_PME_GPU_3DFFT_H
-#define GMX_EWALD_PME_GPU_3DFFT_H
+#ifndef GMX_FFT_GPU_3DFFT_H
+#define GMX_FFT_GPU_3DFFT_H
#include <memory>
class DeviceContext;
class DeviceStream;
-struct PmeGpu;
+
+namespace gmx
+{
/*! \internal \brief
* A 3D FFT class for performing R2C/C2R transforms
* \todo Make this class actually parallel over multiple GPUs
*/
-class GpuParallel3dFft
+class Gpu3dFft
{
public:
/*! \brief
* \param[in] realGrid Device buffer of floats for the real grid
* \param[in] complexGrid Device buffer of complex floats for the complex grid
*/
- GpuParallel3dFft(ivec realGridSize,
- ivec realGridSizePadded,
- ivec complexGridSizePadded,
- bool useDecomposition,
- bool performOutOfPlaceFFT,
- const DeviceContext& context,
- const DeviceStream& pmeStream,
- DeviceBuffer<float> realGrid,
- DeviceBuffer<float> complexGrid);
+ Gpu3dFft(ivec realGridSize,
+ ivec realGridSizePadded,
+ ivec complexGridSizePadded,
+ bool useDecomposition,
+ bool performOutOfPlaceFFT,
+ const DeviceContext& context,
+ const DeviceStream& pmeStream,
+ DeviceBuffer<float> realGrid,
+ DeviceBuffer<float> complexGrid);
/*! \brief Destroys the FFT plans. */
- ~GpuParallel3dFft();
+ ~Gpu3dFft();
/*! \brief Performs the FFT transform in given direction
*
* \param[in] dir FFT transform direction specifier
std::unique_ptr<Impl> impl_;
};
+} // namespace gmx
+
#endif
*/
/*! \internal \file
- * \brief Implements OpenCL 3D FFT routines for PME GPU.
+ * \brief Implements GPU 3D FFT routines for OpenCL.
*
* \author Aleksei Iupinov <a.yupinov@gmail.com>
- * \ingroup module_ewald
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_fft
*/
#include "gmxpre.h"
-#include "pme_gpu_3dfft.h"
+#include "gpu_3dfft.h"
#include <array>
#include <vector>
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
-class GpuParallel3dFft::Impl
+namespace gmx
+{
+
+class Gpu3dFft::Impl
{
public:
Impl(ivec realGridSize,
// Supposedly it's just a superset of standard OpenCL errors
if (status != CLFFT_SUCCESS)
{
- GMX_THROW(gmx::InternalError(gmx::formatString("%s: %d", msg, status)));
+ GMX_THROW(InternalError(formatString("%s: %d", msg, status)));
}
}
-GpuParallel3dFft::Impl::Impl(ivec realGridSize,
- ivec realGridSizePadded,
- ivec complexGridSizePadded,
- const bool useDecomposition,
- const bool performOutOfPlaceFFT,
- const DeviceContext& context,
- const DeviceStream& pmeStream,
- DeviceBuffer<float> realGrid,
- DeviceBuffer<float> complexGrid) :
+Gpu3dFft::Impl::Impl(ivec realGridSize,
+ ivec realGridSizePadded,
+ ivec complexGridSizePadded,
+ const bool useDecomposition,
+ const bool performOutOfPlaceFFT,
+ const DeviceContext& context,
+ const DeviceStream& pmeStream,
+ DeviceBuffer<float> realGrid,
+ DeviceBuffer<float> complexGrid) :
realGrid_(realGrid), complexGrid_(complexGrid)
{
GMX_RELEASE_ASSERT(!useDecomposition, "FFT decomposition not implemented");
// TODO: disable last transpose (clfftSetPlanTransposeResult)
}
-GpuParallel3dFft::Impl::~Impl()
+Gpu3dFft::Impl::~Impl()
{
clfftDestroyPlan(&planR2C_);
clfftDestroyPlan(&planC2R_);
}
-void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent)
+void Gpu3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent)
{
cl_mem tempBuffer = nullptr;
constexpr std::array<cl_event, 0> waitEvents{ {} };
outputGrids = &impl_->realGrid_;
break;
default:
- GMX_THROW(
- gmx::NotImplementedError("The chosen 3D-FFT case is not implemented on GPUs"));
+ GMX_THROW(NotImplementedError("The chosen 3D-FFT case is not implemented on GPUs"));
}
handleClfftError(clfftEnqueueTransform(plan,
direction,
"clFFT execution failure");
}
-GpuParallel3dFft::GpuParallel3dFft(ivec realGridSize,
- ivec realGridSizePadded,
- ivec complexGridSizePadded,
- const bool useDecomposition,
- const bool performOutOfPlaceFFT,
- const DeviceContext& context,
- const DeviceStream& pmeStream,
- DeviceBuffer<float> realGrid,
- DeviceBuffer<float> complexGrid) :
+Gpu3dFft::Gpu3dFft(ivec realGridSize,
+ ivec realGridSizePadded,
+ ivec complexGridSizePadded,
+ const bool useDecomposition,
+ const bool performOutOfPlaceFFT,
+ const DeviceContext& context,
+ const DeviceStream& pmeStream,
+ DeviceBuffer<float> realGrid,
+ DeviceBuffer<float> complexGrid) :
impl_(std::make_unique<Impl>(realGridSize,
realGridSizePadded,
complexGridSizePadded,
{
}
-GpuParallel3dFft::~GpuParallel3dFft() = default;
+Gpu3dFft::~Gpu3dFft() = default;
+
+} // namespace gmx
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
+ * Copyright (c) 2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*
* \author Andrey Alekseenko <al42and@gmail.com>
* \author Mark Abraham <mark.j.abraham@gmail.com>
- * \ingroup module_ewald
+ * \ingroup module_fft
*/
#include "gmxpre.h"
-#include "pme_gpu_3dfft.h"
+#include "gpu_3dfft.h"
#include "gromacs/utility/exceptions.h"
+namespace gmx
+{
+
// [[noreturn]] attributes must be added in the common headers, so it's easier to silence the warning here
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wmissing-noreturn"
-class GpuParallel3dFft::Impl
+class Gpu3dFft::Impl
{
};
-GpuParallel3dFft::GpuParallel3dFft(ivec /*realGridSize*/,
- ivec /*realGridSizePadded*/,
- ivec /*complexGridSizePadded*/,
- const bool /*useDecomposition*/,
- const bool /*performOutOfPlaceFFT*/,
- const DeviceContext& /*context*/,
- const DeviceStream& /*pmeStream*/,
- DeviceBuffer<float> /*realGrid*/,
- DeviceBuffer<float> /*complexGrid*/)
+Gpu3dFft::Gpu3dFft(ivec /*realGridSize*/,
+ ivec /*realGridSizePadded*/,
+ ivec /*complexGridSizePadded*/,
+ const bool /*useDecomposition*/,
+ const bool /*performOutOfPlaceFFT*/,
+ const DeviceContext& /*context*/,
+ const DeviceStream& /*pmeStream*/,
+ DeviceBuffer<float> /*realGrid*/,
+ DeviceBuffer<float> /*complexGrid*/)
{
- GMX_THROW(gmx::NotImplementedError("PME is not implemented in SYCL"));
+ GMX_THROW(NotImplementedError("GPU 3DFFT is not implemented in SYCL"));
}
-GpuParallel3dFft::~GpuParallel3dFft() = default;
+Gpu3dFft::~Gpu3dFft() = default;
-void GpuParallel3dFft::perform3dFft(gmx_fft_direction /*dir*/, CommandEvent* /*timingEvent*/)
+void Gpu3dFft::perform3dFft(gmx_fft_direction /*dir*/, CommandEvent* /*timingEvent*/)
{
- GMX_THROW(gmx::NotImplementedError("Not implemented on SYCL yet"));
+ GMX_THROW(NotImplementedError("Not implemented on SYCL yet"));
}
#pragma clang diagnostic pop
+
+} // namespace gmx