Make GPU X buffer ops platform-agnostic
authorAndrey Alekseenko <al42and@gmail.com>
Mon, 10 May 2021 13:22:53 +0000 (13:22 +0000)
committerMark Abraham <mark.j.abraham@gmail.com>
Mon, 10 May 2021 13:22:53 +0000 (13:22 +0000)
src/gromacs/nbnxm/CMakeLists.txt
src/gromacs/nbnxm/cuda/CMakeLists.txt
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
src/gromacs/nbnxm/cuda/nbnxm_gpu_buffer_ops_internal.cu [moved from src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh with 62% similarity]
src/gromacs/nbnxm/nbnxm_gpu.h
src/gromacs/nbnxm/nbnxm_gpu_buffer_ops.cpp [new file with mode: 0644]
src/gromacs/nbnxm/nbnxm_gpu_buffer_ops_internal.h [new file with mode: 0644]
src/gromacs/nbnxm/nbnxm_gpu_buffer_ops_stubs.cpp [new file with mode: 0644]

index 91738e62355aabf3d49f15ba066a85b79d6d2486..51aa880f470ee7c8cb6890e6212cac4424e7675b 100644 (file)
@@ -66,21 +66,22 @@ file (GLOB NBNXM_SOURCES
 
 if(GMX_GPU_CUDA)
     add_subdirectory(cuda)
-    gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp)
+    gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp nbnxm_gpu_buffer_ops.cpp)
     _gmx_add_files_to_property(CUDA_SOURCES
         nbnxm_gpu_data_mgmt.cpp
+        nbnxm_gpu_buffer_ops.cpp
         )
 endif()
 
 if(GMX_GPU_OPENCL)
     add_subdirectory(opencl)
     set(NBNXM_OPENCL_KERNELS ${NBNXM_OPENCL_KERNELS} PARENT_SCOPE)
-    gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp)
+    gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp nbnxm_gpu_buffer_ops_stubs.cpp)
 endif()
 
 if(GMX_GPU_SYCL)
     add_subdirectory(sycl)
-    gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp)
+    gmx_add_libgromacs_sources(nbnxm_gpu_data_mgmt.cpp nbnxm_gpu_buffer_ops_stubs.cpp)
     _gmx_add_files_to_property(SYCL_SOURCES nbnxm_gpu_data_mgmt.cpp nbnxm.cpp)
 endif()
 
index 15c047605a625e164b3d502c541f831e8b82f2c9..9338868bdd54f03c2c69ee880f04ee68cb2d8f1f 100644 (file)
@@ -2,7 +2,7 @@
 # This file is part of the GROMACS molecular simulation package.
 #
 # Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
-# Copyright (c) 2017,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2017,2019,2020,2021, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -47,6 +47,7 @@ if(GMX_GPU_CUDA)
          nbnxm_cuda.cu
          nbnxm_cuda_data_mgmt.cu
          nbnxm_cuda_jit_support.cu
+         nbnxm_gpu_buffer_ops_internal.cu
          ${NBNXM_CUDA_KERNEL_SOURCES})
     set(NBNXM_SOURCES ${NBNXM_SOURCES} ${NBNXM_CUDA_SOURCES} PARENT_SCOPE)
 endif()
index 4869756afef30f3444f30c726773d4498623e367..ac5e50c8b10cac6b2bb00ee39db9c034e57e8a90 100644 (file)
@@ -71,7 +71,6 @@
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/gmxassert.h"
 
-#include "nbnxm_buffer_ops_kernels.cuh"
 #include "nbnxm_cuda_types.h"
 
 /***** The kernel declarations/definitions come here *****/
 namespace Nbnxm
 {
 
-//! Number of CUDA threads in a block
-// TODO Optimize this through experimentation
-constexpr static int c_bufOpsThreadsPerBlock = 128;
-
 /*! Nonbonded kernel function pointer type */
 typedef void (*nbnxn_cu_kfunc_ptr_t)(const NBAtomDataGpu, const NBParamGpu, const gpu_plist, bool);
 
@@ -710,81 +705,4 @@ void cuda_set_cacheconfig()
     }
 }
 
-/* X buffer operations on GPU: performs conversion from rvec to nb format. */
-void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
-                           NbnxmGpu*                 nb,
-                           DeviceBuffer<gmx::RVec>   d_x,
-                           GpuEventSynchronizer*     xReadyOnDevice,
-                           const Nbnxm::AtomLocality locality,
-                           int                       gridId,
-                           int                       numColumnsMax,
-                           bool                      mustInsertNonLocalDependency)
-{
-    GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
-
-    NBAtomDataGpu* adat = nb->atdat;
-
-    const int                  numColumns      = grid.numColumns();
-    const int                  cellOffset      = grid.cellOffset();
-    const int                  numAtomsPerCell = grid.numAtomsPerCell();
-    Nbnxm::InteractionLocality interactionLoc  = atomToInteractionLocality(locality);
-
-    const DeviceStream& deviceStream = *nb->deviceStreams[interactionLoc];
-
-    if (xReadyOnDevice != nullptr)
-    {
-        // We only need to wait on the first iteration of the loop
-        xReadyOnDevice->enqueueWaitEvent(deviceStream);
-    }
-
-    int numAtoms = grid.srcAtomEnd() - grid.srcAtomBegin();
-    // avoid empty kernel launch, skip to inserting stream dependency
-    if (numAtoms != 0)
-    {
-        // TODO: This will only work with CUDA
-        GMX_ASSERT(d_x, "Need a valid device pointer");
-
-
-        KernelLaunchConfig config;
-        config.blockSize[0] = c_bufOpsThreadsPerBlock;
-        config.blockSize[1] = 1;
-        config.blockSize[2] = 1;
-        config.gridSize[0] = (grid.numCellsColumnMax() * numAtomsPerCell + c_bufOpsThreadsPerBlock - 1)
-                             / c_bufOpsThreadsPerBlock;
-        config.gridSize[1] = numColumns;
-        config.gridSize[2] = 1;
-        GMX_ASSERT(config.gridSize[0] > 0,
-                   "Can not have empty grid, early return above avoids this");
-        config.sharedMemorySize = 0;
-
-        auto       kernelFn      = nbnxn_gpu_x_to_nbat_x_kernel;
-        float4*    d_xq          = adat->xq;
-        float3*    d_xFloat3     = asFloat3(d_x);
-        const int* d_atomIndices = nb->atomIndices;
-        const int* d_cxy_na      = &nb->cxy_na[numColumnsMax * gridId];
-        const int* d_cxy_ind     = &nb->cxy_ind[numColumnsMax * gridId];
-        const auto kernelArgs    = prepareGpuKernelArguments(kernelFn,
-                                                          config,
-                                                          &numColumns,
-                                                          &d_xq,
-                                                          &d_xFloat3,
-                                                          &d_atomIndices,
-                                                          &d_cxy_na,
-                                                          &d_cxy_ind,
-                                                          &cellOffset,
-                                                          &numAtomsPerCell);
-        launchGpuKernel(kernelFn, config, deviceStream, nullptr, "XbufferOps", kernelArgs);
-    }
-
-    if (mustInsertNonLocalDependency)
-    {
-        Nbnxm::nbnxnInsertNonlocalGpuDependency(nb, interactionLoc);
-    }
-}
-
-DeviceBuffer<Float3> getGpuForces(NbnxmGpu* nb)
-{
-    return nb->atdat->f;
-}
-
 } // namespace Nbnxm
similarity index 62%
rename from src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh
rename to src/gromacs/nbnxm/cuda/nbnxm_gpu_buffer_ops_internal.cu
index 2df2a14ea2e96e2723a8d4fcdc3295602cc6552b..577b2b4b920f6201128779f5a5b0f1740badfb5e 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * To help us fund GROMACS development, we humbly ask that you cite
  * the research papers on the package. Check out http://www.gromacs.org.
  */
-
-/*! \internal \file
- *
- * \brief
- * CUDA kernels for GPU versions of copy_rvec_to_nbat_real and add_nbat_f_to_f.
+/*! \file
+ *  \brief Define CUDA kernel (and its wrapper) for transforming position coordinates from rvec to nbnxm layout.
  *
  *  \author Alan Gray <alang@nvidia.com>
  *  \author Jon Vincent <jvincent@nvidia.com>
+ *  \author Szilard Pall <pall.szilard@gmail.com>
  */
 
+#include "gmxpre.h"
+
+#include "gromacs/gpu_utils/typecasts.cuh"
 #include "gromacs/gpu_utils/vectype_ops.cuh"
-#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/grid.h"
+#include "gromacs/nbnxm/nbnxm_gpu_buffer_ops_internal.h"
+#include "gromacs/nbnxm/cuda/nbnxm_cuda_types.h"
 
 /*! \brief CUDA kernel for transforming position coordinates from rvec to nbnxm layout.
  *
@@ -70,8 +74,7 @@ static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
                                                     int numAtomsPerCell)
 {
 
-
-    const float farAway = -1000000.0f;
+    const float farAway = -1000000.0F;
 
     // Map cell-level parallelism to y component of CUDA block index.
     int cxy = blockIdx.y;
@@ -102,3 +105,46 @@ static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
         }
     }
 }
+
+
+namespace Nbnxm
+{
+
+//! Number of CUDA threads in a block
+// TODO Optimize this through experimentation
+constexpr static int c_bufOpsThreadsPerBlock = 128;
+
+void launchNbnxmKernelTransformXToXq(const Grid&          grid,
+                                     NbnxmGpu*            nb,
+                                     DeviceBuffer<Float3> d_x,
+                                     const DeviceStream&  deviceStream,
+                                     const unsigned int   numColumnsMax,
+                                     const int            gridId)
+{
+    const int numColumns      = grid.numColumns();
+    const int cellOffset      = grid.cellOffset();
+    const int numAtomsPerCell = grid.numAtomsPerCell();
+
+    KernelLaunchConfig config;
+    config.blockSize[0] = c_bufOpsThreadsPerBlock;
+    config.blockSize[1] = 1;
+    config.blockSize[2] = 1;
+    config.gridSize[0]  = (grid.numCellsColumnMax() * numAtomsPerCell + c_bufOpsThreadsPerBlock - 1)
+                         / c_bufOpsThreadsPerBlock;
+    config.gridSize[1] = numColumns;
+    config.gridSize[2] = 1;
+    GMX_ASSERT(config.gridSize[0] > 0, "Can not have empty grid, early return above avoids this");
+    config.sharedMemorySize = 0;
+
+    auto       kernelFn      = nbnxn_gpu_x_to_nbat_x_kernel;
+    float3*    d_xFloat3     = asFloat3(d_x);
+    float4*    d_xq          = nb->atdat->xq;
+    const int* d_atomIndices = nb->atomIndices;
+    const int* d_cxy_na      = &nb->cxy_na[numColumnsMax * gridId];
+    const int* d_cxy_ind     = &nb->cxy_ind[numColumnsMax * gridId];
+    const auto kernelArgs    = prepareGpuKernelArguments(
+            kernelFn, config, &numColumns, &d_xq, &d_xFloat3, &d_atomIndices, &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell);
+    launchGpuKernel(kernelFn, config, deviceStream, nullptr, "XbufferOps", kernelArgs);
+}
+
+} // namespace Nbnxm
index eb1becbf7d466605e365337e50d72b59e175f027..a3633c2c7a3394494d83baf5f2dba551ba83af79 100644 (file)
@@ -250,7 +250,7 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
  * \param[in]     mustInsertNonLocalDependency Whether synchronization between local and non-local
  * streams should be added. Typically, true if and only if that is the last grid in gridset.
  */
-CUDA_FUNC_QUALIFIER
+GPU_FUNC_QUALIFIER
 void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
                            NbnxmGpu gmx_unused*    gpu_nbv,
                            DeviceBuffer<gmx::RVec> gmx_unused d_x,
@@ -258,7 +258,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
                            gmx::AtomLocality gmx_unused locality,
                            int gmx_unused gridId,
                            int gmx_unused numColumnsMax,
-                           bool gmx_unused mustInsertNonLocalDependency) CUDA_FUNC_TERM;
+                           bool gmx_unused mustInsertNonLocalDependency) GPU_FUNC_TERM;
 
 /*! \brief Sync the nonlocal stream with dependent tasks in the local queue.
  *
@@ -311,9 +311,9 @@ bool haveGpuShortRangeWork(const NbnxmGpu gmx_unused* nb, gmx::InteractionLocali
  * \param[in] nb  The nonbonded data GPU structure
  * \returns       A pointer to the force buffer in GPU memory
  */
-CUDA_FUNC_QUALIFIER
+GPU_FUNC_QUALIFIER
 DeviceBuffer<gmx::RVec> getGpuForces(NbnxmGpu gmx_unused* nb)
-        CUDA_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
+        GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
 
 } // namespace Nbnxm
 #endif
diff --git a/src/gromacs/nbnxm/nbnxm_gpu_buffer_ops.cpp b/src/gromacs/nbnxm/nbnxm_gpu_buffer_ops.cpp
new file mode 100644 (file)
index 0000000..00a15a0
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ *  \brief
+ *  Common code for GPU buffer operations, namely the coordinate layout conversion
+ *
+ *  \ingroup module_nbnxm
+ */
+#include "gmxpre.h"
+
+#include "config.h"
+
+#include "gromacs/gpu_utils/device_stream.h"
+#if GMX_GPU_CUDA
+#    include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#endif
+#include "gromacs/mdtypes/locality.h"
+#include "gromacs/nbnxm/gridset.h"
+#include "gromacs/nbnxm/nbnxm_gpu.h"
+#include "gromacs/nbnxm/nbnxm_gpu_buffer_ops_internal.h"
+#if GMX_GPU_CUDA
+#    include "gromacs/nbnxm/cuda/nbnxm_cuda_types.h"
+#endif
+#include "gromacs/utility/exceptions.h"
+
+namespace Nbnxm
+{
+
+void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&      grid,
+                           NbnxmGpu*               nb,
+                           DeviceBuffer<gmx::RVec> d_x,
+                           GpuEventSynchronizer*   xReadyOnDevice,
+                           const gmx::AtomLocality locality,
+                           int                     gridId,
+                           int                     numColumnsMax,
+                           bool                    mustInsertNonLocalDependency)
+{
+    GMX_RELEASE_ASSERT(GMX_GPU_CUDA, "nbnxn_gpu_x_to_nbat_x only supported with CUDA");
+    GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
+    gmx::InteractionLocality interactionLoc = gmx::atomToInteractionLocality(locality);
+
+    const DeviceStream& deviceStream = *nb->deviceStreams[interactionLoc];
+
+    const int numAtoms = grid.srcAtomEnd() - grid.srcAtomBegin();
+
+    // Only insert wait on the first iteration of the loop.
+    if (xReadyOnDevice != nullptr)
+    {
+        xReadyOnDevice->enqueueWaitEvent(deviceStream);
+    }
+
+    // avoid empty kernel launch, skip to inserting stream dependency
+    if (numAtoms != 0)
+    {
+        GMX_ASSERT(d_x, "Need a valid device pointer");
+        launchNbnxmKernelTransformXToXq(grid, nb, d_x, deviceStream, numColumnsMax, gridId);
+    }
+
+    if (mustInsertNonLocalDependency)
+    {
+        Nbnxm::nbnxnInsertNonlocalGpuDependency(nb, interactionLoc);
+    }
+}
+
+DeviceBuffer<Float3> getGpuForces(NbnxmGpu* nb)
+{
+    return nb->atdat->f;
+}
+
+} // namespace Nbnxm
diff --git a/src/gromacs/nbnxm/nbnxm_gpu_buffer_ops_internal.h b/src/gromacs/nbnxm/nbnxm_gpu_buffer_ops_internal.h
new file mode 100644 (file)
index 0000000..dc91a7a
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ *  \brief
+ *  Wrapper for the backend-specific coordinate layout conversion functionality
+ *
+ *  \ingroup module_nbnxm
+ */
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/gpu_utils/gputraits.h"
+
+class DeviceStream;
+class Grid;
+struct NbnxmGpu;
+
+namespace Nbnxm
+{
+
+/*! \brief Launch coordinate layout conversion kernel
+ *
+ * \param[in]     grid          Pair-search grid.
+ * \param[in,out] nb            Nbnxm main structure.
+ * \param[in]     d_x           Source atom coordinates.
+ * \param[in]     deviceStream  Device stream for kernel submission.
+ * \param[in]     numColumnsMax Max. number of columns per grid for offset calculation in \p nb.
+ * \param[in]     gridId        Grid index for offset calculation in \p nb.
+ */
+void launchNbnxmKernelTransformXToXq(const Grid&          grid,
+                                     NbnxmGpu*            nb,
+                                     DeviceBuffer<Float3> d_x,
+                                     const DeviceStream&  deviceStream,
+                                     unsigned int         numColumnsMax,
+                                     int                  gridId);
+
+} // namespace Nbnxm
diff --git a/src/gromacs/nbnxm/nbnxm_gpu_buffer_ops_stubs.cpp b/src/gromacs/nbnxm/nbnxm_gpu_buffer_ops_stubs.cpp
new file mode 100644 (file)
index 0000000..2d35afd
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \internal \file
+ *  \brief
+ *  Common code for GPU buffer operations, namely the coordinate layout conversion
+ *
+ *  \ingroup module_nbnxm
+ */
+#include "gmxpre.h"
+
+#include "config.h"
+
+#include "gromacs/nbnxm/nbnxm_gpu.h"
+
+#if !GMX_GPU_CUDA
+
+namespace Nbnxm
+{
+
+void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& /*grid*/,
+                           NbnxmGpu* /*nb*/,
+                           DeviceBuffer<gmx::RVec> /*d_x*/,
+                           GpuEventSynchronizer* /*xReadyOnDevice*/,
+                           const gmx::AtomLocality /*locality*/,
+                           int /*gridId*/,
+                           int /*numColumnsMax*/,
+                           bool /*mustInsertNonLocalDependency*/)
+{
+    GMX_RELEASE_ASSERT(false, "nbnxn_gpu_x_to_nbat_x only supported with CUDA");
+}
+
+DeviceBuffer<gmx::RVec> getGpuForces(NbnxmGpu* /*nb*/)
+{
+    GMX_RELEASE_ASSERT(false, "getGpuForces only supported with CUDA");
+    return DeviceBuffer<gmx::RVec>{};
+}
+
+} // namespace Nbnxm
+
+#endif // !GMX_GPU_CUDA