Fix CUDA clang-tidy complaints

[alexxy/gromacs.git] / src / gromacs / fft / tests / fft.cpp
diff --git a/src/gromacs/fft/tests/fft.cpp b/src/gromacs/fft/tests/fft.cpp

index ce3717e210ffdf72c29f90dd90ac50613801e087..f89d1747c2305497547433d93e0d99b9631ac2af 100644 (file)
--- a/src/gromacs/fft/tests/fft.cpp
+++ b/src/gromacs/fft/tests/fft.cpp
@@ -72,6 +72,8 @@ namespace gmx
  {
  namespace test
  {
+namespace
+{
  
  /*! \brief Input data for FFT tests.
   *
@@ -81,7 +83,7 @@ namespace test
   * initializers, and we would not have to do so much useless copying
   * during the unit tests below.
   */
-const double inputdata[] = {
+const double inputdata[500] = {
      // print ",\n".join([",".join(["%4s"%(random.randint(-99,99)/10.,) for i in range(25)]) for j in range(20)])
      -3.5, 6.3,  1.2,  0.3,  1.1,  -5.7, 5.8,  -1.9, -6.3, -1.4, 7.4,  2.4,  -9.9, -7.2, 5.4,  6.1,
      -1.9, -7.6, 1.4,  -3.5, 0.7,  5.6,  -4.2, -1.1, -4.4, -6.3, -7.2, 4.6,  -3.0, -0.9, 7.2,  2.5,
@@ -364,7 +366,44 @@ TEST_F(FFTTest3D, Real5_6_9)
      checkRealGrid(realGridSize, realGridSizePadded, in_, outputRealGridValues);
  }
  
-#if GMX_GPU_CUDA || GMX_GPU_OPENCL
+#if GMX_GPU
+
+/*! \brief Whether the FFT is in- or out-of-place
+ *
+ *  DPCPP uses oneMKL, which seems to have troubles with out-of-place
+ *  transforms. */
+constexpr bool sc_performOutOfPlaceFFT = !((GMX_SYCL_DPCPP == 1) && (GMX_FFT_MKL == 1));
+
+/*! \brief Return the output grid depending on whether in- or out-of
+ * place FFT is used
+ *
+ * Some versions of clang complain of unused code if we would just
+ * branch on the value of sc_performOutOfPlaceFFT at run time, because
+ * in any single configuration there would indeed be unused code. So
+ * the two template specializations are needed so that the compiler
+ * only compiles the template that is used. */
+template<bool performOutOfPlaceFFT>
+DeviceBuffer<float>* actualOutputGrid(DeviceBuffer<float>* realGrid, DeviceBuffer<float>* complexGrid);
+
+#    if GMX_SYCL_DPCPP && GMX_FFT_MKL
+
+template<>
+DeviceBuffer<float>* actualOutputGrid<false>(DeviceBuffer<float>* realGrid,
+                                             DeviceBuffer<float>* /* complexGrid */)
+{
+    return realGrid;
+};
+
+#    else
+
+template<>
+DeviceBuffer<float>* actualOutputGrid<true>(DeviceBuffer<float>* /* realGrid */, DeviceBuffer<float>* complexGrid)
+{
+    return complexGrid;
+}
+
+#    endif
+
  TEST_F(FFTTest3D, GpuReal5_6_9)
  {
      // Ensure library resources are managed appropriately
@@ -387,6 +426,8 @@ TEST_F(FFTTest3D, GpuReal5_6_9)
  
          int size = complexGridSizePadded[0] * complexGridSizePadded[1] * complexGridSizePadded[2];
          int sizeInReals = size * 2;
+        GMX_RELEASE_ASSERT(sizeof(inputdata) / sizeof(inputdata[0]) >= size_t(sizeInReals),
+                           "Size of inputdata is too small");
  
          // Set up the complex grid. Complex numbers take twice the
          // memory.
@@ -395,18 +436,47 @@ TEST_F(FFTTest3D, GpuReal5_6_9)
          // Use std::copy to convert from double to real easily
          std::copy(inputdata, inputdata + sizeInReals, in_.begin());
  
-        // Allocate the device buffers
-        DeviceBuffer<float> realGrid, complexGrid;
-        allocateDeviceBuffer(&realGrid, in_.size(), deviceContext);
-        allocateDeviceBuffer(&complexGrid, complexGridValues.size(), deviceContext);
-
  #    if GMX_GPU_CUDA
          const FftBackend backend = FftBackend::Cufft;
  #    elif GMX_GPU_OPENCL
          const FftBackend backend = FftBackend::Ocl;
+#    elif GMX_GPU_SYCL
+#        if GMX_SYCL_HIPSYCL
+#            if GMX_HIPSYCL_HAVE_HIP_TARGET
+        const FftBackend backend = FftBackend::SyclRocfft;
+#            else
+        // Use stub backend so compilation succeeds
+        const FftBackend backend = FftBackend::Sycl;
+        // Don't complain about unused reference data
+        checker.disableUnusedEntriesCheck();
+        // Skip the rest of the test
+        GTEST_SKIP() << "Only rocFFT backend is supported with hipSYCL";
+#            endif
+#        elif GMX_SYCL_DPCPP
+#            if GMX_FFT_MKL
+        const FftBackend backend = FftBackend::SyclMkl;
+#            else
+        // Use stub backend so compilation succeeds
+        const FftBackend backend = FftBackend::Sycl;
+        // Don't complain about unused reference data
+        checker.disableUnusedEntriesCheck();
+        // Skip the rest of the test
+        GTEST_SKIP() << "Only MKL backend is supported with DPC++";
+#            endif
+#        else
+#            error "Unsupported SYCL implementation"
+#        endif
  #    endif
-        const bool         performOutOfPlaceFFT    = true;
-        const MPI_Comm     comm                    = MPI_COMM_NULL;
+
+        SCOPED_TRACE("Allocating the device buffers");
+        DeviceBuffer<float> realGrid, complexGrid;
+        allocateDeviceBuffer(&realGrid, in_.size(), deviceContext);
+        if (sc_performOutOfPlaceFFT)
+        {
+            allocateDeviceBuffer(&complexGrid, complexGridValues.size(), deviceContext);
+        }
+
+        MPI_Comm           comm                    = MPI_COMM_NULL;
          const bool         allocateGrid            = false;
          std::array<int, 1> gridSizesInXForEachRank = { 0 };
          std::array<int, 1> gridSizesInYForEachRank = { 0 };
@@ -417,14 +487,14 @@ TEST_F(FFTTest3D, GpuReal5_6_9)
                            gridSizesInXForEachRank,
                            gridSizesInYForEachRank,
                            nz,
-                          performOutOfPlaceFFT,
+                          sc_performOutOfPlaceFFT,
                            deviceContext,
                            deviceStream,
                            realGridSize,
                            realGridSizePadded,
                            complexGridSizePadded,
                            &realGrid,
-                          &complexGrid);
+                          actualOutputGrid<sc_performOutOfPlaceFFT>(&realGrid, &complexGrid));
  
          // Transfer the real grid input data for the FFT
          copyToDeviceBuffer(
@@ -437,7 +507,7 @@ TEST_F(FFTTest3D, GpuReal5_6_9)
  
          // Check the complex grid (NB this data has not been normalized)
          copyFromDeviceBuffer(complexGridValues.data(),
-                             &complexGrid,
+                             actualOutputGrid<sc_performOutOfPlaceFFT>(&realGrid, &complexGrid),
                               0,
                               complexGridValues.size(),
                               deviceStream,
@@ -446,12 +516,26 @@ TEST_F(FFTTest3D, GpuReal5_6_9)
          checker.checkSequence(
                  complexGridValues.begin(), complexGridValues.end(), "ComplexGridAfterRealToComplex");
  
-        // Do the back transform
+        std::vector<float> outputRealGridValues(in_.size());
+        if (sc_performOutOfPlaceFFT)
+        {
+            // Clear the real grid input data for the FFT so we can
+            // compute the back transform into it and observe that it did
+            // the work expected.
+            copyToDeviceBuffer(&realGrid,
+                               outputRealGridValues.data(),
+                               0,
+                               outputRealGridValues.size(),
+                               deviceStream,
+                               GpuApiCallBehavior::Sync,
+                               nullptr);
+        }
+
+        SCOPED_TRACE("Doing the back transform");
          gpu3dFft.perform3dFft(GMX_FFT_COMPLEX_TO_REAL, timingEvent);
          deviceStream.synchronize();
  
          // Transfer the real grid back from the device
-        std::vector<float> outputRealGridValues(in_.size());
          copyFromDeviceBuffer(outputRealGridValues.data(),
                               &realGrid,
                               0,
@@ -462,12 +546,17 @@ TEST_F(FFTTest3D, GpuReal5_6_9)
  
          checkRealGrid(realGridSize, realGridSizePadded, in_, outputRealGridValues);
  
+        SCOPED_TRACE("Cleaning up");
          freeDeviceBuffer(&realGrid);
-        freeDeviceBuffer(&complexGrid);
+        if (sc_performOutOfPlaceFFT)
+        {
+            freeDeviceBuffer(&complexGrid);
+        }
      }
  }
  
  #endif
  
+} // namespace
  } // namespace test
  } // namespace gmx