Allow passing optional width argument into CUDA shuffle intrinsics
authorAleksei Iupinov <a.yupinov@gmail.com>
Fri, 25 Aug 2017 14:16:19 +0000 (16:16 +0200)
committerAleksei Iupinov <a.yupinov@gmail.com>
Tue, 29 Aug 2017 10:16:10 +0000 (12:16 +0200)
Change-Id: I207d8a7f94bf317e34ae4ff8cdb963fc96890260

src/gromacs/gpu_utils/cuda_arch_utils.cuh

index ecd33cd6acb4b110cfee26d489a6355703689674..7cceb1a04d96e5286fed9b796fa90ae2e3d68867 100644 (file)
@@ -111,13 +111,14 @@ template <typename T>
 static __forceinline__ __device__
 T gmx_shfl_up_sync(const unsigned int activeMask,
                    const T            var,
-                   unsigned int       offset)
+                   unsigned int       offset,
+                   int                width = warp_size)
 {
 #if GMX_CUDA_VERSION < 9000
     GMX_UNUSED_VALUE(activeMask);
-    return __shfl_up(var, offset);
+    return __shfl_up(var, offset, width);
 #else
-    return __shfl_up_sync(activeMask, var, offset);
+    return __shfl_up_sync(activeMask, var, offset, width);
 #endif
 }
 
@@ -126,13 +127,14 @@ template <typename T>
 static __forceinline__ __device__
 T gmx_shfl_down_sync(const unsigned int activeMask,
                      const T            var,
-                     unsigned int       offset)
+                     unsigned int       offset,
+                     int                width = warp_size)
 {
 #if GMX_CUDA_VERSION < 9000
     GMX_UNUSED_VALUE(activeMask);
-    return __shfl_down(var, offset);
+    return __shfl_down(var, offset, width);
 #else
-    return __shfl_down_sync(activeMask, var, offset);
+    return __shfl_down_sync(activeMask, var, offset, width);
 #endif
 }