Add CUDA compiler support for CC 5.0
authorSzilárd Páll <pszilard@kth.se>
Sat, 8 Nov 2014 03:47:03 +0000 (04:47 +0100)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Mon, 17 Nov 2014 13:09:12 +0000 (14:09 +0100)
With CUDA 6.5 and later compute capability 5.0 devices are supported, so
we generate cubin and PTX for these too and remove PTX 3.5.
This change also removes explicit optimization for CC 2.1 where
sm_20 binary code runs equally fast as sm_21.

Change-Id: I5a277c235b873afb2d1b2b12b5db64b370f1bade

cmake/gmxManageNvccConfig.cmake

index 33d42ea6991e9d3dbcdb80affc1a84f04dfeab4f..37dbebbd7c01a02c70afe2e8f667680899985678 100644 (file)
@@ -139,17 +139,24 @@ if (NOT DEFINED CUDA_NVCC_FLAGS_SET)
 
     # Set the CUDA GPU architectures to compile for:
     # - with CUDA >v4.2 compute capability 2.0, 2.1 is, but 3.0 is not supported:
-    #     => compile sm_20, sm_21 cubin, and compute_20 PTX
-    # - with CUDA >=4.2 compute capability <=3.0 is supported:
-    #     => compile sm_20, sm_21, sm_30 cubin, and compute_30 PTX
-    # - with CUDA 5.0 and later compute capability 3.5 is supported
-    #     => compile sm_20, sm_21, sm_30, sm_35 cubin, and compute_35 PTX
+    #     => compile sm_20 cubin, and compute_20 PTX
+    # - with CUDA >=4.2 CC <=3.0 is supported:
+    #     => compile sm_20, sm_30 cubin, and compute_30 PTX
+    # - with CUDA 5.0 and later CC <=3.5 is supported
+    #     => compile sm_20, sm_30, sm_35 cubin, and compute_35 PTX
+    # - with CUDA 6.5 and later compute capability <=3.5 and 5.0 are supported
+    #     => compile sm_20, sm_30, sm_35, sm_5.0, cubin, and compute_50 PTX
+    #   Note that CUDA 6.5.19 second patch release supports cc 5.2 too, but
+    #   CUDA_VERSION does not contain patch version and having PTX 5.0 JIT-ed is
+    #   equally fast anyway.
     if(CUDA_VERSION VERSION_LESS "4.2")
-        set(_CUDA_ARCH_STR "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_20,code=sm_21;-gencode;arch=compute_20,code=compute_20")
+        set(_CUDA_ARCH_STR "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_20,code=compute_20")
     elseif(CUDA_VERSION VERSION_LESS "5.0")
-        set(_CUDA_ARCH_STR "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_20,code=sm_21;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_30,code=compute_30")
+        set(_CUDA_ARCH_STR "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_30,code=compute_30")
+    elseif(CUDA_VERSION VERSION_LESS "6.5")
+        set(_CUDA_ARCH_STR "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_35,code=compute_35")
     else()
-        set(_CUDA_ARCH_STR "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_20,code=sm_21;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_35,code=compute_35")
+        set(_CUDA_ARCH_STR "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_50,code=compute_50;")
     endif()
 
     # finally set the damn flags