allow compilation to optimize for CUDA compute cap. 3.5

[alexxy/gromacs.git] / src / mdlib / nbnxn_cuda / nbnxn_cuda_kernel_legacy.cuh
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh

index cff062d7a1381d3f909c2489e8de614ee527676c..b0012ee3619b34af8ac8630a9f2448e588cb803b 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
@@ -56,6 +56,9 @@
  
      Each thread calculates an i force-component taking one pair of i-j atoms.
   */
+#if __CUDA_ARCH__ >= 350
+__launch_bounds__(64,16)
+#endif
  #ifdef PRUNE_NBL
  #ifdef CALC_ENERGIES
  __global__ void NB_KERNEL_FUNC_NAME(k_nbnxn, _ener_prune_legacy)