Merge branch release-4-6 into master

[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_cuda / nbnxn_cuda_kernel_legacy.cuh
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh

index 892c360e1e292372db4d848ceea87d4fedd7bf1a..657030789acfe7621eae89d48afd9690ce7c9d3e 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
@@ -53,6 +53,9 @@
  
      Each thread calculates an i force-component taking one pair of i-j atoms.
   */
+#if __CUDA_ARCH__ >= 350
+__launch_bounds__(64,16)
+#endif
  #ifdef PRUNE_NBL
  #ifdef CALC_ENERGIES
  __global__ void NB_KERNEL_FUNC_NAME(k_nbnxn, _ener_prune_legacy)
@@ -271,8 +274,8 @@ __global__ void NB_KERNEL_FUNC_NAME(k_nbnxn, _legacy)
                              typei   = atom_types[ai];
  
                              /* LJ 6*C6 and 12*C12 */
-                            c6      = tex1Dfetch(tex_nbfp, 2 * (ntypes * typei + typej));
-                            c12     = tex1Dfetch(tex_nbfp, 2 * (ntypes * typei + typej) + 1);
+                            c6      = tex1Dfetch(nbfp_texref, 2 * (ntypes * typei + typej));
+                            c12     = tex1Dfetch(nbfp_texref, 2 * (ntypes * typei + typej) + 1);
  
                              /* avoid NaN for excluded pairs at r=0 */
                              r2      += (1.0f - int_bit) * NBNXN_AVOID_SING_R2_INC;