From: Szilárd Páll <pall.szilard@gmail.com>
Date: Thu, 8 Feb 2018 20:20:45 +0000 (+0100)
Subject: Remove texture reference support in the CUDA
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=4a080445385954a096eaf5c047f87a2ae734368f;p=alexxy%2Fgromacs.git

Remove texture reference support in the CUDA

Only required for legacy CC 2.0 / Fermi hardware we drop support to
simplify code and improve maintainability. The impact on the Fermi
support will only be performance (as direct memory fetch will be used).
Additionally, all builds will use multiple complication units now,
single compilation unit support has been kept for now and will be
removed later (if it simplifies code).

Follow-up expected/made possible:
- remove single compilation unit mode for CC 2.0;
- remove barrier that protects from race cleanup in
  pme-load-balancing.cpp;
- merge regular and Fermi NB kernels.

Change-Id: Ic3677b7beaff1b4b9dc927d955940f7e779c2a41
---

diff --git a/src/gromacs/ewald/pme-load-balancing.cpp b/src/gromacs/ewald/pme-load-balancing.cpp
index c9935fe839..b591f61262 100644
--- a/src/gromacs/ewald/pme-load-balancing.cpp
+++ b/src/gromacs/ewald/pme-load-balancing.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -842,6 +842,8 @@ pme_load_balance(pme_load_balancing_t      *pme_lb,
 
     nbnxn_gpu_pme_loadbal_update_param(nbv, ic, listParams);
 
+    // TODO: with the texture reference support removed, this barrier is
+    //       in principle not needed. Remove now or do it in a follow-up?
     /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
      * also sharing texture references. To keep the code simple, we don't
      * treat texture references as shared resources, but this means that
diff --git a/src/gromacs/ewald/pme-spread.cu b/src/gromacs/ewald/pme-spread.cu
index c13179fdf9..b2644adcc4 100644
--- a/src/gromacs/ewald/pme-spread.cu
+++ b/src/gromacs/ewald/pme-spread.cu
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013-2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013-2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -77,21 +77,6 @@ constexpr int c_spreadMaxWarpsPerBlock = 8;
 //! Spreading max block size in threads
 constexpr int c_spreadMaxThreadsPerBlock = c_spreadMaxWarpsPerBlock * warp_size;
 
-//! Texture references for CC 2.x
-texture<int, 1, cudaReadModeElementType>   gridlineIndicesTableTextureRef;
-texture<float, 1, cudaReadModeElementType> fractShiftsTableTextureRef;
-
-/*! Returns the reference to the gridlineIndices texture. */
-const struct texture<int, 1, cudaReadModeElementType> &pme_gpu_get_gridline_texref()
-{
-    return gridlineIndicesTableTextureRef;
-}
-
-/*! Returns the reference to the fractShifts texture. */
-const struct texture<float, 1, cudaReadModeElementType> &pme_gpu_get_fract_shifts_texref()
-{
-    return fractShiftsTableTextureRef;
-}
 
 /*! \brief
  * General purpose function for loading atom-related data from global to shared memory.
@@ -250,16 +235,10 @@ __device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams
             sm_fractCoords[sharedMemoryIndex] +=
                 fetchFromParamLookupTable(kernelParams.grid.d_fractShiftsTable,
                                           kernelParams.fractShiftsTableTexture,
-#if DISABLE_CUDA_TEXTURES == 0
-                                          fractShiftsTableTextureRef,
-#endif
                                           tableIndex);
             sm_gridlineIndices[sharedMemoryIndex] =
                 fetchFromParamLookupTable(kernelParams.grid.d_gridlineIndicesTable,
                                           kernelParams.gridlineIndicesTableTexture,
-#if DISABLE_CUDA_TEXTURES == 0
-                                          gridlineIndicesTableTextureRef,
-#endif
                                           tableIndex);
             gm_gridlineIndices[atomIndexOffset * DIM + sharedMemoryIndex] = sm_gridlineIndices[sharedMemoryIndex];
         }
diff --git a/src/gromacs/ewald/pme.cu b/src/gromacs/ewald/pme.cu
index f677c7de37..9f48b5bbc5 100644
--- a/src/gromacs/ewald/pme.cu
+++ b/src/gromacs/ewald/pme.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -347,14 +347,12 @@ void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu *pmeGpu)
 
     initParamLookupTable(kernelParamsPtr->grid.d_fractShiftsTable,
                          kernelParamsPtr->fractShiftsTableTexture,
-                         &pme_gpu_get_fract_shifts_texref(),
                          pmeGpu->common->fsh.data(),
                          newFractShiftsSize,
                          pmeGpu->deviceInfo);
 
     initParamLookupTable(kernelParamsPtr->grid.d_gridlineIndicesTable,
                          kernelParamsPtr->gridlineIndicesTableTexture,
-                         &pme_gpu_get_gridline_texref(),
                          pmeGpu->common->nn.data(),
                          newFractShiftsSize,
                          pmeGpu->deviceInfo);
@@ -365,11 +363,9 @@ void pme_gpu_free_fract_shifts(const PmeGpu *pmeGpu)
     auto *kernelParamsPtr = pmeGpu->kernelParams.get();
     destroyParamLookupTable(kernelParamsPtr->grid.d_fractShiftsTable,
                             kernelParamsPtr->fractShiftsTableTexture,
-                            &pme_gpu_get_fract_shifts_texref(),
                             pmeGpu->deviceInfo);
     destroyParamLookupTable(kernelParamsPtr->grid.d_gridlineIndicesTable,
                             kernelParamsPtr->gridlineIndicesTableTexture,
-                            &pme_gpu_get_gridline_texref(),
                             pmeGpu->deviceInfo);
 }
 
diff --git a/src/gromacs/ewald/pme.cuh b/src/gromacs/ewald/pme.cuh
index 5f3b04b794..c84e4fed8a 100644
--- a/src/gromacs/ewald/pme.cuh
+++ b/src/gromacs/ewald/pme.cuh
@@ -224,12 +224,4 @@ struct PmeGpuCudaKernelParams : PmeGpuKernelParamsBase
     cudaTextureObject_t gridlineIndicesTableTexture;
 };
 
-/* CUDA texture reference functions which reside in respective kernel files
- * (due to texture references having scope of a translation unit).
- */
-/*! Returns the reference to the gridlineIndices texture. */
-const struct texture<int, 1, cudaReadModeElementType>   &pme_gpu_get_gridline_texref();
-/*! Returns the reference to the fractShifts texture. */
-const struct texture<float, 1, cudaReadModeElementType> &pme_gpu_get_fract_shifts_texref();
-
 #endif
diff --git a/src/gromacs/gpu_utils/cuda_arch_utils.cuh b/src/gromacs/gpu_utils/cuda_arch_utils.cuh
index e1bf50cc3b..8ae0a20a08 100644
--- a/src/gromacs/gpu_utils/cuda_arch_utils.cuh
+++ b/src/gromacs/gpu_utils/cuda_arch_utils.cuh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -140,13 +140,14 @@ T gmx_shfl_down_sync(const unsigned int activeMask,
 
 /*! \brief Allow disabling CUDA textures using the GMX_DISABLE_CUDA_TEXTURES macro.
  *
- *  Disable texture support-missing in clang (all versions up to <=5.0-dev as of writing).
+ *  Only texture objects supported, disable textures for <= CC 2.0 (but not in host code).
+ *  Disable texture support missing in clang (all versions up to <=5.0-dev as of writing).
  *
  *  This option will not influence functionality. All features using textures ought
  *  to have fallback for texture-less reads (direct/LDG loads), all new code needs
  *  to provide fallback code.
  */
-#if defined(GMX_DISABLE_CUDA_TEXTURES) || (defined(__clang__) && defined(__CUDA__))
+#if defined(GMX_DISABLE_CUDA_TEXTURES) || (GMX_PTX_ARCH > 0 && GMX_PTX_ARCH < 300) || (defined(__clang__) && defined(__CUDA__))
 #define DISABLE_CUDA_TEXTURES 1
 #else
 #define DISABLE_CUDA_TEXTURES 0
diff --git a/src/gromacs/gpu_utils/cuda_kernel_utils.cuh b/src/gromacs/gpu_utils/cuda_kernel_utils.cuh
index 002a7c6109..daebe3be29 100644
--- a/src/gromacs/gpu_utils/cuda_kernel_utils.cuh
+++ b/src/gromacs/gpu_utils/cuda_kernel_utils.cuh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -56,43 +56,31 @@ __device__ __forceinline__ T LDG(const T* ptr)
 #endif
 }
 
-/*! \brief Fetch the value by \p index from the texture object or reference.
- * Fetching from the object is the preferred behaviour on CC >= 3.0.
+/*! \brief Fetch the value by \p index from the texture object.
  *
  * \tparam[in] T        Raw data type
  * \param[in] texObj    Table texture object
- * \param[in] texRef    Table texture reference
  * \param[in] index     Non-negative element index
  * \returns             The value from the table at \p index
  */
 template <typename T>
 static __forceinline__ __device__
 T fetchFromTexture(const cudaTextureObject_t texObj,
-                   const struct texture<T, 1, cudaReadModeElementType> texRef,
-                   int index)
+                   int                       index)
 {
     assert(index >= 0);
     assert(!c_disableCudaTextures);
-    T result;
-#if GMX_PTX_ARCH >= 300  // Preferring texture objects on any new arch
-    GMX_UNUSED_VALUE(texRef);
-    result = tex1Dfetch<T>(texObj, index);
-#else
-    GMX_UNUSED_VALUE(texObj);
-    result = tex1Dfetch(texRef, index);
-#endif
-    return result;
+    return tex1Dfetch<T>(texObj, index);
 }
 
 /*! \brief Fetch the value by \p index from the parameter lookup table.
  *
  *  Depending on what is supported, it fetches parameters either
- *  using direct load, texture objects, or texture references.
+ *  using direct load or texture objects.
  *
  * \tparam[in] T        Raw data type
  * \param[in] d_ptr     Device pointer to the raw table memory
  * \param[in] texObj    Table texture object
- * \param[in] texRef    Table texture reference
  * \param[in] index     Non-negative element index
  * \returns             The value from the table at \p index
  */
@@ -100,10 +88,7 @@ template <typename T>
 static __forceinline__ __device__
 T fetchFromParamLookupTable(const T                  *d_ptr,
                             const cudaTextureObject_t texObj,
-#if DISABLE_CUDA_TEXTURES == 0
-                            const struct texture<T, 1, cudaReadModeElementType> texRef,
-#endif
-                            int index)
+                            int                       index)
 {
     assert(index >= 0);
     T result;
@@ -112,7 +97,7 @@ T fetchFromParamLookupTable(const T                  *d_ptr,
     result = LDG(d_ptr + index);
 #else
     GMX_UNUSED_VALUE(d_ptr);
-    result = fetchFromTexture<T>(texObj, texRef, index);
+    result = fetchFromTexture<T>(texObj, index);
 #endif
     return result;
 }
diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu
index 5c92eb4d27..53e204a2ab 100644
--- a/src/gromacs/gpu_utils/cudautils.cu
+++ b/src/gromacs/gpu_utils/cudautils.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -261,38 +261,12 @@ static void setup1DTexture(cudaTextureObject_t &texObj,
     CU_RET_ERR(stat, "cudaCreateTextureObject failed");
 }
 
-/*! \brief Set up texture reference for an array of type T.
- *
- * Set up texture object for an array of type T and bind it to the device memory
- * \p d_ptr points to.
- *
- * \tparam[in] T        Raw data type
- * \param[out] texObj   texture reference to initialize
- * \param[in]  d_ptr    pointer to device global memory to bind \p texObj to
- * \param[in]  sizeInBytes  size of memory area to bind \p texObj to
- */
-template <typename T>
-static void setup1DTexture(const struct texture<T, 1, cudaReadModeElementType> *texRef,
-                           const void                                          *d_ptr,
-                           size_t                                              sizeInBytes)
-{
-    assert(!c_disableCudaTextures);
-
-    cudaError_t           stat;
-    cudaChannelFormatDesc cd;
-
-    cd   = cudaCreateChannelDesc<T>();
-    stat = cudaBindTexture(nullptr, texRef, d_ptr, &cd, sizeInBytes);
-    CU_RET_ERR(stat, "cudaBindTexture failed");
-}
-
 template <typename T>
 void initParamLookupTable(T                        * &d_ptr,
-                          cudaTextureObject_t       &texObj,
-                          const struct texture<T, 1, cudaReadModeElementType> *texRef,
-                          const T                   *h_ptr,
-                          int                        numElem,
-                          const gmx_device_info_t   *devInfo)
+                          cudaTextureObject_t        &texObj,
+                          const T                    *h_ptr,
+                          int                         numElem,
+                          const gmx_device_info_t    *devInfo)
 {
     const size_t sizeInBytes = numElem * sizeof(*d_ptr);
     cudaError_t  stat        = cudaMalloc((void **)&d_ptr, sizeInBytes);
@@ -305,17 +279,12 @@ void initParamLookupTable(T                        * &d_ptr,
         {
             setup1DTexture<T>(texObj, d_ptr, sizeInBytes);
         }
-        else
-        {
-            setup1DTexture<T>(texRef, d_ptr, sizeInBytes);
-        }
     }
 }
 
 template <typename T>
 void destroyParamLookupTable(T                       *d_ptr,
                              cudaTextureObject_t      texObj,
-                             const struct texture<T, 1, cudaReadModeElementType> *texRef,
                              const gmx_device_info_t *devInfo)
 {
     if (!c_disableCudaTextures)
@@ -324,10 +293,6 @@ void destroyParamLookupTable(T                       *d_ptr,
         {
             CU_RET_ERR(cudaDestroyTextureObject(texObj), "cudaDestroyTextureObject on texObj failed");
         }
-        else
-        {
-            CU_RET_ERR(cudaUnbindTexture(texRef), "cudaUnbindTexture on texRef failed");
-        }
     }
     CU_RET_ERR(cudaFree(d_ptr), "cudaFree failed");
 }
@@ -336,7 +301,7 @@ void destroyParamLookupTable(T                       *d_ptr,
  * One should also verify that the result of cudaCreateChannelDesc<T>() during texture setup
  * looks reasonable, when instantiating the templates for new types - just in case.
  */
-template void initParamLookupTable<float>(float * &, cudaTextureObject_t &, const texture<float, 1, cudaReadModeElementType> *, const float *, int, const gmx_device_info_t *);
-template void destroyParamLookupTable<float>(float *, cudaTextureObject_t, const texture<float, 1, cudaReadModeElementType> *, const gmx_device_info_t *);
-template void initParamLookupTable<int>(int * &, cudaTextureObject_t &, const texture<int, 1, cudaReadModeElementType> *, const int *, int, const gmx_device_info_t *);
-template void destroyParamLookupTable<int>(int *, cudaTextureObject_t, const texture<int, 1, cudaReadModeElementType> *, const gmx_device_info_t *);
+template void initParamLookupTable<float>(float * &, cudaTextureObject_t &, const float *, int, const gmx_device_info_t *);
+template void destroyParamLookupTable<float>(float *, cudaTextureObject_t, const gmx_device_info_t *);
+template void initParamLookupTable<int>(int * &, cudaTextureObject_t &, const int *, int, const gmx_device_info_t *);
+template void destroyParamLookupTable<int>(int *, cudaTextureObject_t, const gmx_device_info_t *);
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh
index b4cca13bb5..5d985c81fc 100644
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -215,8 +215,7 @@ void cu_realloc_buffered(void **d_dest, void *h_src,
 
 // TODO: the 2 functions below are pretty much a constructor/destructor of a simple
 // GPU table object. There is also almost self-contained fetchFromParamLookupTable()
-// in cuda_kernel_utils.cuh. They could all live in a separate class/struct file,
-// granted storing static texture references in there does not pose problems.
+// in cuda_kernel_utils.cuh. They could all live in a separate class/struct file.
 
 /*! \brief Initialize parameter lookup table.
  *
@@ -226,34 +225,30 @@ void cu_realloc_buffered(void **d_dest, void *h_src,
  * \tparam[in] T         Raw data type
  * \param[out] d_ptr     device pointer to the memory to be allocated
  * \param[out] texObj    texture object to be initialized
- * \param[out] texRef    texture reference to be initialized
  * \param[in]  h_ptr     pointer to the host memory to be uploaded to the device
  * \param[in]  numElem   number of elements in the h_ptr
  * \param[in]  devInfo   pointer to the info struct of the device in use
  */
 template <typename T>
 void initParamLookupTable(T                        * &d_ptr,
-                          cudaTextureObject_t       &texObj,
-                          const struct texture<T, 1, cudaReadModeElementType> *texRef,
-                          const T                   *h_ptr,
-                          int                        numElem,
-                          const gmx_device_info_t   *devInfo);
+                          cudaTextureObject_t        &texObj,
+                          const T                    *h_ptr,
+                          int                         numElem,
+                          const gmx_device_info_t    *devInfo);
 
 /*! \brief Destroy parameter lookup table.
  *
- * Unbinds texture reference/object, deallocates device memory.
+ * Unbinds texture object, deallocates device memory.
  *
  * \tparam[in] T         Raw data type
  * \param[in]  d_ptr     Device pointer to the memory to be deallocated
  * \param[in]  texObj    Texture object to be deinitialized
- * \param[in]  texRef    Texture reference to be deinitialized
  * \param[in]  devInfo   Pointer to the info struct of the device in use
  */
 template <typename T>
-void destroyParamLookupTable(T                         *d_ptr,
-                             cudaTextureObject_t        texObj,
-                             const struct texture<T, 1, cudaReadModeElementType> *texRef,
-                             const gmx_device_info_t   *devInfo);
+void destroyParamLookupTable(T                       *d_ptr,
+                             cudaTextureObject_t      texObj,
+                             const gmx_device_info_t *devInfo);
 
 /*! \brief Add a triplets stored in a float3 to an rvec variable.
  *
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp
index 401994a681..6f6d28fe47 100644
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -2271,6 +2271,8 @@ static void init_nb_verlet(const gmx::MDLogger     &mdlog,
                        cr->nodeid,
                        (nbv->ngrp > 1));
 
+        // TODO: with the texture reference support removed, this barrier is
+        //       in principle not needed. Remove now or do it in a follow-up?
         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
          * also sharing texture references. To keep the code simple, we don't
          * treat texture references as shared resources, but this means that
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
index 1bca13b767..3ae70cae21 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -66,25 +66,6 @@
 
 #include "nbnxn_cuda_types.h"
 
-/*
- * Texture references are created at compile-time and need to be declared
- * at file scope as global variables (see http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#texture-reference-api).
- * The texture references below are used in two translation units;
- * we declare them here along the kernels that use them (when compiling legacy Fermi kernels),
- * and provide getters (see below) used by the data_mgmt module where the
- * textures are bound/unbound.
- * (In principle we could do it the other way arond, but that would likely require
- * device linking and we'd rather avoid technical hurdles.)
- */
-/*! Texture reference for LJ C6/C12 parameters; bound to cu_nbparam_t.nbfp */
-texture<float, 1, cudaReadModeElementType> nbfp_texref;
-
-/*! Texture reference for LJ-PME parameters; bound to cu_nbparam_t.nbfp_comb */
-texture<float, 1, cudaReadModeElementType> nbfp_comb_texref;
-
-/*! Texture reference for Ewald coulomb force table; bound to cu_nbparam_t.coulomb_tab */
-texture<float, 1, cudaReadModeElementType> coulomb_tab_texref;
-
 
 /***** The kernel declarations/definitions come here *****/
 
@@ -728,21 +709,6 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
     }
 }
 
-const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_texref()
-{
-    return nbfp_texref;
-}
-
-const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_comb_texref()
-{
-    return nbfp_comb_texref;
-}
-
-const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_coulomb_tab_texref()
-{
-    return coulomb_tab_texref;
-}
-
 void nbnxn_cuda_set_cacheconfig(const gmx_device_info_t *devinfo)
 {
     cudaError_t stat;
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h
index 2b6920a842..c61fe210dc 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -43,26 +43,5 @@
 
 //! Set up the cache configuration for the non-bonded kernels.
 void nbnxn_cuda_set_cacheconfig(const gmx_device_info_t *devinfo);
-/*! \brief Return the reference to the nbfp texture.
- *
- *  Note: it can return junk when c_disableCudaTextures==false, but we don't
- *  assert on that condition because the data_mgmt module ends up calling this
- *  function even if texture references are not used.
- */
-const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_texref();
-/*! \brief Return the reference to the nbfp_comb texture.
- *
- *  Note: it can return junk when c_disableCudaTextures==false, but we don't
- *  assert on that condition because the data_mgmt module ends up calling this
- *  function even if texture references are not used.
- */
-const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_comb_texref();
-/*! \brief Return the reference to the coulomb_tab texture.
- *
- *  Note: it can return junk when c_disableCudaTextures==false, but we don't
- *  assert on that condition because the data_mgmt module ends up calling this
- *  function even if texture references are not used.
- */
-const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_coulomb_tab_texref();
 
 #endif
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
index fe86c52557..cc8ba63b0e 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -112,7 +112,6 @@ static void init_ewald_coulomb_force_table(const interaction_const_t *ic,
 
     nbp->coulomb_tab_scale = ic->tabq_scale;
     initParamLookupTable(nbp->coulomb_tab, nbp->coulomb_tab_texobj,
-                         &nbnxn_cuda_get_coulomb_tab_texref(),
                          ic->tabq_coul_F, ic->tabq_size, dev_info);
 }
 
@@ -327,7 +326,6 @@ static void init_nbparam(cu_nbparam_t              *nbp,
     if (!useLjCombRule(nbp))
     {
         initParamLookupTable(nbp->nbfp, nbp->nbfp_texobj,
-                             &nbnxn_cuda_get_nbfp_texref(),
                              nbat->nbfp, 2*ntypes*ntypes, dev_info);
     }
 
@@ -335,7 +333,6 @@ static void init_nbparam(cu_nbparam_t              *nbp,
     if (ic->vdwtype == evdwPME)
     {
         initParamLookupTable(nbp->nbfp_comb, nbp->nbfp_comb_texobj,
-                             &nbnxn_cuda_get_nbfp_comb_texref(),
                              nbat->nbfp_comb, 2*ntypes, dev_info);
     }
 }
@@ -728,7 +725,7 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t            *nbparam,
     if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN)
     {
         destroyParamLookupTable(nbparam->coulomb_tab, nbparam->coulomb_tab_texobj,
-                                &nbnxn_cuda_get_coulomb_tab_texref(), dev_info);
+                                dev_info);
     }
 }
 
@@ -770,14 +767,14 @@ void nbnxn_gpu_free(gmx_nbnxn_cuda_t *nb)
     if (!useLjCombRule(nb->nbparam))
     {
         destroyParamLookupTable(nbparam->nbfp, nbparam->nbfp_texobj,
-                                &nbnxn_cuda_get_nbfp_texref(), nb->dev_info);
+                                nb->dev_info);
 
     }
 
     if (nbparam->vdwtype == evdwCuEWALDGEOM || nbparam->vdwtype == evdwCuEWALDLB)
     {
         destroyParamLookupTable(nbparam->nbfp_comb, nbparam->nbfp_comb_texobj,
-                                &nbnxn_cuda_get_nbfp_comb_texref(), nb->dev_info);
+                                nb->dev_info);
     }
 
     stat = cudaFree(atdat->shift_vec);
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh
index 6eaa496483..4fa0c8833d 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -272,11 +272,7 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
 #endif
 
 #ifdef LJ_EWALD
-    #if DISABLE_CUDA_TEXTURES
             E_lj += LDG(&nbparam.nbfp[atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2]);
-    #else
-            E_lj += tex1Dfetch(nbfp_texref, atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2);
-    #endif
 #endif
         }
 
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
index 2626bf101d..082bab0ac7 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -58,12 +58,6 @@
 #ifndef NBNXN_CUDA_KERNEL_UTILS_CUH
 #define NBNXN_CUDA_KERNEL_UTILS_CUH
 
-/* Use texture objects if supported by the target hardware (and in host pass). */
-#if GMX_PTX_ARCH >= 300 || GMX_PTX_ARCH == 0
-/* Note: convenience macro, needs to be undef-ed at the end of the file. */
-#define USE_TEXOBJ
-#endif
-
 /*! \brief Log of the i and j cluster size.
  *  change this together with c_clSize !*/
 static const int          c_clSizeLog2  = 3;
@@ -233,11 +227,7 @@ float calculate_lj_ewald_c6grid(const cu_nbparam_t nbparam,
 #if DISABLE_CUDA_TEXTURES
     return LDG(&nbparam.nbfp_comb[2*typei]) * LDG(&nbparam.nbfp_comb[2*typej]);
 #else
-#ifdef USE_TEXOBJ
     return tex1Dfetch<float>(nbparam.nbfp_comb_texobj, 2*typei) * tex1Dfetch<float>(nbparam.nbfp_comb_texobj, 2*typej);
-#else
-    return tex1Dfetch(nbfp_comb_texref, 2*typei) * tex1Dfetch(nbfp_comb_texref, 2*typej);
-#endif /* USE_TEXOBJ */
 #endif /* DISABLE_CUDA_TEXTURES */
 }
 
@@ -320,13 +310,8 @@ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam_t nbparam,
 #else
     /* NOTE: as we always do 8-byte aligned loads, we could
        fetch float2 here too just as above. */
-#ifdef USE_TEXOBJ
     c6c12.x = tex1Dfetch<float>(nbparam.nbfp_comb_texobj, 2*type);
     c6c12.y = tex1Dfetch<float>(nbparam.nbfp_comb_texobj, 2*type + 1);
-#else
-    c6c12.x = tex1Dfetch(nbfp_comb_texref, 2*type);
-    c6c12.y = tex1Dfetch(nbfp_comb_texref, 2*type + 1);
-#endif /* USE_TEXOBJ */
 #endif /* DISABLE_CUDA_TEXTURES */
 
     return c6c12;
@@ -399,13 +384,8 @@ float2 fetch_coulomb_force_r(const cu_nbparam_t nbparam,
     d.x = LDG(&nbparam.coulomb_tab[index]);
     d.y = LDG(&nbparam.coulomb_tab[index + 1]);
 #else
-#ifdef USE_TEXOBJ
     d.x = tex1Dfetch<float>(nbparam.coulomb_tab_texobj, index);
     d.y = tex1Dfetch<float>(nbparam.coulomb_tab_texobj, index + 1);
-#else
-    d.x =  tex1Dfetch(coulomb_tab_texref, index);
-    d.y =  tex1Dfetch(coulomb_tab_texref, index + 1);
-#endif // USE_TEXOBJ
 #endif // DISABLE_CUDA_TEXTURES
 
     return d;
@@ -461,13 +441,8 @@ void fetch_nbfp_c6_c12(float               &c6,
 #else
     /* NOTE: as we always do 8-byte aligned loads, we could
        fetch float2 here too just as above. */
-#ifdef USE_TEXOBJ
     c6  = tex1Dfetch<float>(nbparam.nbfp_texobj, 2*baseIndex);
     c12 = tex1Dfetch<float>(nbparam.nbfp_texobj, 2*baseIndex + 1);
-#else
-    c6  = tex1Dfetch(nbfp_texref, 2*baseIndex);
-    c12 = tex1Dfetch(nbfp_texref, 2*baseIndex + 1);
-#endif
 #endif // DISABLE_CUDA_TEXTURES
 }
 
@@ -765,6 +740,4 @@ void reduce_energy_warp_shfl(float E_lj, float E_el,
 }
 #endif /* GMX_PTX_ARCH */
 
-#undef USE_TEXOBJ
-
 #endif /* NBNXN_CUDA_KERNEL_UTILS_CUH */
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh
index a9be593e84..65406cc462 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -52,7 +52,7 @@
  *  \ingroup module_mdlib
  */
 
-/* Use the standard non-Fermi kernel in host pass too (to avoid texref API calls). */
+/* Use the standard (non-Fermi) kernel in host pass too. */
 #if GMX_PTX_ARCH >= 300 || GMX_PTX_ARCH == 0
 #define FLAVOR_LEVEL_GENERATOR "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
 #else
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
index c5615ceb00..b91074075c 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -88,9 +88,6 @@
 #include "nbnxn_ocl_internal.h"
 #include "nbnxn_ocl_types.h"
 
-#if defined TEXOBJ_SUPPORTED && __CUDA_ARCH__ >= 300
-#define USE_TEXOBJ
-#endif
 
 /*! \brief Convenience constants */
 //@{
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_amd.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_amd.clh
index 36a32b9e27..6fb9fe6770 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_amd.clh
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_amd.clh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -540,11 +540,7 @@ __global float *restrict fshift,            /* stores float3 values */     /* OU
                                 F_invr  += qi * qj_f * (int_bit*inv_r2*inv_r + pmecorrF(beta2*r2)*beta3);
 #elif defined EL_EWALD_TAB
                                 F_invr  += qi * qj_f * (int_bit*inv_r2 -
-#ifdef USE_TEXOBJ
-                                                        interpolate_coulomb_force_r(nbparam->coulomb_tab_texobj, r2 * inv_r, coulomb_tab_scale)
-#else
                                                         interpolate_coulomb_force_r(coulomb_tab_climg2d, r2 * inv_r, coulomb_tab_scale)
-#endif /* USE_TEXOBJ */
                                                         ) * inv_r;
 #endif /* EL_EWALD_ANA/TAB */
 
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nowarp.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nowarp.clh
index ba522a2b96..0afbdee6c8 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nowarp.clh
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nowarp.clh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -543,11 +543,7 @@ __global float *restrict fshift,            /* stores float3 values */     /* OU
                                 F_invr  += qi * qj_f * (int_bit*inv_r2*inv_r + pmecorrF(beta2*r2)*beta3);
 #elif defined EL_EWALD_TAB
                                 F_invr  += qi * qj_f * (int_bit*inv_r2 -
-#ifdef USE_TEXOBJ
-                                                        interpolate_coulomb_force_r(nbparam->coulomb_tab_texobj, r2 * inv_r, coulomb_tab_scale)
-#else
                                                         interpolate_coulomb_force_r(coulomb_tab_climg2d, r2 * inv_r, coulomb_tab_scale)
-#endif /* USE_TEXOBJ */
                                                         ) * inv_r;
 #endif /* EL_EWALD_ANA/TAB */
 
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nvidia.clh b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nvidia.clh
index a7366c879b..be440d9f96 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nvidia.clh
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_kernel_nvidia.clh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -533,11 +533,7 @@ __global float *restrict fshift,            /* stores float3 values */     /* OU
                                 F_invr  += qi * qj_f * (int_bit*inv_r2*inv_r + pmecorrF(beta2*r2)*beta3);
 #elif defined EL_EWALD_TAB
                                 F_invr  += qi * qj_f * (int_bit*inv_r2 -
-#ifdef USE_TEXOBJ
-                                                        interpolate_coulomb_force_r(nbparam->coulomb_tab_texobj, r2 * inv_r, coulomb_tab_scale)
-#else
                                                         interpolate_coulomb_force_r(coulomb_tab_climg2d, r2 * inv_r, coulomb_tab_scale)
-#endif /* USE_TEXOBJ */
                                                         ) * inv_r;
 #endif /* EL_EWALD_ANA/TAB */