From: Berk Hess <hess@kth.se>
Date: Fri, 21 Dec 2018 20:49:29 +0000 (+0100)
Subject: Convert nbnxn_atomdata_t to C++
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=f43e9454a4ed72f383ef71548499e00935744f48;p=alexxy%2Fgromacs.git

Convert nbnxn_atomdata_t to C++

Changed all manually managed pointer to std::vector.
Split of a Params and a SimdMasks struct.
Changed some data members to be private, more to be done.

This change is ony refactoring, no functional changes.

Note: minor, negligible performance impact of the nbnxn gridding
due to (unnecessary) initialization of std::vector during resize().

Change-Id: I9c70a1f8f272c80a7cf335fcbd867bd79c4102a2
---

diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu
index fda0e9bb90..bda125511b 100644
--- a/src/gromacs/gpu_utils/cudautils.cu
+++ b/src/gromacs/gpu_utils/cudautils.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -91,7 +91,7 @@ int cu_copy_D2H_async(void * h_dest, void * d_src, size_t bytes, cudaStream_t s
 }
 
 // TODO: template on transferKind to avoid runtime conditionals
-int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes,
+int cu_copy_H2D(void *d_dest, const void *h_src, size_t bytes,
                 GpuApiCallBehavior transferKind, cudaStream_t s = nullptr)
 {
     cudaError_t stat;
@@ -121,7 +121,7 @@ int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes,
     return 0;
 }
 
-int cu_copy_H2D_sync(void * d_dest, void * h_src, size_t bytes)
+int cu_copy_H2D_sync(void * d_dest, const void * h_src, size_t bytes)
 {
     return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Sync);
 }
@@ -129,7 +129,7 @@ int cu_copy_H2D_sync(void * d_dest, void * h_src, size_t bytes)
 /*!
  *  The copy is launched in stream s or if not specified, in stream 0.
  */
-int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s = nullptr)
+int cu_copy_H2D_async(void * d_dest, const void * h_src, size_t bytes, cudaStream_t s = nullptr)
 {
     return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Async, s);
 }
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh
index 1116d66674..35ab3d8753 100644
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -157,13 +157,13 @@ int cu_copy_D2H_async(void * /*h_dest*/, void * /*d_src*/, size_t /*bytes*/, cud
  *
  *  The copy is launched in stream s or if not specified, in stream 0.
  */
-int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
+int cu_copy_H2D(void *d_dest, const void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
 
 /*! Launches synchronous host to device memory copy. */
-int cu_copy_H2D_sync(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
+int cu_copy_H2D_sync(void * /*d_dest*/, const void * /*h_src*/, size_t /*bytes*/);
 
 /*! Launches asynchronous host to device memory copy in stream s. */
-int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
+int cu_copy_H2D_async(void * /*d_dest*/, const void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
 
 // TODO: the 2 functions below are pretty much a constructor/destructor of a simple
 // GPU table object. There is also almost self-contained fetchFromParamLookupTable()
diff --git a/src/gromacs/gpu_utils/oclutils.cpp b/src/gromacs/gpu_utils/oclutils.cpp
index 26e750a2e4..b5efc4397e 100644
--- a/src/gromacs/gpu_utils/oclutils.cpp
+++ b/src/gromacs/gpu_utils/oclutils.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -52,7 +52,7 @@
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/smalloc.h"
 
-int ocl_copy_H2D(cl_mem d_dest, void* h_src,
+int ocl_copy_H2D(cl_mem d_dest, const void* h_src,
                  size_t offset, size_t bytes,
                  GpuApiCallBehavior transferKind,
                  cl_command_queue command_queue,
@@ -92,7 +92,7 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src,
  *  identifying this particular host to device operation. The event can further
  *  be used to queue a wait for this operation or to query profiling information.
  */
-int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_async(cl_mem d_dest, const void * h_src,
                        size_t offset, size_t bytes,
                        cl_command_queue command_queue,
                        cl_event *copy_event)
@@ -102,7 +102,7 @@ int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
 
 /*! \brief Launches synchronous host to device memory copy.
  */
-int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_sync(cl_mem d_dest, const void * h_src,
                       size_t offset, size_t bytes,
                       cl_command_queue command_queue)
 {
diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h
index 78cd747f7b..0cbb7a46aa 100644
--- a/src/gromacs/gpu_utils/oclutils.h
+++ b/src/gromacs/gpu_utils/oclutils.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -132,20 +132,20 @@ int ocl_copy_D2H_async(void * h_dest, cl_mem d_src,
  *  identifying this particular host to device operation. The event can further
  *  be used to queue a wait for this operation or to query profiling information.
  */
-int ocl_copy_H2D(cl_mem d_dest, void* h_src,
+int ocl_copy_H2D(cl_mem d_dest, const void* h_src,
                  size_t offset, size_t bytes,
                  GpuApiCallBehavior transferKind,
                  cl_command_queue command_queue,
                  cl_event *copy_event);
 
 /*! \brief Launches asynchronous host to device memory copy. */
-int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_async(cl_mem d_dest, const void * h_src,
                        size_t offset, size_t bytes,
                        cl_command_queue command_queue,
                        cl_event *copy_event);
 
 /*! \brief Launches synchronous host to device memory copy. */
-int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_sync(cl_mem d_dest, const void * h_src,
                       size_t offset, size_t bytes,
                       cl_command_queue command_queue);
 
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp
index 763aaaa6c1..8255b7b0f8 100644
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -2196,7 +2196,7 @@ static void init_nb_verlet(const gmx::MDLogger     &mdlog,
         enbnxninitcombrule = enbnxninitcombruleNONE;
     }
 
-    snew(nbv->nbat, 1);
+    nbv->nbat = new nbnxn_atomdata_t(nbv->bUseGPU ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
     int mimimumNumEnergyGroupNonbonded = ir->opts.ngener;
     if (ir->opts.ngener - ir->nwall == 1)
     {
@@ -2213,8 +2213,7 @@ static void init_nb_verlet(const gmx::MDLogger     &mdlog,
                         enbnxninitcombrule,
                         fr->ntype, fr->nbfp,
                         mimimumNumEnergyGroupNonbonded,
-                        bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
-                        nb_alloc, nb_free);
+                        bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1);
 
     if (nbv->bUseGPU)
     {
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.cpp b/src/gromacs/mdlib/nbnxn_atomdata.cpp
index 9c70f5ab60..34edde97c8 100644
--- a/src/gromacs/mdlib/nbnxn_atomdata.cpp
+++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp
@@ -112,73 +112,46 @@ void nbnxn_realloc_void(void **ptr,
     *ptr = ptr_new;
 }
 
-/* Reallocate the nbnxn_atomdata_t for a size of n atoms */
-void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n)
+void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms)
 {
-    GMX_ASSERT(nbat->nalloc >= nbat->natoms, "We should have at least as many elelements allocated as there are set");
-
-    int t;
-
-    nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->type),
-                       nbat->natoms*sizeof(*nbat->type),
-                       n*sizeof(*nbat->type),
-                       nbat->alloc, nbat->free);
-    nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->lj_comb),
-                       nbat->natoms*2*sizeof(*nbat->lj_comb),
-                       n*2*sizeof(*nbat->lj_comb),
-                       nbat->alloc, nbat->free);
-    if (nbat->XFormat != nbatXYZQ)
-    {
-        nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->q),
-                           nbat->natoms*sizeof(*nbat->q),
-                           n*sizeof(*nbat->q),
-                           nbat->alloc, nbat->free);
-    }
-    if (nbat->nenergrp > 1)
-    {
-        nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->energrp),
-                           nbat->natoms/nbat->na_c*sizeof(*nbat->energrp),
-                           n/nbat->na_c*sizeof(*nbat->energrp),
-                           nbat->alloc, nbat->free);
-    }
-    nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->x),
-                       nbat->natoms*nbat->xstride*sizeof(*nbat->x),
-                       n*nbat->xstride*sizeof(*nbat->x),
-                       nbat->alloc, nbat->free);
-    for (t = 0; t < nbat->nout; t++)
+    numAtoms_ = numAtoms;
+
+    x_.resize(numAtoms*xstride);
+}
+
+void nbnxn_atomdata_t::resizeForceBuffers()
+{
+    /* Force buffers need padding up to a multiple of the buffer flag size */
+    const int paddedSize = (numAtoms() + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE*NBNXN_BUFFERFLAG_SIZE;
+
+    /* Should we let each thread allocate it's own data instead? */
+    for (nbnxn_atomdata_output_t &outBuffer : out)
     {
-        /* Allocate one element extra for possible signaling with GPUs */
-        nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->out[t].f),
-                           nbat->natoms*nbat->fstride*sizeof(*nbat->out[t].f),
-                           n*nbat->fstride*sizeof(*nbat->out[t].f),
-                           nbat->alloc, nbat->free);
+        outBuffer.f.resize(paddedSize*fstride);
     }
-    nbat->nalloc = n;
 }
 
 /* Initializes an nbnxn_atomdata_output_t data structure */
-static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out,
-                                       int nb_kernel_type,
-                                       int nenergrp, int stride,
-                                       nbnxn_alloc_t *ma)
+nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int                nb_kernel_type,
+                                                 int                numEnergyGroups,
+                                                 int                simdEnergyBufferStride,
+                                                 gmx::PinningPolicy pinningPolicy) :
+    f({}, {pinningPolicy}),
+    fshift({}, {pinningPolicy}),
+    Vvdw({}, {pinningPolicy}),
+    Vc({}, {pinningPolicy})
 {
-    out->f = nullptr;
-    ma(reinterpret_cast<void **>(&out->fshift), SHIFTS*DIM*sizeof(*out->fshift));
-    out->nV = nenergrp*nenergrp;
-    ma(reinterpret_cast<void **>(&out->Vvdw), out->nV*sizeof(*out->Vvdw));
-    ma(reinterpret_cast<void **>(&out->Vc), out->nV*sizeof(*out->Vc  ));
+    fshift.resize(SHIFTS*DIM);
+    Vvdw.resize(numEnergyGroups*numEnergyGroups);
+    Vc.resize(numEnergyGroups*numEnergyGroups);
 
     if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
         nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
     {
-        int cj_size  = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
-        out->nVS = nenergrp*nenergrp*stride*(cj_size>>1)*cj_size;
-        ma(reinterpret_cast<void **>(&out->VSvdw), out->nVS*sizeof(*out->VSvdw));
-        ma(reinterpret_cast<void **>(&out->VSc), out->nVS*sizeof(*out->VSc  ));
-    }
-    else
-    {
-        out->nVS = 0;
+        int cj_size     = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+        int numElements = numEnergyGroups*numEnergyGroups*simdEnergyBufferStride*(cj_size/2)*cj_size;
+        VSvdw.resize(numElements);
+        VSc.resize(numElements);
     }
 }
 
@@ -321,11 +294,9 @@ void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
 }
 
 /* Stores the LJ parameter data in a format convenient for different kernels */
-static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
+static void set_lj_parameter_data(nbnxn_atomdata_t::Params *params, gmx_bool bSIMD)
 {
-    real c6, c12;
-
-    int  nt = nbat->ntype;
+    int  nt = params->numTypes;
 
     if (bSIMD)
     {
@@ -337,16 +308,16 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
          * when it might not be used, but introducing the conditional code is not
          * really worth it.
          */
-        nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_aligned),
-                    nt*nt*c_simdBestPairAlignment*sizeof(*nbat->nbfp_aligned));
+        params->nbfp_aligned.resize(nt*nt*c_simdBestPairAlignment);
+
         for (int i = 0; i < nt; i++)
         {
             for (int j = 0; j < nt; j++)
             {
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = nbat->nbfp[(i*nt+j)*2+0];
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = nbat->nbfp[(i*nt+j)*2+1];
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = params->nbfp[(i*nt+j)*2+0];
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = params->nbfp[(i*nt+j)*2+1];
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
             }
         }
 #endif
@@ -356,36 +327,37 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
      * and with LJ-PME kernels. We then only need parameters per atom type,
      * not per pair of atom types.
      */
-    switch (nbat->comb_rule)
+    params->nbfp_comb.resize(nt*2);
+    switch (params->comb_rule)
     {
         case ljcrGEOM:
-            nbat->comb_rule = ljcrGEOM;
+            params->comb_rule = ljcrGEOM;
 
             for (int i = 0; i < nt; i++)
             {
                 /* Store the sqrt of the diagonal from the nbfp matrix */
-                nbat->nbfp_comb[i*2  ] = std::sqrt(nbat->nbfp[(i*nt+i)*2  ]);
-                nbat->nbfp_comb[i*2+1] = std::sqrt(nbat->nbfp[(i*nt+i)*2+1]);
+                params->nbfp_comb[i*2  ] = std::sqrt(params->nbfp[(i*nt+i)*2  ]);
+                params->nbfp_comb[i*2+1] = std::sqrt(params->nbfp[(i*nt+i)*2+1]);
             }
             break;
         case ljcrLB:
             for (int i = 0; i < nt; i++)
             {
                 /* Get 6*C6 and 12*C12 from the diagonal of the nbfp matrix */
-                c6  = nbat->nbfp[(i*nt+i)*2  ];
-                c12 = nbat->nbfp[(i*nt+i)*2+1];
+                const real c6  = params->nbfp[(i*nt+i)*2  ];
+                const real c12 = params->nbfp[(i*nt+i)*2+1];
                 if (c6 > 0 && c12 > 0)
                 {
                     /* We store 0.5*2^1/6*sigma and sqrt(4*3*eps),
                      * so we get 6*C6 and 12*C12 after combining.
                      */
-                    nbat->nbfp_comb[i*2  ] = 0.5*gmx::sixthroot(c12/c6);
-                    nbat->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
+                    params->nbfp_comb[i*2  ] = 0.5*gmx::sixthroot(c12/c6);
+                    params->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
                 }
                 else
                 {
-                    nbat->nbfp_comb[i*2  ] = 0;
-                    nbat->nbfp_comb[i*2+1] = 0;
+                    params->nbfp_comb[i*2  ] = 0;
+                    params->nbfp_comb[i*2+1] = 0;
                 }
             }
             break;
@@ -397,34 +369,30 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
     }
 }
 
-#if GMX_SIMD
-static void
-nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
+nbnxn_atomdata_t::SimdMasks::SimdMasks()
 {
-    const int simd_width = GMX_SIMD_REAL_WIDTH;
-    int       simd_excl_size;
+#if GMX_SIMD
+    constexpr int simd_width = GMX_SIMD_REAL_WIDTH;
     /* Set the diagonal cluster pair exclusion mask setup data.
      * In the kernel we check 0 < j - i to generate the masks.
      * Here we store j - i for generating the mask for the first i,
      * we substract 0.5 to avoid rounding issues.
      * In the kernel we can subtract 1 to generate the subsequent mask.
      */
-    int        simd_4xn_diag_size;
-
-    simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
-    snew_aligned(nbat->simd_4xn_diagonal_j_minus_i, simd_4xn_diag_size, NBNXN_MEM_ALIGN);
+    const int simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
+    diagonal_4xn_j_minus_i.resize(simd_4xn_diag_size);
     for (int j = 0; j < simd_4xn_diag_size; j++)
     {
-        nbat->simd_4xn_diagonal_j_minus_i[j] = j - 0.5;
+        diagonal_4xn_j_minus_i[j] = j - 0.5;
     }
 
-    snew_aligned(nbat->simd_2xnn_diagonal_j_minus_i, simd_width, NBNXN_MEM_ALIGN);
+    diagonal_2xnn_j_minus_i.resize(simd_width);
     for (int j = 0; j < simd_width/2; j++)
     {
         /* The j-cluster size is half the SIMD width */
-        nbat->simd_2xnn_diagonal_j_minus_i[j]              = j - 0.5;
+        diagonal_2xnn_j_minus_i[j]                = j - 0.5;
         /* The next half of the SIMD width is for i + 1 */
-        nbat->simd_2xnn_diagonal_j_minus_i[simd_width/2+j] = j - 1 - 0.5;
+        diagonal_2xnn_j_minus_i[simd_width/2 + j] = j - 1 - 0.5;
     }
 
     /* We use up to 32 bits for exclusion masking.
@@ -434,95 +402,99 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
      * In single precision this means the real and integer SIMD registers
      * are of equal size.
      */
-    simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
+    const int simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
 #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    snew_aligned(nbat->simd_exclusion_filter64, simd_excl_size,   NBNXN_MEM_ALIGN);
+    exclusion_filter64.resize(simd_excl_size);
 #else
-    snew_aligned(nbat->simd_exclusion_filter, simd_excl_size,   NBNXN_MEM_ALIGN);
+    exclusion_filter.resize(simd_excl_size);
 #endif
 
     for (int j = 0; j < simd_excl_size; j++)
     {
         /* Set the consecutive bits for masking pair exclusions */
 #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-        nbat->simd_exclusion_filter64[j]     = (1U << j);
+        exclusion_filter64[j] = (1U << j);
 #else
-        nbat->simd_exclusion_filter[j]       = (1U << j);
+        exclusion_filter[j]   = (1U << j);
 #endif
     }
 
-#if !GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL
-    // If the SIMD implementation has no bitwise logical operation support
-    // whatsoever we cannot use the normal masking. Instead,
-    // we generate a vector of all 2^4 possible ways an i atom
-    // interacts with its 4 j atoms. Each array entry contains
-    // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
-    // Since there is no logical value representation in this case, we use
-    // any nonzero value to indicate 'true', while zero mean 'false'.
-    // This can then be converted to a SIMD boolean internally in the SIMD
-    // module by comparing to zero.
-    // Each array entry encodes how this i atom will interact with the 4 j atoms.
-    // Matching code exists in set_ci_top_excls() to generate indices into this array.
-    // Those indices are used in the kernels.
-
-    simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
-    const real simdFalse =  0.0;
-    const real simdTrue  =  1.0;
-    real      *simd_interaction_array;
-
-    snew_aligned(simd_interaction_array, simd_excl_size * GMX_SIMD_REAL_WIDTH, NBNXN_MEM_ALIGN);
-    for (int j = 0; j < simd_excl_size; j++)
+    if (!GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL)
     {
-        int index = j * GMX_SIMD_REAL_WIDTH;
-        for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+        // If the SIMD implementation has no bitwise logical operation support
+        // whatsoever we cannot use the normal masking. Instead,
+        // we generate a vector of all 2^4 possible ways an i atom
+        // interacts with its 4 j atoms. Each array entry contains
+        // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
+        // Since there is no logical value representation in this case, we use
+        // any nonzero value to indicate 'true', while zero mean 'false'.
+        // This can then be converted to a SIMD boolean internally in the SIMD
+        // module by comparing to zero.
+        // Each array entry encodes how this i atom will interact with the 4 j atoms.
+        // Matching code exists in set_ci_top_excls() to generate indices into this array.
+        // Those indices are used in the kernels.
+
+        const int  simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
+        const real simdFalse      = 0.0;
+        const real simdTrue       = 1.0;
+
+        interaction_array.resize(simd_excl_size * GMX_SIMD_REAL_WIDTH);
+        for (int j = 0; j < simd_excl_size; j++)
         {
-            simd_interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+            const int index = j * GMX_SIMD_REAL_WIDTH;
+            for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+            {
+                interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+            }
         }
     }
-    nbat->simd_interaction_array = simd_interaction_array;
 #endif
 }
-#endif
 
-/* Initializes the nbnxn_atomdata_t parameters data structure */
+nbnxn_atomdata_t::Params::Params(gmx::PinningPolicy pinningPolicy) :
+    numTypes(0),
+    nbfp({}, {pinningPolicy}),
+    nbfp_comb({}, {pinningPolicy}),
+    type({}, {pinningPolicy}),
+    lj_comb({}, {pinningPolicy}),
+    q({}, {pinningPolicy}),
+    nenergrp(0),
+    neg_2log(0),
+    energrp({}, {pinningPolicy})
+{
+}
+
+nbnxn_atomdata_t::nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy) :
+    params_(pinningPolicy),
+    numAtoms_(0),
+    natoms_local(0),
+    shift_vec({}, {pinningPolicy}),
+    x_({}, {pinningPolicy}),
+    simdMasks(),
+    bUseBufferFlags(FALSE),
+    bUseTreeReduce(FALSE)
+{
+}
+
+/* Initializes an nbnxn_atomdata_t::Params data structure */
 static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
-                                       nbnxn_atomdata_t *nbat,
+                                       nbnxn_atomdata_t::Params *params,
                                        int nb_kernel_type,
                                        int enbnxninitcombrule,
                                        int ntype, const real *nbfp,
-                                       int n_energygroups,
-                                       nbnxn_alloc_t *alloc,
-                                       nbnxn_free_t  *free)
+                                       int n_energygroups)
 {
     real     c6, c12, tol;
     char    *ptr;
     gmx_bool simple, bCombGeom, bCombLB, bSIMD;
 
-    if (alloc == nullptr)
-    {
-        nbat->alloc = nbnxn_alloc_aligned;
-    }
-    else
-    {
-        nbat->alloc = alloc;
-    }
-    if (free == nullptr)
-    {
-        nbat->free = nbnxn_free_aligned;
-    }
-    else
-    {
-        nbat->free = free;
-    }
-
     if (debug)
     {
         fprintf(debug, "There are %d atom types in the system, adding one for nbnxn_atomdata_t\n", ntype);
     }
-    nbat->ntype = ntype + 1;
-    nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp),
-                nbat->ntype*nbat->ntype*2*sizeof(*nbat->nbfp));
-    nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_comb), nbat->ntype*2*sizeof(*nbat->nbfp_comb));
+    params->numTypes = ntype + 1;
+    params->nbfp.resize(params->numTypes*params->numTypes*2);
+    params->nbfp_comb.resize(params->numTypes*2);
 
     /* A tolerance of 1e-5 seems reasonable for (possibly hand-typed)
      * force-field floating point parameters.
@@ -539,22 +511,22 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
     bCombGeom = TRUE;
     bCombLB   = TRUE;
 
-    /* Temporarily fill nbat->nbfp_comb with sigma and epsilon
+    /* Temporarily fill params->nbfp_comb with sigma and epsilon
      * to check for the LB rule.
      */
     for (int i = 0; i < ntype; i++)
     {
-        c6  = nbfp[(i*ntype+i)*2  ]/6.0;
-        c12 = nbfp[(i*ntype+i)*2+1]/12.0;
+        c6  = nbfp[(i*ntype+i)*2    ]/6.0;
+        c12 = nbfp[(i*ntype+i)*2 + 1]/12.0;
         if (c6 > 0 && c12 > 0)
         {
-            nbat->nbfp_comb[i*2  ] = gmx::sixthroot(c12/c6);
-            nbat->nbfp_comb[i*2+1] = 0.25*c6*c6/c12;
+            params->nbfp_comb[i*2    ] = gmx::sixthroot(c12/c6);
+            params->nbfp_comb[i*2 + 1] = 0.25*c6*c6/c12;
         }
         else if (c6 == 0 && c12 == 0)
         {
-            nbat->nbfp_comb[i*2  ] = 0;
-            nbat->nbfp_comb[i*2+1] = 0;
+            params->nbfp_comb[i*2    ] = 0;
+            params->nbfp_comb[i*2 + 1] = 0;
         }
         else
         {
@@ -563,41 +535,41 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
         }
     }
 
-    for (int i = 0; i < nbat->ntype; i++)
+    for (int i = 0; i < params->numTypes; i++)
     {
-        for (int j = 0; j < nbat->ntype; j++)
+        for (int j = 0; j < params->numTypes; j++)
         {
             if (i < ntype && j < ntype)
             {
                 /* fr->nbfp has been updated, so that array too now stores c6/c12 including
                  * the 6.0/12.0 prefactors to save 2 flops in the most common case (force-only).
                  */
-                c6  = nbfp[(i*ntype+j)*2  ];
-                c12 = nbfp[(i*ntype+j)*2+1];
-                nbat->nbfp[(i*nbat->ntype+j)*2  ] = c6;
-                nbat->nbfp[(i*nbat->ntype+j)*2+1] = c12;
+                c6  = nbfp[(i*ntype+j)*2    ];
+                c12 = nbfp[(i*ntype+j)*2 + 1];
+                params->nbfp[(i*params->numTypes+j)*2    ] = c6;
+                params->nbfp[(i*params->numTypes+j)*2 + 1] = c12;
 
                 /* Compare 6*C6 and 12*C12 for geometric cobination rule */
                 bCombGeom = bCombGeom &&
                     gmx_within_tol(c6*c6, nbfp[(i*ntype+i)*2  ]*nbfp[(j*ntype+j)*2  ], tol) &&
-                    gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2+1]*nbfp[(j*ntype+j)*2+1], tol);
+                    gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2 + 1]*nbfp[(j*ntype+j)*2 + 1], tol);
 
                 /* Compare C6 and C12 for Lorentz-Berthelot combination rule */
                 c6     /= 6.0;
                 c12    /= 12.0;
                 bCombLB = bCombLB &&
                     ((c6 == 0 && c12 == 0 &&
-                      (nbat->nbfp_comb[i*2+1] == 0 || nbat->nbfp_comb[j*2+1] == 0)) ||
+                      (params->nbfp_comb[i*2 + 1] == 0 || params->nbfp_comb[j*2 + 1] == 0)) ||
                      (c6 > 0 && c12 > 0 &&
                       gmx_within_tol(gmx::sixthroot(c12/c6),
-                                     0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) &&
-                      gmx_within_tol(0.25*c6*c6/c12, std::sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol)));
+                                     0.5*(params->nbfp_comb[i*2]+params->nbfp_comb[j*2]), tol) &&
+                      gmx_within_tol(0.25*c6*c6/c12, std::sqrt(params->nbfp_comb[i*2 + 1]*params->nbfp_comb[j*2 + 1]), tol)));
             }
             else
             {
                 /* Add zero parameters for the additional dummy atom type */
-                nbat->nbfp[(i*nbat->ntype+j)*2  ] = 0;
-                nbat->nbfp[(i*nbat->ntype+j)*2+1] = 0;
+                params->nbfp[(i*params->numTypes + j)*2  ] = 0;
+                params->nbfp[(i*params->numTypes + j)*2+1] = 0;
             }
         }
     }
@@ -617,43 +589,43 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
              */
             if (bCombGeom)
             {
-                nbat->comb_rule = ljcrGEOM;
+                params->comb_rule = ljcrGEOM;
             }
             else if (bCombLB)
             {
-                nbat->comb_rule = ljcrLB;
+                params->comb_rule = ljcrLB;
             }
             else
             {
-                nbat->comb_rule = ljcrNONE;
+                params->comb_rule = ljcrNONE;
 
-                nbat->free(nbat->nbfp_comb);
+                params->nbfp_comb.clear();
             }
 
             {
                 std::string mesg;
-                if (nbat->comb_rule == ljcrNONE)
+                if (params->comb_rule == ljcrNONE)
                 {
                     mesg = "Using full Lennard-Jones parameter combination matrix";
                 }
                 else
                 {
                     mesg = gmx::formatString("Using %s Lennard-Jones combination rule",
-                                             nbat->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
+                                             params->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
                 }
                 GMX_LOG(mdlog.info).asParagraph().appendText(mesg);
             }
             break;
         case enbnxninitcombruleGEOM:
-            nbat->comb_rule = ljcrGEOM;
+            params->comb_rule = ljcrGEOM;
             break;
         case enbnxninitcombruleLB:
-            nbat->comb_rule = ljcrLB;
+            params->comb_rule = ljcrLB;
             break;
         case enbnxninitcombruleNONE:
-            nbat->comb_rule = ljcrNONE;
+            params->comb_rule = ljcrNONE;
 
-            nbat->free(nbat->nbfp_comb);
+            params->nbfp_comb.clear();
             break;
         default:
             gmx_incons("Unknown enbnxninitcombrule");
@@ -662,23 +634,23 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
     bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
              nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
 
-    set_lj_parameter_data(nbat, bSIMD);
+    set_lj_parameter_data(params, bSIMD);
 
-    nbat->nenergrp = n_energygroups;
+    params->nenergrp = n_energygroups;
     if (!simple)
     {
         // We now check for energy groups already when starting mdrun
         GMX_RELEASE_ASSERT(n_energygroups == 1, "GPU kernels do not support energy groups");
     }
     /* Temporary storage goes as #grp^3*simd_width^2/2, so limit to 64 */
-    if (nbat->nenergrp > 64)
+    if (params->nenergrp > 64)
     {
         gmx_fatal(FARGS, "With NxN kernels not more than 64 energy groups are supported\n");
     }
-    nbat->neg_2log = 1;
-    while (nbat->nenergrp > (1<<nbat->neg_2log))
+    params->neg_2log = 1;
+    while (params->nenergrp > (1<<params->neg_2log))
     {
-        nbat->neg_2log++;
+        params->neg_2log++;
     }
 }
 
@@ -689,20 +661,15 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                          int enbnxninitcombrule,
                          int ntype, const real *nbfp,
                          int n_energygroups,
-                         int nout,
-                         nbnxn_alloc_t *alloc,
-                         nbnxn_free_t  *free)
+                         int nout)
 {
-    nbnxn_atomdata_params_init(mdlog, nbat, nb_kernel_type,
-                               enbnxninitcombrule, ntype, nbfp, n_energygroups,
-                               alloc, free);
+    nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), nb_kernel_type,
+                               enbnxninitcombrule, ntype, nbfp, n_energygroups);
 
     const gmx_bool simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
     const gmx_bool bSIMD  = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
                              nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
 
-    set_lj_parameter_data(nbat, bSIMD);
-
     if (simple)
     {
         int pack_x;
@@ -736,30 +703,19 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
         nbat->FFormat = nbatXYZ;
     }
 
-    nbat->alloc(reinterpret_cast<void **>(&nbat->shift_vec), SHIFTS*sizeof(*nbat->shift_vec));
+    nbat->shift_vec.resize(SHIFTS);
 
     nbat->xstride = (nbat->XFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
     nbat->fstride = (nbat->FFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
-    nbat->x       = nullptr;
-
-#if GMX_SIMD
-    if (simple)
-    {
-        nbnxn_atomdata_init_simple_exclusion_masks(nbat);
-    }
-#endif
 
     /* Initialize the output data structures */
-    nbat->nout    = nout;
-    snew(nbat->out, nbat->nout);
-    nbat->nalloc  = 0;
-    for (int i = 0; i < nbat->nout; i++)
+    for (int i = 0; i < nout; i++)
     {
-        nbnxn_atomdata_output_init(&nbat->out[i],
-                                   nb_kernel_type,
-                                   nbat->nenergrp, 1<<nbat->neg_2log,
-                                   nbat->alloc);
+        const auto &pinningPolicy = nbat->params().type.get_allocator().pinningPolicy();
+        nbat->out.emplace_back(nb_kernel_type, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
+                               pinningPolicy);
     }
+
     nbat->buffer_flags.flag        = nullptr;
     nbat->buffer_flags.flag_nalloc = 0;
 
@@ -789,7 +745,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
 }
 
 template<int packSize>
-static void copy_lj_to_nbat_lj_comb(const real *ljparam_type,
+static void copy_lj_to_nbat_lj_comb(gmx::ArrayRef<const real> ljparam_type,
                                     const int *type, int na,
                                     real *ljparam_at)
 {
@@ -807,11 +763,20 @@ static void copy_lj_to_nbat_lj_comb(const real *ljparam_type,
     }
 }
 
+static int numAtomsFromGrids(const nbnxn_search &nbs)
+{
+    const nbnxn_grid_t &lastGrid = nbs.grid.back();
+
+    return (lastGrid.cell0 + lastGrid.nc)*lastGrid.na_sc;
+}
+
 /* Sets the atom type in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
-                                         const nbnxn_search  *nbs,
-                                         const int           *type)
+static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t::Params *params,
+                                         const nbnxn_search       *nbs,
+                                         const int                *type)
 {
+    params->type.resize(numAtomsFromGrids(*nbs));
+
     for (const nbnxn_grid_t &grid : nbs->grid)
     {
         /* Loop over all columns and copy and fill */
@@ -821,16 +786,19 @@ static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
             int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
 
             copy_int_to_nbat_int(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
-                                 type, nbat->ntype-1, nbat->type+ash);
+                                 type, params->numTypes - 1, params->type.data() + ash);
         }
     }
 }
 
 /* Sets the LJ combination rule parameters in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t    *nbat,
-                                            const nbnxn_search  *nbs)
+static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t::Params *params,
+                                            const int                 XFormat,
+                                            const nbnxn_search       *nbs)
 {
-    if (nbat->comb_rule != ljcrNONE)
+    params->lj_comb.resize(numAtomsFromGrids(*nbs)*2);
+
+    if (params->comb_rule != ljcrNONE)
     {
         for (const nbnxn_grid_t &grid : nbs->grid)
         {
@@ -840,26 +808,26 @@ static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t    *nbat,
                 int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i];
                 int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
 
-                if (nbat->XFormat == nbatX4)
+                if (XFormat == nbatX4)
                 {
-                    copy_lj_to_nbat_lj_comb<c_packX4>(nbat->nbfp_comb,
-                                                      nbat->type + ash,
+                    copy_lj_to_nbat_lj_comb<c_packX4>(params->nbfp_comb,
+                                                      params->type.data() + ash,
                                                       ncz*grid.na_sc,
-                                                      nbat->lj_comb + ash*2);
+                                                      params->lj_comb.data() + ash*2);
                 }
-                else if (nbat->XFormat == nbatX8)
+                else if (XFormat == nbatX8)
                 {
-                    copy_lj_to_nbat_lj_comb<c_packX8>(nbat->nbfp_comb,
-                                                      nbat->type + ash,
+                    copy_lj_to_nbat_lj_comb<c_packX8>(params->nbfp_comb,
+                                                      params->type.data() + ash,
                                                       ncz*grid.na_sc,
-                                                      nbat->lj_comb + ash*2);
+                                                      params->lj_comb.data() + ash*2);
                 }
-                else if (nbat->XFormat == nbatXYZQ)
+                else if (XFormat == nbatXYZQ)
                 {
-                    copy_lj_to_nbat_lj_comb<1>(nbat->nbfp_comb,
-                                               nbat->type + ash,
+                    copy_lj_to_nbat_lj_comb<1>(params->nbfp_comb,
+                                               params->type.data() + ash,
                                                ncz*grid.na_sc,
-                                               nbat->lj_comb + ash*2);
+                                               params->lj_comb.data() + ash*2);
                 }
             }
         }
@@ -871,6 +839,11 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
                                        const nbnxn_search  *nbs,
                                        const real          *charge)
 {
+    if (nbat->XFormat != nbatXYZQ)
+    {
+        nbat->paramsDeprecated().q.resize(nbat->numAtoms());
+    }
+
     for (const nbnxn_grid_t &grid : nbs->grid)
     {
         /* Loop over all columns and copy and fill */
@@ -882,7 +855,7 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
 
             if (nbat->XFormat == nbatXYZQ)
             {
-                real *q = nbat->x + ash*STRIDE_XYZQ + ZZ + 1;
+                real *q = nbat->x().data() + ash*STRIDE_XYZQ + ZZ + 1;
                 int   i;
                 for (i = 0; i < na; i++)
                 {
@@ -898,7 +871,7 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
             }
             else
             {
-                real *q = nbat->q + ash;
+                real *q = nbat->paramsDeprecated().q.data() + ash;
                 int   i;
                 for (i = 0; i < na; i++)
                 {
@@ -925,22 +898,24 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
 static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
                                     const nbnxn_search  *nbs)
 {
-    real               *q;
-    int                 stride_q, nsubc;
+    nbnxn_atomdata_t::Params &params = nbat->paramsDeprecated();
+    real                     *q;
+    int                       stride_q;
 
     if (nbat->XFormat == nbatXYZQ)
     {
-        q        = nbat->x + ZZ + 1;
+        q        = nbat->x().data() + ZZ + 1;
         stride_q = STRIDE_XYZQ;
     }
     else
     {
-        q        = nbat->q;
+        q        = params.q.data();
         stride_q = 1;
     }
 
     for (const nbnxn_grid_t &grid : nbs->grid)
     {
+        int nsubc;
         if (grid.bSimple)
         {
             nsubc = 1;
@@ -965,8 +940,8 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
                     {
                         int ind = c_offset + c*grid.na_c + i;
                         /* Set atom type and charge to non-interacting */
-                        nbat->type[ind] = nbat->ntype - 1;
-                        q[ind*stride_q] = 0;
+                        params.type[ind] = params.numTypes - 1;
+                        q[ind*stride_q]  = 0;
                     }
                 }
             }
@@ -1005,15 +980,17 @@ static void copy_egp_to_nbat_egps(const int *a, int na, int na_round,
 }
 
 /* Set the energy group indices for atoms in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t    *nbat,
-                                            const nbnxn_search  *nbs,
-                                            const int           *atinfo)
+static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t::Params *params,
+                                            const nbnxn_search       *nbs,
+                                            const int                *atinfo)
 {
-    if (nbat->nenergrp == 1)
+    if (params->nenergrp == 1)
     {
         return;
     }
 
+    params->energrp.resize(numAtomsFromGrids(*nbs));
+
     for (const nbnxn_grid_t &grid : nbs->grid)
     {
         /* Loop over all columns and copy and fill */
@@ -1023,8 +1000,9 @@ static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t    *nbat,
             int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
 
             copy_egp_to_nbat_egps(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
-                                  nbat->na_c, nbat->neg_2log,
-                                  atinfo, nbat->energrp+(ash>>grid.na_c_2log));
+                                  c_nbnxnCpuIClusterSize, params->neg_2log,
+                                  atinfo,
+                                  params->energrp.data() + (ash >> grid.na_c_2log));
         }
     }
 }
@@ -1035,7 +1013,9 @@ void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
                         const t_mdatoms     *mdatoms,
                         const int           *atinfo)
 {
-    nbnxn_atomdata_set_atomtypes(nbat, nbs, mdatoms->typeA);
+    nbnxn_atomdata_t::Params &params = nbat->paramsDeprecated();
+
+    nbnxn_atomdata_set_atomtypes(&params, nbs, mdatoms->typeA);
 
     nbnxn_atomdata_set_charges(nbat, nbs, mdatoms->chargeA);
 
@@ -1045,9 +1025,9 @@ void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
     }
 
     /* This must be done after masking types for FEP */
-    nbnxn_atomdata_set_ljcombparams(nbat, nbs);
+    nbnxn_atomdata_set_ljcombparams(&params, nbat->XFormat, nbs);
 
-    nbnxn_atomdata_set_energygroups(nbat, nbs, atinfo);
+    nbnxn_atomdata_set_energygroups(&params, nbs, atinfo);
 }
 
 /* Copies the shift vector array to nbnxn_atomdata_t */
@@ -1135,7 +1115,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search  *nbs,
                         na_fill = na;
                     }
                     copy_rvec_to_nbat_real(nbs->a.data() + ash, na, na_fill, x,
-                                           nbat->XFormat, nbat->x, ash);
+                                           nbat->XFormat, nbat->x().data(), ash);
                 }
             }
         }
@@ -1147,7 +1127,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search  *nbs,
 }
 
 static void
-nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
+nbnxn_atomdata_clear_reals(gmx::ArrayRef<real> dest,
                            int i0, int i1)
 {
     for (int i = i0; i < i1; i++)
@@ -1159,7 +1139,7 @@ nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
 gmx_unused static void
 nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
                             gmx_bool bDestSet,
-                            real ** gmx_restrict src,
+                            const real ** gmx_restrict src,
                             int nsrc,
                             int i0, int i1)
 {
@@ -1191,7 +1171,7 @@ nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
 gmx_unused static void
 nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
                                  gmx_bool gmx_unused bDestSet,
-                                 real gmx_unused ** gmx_restrict src,
+                                 const gmx_unused real ** gmx_restrict src,
                                  int gmx_unused nsrc,
                                  int gmx_unused i0, int gmx_unused i1)
 {
@@ -1234,7 +1214,7 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
 static void
 nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
                                     const nbnxn_atomdata_t *nbat,
-                                    nbnxn_atomdata_output_t *out,
+                                    gmx::ArrayRef<nbnxn_atomdata_output_t> out,
                                     int nfa,
                                     int a0, int a1,
                                     rvec *f)
@@ -1248,7 +1228,7 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
         case nbatXYZQ:
             if (nfa == 1)
             {
-                const real *fnb = out[0].f;
+                const real *fnb = out[0].f.data();
 
                 for (int a = a0; a < a1; a++)
                 {
@@ -1277,7 +1257,7 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
         case nbatX4:
             if (nfa == 1)
             {
-                const real *fnb = out[0].f;
+                const real *fnb = out[0].f.data();
 
                 for (int a = a0; a < a1; a++)
                 {
@@ -1306,7 +1286,7 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
         case nbatX8:
             if (nfa == 1)
             {
-                const real *fnb = out[0].f;
+                const real *fnb = out[0].f.data();
 
                 for (int a = a0; a < a1; a++)
                 {
@@ -1343,14 +1323,16 @@ static inline unsigned char reverse_bits(unsigned char b)
     return (b * 0x0202020202ULL & 0x010884422010ULL) % 1023;
 }
 
-static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nbat,
-                                                      int                     nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t *nbat,
+                                                      int               nth)
 {
     const nbnxn_buffer_flags_t *flags = &nbat->buffer_flags;
 
-    int next_pow2 = 1<<(gmx::log2I(nth-1)+1);
+    int                         next_pow2 = 1<<(gmx::log2I(nth-1)+1);
 
-    assert(nbat->nout == nth); /* tree-reduce currently only works for nout==nth */
+    const int                   numOutputBuffers = nbat->out.size();
+    GMX_ASSERT(numOutputBuffers == nth,
+               "tree-reduce currently only works for numOutputBuffers==nth");
 
     memset(nbat->syncStep, 0, sizeof(*(nbat->syncStep))*nth);
 
@@ -1404,7 +1386,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb
                 index[1]  = index[0] + group_size/2;
 
                 /* If no second buffer, nothing to do */
-                if (index[1] >= nbat->nout && group_size > 2)
+                if (index[1] >= numOutputBuffers && group_size > 2)
                 {
                     continue;
                 }
@@ -1446,14 +1428,15 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb
 
                         if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2)
                         {
+                            const real *fIndex1 = nbat->out[index[1]].f.data();
 #if GMX_SIMD
                             nbnxn_atomdata_reduce_reals_simd
 #else
                             nbnxn_atomdata_reduce_reals
 #endif
-                                (nbat->out[index[0]].f,
+                                (nbat->out[index[0]].f.data(),
                                 bitmask_is_set(flags->flag[b], index[0]) || group_size > 2,
-                                &(nbat->out[index[1]].f), 1, i0, i1);
+                                &fIndex1, 1, i0, i1);
 
                         }
                         else if (!bitmask_is_set(flags->flag[b], index[0]))
@@ -1470,8 +1453,8 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb
 }
 
 
-static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nbat,
-                                                     int                     nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t *nbat,
+                                                     int               nth)
 {
 #pragma omp parallel for num_threads(nth) schedule(static)
     for (int th = 0; th < nth; th++)
@@ -1479,8 +1462,8 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
         try
         {
             const nbnxn_buffer_flags_t *flags;
-            int   nfptr;
-            real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
+            int                         nfptr;
+            const real                 *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
 
             flags = &nbat->buffer_flags;
 
@@ -1494,11 +1477,11 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
                 int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
 
                 nfptr = 0;
-                for (int out = 1; out < nbat->nout; out++)
+                for (int out = 1; out < static_cast<gmx::index>(nbat->out.size()); out++)
                 {
                     if (bitmask_is_set(flags->flag[b], out))
                     {
-                        fptr[nfptr++] = nbat->out[out].f;
+                        fptr[nfptr++] = nbat->out[out].f.data();
                     }
                 }
                 if (nfptr > 0)
@@ -1508,7 +1491,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
 #else
                     nbnxn_atomdata_reduce_reals
 #endif
-                        (nbat->out[0].f,
+                        (nbat->out[0].f.data(),
                         bitmask_is_set(flags->flag[b], 0),
                         fptr, nfptr,
                         i0, i1);
@@ -1527,7 +1510,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
 /* Add the force array(s) from nbnxn_atomdata_t to f */
 void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
                                     int                     locality,
-                                    const nbnxn_atomdata_t *nbat,
+                                    nbnxn_atomdata_t       *nbat,
                                     rvec                   *f,
                                     gmx_wallcycle          *wcycle)
 {
@@ -1556,7 +1539,7 @@ void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
 
     int nth = gmx_omp_nthreads_get(emntNonbonded);
 
-    if (nbat->nout > 1)
+    if (nbat->out.size() > 1)
     {
         if (locality != eatAll)
         {
@@ -1600,17 +1583,17 @@ void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
                                               rvec                   *fshift)
 {
-    const nbnxn_atomdata_output_t * out = nbat->out;
+    gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat->out;
 
     for (int s = 0; s < SHIFTS; s++)
     {
         rvec sum;
         clear_rvec(sum);
-        for (int th = 0; th < nbat->nout; th++)
+        for (const nbnxn_atomdata_output_t &out : outputBuffers)
         {
-            sum[XX] += out[th].fshift[s*DIM+XX];
-            sum[YY] += out[th].fshift[s*DIM+YY];
-            sum[ZZ] += out[th].fshift[s*DIM+ZZ];
+            sum[XX] += out.fshift[s*DIM+XX];
+            sum[YY] += out.fshift[s*DIM+YY];
+            sum[ZZ] += out.fshift[s*DIM+ZZ];
         }
         rvec_inc(fshift[s], sum);
     }
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.h b/src/gromacs/mdlib/nbnxn_atomdata.h
index d336d4b546..f5a8c1a291 100644
--- a/src/gromacs/mdlib/nbnxn_atomdata.h
+++ b/src/gromacs/mdlib/nbnxn_atomdata.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -91,11 +91,8 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                          int enbnxninitcombrule,
                          int ntype, const real *nbfp,
                          int n_energygroups,
-                         int nout,
-                         nbnxn_alloc_t *alloc,
-                         nbnxn_free_t  *free);
+                         int nout);
 
-/* Copy the atom data to the non-bonded atom data structure */
 void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
                         const nbnxn_search  *nbs,
                         const t_mdatoms     *mdatoms,
@@ -119,7 +116,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search  *nbs,
 /* Add the forces stored in nbat to f, zeros the forces in nbat */
 void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
                                     int                     locality,
-                                    const nbnxn_atomdata_t *nbat,
+                                    nbnxn_atomdata_t       *nbat,
                                     rvec                   *f,
                                     gmx_wallcycle          *wcycle);
 
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
index 22e8eee3fd..7a78dda109 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -318,7 +318,8 @@ void nbnxn_gpu_copy_xq_to_gpu(gmx_nbnxn_cuda_t       *nb,
     }
 
     /* HtoD x, q */
-    cu_copy_H2D_async(adat->xq + adat_begin, nbatom->x + adat_begin * 4,
+    cu_copy_H2D_async(adat->xq + adat_begin,
+                      static_cast<const void *>(nbatom->x().data() + adat_begin * 4),
                       adat_len * sizeof(*adat->xq), stream);
 
     if (bDoTime)
@@ -612,7 +613,7 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_cuda_t       *nb,
 }
 
 void nbnxn_gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
-                              const nbnxn_atomdata_t *nbatom,
+                              nbnxn_atomdata_t       *nbatom,
                               int                     flags,
                               int                     aloc,
                               bool                    haveOtherWork)
@@ -654,7 +655,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
     }
 
     /* DtoH f */
-    cu_copy_D2H_async(nbatom->out[0].f + adat_begin * 3, adat->f + adat_begin,
+    cu_copy_D2H_async(nbatom->out[0].f.data() + adat_begin * 3, adat->f + adat_begin,
                       (adat_len)*sizeof(*adat->f), stream);
 
     /* After the non-local D2H is launched the nonlocal_done event can be
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
index 40bdc78d20..d9c26a0813 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -219,14 +219,14 @@ static void set_cutoff_parameters(cu_nbparam_t              *nbp,
 }
 
 /*! Initializes the nonbonded parameter data structure. */
-static void init_nbparam(cu_nbparam_t              *nbp,
-                         const interaction_const_t *ic,
-                         const NbnxnListParameters *listParams,
-                         const nbnxn_atomdata_t    *nbat)
+static void init_nbparam(cu_nbparam_t                   *nbp,
+                         const interaction_const_t      *ic,
+                         const NbnxnListParameters      *listParams,
+                         const nbnxn_atomdata_t::Params &nbatParams)
 {
     int         ntypes;
 
-    ntypes  = nbat->ntype;
+    ntypes  = nbatParams.numTypes;
 
     set_cutoff_parameters(nbp, ic, listParams);
 
@@ -245,7 +245,7 @@ static void init_nbparam(cu_nbparam_t              *nbp,
         {
             case eintmodNONE:
             case eintmodPOTSHIFT:
-                switch (nbat->comb_rule)
+                switch (nbatParams.comb_rule)
                 {
                     case ljcrNONE:
                         nbp->vdwtype = evdwCuCUT;
@@ -274,12 +274,12 @@ static void init_nbparam(cu_nbparam_t              *nbp,
     {
         if (ic->ljpme_comb_rule == ljcrGEOM)
         {
-            assert(nbat->comb_rule == ljcrGEOM);
+            assert(nbatParams.comb_rule == ljcrGEOM);
             nbp->vdwtype = evdwCuEWALDGEOM;
         }
         else
         {
-            assert(nbat->comb_rule == ljcrLB);
+            assert(nbatParams.comb_rule == ljcrLB);
             nbp->vdwtype = evdwCuEWALDLB;
         }
     }
@@ -318,14 +318,14 @@ static void init_nbparam(cu_nbparam_t              *nbp,
     if (!useLjCombRule(nbp))
     {
         initParamLookupTable(nbp->nbfp, nbp->nbfp_texobj,
-                             nbat->nbfp, 2*ntypes*ntypes);
+                             nbatParams.nbfp.data(), 2*ntypes*ntypes);
     }
 
     /* set up LJ-PME parameter lookup table */
     if (ic->vdwtype == evdwPME)
     {
         initParamLookupTable(nbp->nbfp_comb, nbp->nbfp_comb_texobj,
-                             nbat->nbfp_comb, 2*ntypes);
+                             nbatParams.nbfp_comb.data(), 2*ntypes);
     }
 }
 
@@ -412,10 +412,10 @@ static void init_timings(gmx_wallclock_gpu_nbnxn_t *t)
 static void nbnxn_cuda_init_const(gmx_nbnxn_cuda_t               *nb,
                                   const interaction_const_t      *ic,
                                   const NbnxnListParameters      *listParams,
-                                  const nbnxn_atomdata_t         *nbat)
+                                  const nbnxn_atomdata_t::Params &nbatParams)
 {
-    init_atomdata_first(nb->atdat, nbat->ntype);
-    init_nbparam(nb->nbparam, ic, listParams, nbat);
+    init_atomdata_first(nb->atdat, nbatParams.numTypes);
+    init_nbparam(nb->nbparam, ic, listParams, nbatParams);
 
     /* clear energy and shift force outputs */
     nbnxn_cuda_clear_e_fshift(nb);
@@ -504,7 +504,7 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
     /* pick L1 cache configuration */
     nbnxn_cuda_set_cacheconfig();
 
-    nbnxn_cuda_init_const(nb, ic, listParams, nbat);
+    nbnxn_cuda_init_const(nb, ic, listParams, nbat->params());
 
     *p_nb = nb;
 
@@ -584,7 +584,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_cuda_t       *nb,
     /* only if we have a dynamic box */
     if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
     {
-        cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec,
+        cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec.data(),
                           SHIFTS * sizeof(*adat->shift_vec), ls);
         adat->bShiftVecUploaded = true;
     }
@@ -638,7 +638,7 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_cuda_t              *nb,
     cu_atomdata_t *d_atdat   = nb->atdat;
     cudaStream_t   ls        = nb->stream[eintLocal];
 
-    natoms    = nbat->natoms;
+    natoms    = nbat->numAtoms();
     realloced = false;
 
     if (bDoTime)
@@ -692,12 +692,12 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_cuda_t              *nb,
 
     if (useLjCombRule(nb->nbparam))
     {
-        cu_copy_H2D_async(d_atdat->lj_comb, nbat->lj_comb,
+        cu_copy_H2D_async(d_atdat->lj_comb, nbat->params().lj_comb.data(),
                           natoms*sizeof(*d_atdat->lj_comb), ls);
     }
     else
     {
-        cu_copy_H2D_async(d_atdat->atom_types, nbat->type,
+        cu_copy_H2D_async(d_atdat->atom_types, nbat->params().type.data(),
                           natoms*sizeof(*d_atdat->atom_types), ls);
     }
 
diff --git a/src/gromacs/mdlib/nbnxn_gpu.h b/src/gromacs/mdlib/nbnxn_gpu.h
index fcc7888515..b23347b6c5 100644
--- a/src/gromacs/mdlib/nbnxn_gpu.h
+++ b/src/gromacs/mdlib/nbnxn_gpu.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -132,7 +132,7 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb,
  */
 GPU_FUNC_QUALIFIER
 void nbnxn_gpu_launch_cpyback(gmx_nbnxn_gpu_t  gmx_unused              *nb,
-                              const struct nbnxn_atomdata_t gmx_unused *nbatom,
+                              struct nbnxn_atomdata_t gmx_unused       *nbatom,
                               int                    gmx_unused         flags,
                               int                    gmx_unused         aloc,
                               bool                   gmx_unused         haveOtherWork) GPU_FUNC_TERM
diff --git a/src/gromacs/mdlib/nbnxn_grid.cpp b/src/gromacs/mdlib/nbnxn_grid.cpp
index f33a958d0b..93a93fbb64 100644
--- a/src/gromacs/mdlib/nbnxn_grid.cpp
+++ b/src/gromacs/mdlib/nbnxn_grid.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -841,7 +841,7 @@ static void fill_cell(nbnxn_search                  *nbs,
 
     copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, grid->na_c,
                            as_rvec_array(x.data()),
-                           nbat->XFormat, nbat->x, atomStart);
+                           nbat->XFormat, nbat->x().data(), atomStart);
 
     if (nbat->XFormat == nbatX4)
     {
@@ -852,13 +852,13 @@ static void fill_cell(nbnxn_search                  *nbs,
 #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
         if (2*grid->na_cj == grid->na_c)
         {
-            calc_bounding_box_x_x4_halves(numAtoms, nbat->x + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
+            calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
                                           grid->bbj.data() + offset*2);
         }
         else
 #endif
         {
-            calc_bounding_box_x_x4(numAtoms, nbat->x + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
+            calc_bounding_box_x_x4(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
         }
     }
     else if (nbat->XFormat == nbatX8)
@@ -867,7 +867,7 @@ static void fill_cell(nbnxn_search                  *nbs,
         size_t      offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log;
         nbnxn_bb_t *bb_ptr = grid->bb.data() + offset;
 
-        calc_bounding_box_x_x8(numAtoms, nbat->x +  atom_to_x_index<c_packX8>(atomStart), bb_ptr);
+        calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
     }
 #if NBNXN_BBXXXX
     else if (!grid->bSimple)
@@ -883,13 +883,13 @@ static void fill_cell(nbnxn_search                  *nbs,
 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
         if (nbat->XFormat == nbatXYZQ)
         {
-            calc_bounding_box_xxxx_simd4(numAtoms, nbat->x + atomStart*nbat->xstride,
+            calc_bounding_box_xxxx_simd4(numAtoms, nbat->x().data() + atomStart*nbat->xstride,
                                          bb_work_aligned, pbb_ptr);
         }
         else
 #endif
         {
-            calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x + atomStart*nbat->xstride,
+            calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
                                    pbb_ptr);
         }
         if (gmx_debug_at)
@@ -907,7 +907,7 @@ static void fill_cell(nbnxn_search                  *nbs,
         /* Store the bounding boxes as xyz.xyz. */
         nbnxn_bb_t *bb_ptr = grid->bb.data() + ((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log);
 
-        calc_bounding_box(numAtoms, nbat->xstride, nbat->x + atomStart*nbat->xstride,
+        calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
                           bb_ptr);
 
         if (gmx_debug_at)
@@ -1226,13 +1226,8 @@ static void resizeForNumberOfCells(const nbnxn_grid_t &grid,
      */
     nbs->a.resize(numNbnxnAtoms + numAtomsMoved);
 
-    /* We need padding up to a multiple of the buffer flag size: simply add */
-    if (numNbnxnAtoms + NBNXN_BUFFERFLAG_SIZE > nbat->nalloc)
-    {
-        nbnxn_atomdata_realloc(nbat, numNbnxnAtoms + NBNXN_BUFFERFLAG_SIZE);
-    }
-
-    nbat->natoms = numNbnxnAtoms;
+    /* Make space in nbat for storing the atom coordinates */
+    nbat->resizeCoordinateBuffer(numNbnxnAtoms);
 }
 
 /* Determine in which grid cells the atoms should go */
@@ -1441,8 +1436,6 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
     grid->na_sc     = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
     grid->na_c_2log = get_2log(grid->na_c);
 
-    nbat->na_c = grid->na_c;
-
     if (ddZone == 0)
     {
         grid->cell0 = 0;
@@ -1510,7 +1503,12 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
 
     if (ddZone == 0)
     {
-        nbat->natoms_local = nbat->natoms;
+        nbat->natoms_local = nbat->numAtoms();
+    }
+    if (ddZone == static_cast<int>(nbs->grid.size()) - 1)
+    {
+        /* We are done setting up all grids, we can resize the force buffers */
+        nbat->resizeForceBuffers();
     }
 
     nbs_cycle_stop(&nbs->cc[enbsCCgrid]);
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp
index 7a009dc2c1..b4a3f1327f 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -42,9 +42,7 @@
 static void
 clear_f_all(const nbnxn_atomdata_t *nbat, real *f)
 {
-    int i;
-
-    for (i = 0; i < nbat->natoms*nbat->fstride; i++)
+    for (int i = 0; i < nbat->numAtoms()*nbat->fstride; i++)
     {
         f[i] = 0;
     }
@@ -103,24 +101,23 @@ reduce_energies_over_lists(const nbnxn_atomdata_t     *nbat,
                            real                       *Vvdw,
                            real                       *Vc)
 {
-    int nb;
-    int i, j, ind, indr;
+    const int nenergrp = nbat->params().nenergrp;
 
-    for (nb = 0; nb < nlist; nb++)
+    for (int nb = 0; nb < nlist; nb++)
     {
-        for (i = 0; i < nbat->nenergrp; i++)
+        for (int i = 0; i < nenergrp; i++)
         {
             /* Reduce the diagonal terms */
-            ind        = i*nbat->nenergrp + i;
+            int ind    = i*nenergrp + i;
             Vvdw[ind] += nbat->out[nb].Vvdw[ind];
             Vc[ind]   += nbat->out[nb].Vc[ind];
 
             /* Reduce the off-diagonal terms */
-            for (j = i+1; j < nbat->nenergrp; j++)
+            for (int j = i + 1; j < nenergrp; j++)
             {
                 /* The output should contain only one off-diagonal part */
-                ind        = i*nbat->nenergrp + j;
-                indr       = j*nbat->nenergrp + i;
+                int ind    = i*nenergrp + j;
+                int indr   = j*nenergrp + i;
                 Vvdw[ind] += nbat->out[nb].Vvdw[ind] + nbat->out[nb].Vvdw[indr];
                 Vc[ind]   += nbat->out[nb].Vc[ind]   + nbat->out[nb].Vc[indr];
             }
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp
index 074f97974d..3f47f2d0b7 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp
@@ -66,20 +66,10 @@
  */
 static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
 {
-    for (int i = 0; i < out->nV; i++)
-    {
-        out->Vvdw[i] = 0;
-        out->Vc[i]   = 0;
-    }
-
-    for (int i = 0; i < out->nVS; i++)
-    {
-        out->VSvdw[i] = 0;
-    }
-    for (int i = 0; i < out->nVS; i++)
-    {
-        out->VSc[i] = 0;
-    }
+    std::fill(out->Vvdw.begin(), out->Vvdw.end(), 0.0_real);
+    std::fill(out->Vc.begin(), out->Vc.end(), 0.0_real);
+    std::fill(out->VSvdw.begin(), out->VSvdw.end(), 0.0_real);
+    std::fill(out->VSc.begin(), out->VSc.end(), 0.0_real);
 }
 
 /*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
@@ -91,22 +81,21 @@ static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
  * \tparam        unrollj         The unroll size for j-particles in the SIMD kernel
  * \param[in]     numGroups       The number of energy groups
  * \param[in]     numGroups_2log  Log2 of numGroups, rounded up
- * \param[in]     vVdwSimd        SIMD Van der Waals energy buffers
- * \param[in]     vCoulombSimd    SIMD Coulomb energy buffers
- * \param[in,out] vVdw            Van der Waals energy output buffer
- * \param[in,out] vCoulomb        Coulomb energy output buffer
+ * \param[in,out] out             Struct with energy buffers
  */
 template <int unrollj> static void
 reduceGroupEnergySimdBuffers(int                       numGroups,
                              int                       numGroups_2log,
-                             const real * gmx_restrict vVdwSimd,
-                             const real * gmx_restrict vCoulombSimd,
-                             real * gmx_restrict       vVdw,
-                             real * gmx_restrict       vCoulomb)
+                             nbnxn_atomdata_output_t  *out)
 {
-    const int unrollj_half     = unrollj/2;
+    const int                 unrollj_half     = unrollj/2;
     /* Energies are stored in SIMD registers with size 2^numGroups_2log */
-    const int numGroupsStorage = (1 << numGroups_2log);
+    const int                 numGroupsStorage = (1 << numGroups_2log);
+
+    const real * gmx_restrict vVdwSimd     = out->VSvdw.data();
+    const real * gmx_restrict vCoulombSimd = out->VSc.data();
+    real * gmx_restrict       vVdw         = out->Vvdw.data();
+    real * gmx_restrict       vCoulomb     = out->Vc.data();
 
     /* The size of the SIMD energy group buffer array is:
      * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
@@ -133,7 +122,7 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
 
 void
 nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
-                 const nbnxn_atomdata_t    *nbat,
+                 nbnxn_atomdata_t          *nbat,
                  const interaction_const_t *ic,
                  rvec                      *shiftVectors,
                  int                        forceFlags,
@@ -174,6 +163,8 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
         }
     }
 
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
     int vdwkt = 0;
     if (ic->vdwtype == evdwCUT)
     {
@@ -181,7 +172,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
         {
             case eintmodNONE:
             case eintmodPOTSHIFT:
-                switch (nbat->comb_rule)
+                switch (nbatParams.comb_rule)
                 {
                     case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
                     case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
@@ -231,7 +222,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
 
         if (clearF == enbvClearFYes)
         {
-            clear_f(nbat, nb, out->f);
+            clear_f(nbat, nb, out->f.data());
         }
 
         real *fshift_p;
@@ -241,7 +232,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
         }
         else
         {
-            fshift_p = out->fshift;
+            fshift_p = out->fshift.data();
 
             if (clearF == enbvClearFYes)
             {
@@ -258,7 +249,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                            ic,
                                                            shiftVectors,
-                                                           out->f,
+                                                           out->f.data(),
                                                            fshift_p);
                     break;
 #ifdef GMX_NBNXN_SIMD_2XNN
@@ -266,7 +257,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_noener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
                                                                  ic,
                                                                  shiftVectors,
-                                                                 out->f,
+                                                                 out->f.data(),
                                                                  fshift_p);
                     break;
 #endif
@@ -275,7 +266,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_noener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
                                                                 ic,
                                                                 shiftVectors,
-                                                                out->f,
+                                                                out->f.data(),
                                                                 fshift_p);
                     break;
 #endif
@@ -283,7 +274,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
             }
         }
-        else if (out->nV == 1)
+        else if (out->Vvdw.size() == 1)
         {
             /* A single energy group (pair) */
             out->Vvdw[0] = 0;
@@ -295,20 +286,20 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                          ic,
                                                          shiftVectors,
-                                                         out->f,
+                                                         out->f.data(),
                                                          fshift_p,
-                                                         out->Vvdw,
-                                                         out->Vc);
+                                                         out->Vvdw.data(),
+                                                         out->Vc.data());
                     break;
 #ifdef GMX_NBNXN_SIMD_2XNN
                 case nbnxnk4xN_SIMD_2xNN:
                     nbnxn_kernel_ener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
                                                                ic,
                                                                shiftVectors,
-                                                               out->f,
+                                                               out->f.data(),
                                                                fshift_p,
-                                                               out->Vvdw,
-                                                               out->Vc);
+                                                               out->Vvdw.data(),
+                                                               out->Vc.data());
                     break;
 #endif
 #ifdef GMX_NBNXN_SIMD_4XN
@@ -316,10 +307,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_ener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
                                                               ic,
                                                               shiftVectors,
-                                                              out->f,
+                                                              out->f.data(),
                                                               fshift_p,
-                                                              out->Vvdw,
-                                                              out->Vc);
+                                                              out->Vvdw.data(),
+                                                              out->Vc.data());
                     break;
 #endif
                 default:
@@ -340,10 +331,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                             ic,
                                                             shiftVectors,
-                                                            out->f,
+                                                            out->f.data(),
                                                             fshift_p,
-                                                            out->Vvdw,
-                                                            out->Vc);
+                                                            out->Vvdw.data(),
+                                                            out->Vc.data());
                     break;
 #ifdef GMX_NBNXN_SIMD_2XNN
                 case nbnxnk4xN_SIMD_2xNN:
@@ -351,10 +342,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_energrp_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
                                                                   ic,
                                                                   shiftVectors,
-                                                                  out->f,
+                                                                  out->f.data(),
                                                                   fshift_p,
-                                                                  out->VSvdw,
-                                                                  out->VSc);
+                                                                  out->VSvdw.data(),
+                                                                  out->VSc.data());
                     break;
 #endif
 #ifdef GMX_NBNXN_SIMD_4XN
@@ -363,10 +354,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                     nbnxn_kernel_energrp_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
                                                                  ic,
                                                                  shiftVectors,
-                                                                 out->f,
+                                                                 out->f.data(),
                                                                  fshift_p,
-                                                                 out->VSvdw,
-                                                                 out->VSc);
+                                                                 out->VSvdw.data(),
+                                                                 out->VSc.data());
                     break;
 #endif
                 default:
@@ -378,22 +369,19 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                 switch (unrollj)
                 {
                     case 2:
-                        reduceGroupEnergySimdBuffers<2>(nbat->nenergrp,
-                                                        nbat->neg_2log,
-                                                        out->VSvdw, out->VSc,
-                                                        out->Vvdw, out->Vc);
+                        reduceGroupEnergySimdBuffers<2>(nbatParams.nenergrp,
+                                                        nbatParams.neg_2log,
+                                                        out);
                         break;
                     case 4:
-                        reduceGroupEnergySimdBuffers<4>(nbat->nenergrp,
-                                                        nbat->neg_2log,
-                                                        out->VSvdw, out->VSc,
-                                                        out->Vvdw, out->Vc);
+                        reduceGroupEnergySimdBuffers<4>(nbatParams.nenergrp,
+                                                        nbatParams.neg_2log,
+                                                        out);
                         break;
                     case 8:
-                        reduceGroupEnergySimdBuffers<8>(nbat->nenergrp,
-                                                        nbat->neg_2log,
-                                                        out->VSvdw, out->VSc,
-                                                        out->Vvdw, out->Vc);
+                        reduceGroupEnergySimdBuffers<8>(nbatParams.nenergrp,
+                                                        nbatParams.neg_2log,
+                                                        out);
                         break;
                     default:
                         GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h
index 5f4ba0cde0..389f5e6daa 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -58,7 +58,7 @@ struct nonbonded_verlet_group_t;
  * within this function.
  *
  * \param[in,out] nbvg          The group (local/non-local) to compute interaction for
- * \param[in]     nbat          The atomdata for the interactions
+ * \param[in,out] nbat          The atomdata for the interactions
  * \param[in]     ic            Non-bonded interaction constants
  * \param[in]     shiftVectors  The PBC shift vectors
  * \param[in]     forceFlags    Flags that tell what to compute
@@ -69,7 +69,7 @@ struct nonbonded_verlet_group_t;
  */
 void
 nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
-                 const nbnxn_atomdata_t    *nbat,
+                 nbnxn_atomdata_t          *nbat,
                  const interaction_const_t *ic,
                  rvec                      *shiftVectors,
                  int                        forceFlags,
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp
index 0463f634e2..fcbf9d0195 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp
@@ -64,13 +64,12 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                      rvec                       *shift_vec,
                      int                         force_flags,
                      int                         clearF,
-                     real  *                     f,
+                     gmx::ArrayRef<real>         f,
                      real  *                     fshift,
                      real  *                     Vc,
                      real  *                     Vvdw)
 {
     const nbnxn_sci_t  *nbln;
-    const real         *x;
     gmx_bool            bEner;
     gmx_bool            bEwald;
     const real         *Ftab = nullptr;
@@ -102,9 +101,6 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
     int                 int_bit;
     real                fexcl;
     real                c6, c12;
-    const real       *  shiftvec;
-    real       *        vdwparam;
-    int       *         type;
     const nbnxn_excl_t *excl[2];
 
     int                 npair_tot, npair;
@@ -117,7 +113,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
 
     if (clearF == enbvClearFYes)
     {
-        clear_f(nbat, 0, f);
+        clear_f(nbat, 0, f.data());
     }
 
     bEner = ((force_flags & GMX_FORCE_ENERGY) != 0);
@@ -128,18 +124,18 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
         Ftab = iconst->tabq_coul_F;
     }
 
-    rcut2               = iconst->rcoulomb*iconst->rcoulomb;
-    rvdw2               = iconst->rvdw*iconst->rvdw;
+    rcut2                = iconst->rcoulomb*iconst->rcoulomb;
+    rvdw2                = iconst->rvdw*iconst->rvdw;
 
-    rlist2              = nbl->rlist*nbl->rlist;
+    rlist2               = nbl->rlist*nbl->rlist;
 
-    type                = nbat->type;
-    facel               = iconst->epsfac;
-    shiftvec            = shift_vec[0];
-    vdwparam            = nbat->nbfp;
-    ntype               = nbat->ntype;
+    const int  *type     = nbat->params().type.data();
+    facel                = iconst->epsfac;
+    const real *shiftvec = shift_vec[0];
+    const real *vdwparam = nbat->params().nbfp.data();
+    ntype                = nbat->params().numTypes;
 
-    x = nbat->x;
+    const real *x        = nbat->x().data();
 
     npair_tot   = 0;
     nhwu        = 0;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
index a4a1b02e22..7a5a1cff26 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
@@ -39,6 +39,7 @@
 #include "gromacs/math/vectypes.h"
 #include "gromacs/mdlib/nbnxn_pairlist.h"
 #include "gromacs/mdtypes/forcerec.h"
+#include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/real.h"
 
 /* Reference (slow) kernel for nb n vs n GPU type pair lists */
@@ -49,7 +50,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                      rvec                       *shift_vec,
                      int                         force_flags,
                      int                         clearF,
-                     real  *                     f,
+                     gmx::ArrayRef<real>         f,
                      real  *                     fshift,
                      real  *                     Vc,
                      real  *                     Vvdw);
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h
index 4e5f0481e5..a6d44626a7 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -50,7 +50,7 @@
     cj = l_cj[cjind].cj;
 
 #ifdef ENERGY_GROUPS
-    egp_cj = nbat->energrp[cj];
+    egp_cj = nbatParams.energrp[cj];
 #endif
     for (i = 0; i < UNROLLI; i++)
     {
@@ -274,7 +274,7 @@
 
 #ifdef CALC_ENERGIES
 #ifdef ENERGY_GROUPS
-                Vvdw[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += VLJ;
+                Vvdw[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += VLJ;
 #else
                 Vvdw_ci += VLJ;
                 /* 1 flop for LJ energy addition */
@@ -333,7 +333,7 @@
 
 #ifdef CALC_ENERGIES
 #ifdef ENERGY_GROUPS
-            Vc[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += vcoul;
+            Vc[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += vcoul;
 #else
             Vc_ci += vcoul;
             /* 1 flop for Coulomb energy addition */
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
index b56f6aecdb..b50a45d9c0 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
@@ -107,11 +107,6 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
 )
 {
     const nbnxn_cj_t   *l_cj;
-    const int          *type;
-    const real         *q;
-    const real         *shiftvec;
-    const real         *x;
-    const real         *nbfp;
     real                rcut2;
 #ifdef VDW_CUTOFF_CHECK
     real                rvdw2;
@@ -145,7 +140,6 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
 #ifdef CALC_ENERGIES
     real        lje_vc;
 #endif
-    const real *ljc;
 #endif
 
 #ifdef CALC_COUL_RF
@@ -179,6 +173,8 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
     swF4 = 5*ic->vdw_switch.c5;
 #endif
 
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
 #ifdef LJ_EWALD
     lje_coeff2   = ic->ewaldcoeff_lj*ic->ewaldcoeff_lj;
     lje_coeff6_6 = lje_coeff2*lje_coeff2*lje_coeff2/6.0;
@@ -186,7 +182,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
     lje_vc       = ic->sh_lj_ewald;
 #endif
 
-    ljc          = nbat->nbfp_comb;
+    const real *ljc = nbatParams.nbfp_comb.data();
 #endif
 
 #ifdef CALC_COUL_RF
@@ -210,22 +206,22 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
 #endif
 
 #ifdef ENERGY_GROUPS
-    egp_mask = (1<<nbat->neg_2log) - 1;
+    egp_mask = (1 << nbatParams.neg_2log) - 1;
 #endif
 
 
-    rcut2               = ic->rcoulomb*ic->rcoulomb;
+    rcut2                = ic->rcoulomb*ic->rcoulomb;
 #ifdef VDW_CUTOFF_CHECK
-    rvdw2               = ic->rvdw*ic->rvdw;
+    rvdw2                = ic->rvdw*ic->rvdw;
 #endif
 
-    ntype2              = nbat->ntype*2;
-    nbfp                = nbat->nbfp;
-    q                   = nbat->q;
-    type                = nbat->type;
-    facel               = ic->epsfac;
-    shiftvec            = shift_vec[0];
-    x                   = nbat->x;
+    ntype2               = nbatParams.numTypes*2;
+    const real *nbfp     = nbatParams.nbfp.data();
+    const real *q        = nbatParams.q.data();
+    const int  *type     = nbatParams.type.data();
+    facel                = ic->epsfac;
+    const real *shiftvec = shift_vec[0];
+    const real *x        = nbat->x().data();
 
     l_cj = nbl->cj.data();
 
@@ -265,7 +261,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
 #else
         for (i = 0; i < UNROLLI; i++)
         {
-            egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
+            egp_sh_i[i] = ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask)*nbatParams.nenergrp;
         }
 #endif
 #endif
@@ -303,7 +299,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
                 {
                     int egp_ind;
 #ifdef ENERGY_GROUPS
-                    egp_ind = egp_sh_i[i] + ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask);
+                    egp_ind = egp_sh_i[i] + ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask);
 #else
                     egp_ind = 0;
 #endif
@@ -312,7 +308,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
 
 #ifdef LJ_EWALD
                     /* LJ Ewald self interaction */
-                    Vvdw[egp_ind] += 0.5*nbat->nbfp[nbat->type[ci*UNROLLI+i]*(nbat->ntype + 1)*2]/6*lje_coeff6_6;
+                    Vvdw[egp_ind] += 0.5*nbatParams.nbfp[nbatParams.type[ci*UNROLLI+i]*(nbatParams.numTypes + 1)*2]/6*lje_coeff6_6;
 #endif
                 }
             }
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp
index 183c2cfed1..45b5eda20a 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp
@@ -60,7 +60,7 @@ nbnxn_kernel_prune_ref(NbnxnPairlistCpu *         nbl,
     nbnxn_cj_t       * gmx_restrict cjInner   = nbl->cj.data();
 
     const real       * gmx_restrict shiftvec = shift_vec[0];
-    const real       * gmx_restrict x        = nbat->x;
+    const real       * gmx_restrict x        = nbat->x().data();
 
     const real                      rlist2   = rlistInner*rlistInner;
 
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
index 844f55d7f5..38724cb2b5 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -803,7 +803,7 @@
     {
         int egps_j;
 #if UNROLLJ == 2
-        egps_j    = nbat->energrp[cj>>1];
+        egps_j    = nbatParams.energrp[cj >> 1];
         egp_jj[0] = ((egps_j >> ((cj & 1)*egps_jshift)) & egps_jmask)*egps_jstride;
 #else
         /* We assume UNROLLI <= UNROLLJ */
@@ -811,7 +811,7 @@
         for (jdi = 0; jdi < UNROLLJ/UNROLLI; jdi++)
         {
             int jj;
-            egps_j = nbat->energrp[cj*(UNROLLJ/UNROLLI)+jdi];
+            egps_j = nbatParams.energrp[cj*(UNROLLJ/UNROLLI) + jdi];
             for (jj = 0; jj < (UNROLLI/2); jj++)
             {
                 egp_jj[jdi*(UNROLLI/2)+jj] = ((egps_j >> (jj*egps_jshift)) & egps_jmask)*egps_jstride;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
index ced88fcf03..3a289895fb 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
@@ -37,10 +37,6 @@
 {
     using namespace gmx;
     const nbnxn_cj_t   *l_cj;
-    const real         *q;
-    const real         *shiftvec;
-    const real         *x;
-    real                facel;
     int                 ci, ci_sh;
     int                 ish, ish3;
     gmx_bool            do_LJ, half_LJ, do_coul;
@@ -71,7 +67,6 @@
     SimdBool  diagonal_mask1_S0, diagonal_mask1_S2;
 #endif
 
-    unsigned            *exclusion_filter;
     SimdBitMask          filter_S0, filter_S2;
 
     SimdReal             zero_S(0.0);
@@ -132,8 +127,6 @@
 #endif
 
 #ifdef LJ_COMB_LB
-    const real       *ljc;
-
     SimdReal          hsig_i_S0, seps_i_S0;
     SimdReal          hsig_i_S2, seps_i_S2;
 #else
@@ -141,10 +134,6 @@
     alignas(GMX_SIMD_ALIGNMENT) real  pvdw_c6[2*UNROLLI*UNROLLJ];
     real  *pvdw_c12 = pvdw_c6 + UNROLLI*UNROLLJ;
 #endif
-
-#if defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
-    const real       *ljc;
-#endif
 #endif /* LJ_COMB_LB */
 
     SimdReal  minRsq_S;
@@ -159,17 +148,19 @@
     int npair = 0;
 #endif
 
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
 #if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
-    ljc = nbat->lj_comb;
+    const real * gmx_restrict ljc      = nbatParams.lj_comb.data();
 #endif
 #if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
     /* No combination rule used */
-    real      *nbfp_ptr = nbat->nbfp_aligned;
-    const int *type     = nbat->type;
+    const real * gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
+    const int * gmx_restrict  type     = nbatParams.type.data();
 #endif
 
     /* Load j-i for the first i */
-    diagonal_jmi_S    = load<SimdReal>(nbat->simd_2xnn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simdMasks.diagonal_2xnn_j_minus_i.data());
     /* Generate all the diagonal masks as comparison results */
 #if UNROLLI == UNROLLJ
     diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
@@ -194,9 +185,9 @@
     /* Load masks for topology exclusion masking. filter_stride is
        static const, so the conditional will be optimized away. */
 #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    exclusion_filter = nbat->simd_exclusion_filter64;
+    const std::uint64_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
 #else
-    exclusion_filter = nbat->simd_exclusion_filter;
+    const std::uint32_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
 #endif
 
     /* Here we cast the exclusion filters from unsigned * to int * or real *.
@@ -305,12 +296,12 @@
     rcvdw2_S = SimdReal(ic->rvdw*ic->rvdw);
 #endif
 
-    minRsq_S            = SimdReal(NBNXN_MIN_RSQ);
+    minRsq_S                           = SimdReal(NBNXN_MIN_RSQ);
 
-    q                   = nbat->q;
-    facel               = ic->epsfac;
-    shiftvec            = shift_vec[0];
-    x                   = nbat->x;
+    const real * gmx_restrict q        = nbatParams.q.data();
+    const real                facel    = ic->epsfac;
+    const real * gmx_restrict shiftvec = shift_vec[0];
+    const real * gmx_restrict x        = nbat->x().data();
 
 #ifdef FIX_LJ_C
 
@@ -338,13 +329,13 @@
 #endif /* FIX_LJ_C */
 
 #ifdef ENERGY_GROUPS
-    egps_ishift  = nbat->neg_2log;
+    egps_ishift  = nbatParams.neg_2log;
     egps_imask   = (1<<egps_ishift) - 1;
-    egps_jshift  = 2*nbat->neg_2log;
+    egps_jshift  = 2*nbatParams.neg_2log;
     egps_jmask   = (1<<egps_jshift) - 1;
     egps_jstride = (UNROLLJ>>1)*UNROLLJ;
     /* Major division is over i-particle energy groups, determine the stride */
-    Vstride_i    = nbat->nenergrp*(1<<nbat->neg_2log)*egps_jstride;
+    Vstride_i    = nbatParams.nenergrp*(1 << nbatParams.neg_2log)*egps_jstride;
 #endif
 
     l_cj = nbl->cj.data();
@@ -389,7 +380,7 @@
         half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
 
 #ifdef ENERGY_GROUPS
-        egps_i = nbat->energrp[ci];
+        egps_i = nbatParams.energrp[ci];
         {
             int ia, egp_ia;
 
@@ -457,7 +448,7 @@
                 {
                     real c6_i;
 
-                    c6_i = nbat->nbfp[nbat->type[sci+ia]*(nbat->ntype + 1)*2]/6;
+                    c6_i = nbatParams.nbfp[nbatParams.type[sci+ia]*(nbatParams.numTypes + 1)*2]/6;
 #ifdef ENERGY_GROUPS
                     vvdwtp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
 #else
@@ -520,13 +511,14 @@
             c12s_S2 = loadU1DualHsimd(ljc+sci2+STRIDE+2);
         }
 #elif !defined LJ_COMB_LB && !defined FIX_LJ_C
-        const real *nbfp0     = nbfp_ptr + type[sci  ]*nbat->ntype*c_simdBestPairAlignment;
-        const real *nbfp1     = nbfp_ptr + type[sci+1]*nbat->ntype*c_simdBestPairAlignment;
+        const int   numTypes  = nbatParams.numTypes;
+        const real *nbfp0     = nbfp_ptr + type[sci  ]*numTypes*c_simdBestPairAlignment;
+        const real *nbfp1     = nbfp_ptr + type[sci+1]*numTypes*c_simdBestPairAlignment;
         const real *nbfp2     = nullptr, *nbfp3 = nullptr;
         if (!half_LJ)
         {
-            nbfp2 = nbfp_ptr + type[sci+2]*nbat->ntype*c_simdBestPairAlignment;
-            nbfp3 = nbfp_ptr + type[sci+3]*nbat->ntype*c_simdBestPairAlignment;
+            nbfp2 = nbfp_ptr + type[sci+2]*numTypes*c_simdBestPairAlignment;
+            nbfp3 = nbfp_ptr + type[sci+3]*numTypes*c_simdBestPairAlignment;
         }
 #endif
 #endif
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp
index 96323e49a4..daca1cd809 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp
@@ -67,7 +67,7 @@ nbnxn_kernel_prune_2xnn(NbnxnPairlistCpu *         nbl,
     nbnxn_cj_t       * gmx_restrict cjInner  = nbl->cj.data();
 
     const real       * gmx_restrict shiftvec = shift_vec[0];
-    const real       * gmx_restrict x        = nbat->x;
+    const real       * gmx_restrict x        = nbat->x().data();
 
     const SimdReal                  rlist2_S(rlistInner*rlistInner);
 
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
index b787b9d79a..7ac6e4f62a 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
@@ -104,7 +104,7 @@ gmx_load_simd_4xn_interactions(int                               excl,
                                SimdBitMask gmx_unused            filter_S1,
                                SimdBitMask gmx_unused            filter_S2,
                                SimdBitMask gmx_unused            filter_S3,
-                               real gmx_unused                  *simd_interaction_array,
+                               const real gmx_unused            *simd_interaction_array,
                                gmx::SimdBool                    *interact_S0,
                                gmx::SimdBool                    *interact_S1,
                                gmx::SimdBool                    *interact_S2,
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
index 5bc77035f0..8b84291627 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -271,7 +271,7 @@
     gmx_load_simd_4xn_interactions(l_cj[cjind].excl,
                                    filter_S0, filter_S1,
                                    filter_S2, filter_S3,
-                                   nbat->simd_interaction_array,
+                                   nbat->simdMasks.interaction_array.data(),
                                    &interact_S0, &interact_S1,
                                    &interact_S2, &interact_S3);
 #endif /* CHECK_EXCLS */
@@ -1066,7 +1066,7 @@
     {
         int egps_j;
 #if UNROLLJ == 2
-        egps_j    = nbat->energrp[cj>>1];
+        egps_j    = nbatParams.energrp[cj >> 1];
         egp_jj[0] = ((egps_j >> ((cj & 1)*egps_jshift)) & egps_jmask)*egps_jstride;
 #else
         /* We assume UNROLLI <= UNROLLJ */
@@ -1074,7 +1074,7 @@
         for (jdi = 0; jdi < UNROLLJ/UNROLLI; jdi++)
         {
             int jj;
-            egps_j = nbat->energrp[cj*(UNROLLJ/UNROLLI)+jdi];
+            egps_j = nbatParams.energrp[cj*(UNROLLJ/UNROLLI) + jdi];
             for (jj = 0; jj < (UNROLLI/2); jj++)
             {
                 egp_jj[jdi*(UNROLLI/2)+jj] = ((egps_j >> (jj*egps_jshift)) & egps_jmask)*egps_jstride;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
index ad4a1b499d..df29ca0b56 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
@@ -37,10 +37,6 @@
 {
     using namespace gmx;
     const nbnxn_cj_t   *l_cj;
-    const real *        q;
-    const real         *shiftvec;
-    const real         *x;
-    real                facel;
     int                 ci, ci_sh;
     int                 ish, ish3;
     gmx_bool            do_LJ, half_LJ, do_coul;
@@ -75,11 +71,6 @@
     SimdBool  diagonal_mask1_S0, diagonal_mask1_S1, diagonal_mask1_S2, diagonal_mask1_S3;
 #endif
 
-#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    std::uint64_t       *exclusion_filter;
-#else
-    std::uint32_t       *exclusion_filter;
-#endif
     SimdBitMask          filter_S0, filter_S1, filter_S2, filter_S3;
 
     SimdReal             zero_S(0.0);
@@ -141,17 +132,10 @@
 #endif
 
 #ifdef LJ_COMB_LB
-    const real       *ljc;
-
     SimdReal          hsig_i_S0, seps_i_S0;
     SimdReal          hsig_i_S1, seps_i_S1;
     SimdReal          hsig_i_S2, seps_i_S2;
     SimdReal          hsig_i_S3, seps_i_S3;
-#else
-
-#if defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
-    const real       *ljc;
-#endif
 #endif /* LJ_COMB_LB */
 
     SimdReal  minRsq_S;
@@ -166,17 +150,19 @@
     int npair = 0;
 #endif
 
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
 #if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
-    ljc = nbat->lj_comb;
+    const real * gmx_restrict ljc      = nbatParams.lj_comb.data();
 #endif
 #if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
     /* No combination rule used */
-    real      *nbfp_ptr    = nbat->nbfp_aligned;
-    const int *type        = nbat->type;
+    const real * gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
+    const int * gmx_restrict  type     = nbatParams.type.data();
 #endif
 
     /* Load j-i for the first i */
-    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simdMasks.diagonal_4xn_j_minus_i.data());
     /* Generate all the diagonal masks as comparison results */
 #if UNROLLI == UNROLLJ
     diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
@@ -199,7 +185,7 @@
 
 #if UNROLLI == 2*UNROLLJ
     /* Load j-i for the second half of the j-cluster */
-    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simdMasks.diagonal_4xn_j_minus_i.data() + UNROLLJ);
 #endif
 
     diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
@@ -213,9 +199,9 @@
 #endif
 
 #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    exclusion_filter = nbat->simd_exclusion_filter64;
+    const std::uint64_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
 #else
-    exclusion_filter = nbat->simd_exclusion_filter;
+    const std::uint32_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
 #endif
 
     /* Here we cast the exclusion filters from unsigned * to int * or real *.
@@ -326,12 +312,12 @@
     rcvdw2_S =  SimdReal(ic->rvdw*ic->rvdw);
 #endif
 
-    minRsq_S            = SimdReal(NBNXN_MIN_RSQ);
+    minRsq_S                           = SimdReal(NBNXN_MIN_RSQ);
 
-    q                   = nbat->q;
-    facel               = ic->epsfac;
-    shiftvec            = shift_vec[0];
-    x                   = nbat->x;
+    const real * gmx_restrict q        = nbatParams.q.data();
+    const real                facel    = ic->epsfac;
+    const real * gmx_restrict shiftvec = shift_vec[0];
+    const real * gmx_restrict x        = nbat->x().data();
 
 #ifdef FIX_LJ_C
     alignas(GMX_SIMD_ALIGNMENT) real  pvdw_c6[2*UNROLLI*UNROLLJ];
@@ -361,13 +347,13 @@
 #endif /* FIX_LJ_C */
 
 #ifdef ENERGY_GROUPS
-    egps_ishift  = nbat->neg_2log;
+    egps_ishift  = nbatParams.neg_2log;
     egps_imask   = (1<<egps_ishift) - 1;
-    egps_jshift  = 2*nbat->neg_2log;
+    egps_jshift  = 2*nbatParams.neg_2log;
     egps_jmask   = (1<<egps_jshift) - 1;
     egps_jstride = (UNROLLJ>>1)*UNROLLJ;
     /* Major division is over i-particle energy groups, determine the stride */
-    Vstride_i    = nbat->nenergrp*(1<<nbat->neg_2log)*egps_jstride;
+    Vstride_i    = nbatParams.nenergrp*(1 << nbatParams.neg_2log)*egps_jstride;
 #endif
 
     l_cj = nbl->cj.data();
@@ -413,7 +399,7 @@
         half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
 
 #ifdef ENERGY_GROUPS
-        egps_i = nbat->energrp[ci];
+        egps_i = nbatParams.energrp[ci];
         {
             int ia, egp_ia;
 
@@ -484,7 +470,7 @@
                 {
                     real c6_i;
 
-                    c6_i = nbat->nbfp[nbat->type[sci+ia]*(nbat->ntype + 1)*2]/6;
+                    c6_i = nbatParams.nbfp[nbatParams.type[sci+ia]*(nbatParams.numTypes + 1)*2]/6;
 #ifdef ENERGY_GROUPS
                     vvdwtp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
 #else
@@ -559,13 +545,14 @@
             c12s_S3    = setZero();
         }
 #else
-        const real *nbfp0     = nbfp_ptr + type[sci  ]*nbat->ntype*c_simdBestPairAlignment;
-        const real *nbfp1     = nbfp_ptr + type[sci+1]*nbat->ntype*c_simdBestPairAlignment;
+        const int   numTypes  = nbatParams.numTypes;
+        const real *nbfp0     = nbfp_ptr + type[sci  ]*numTypes*c_simdBestPairAlignment;
+        const real *nbfp1     = nbfp_ptr + type[sci+1]*numTypes*c_simdBestPairAlignment;
         const real *nbfp2     = nullptr, *nbfp3 = nullptr;
         if (!half_LJ)
         {
-            nbfp2 = nbfp_ptr + type[sci+2]*nbat->ntype*c_simdBestPairAlignment;
-            nbfp3 = nbfp_ptr + type[sci+3]*nbat->ntype*c_simdBestPairAlignment;
+            nbfp2 = nbfp_ptr + type[sci+2]*numTypes*c_simdBestPairAlignment;
+            nbfp3 = nbfp_ptr + type[sci+3]*numTypes*c_simdBestPairAlignment;
         }
 #endif
 #endif
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
index 8a84c93484..e7f829c47b 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
@@ -67,7 +67,7 @@ nbnxn_kernel_prune_4xn(NbnxnPairlistCpu *         nbl,
     nbnxn_cj_t       * gmx_restrict cjInner  = nbl->cj.data();
 
     const real       * gmx_restrict shiftvec = shift_vec[0];
-    const real       * gmx_restrict x        = nbat->x;
+    const real       * gmx_restrict x        = nbat->x().data();
 
     const SimdReal                  rlist2_S(rlistInner*rlistInner);
 
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
index 22c6ba169e..8908f3fb2e 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -410,7 +410,7 @@ void nbnxn_gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t        *nb,
     }
 
     /* HtoD x, q */
-    ocl_copy_H2D_async(adat->xq, nbatom->x + adat_begin * 4, adat_begin*sizeof(float)*4,
+    ocl_copy_H2D_async(adat->xq, nbatom->x().data() + adat_begin * 4, adat_begin*sizeof(float)*4,
                        adat_len * sizeof(float) * 4, stream, bDoTime ? t->nb_h2d[iloc].fetchNextEvent() : nullptr);
 
     if (bDoTime)
@@ -729,7 +729,7 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t       *nb,
  * (and energies/shift forces if required).
  */
 void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
-                              const struct nbnxn_atomdata_t *nbatom,
+                              struct nbnxn_atomdata_t       *nbatom,
                               int                            flags,
                               int                            aloc,
                               bool                           haveOtherWork)
@@ -780,7 +780,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
     }
 
     /* DtoH f */
-    ocl_copy_D2H_async(nbatom->out[0].f + adat_begin * 3, adat->f, adat_begin*3*sizeof(float),
+    ocl_copy_D2H_async(nbatom->out[0].f.data() + adat_begin * 3, adat->f, adat_begin*3*sizeof(float),
                        (adat_len)* adat->f_elem_size, stream, bDoTime ? t->nb_d2h[iloc].fetchNextEvent() : nullptr);
 
     /* kick off work */
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
index 9547252b74..4e1b973d67 100644
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -145,7 +145,7 @@ static void init_atomdata_first(cl_atomdata_t *ad, int ntypes, gmx_device_runtim
 
     /* An element of the shift_vec device buffer has the same size as one element
        of the host side shift_vec buffer. */
-    ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec);
+    ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec.data());
 
     ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                    SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error);
@@ -291,7 +291,7 @@ map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t *ic,
 static void init_nbparam(cl_nbparam_t                    *nbp,
                          const interaction_const_t       *ic,
                          const NbnxnListParameters       *listParams,
-                         const nbnxn_atomdata_t          *nbat,
+                         const nbnxn_atomdata_t::Params  &nbatParams,
                          const gmx_device_runtime_data_t *runData)
 {
     cl_int cl_error;
@@ -299,7 +299,7 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
     set_cutoff_parameters(nbp, ic, listParams);
 
     map_interaction_types_to_gpu_kernel_flavors(ic,
-                                                nbat->comb_rule,
+                                                nbatParams.comb_rule,
                                                 &(nbp->eeltype),
                                                 &(nbp->vdwtype));
 
@@ -307,11 +307,11 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
     {
         if (ic->ljpme_comb_rule == ljcrGEOM)
         {
-            GMX_ASSERT(nbat->comb_rule == ljcrGEOM, "Combination rule mismatch!");
+            GMX_ASSERT(nbatParams.comb_rule == ljcrGEOM, "Combination rule mismatch!");
         }
         else
         {
-            GMX_ASSERT(nbat->comb_rule == ljcrLB, "Combination rule mismatch!");
+            GMX_ASSERT(nbatParams.comb_rule == ljcrLB, "Combination rule mismatch!");
         }
     }
     /* generate table for PME */
@@ -342,8 +342,8 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
                            ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
     }
 
-    int nnbfp      = 2*nbat->ntype*nbat->ntype;
-    int nnbfp_comb = 2*nbat->ntype;
+    const int nnbfp      = 2*nbatParams.numTypes*nbatParams.numTypes;
+    const int nnbfp_comb = 2*nbatParams.numTypes;
 
     {
         /* Switched from using textures to using buffers */
@@ -358,8 +358,12 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
             &array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error);
          */
 
-        nbp->nbfp_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
-                                           nnbfp*sizeof(cl_float), nbat->nbfp, &cl_error);
+        nbp->nbfp_climg2d =
+            clCreateBuffer(runData->context,
+                           CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+                           nnbfp*sizeof(cl_float),
+                           const_cast<float *>(nbatParams.nbfp.data()),
+                           &cl_error);
         GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                            ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
 
@@ -369,8 +373,12 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
             // TODO: decide which alternative is most efficient - textures or buffers.
             /*  nbp->nbfp_comb_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                 &array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/
-            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
-                                                    nnbfp_comb*sizeof(cl_float), nbat->nbfp_comb, &cl_error);
+            nbp->nbfp_comb_climg2d =
+                clCreateBuffer(runData->context,
+                               CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+                               nnbfp_comb*sizeof(cl_float),
+                               const_cast<float *>(nbatParams.nbfp_comb.data()),
+                               &cl_error);
             GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                                ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
         }
@@ -598,10 +606,10 @@ static void nbnxn_gpu_init_kernels(gmx_nbnxn_ocl_t *nb)
 static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t                *nb,
                                  const interaction_const_t      *ic,
                                  const NbnxnListParameters      *listParams,
-                                 const nbnxn_atomdata_t         *nbat)
+                                 const nbnxn_atomdata_t::Params &nbatParams)
 {
-    init_atomdata_first(nb->atdat, nbat->ntype, nb->dev_rundata);
-    init_nbparam(nb->nbparam, ic, listParams, nbat, nb->dev_rundata);
+    init_atomdata_first(nb->atdat, nbatParams.numTypes, nb->dev_rundata);
+    init_nbparam(nb->nbparam, ic, listParams, nbatParams, nb->dev_rundata);
 }
 
 
@@ -695,7 +703,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
         init_timings(nb->timings);
     }
 
-    nbnxn_ocl_init_const(nb, ic, listParams, nbat);
+    nbnxn_ocl_init_const(nb, ic, listParams, nbat->params());
 
     /* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
      * TODO: decide about NVIDIA
@@ -839,7 +847,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_ocl_t        *nb,
     /* only if we have a dynamic box */
     if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
     {
-        ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0,
+        ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec.data(), 0,
                            SHIFTS * adat->shift_vec_elem_size, ls, nullptr);
         adat->bShiftVecUploaded = CL_TRUE;
     }
@@ -857,7 +865,7 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
     cl_atomdata_t   *d_atdat = nb->atdat;
     cl_command_queue ls      = nb->stream[eintLocal];
 
-    natoms    = nbat->natoms;
+    natoms    = nbat->numAtoms();
     realloced = false;
 
     if (bDoTime)
@@ -923,12 +931,12 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
 
     if (useLjCombRule(nb->nbparam->vdwtype))
     {
-        ocl_copy_H2D_async(d_atdat->lj_comb, nbat->lj_comb, 0,
+        ocl_copy_H2D_async(d_atdat->lj_comb, nbat->params().lj_comb.data(), 0,
                            natoms*sizeof(cl_float2), ls, bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
     }
     else
     {
-        ocl_copy_H2D_async(d_atdat->atom_types, nbat->type, 0,
+        ocl_copy_H2D_async(d_atdat->atom_types, nbat->params().type.data(), 0,
                            natoms*sizeof(int), ls, bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
 
     }
diff --git a/src/gromacs/mdlib/nbnxn_pairlist.h b/src/gromacs/mdlib/nbnxn_pairlist.h
index 9c01c7125b..1ac278db91 100644
--- a/src/gromacs/mdlib/nbnxn_pairlist.h
+++ b/src/gromacs/mdlib/nbnxn_pairlist.h
@@ -40,6 +40,7 @@
 
 #include <cstddef>
 
+#include "gromacs/gpu_utils/hostallocator.h"
 #include "gromacs/math/vectypes.h"
 #include "gromacs/mdlib/nbnxn_consts.h"
 #include "gromacs/mdtypes/nblist.h"
@@ -52,6 +53,10 @@ struct NbnxnPairlistCpuWork;
 struct NbnxnPairlistGpuWork;
 struct tMPI_Atomic;
 
+/* Convenience type for vector with aligned memory */
+template<typename T>
+using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
+
 /* Convenience type for vector that avoids initialization at resize() */
 template<typename T>
 using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
@@ -258,16 +263,28 @@ enum {
     nbatXYZ, nbatXYZQ, nbatX4, nbatX8
 };
 
-typedef struct {
-    real *f;      /* f, size natoms*fstride                             */
-    real *fshift; /* Shift force array, size SHIFTS*DIM                 */
-    int   nV;     /* The size of *Vvdw and *Vc                          */
-    real *Vvdw;   /* Temporary Van der Waals group energy storage       */
-    real *Vc;     /* Temporary Coulomb group energy storage             */
-    int   nVS;    /* The size of *VSvdw and *VSc                        */
-    real *VSvdw;  /* Temporary SIMD Van der Waals group energy storage  */
-    real *VSc;    /* Temporary SIMD Coulomb group energy storage        */
-} nbnxn_atomdata_output_t;
+// Struct that holds force and energy output buffers
+struct nbnxn_atomdata_output_t
+{
+    /* Constructor
+     *
+     * \param[in] nb_kernel_type          Type of non-bonded kernel
+     * \param[in] numEnergyGroups         The number of energy groups
+     * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
+     * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
+     */
+    nbnxn_atomdata_output_t(int                nb_kernel_type,
+                            int                numEnergyGroups,
+                            int                simdEnergyBUfferStride,
+                            gmx::PinningPolicy pinningPolicy);
+
+    gmx::HostVector<real> f;      // f, size natoms*fstride
+    gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
+    gmx::HostVector<real> Vvdw;   // Temporary Van der Waals group energy storage
+    gmx::HostVector<real> Vc;     // Temporary Coulomb group energy storage
+    AlignedVector<real>   VSvdw;  // Temporary SIMD Van der Waals group energy storage
+    AlignedVector<real>   VSc;    // Temporary SIMD Coulomb group energy storage
+};
 
 /* Block size in atoms for the non-bonded thread force-buffer reduction,
  * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
@@ -298,49 +315,132 @@ enum {
     ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
 };
 
-typedef struct nbnxn_atomdata_t { //NOLINT(clang-analyzer-optin.performance.Padding)
-    nbnxn_alloc_t           *alloc;
-    nbnxn_free_t            *free;
-    int                      ntype;           /* The number of different atom types                 */
-    real                    *nbfp;            /* Lennard-Jones 6*C6 and 12*C12 params, size ntype^2*2 */
-    int                      comb_rule;       /* Combination rule, see enum above                   */
-    real                    *nbfp_comb;       /* LJ parameter per atom type, size ntype*2           */
-    real                    *nbfp_aligned;    /* As nbfp, but with an alignment (stride) suitable
-                                               * for the present SIMD architectures
-                                               */
-    int                      natoms;          /* Number of atoms                                    */
-    int                      natoms_local;    /* Number of local atoms                           */
-    int                     *type;            /* Atom types                                         */
-    real                    *lj_comb;         /* LJ parameters per atom for combining for pairs     */
-    int                      XFormat;         /* The format of x (and q), enum                      */
-    int                      FFormat;         /* The format of f, enum                              */
-    real                    *q;               /* Charges, can be NULL if incorporated in x          */
-    int                      na_c;            /* The number of atoms per cluster                    */
-    int                      nenergrp;        /* The number of energy groups                        */
-    int                      neg_2log;        /* Log2 of nenergrp                                   */
-    int                     *energrp;         /* The energy groups per cluster, can be NULL         */
-    gmx_bool                 bDynamicBox;     /* Do we need to update shift_vec every step?    */
-    rvec                    *shift_vec;       /* Shift vectors, copied from t_forcerec              */
-    int                      xstride;         /* stride for a coordinate in x (usually 3 or 4)      */
-    int                      fstride;         /* stride for a coordinate in f (usually 3 or 4)      */
-    real                    *x;               /* x and possibly q, size natoms*xstride              */
-
-    /* j-atom minus i-atom index for generating self and Newton exclusions
-     * cluster-cluster pairs of the diagonal, for 4xn and 2xnn kernels.
+/* Struct that stores atom related data for the nbnxn module
+ *
+ * Note: performance would improve slightly when all std::vector containers
+ *       in this struct would not initialize during resize().
+ */
+struct nbnxn_atomdata_t
+{   //NOLINT(clang-analyzer-optin.performance.Padding)
+    struct Params
+    {
+        /* Constructor
+         *
+         * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
+         */
+        Params(gmx::PinningPolicy pinningPolicy);
+
+        // The number of different atom types
+        int                   numTypes;
+        // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
+        gmx::HostVector<real> nbfp;
+        // Combination rule, see enum defined above
+        int                   comb_rule;
+        // LJ parameters per atom type, size numTypes*2
+        gmx::HostVector<real> nbfp_comb;
+        // As nbfp, but with a stride for the present SIMD architecture
+        AlignedVector<real>   nbfp_aligned;
+        // Atom types per atom
+        gmx::HostVector<int>  type;
+        // LJ parameters per atom for fast SIMD loading
+        gmx::HostVector<real> lj_comb;
+        // Charges per atom, not set with format nbatXYZQ
+        gmx::HostVector<real> q;
+        // The number of energy groups
+        int                   nenergrp;
+        // 2log(nenergrp)
+        int                   neg_2log;
+        // The energy groups, one int entry per cluster, only set when needed
+        gmx::HostVector<int>  energrp;
+    };
+
+    // Diagonal and topology exclusion helper data for all SIMD kernels
+    struct SimdMasks
+    {
+        SimdMasks();
+
+        // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
+        AlignedVector<real>     diagonal_4xn_j_minus_i;
+        // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
+        AlignedVector<real>     diagonal_2xnn_j_minus_i;
+        // Filters for topology exclusion masks for the SIMD kernels
+        AlignedVector<uint32_t> exclusion_filter;
+        // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
+        AlignedVector<uint64_t> exclusion_filter64;
+        // Array of masks needed for exclusions
+        AlignedVector<real>     interaction_array;
+    };
+
+    /* Constructor
+     *
+     * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
      */
-    real                    *simd_4xn_diagonal_j_minus_i;
-    real                    *simd_2xnn_diagonal_j_minus_i;
-    /* Filters for topology exclusion masks for the SIMD kernels. */
-    uint32_t                *simd_exclusion_filter;
-    uint64_t                *simd_exclusion_filter64; //!< Used for double w/o SIMD int32 logical support
-    real                    *simd_interaction_array;  /* Array of masks needed for exclusions */
-    int                      nout;                    /* The number of force arrays                         */
-    nbnxn_atomdata_output_t *out;                     /* Output data structures               */
-    int                      nalloc;                  /* Allocation size of all arrays (for x/f *x/fstride) */
-    gmx_bool                 bUseBufferFlags;         /* Use the flags or operate on all atoms     */
-    nbnxn_buffer_flags_t     buffer_flags;            /* Flags for buffer zeroing+reduc.  */
-    gmx_bool                 bUseTreeReduce;          /* Use tree for force reduction */
-    tMPI_Atomic             *syncStep;                /* Synchronization step for tree reduce */
-} nbnxn_atomdata_t;
+    nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
+
+    /* Returns a const reference to the parameters */
+    const Params &params() const
+    {
+        return params_;
+    }
+
+    /* Returns a non-const reference to the parameters */
+    Params &paramsDeprecated()
+    {
+        return params_;
+    }
+
+    /* Returns the current total number of atoms stored */
+    int numAtoms() const
+    {
+        return numAtoms_;
+    }
+
+    /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+    gmx::ArrayRef<const real> x() const
+    {
+        return x_;
+    }
+
+    /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+    gmx::ArrayRef<real> x()
+    {
+        return x_;
+    }
+
+    /* Resizes the coordinate buffer and sets the number of atoms */
+    void resizeCoordinateBuffer(int numAtoms);
+
+    /* Resizes the force buffers for the current number of atoms */
+    void resizeForceBuffers();
+
+    private:
+        // The LJ and charge parameters
+        Params                     params_;
+        // The total number of atoms currently stored
+        int                        numAtoms_;
+    public:
+        int                        natoms_local; /* Number of local atoms                           */
+        int                        XFormat;      /* The format of x (and q), enum                      */
+        int                        FFormat;      /* The format of f, enum                              */
+        gmx_bool                   bDynamicBox;  /* Do we need to update shift_vec every step?    */
+        gmx::HostVector<gmx::RVec> shift_vec;    /* Shift vectors, copied from t_forcerec              */
+        int                        xstride;      /* stride for a coordinate in x (usually 3 or 4)      */
+        int                        fstride;      /* stride for a coordinate in f (usually 3 or 4)      */
+    private:
+        gmx::HostVector<real>      x_;           /* x and possibly q, size natoms*xstride              */
+
+    public:
+        // Masks for handling exclusions in the SIMD kernels
+        const SimdMasks          simdMasks;
+
+        /* Output data */
+        std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
+
+        /* Reduction related data */
+        gmx_bool                 bUseBufferFlags;     /* Use the flags or operate on all atoms     */
+        nbnxn_buffer_flags_t     buffer_flags;        /* Flags for buffer zeroing+reduc.  */
+        gmx_bool                 bUseTreeReduce;      /* Use tree for force reduction */
+        tMPI_Atomic             *syncStep;            /* Synchronization step for tree reduce */
+};
 
 #endif
diff --git a/src/gromacs/mdlib/nbnxn_search.cpp b/src/gromacs/mdlib/nbnxn_search.cpp
index 5b7efff423..4a5d6da7bc 100644
--- a/src/gromacs/mdlib/nbnxn_search.cpp
+++ b/src/gromacs/mdlib/nbnxn_search.cpp
@@ -1772,7 +1772,9 @@ static void make_fep_list(const nbnxn_search     *nbs,
         reallocate_nblist(nlist);
     }
 
-    ngid = nbat->nenergrp;
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
+    ngid = nbatParams.nenergrp;
 
     if (ngid*jGrid.na_cj > gmx::index(sizeof(gid_cj)*8))
     {
@@ -1780,8 +1782,8 @@ static void make_fep_list(const nbnxn_search     *nbs,
                   iGrid.na_c, jGrid.na_cj, (sizeof(gid_cj)*8)/jGrid.na_cj);
     }
 
-    egp_shift = nbat->neg_2log;
-    egp_mask  = (1<<nbat->neg_2log) - 1;
+    egp_shift = nbatParams.neg_2log;
+    egp_mask  = (1 << egp_shift) - 1;
 
     /* Loop over the atoms in the i sub-cell */
     bFEP_i_all = TRUE;
@@ -1811,7 +1813,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
 
             if (ngid > 1)
             {
-                gid_i = (nbat->energrp[ci] >> (egp_shift*i)) & egp_mask;
+                gid_i = (nbatParams.energrp[ci] >> (egp_shift*i)) & egp_mask;
             }
 
             for (int cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++)
@@ -1826,7 +1828,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
                     fep_cj = jGrid.fep[cjr];
                     if (ngid > 1)
                     {
-                        gid_cj = nbat->energrp[cja];
+                        gid_cj = nbatParams.energrp[cja];
                     }
                 }
                 else if (2*jGrid.na_cj == jGrid.na_c)
@@ -1836,7 +1838,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
                     fep_cj = (jGrid.fep[cjr>>1] >> ((cjr&1)*jGrid.na_cj)) & ((1<<jGrid.na_cj) - 1);
                     if (ngid > 1)
                     {
-                        gid_cj = nbat->energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
+                        gid_cj = nbatParams.energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
                     }
                 }
                 else
@@ -1846,7 +1848,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
                     fep_cj = jGrid.fep[cjr*2] + (jGrid.fep[cjr*2+1] << jGrid.na_c);
                     if (ngid > 1)
                     {
-                        gid_cj = nbat->energrp[cja*2] + (nbat->energrp[cja*2+1] << (jGrid.na_c*egp_shift));
+                        gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.na_c*egp_shift));
                     }
                 }
 
@@ -2001,9 +2003,9 @@ static void make_fep_list(const nbnxn_search     *nbs,
 
                 bFEP_i = ((iGrid.fep[c_abs - iGrid.cell0*c_gpuNumClusterPerCell] & (1 << i)) != 0u);
 
-                xi = nbat->x[ind_i*nbat->xstride+XX] + shx;
-                yi = nbat->x[ind_i*nbat->xstride+YY] + shy;
-                zi = nbat->x[ind_i*nbat->xstride+ZZ] + shz;
+                xi = nbat->x()[ind_i*nbat->xstride+XX] + shx;
+                yi = nbat->x()[ind_i*nbat->xstride+YY] + shy;
+                zi = nbat->x()[ind_i*nbat->xstride+ZZ] + shz;
 
                 if ((nlist->nrj + cj4_ind_end - cj4_ind_start)*c_nbnxnGpuJgroupSize*nbl->na_cj > nlist->maxnrj)
                 {
@@ -2052,9 +2054,9 @@ static void make_fep_list(const nbnxn_search     *nbs,
                                     excl_pair = a_mod_wj(j)*nbl->na_ci + i;
                                     excl_bit  = (1U << (gcj*c_gpuNumClusterPerCell + c));
 
-                                    dx = nbat->x[ind_j*nbat->xstride+XX] - xi;
-                                    dy = nbat->x[ind_j*nbat->xstride+YY] - yi;
-                                    dz = nbat->x[ind_j*nbat->xstride+ZZ] - zi;
+                                    dx = nbat->x()[ind_j*nbat->xstride+XX] - xi;
+                                    dy = nbat->x()[ind_j*nbat->xstride+YY] - yi;
+                                    dz = nbat->x()[ind_j*nbat->xstride+ZZ] - zi;
 
                                     /* The unpruned GPU list has more than 2/3
                                      * of the atom pairs beyond rlist. Using
@@ -3323,7 +3325,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
             makeClusterListSimple(jGrid,
                                   nbl, ci, firstCell, lastCell,
                                   excludeSubDiagonal,
-                                  nbat->x,
+                                  nbat->x().data(),
                                   rlist2, rbb2,
                                   numDistanceChecks);
             break;
@@ -3332,7 +3334,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
             makeClusterListSimd4xn(jGrid,
                                    nbl, ci, firstCell, lastCell,
                                    excludeSubDiagonal,
-                                   nbat->x,
+                                   nbat->x().data(),
                                    rlist2, rbb2,
                                    numDistanceChecks);
             break;
@@ -3342,7 +3344,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
             makeClusterListSimd2xnn(jGrid,
                                     nbl, ci, firstCell, lastCell,
                                     excludeSubDiagonal,
-                                    nbat->x,
+                                    nbat->x().data(),
                                     rlist2, rbb2,
                                     numDistanceChecks);
             break;
@@ -3368,7 +3370,7 @@ static void makeClusterListWrapper(NbnxnPairlistGpu              *nbl,
         make_cluster_list_supersub(iGrid, jGrid,
                                    nbl, ci, cj,
                                    excludeSubDiagonal,
-                                   nbat->xstride, nbat->x,
+                                   nbat->xstride, nbat->x().data(),
                                    rlist2, rbb2,
                                    numDistanceChecks);
     }
@@ -3739,7 +3741,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
                                  nbl->work);
 
                     icell_set_x(cell0_i+ci, shx, shy, shz,
-                                nbat->xstride, nbat->x,
+                                nbat->xstride, nbat->x().data(),
                                 nb_kernel_type,
                                 nbl->work);
 
@@ -4265,11 +4267,11 @@ void nbnxn_make_pairlist(nbnxn_search         *nbs,
         fprintf(debug, "ns making %d nblists\n", nnbl);
     }
 
-    nbat->bUseBufferFlags = (nbat->nout > 1);
+    nbat->bUseBufferFlags = (nbat->out.size() > 1);
     /* We should re-init the flags before making the first list */
     if (nbat->bUseBufferFlags && LOCAL_I(iloc))
     {
-        init_buffer_flags(&nbat->buffer_flags, nbat->natoms);
+        init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms());
     }
 
     int nzi;
@@ -4360,7 +4362,7 @@ void nbnxn_make_pairlist(nbnxn_search         *nbs,
                      */
                     if (nbat->bUseBufferFlags && ((zi == 0 && zj == 0)))
                     {
-                        init_buffer_flags(&nbs->work[th].buffer_flags, nbat->natoms);
+                        init_buffer_flags(&nbs->work[th].buffer_flags, nbat->numAtoms());
                     }
 
                     if (CombineNBLists && th > 0)