Convert nbnxn_atomdata_t to C++

author Berk Hess <hess@kth.se>

Fri, 21 Dec 2018 20:49:29 +0000 (21:49 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Mon, 21 Jan 2019 10:14:07 +0000 (11:14 +0100)
author Berk Hess <hess@kth.se>
Fri, 21 Dec 2018 20:49:29 +0000 (21:49 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Mon, 21 Jan 2019 10:14:07 +0000 (11:14 +0100)
diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu

index fda0e9bb90dba20e56a72ea1cd8e72120d7790eb..bda125511b7a50f99f3df3bb30b77dee73359c6e 100644 (file)
--- a/src/gromacs/gpu_utils/cudautils.cu
+++ b/src/gromacs/gpu_utils/cudautils.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -91,7 +91,7 @@ int cu_copy_D2H_async(void * h_dest, void * d_src, size_t bytes, cudaStream_t s
  }
  
  // TODO: template on transferKind to avoid runtime conditionals
-int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes,
+int cu_copy_H2D(void *d_dest, const void *h_src, size_t bytes,
                  GpuApiCallBehavior transferKind, cudaStream_t s = nullptr)
  {
      cudaError_t stat;
@@ -121,7 +121,7 @@ int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes,
      return 0;
  }
  
-int cu_copy_H2D_sync(void * d_dest, void * h_src, size_t bytes)
+int cu_copy_H2D_sync(void * d_dest, const void * h_src, size_t bytes)
  {
      return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Sync);
  }
@@ -129,7 +129,7 @@ int cu_copy_H2D_sync(void * d_dest, void * h_src, size_t bytes)
  /*!
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s = nullptr)
+int cu_copy_H2D_async(void * d_dest, const void * h_src, size_t bytes, cudaStream_t s = nullptr)
  {
      return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Async, s);
  }
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh

index 1116d6667406df78c3f030364f08e17f05ca7a6c..35ab3d875346b302c0926a938bee155d3976aab6 100644 (file)
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -157,13 +157,13 @@ int cu_copy_D2H_async(void * /*h_dest*/, void * /*d_src*/, size_t /*bytes*/, cud
   *
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
+int cu_copy_H2D(void *d_dest, const void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
  
  /*! Launches synchronous host to device memory copy. */
-int cu_copy_H2D_sync(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
+int cu_copy_H2D_sync(void * /*d_dest*/, const void * /*h_src*/, size_t /*bytes*/);
  
  /*! Launches asynchronous host to device memory copy in stream s. */
-int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
+int cu_copy_H2D_async(void * /*d_dest*/, const void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
  
  // TODO: the 2 functions below are pretty much a constructor/destructor of a simple
  // GPU table object. There is also almost self-contained fetchFromParamLookupTable()
diff --git a/src/gromacs/gpu_utils/oclutils.cpp b/src/gromacs/gpu_utils/oclutils.cpp

index 26e750a2e44f9657ee0c692da27219f28cc9dad5..b5efc4397e8059c6e85828c58898bcad058d85b9 100644 (file)
--- a/src/gromacs/gpu_utils/oclutils.cpp
+++ b/src/gromacs/gpu_utils/oclutils.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -52,7 +52,7 @@
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  
-int ocl_copy_H2D(cl_mem d_dest, void* h_src,
+int ocl_copy_H2D(cl_mem d_dest, const void* h_src,
                   size_t offset, size_t bytes,
                   GpuApiCallBehavior transferKind,
                   cl_command_queue command_queue,
@@ -92,7 +92,7 @@ int ocl_copy_H2D(cl_mem d_dest, void* h_src,
   *  identifying this particular host to device operation. The event can further
   *  be used to queue a wait for this operation or to query profiling information.
   */
-int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_async(cl_mem d_dest, const void * h_src,
                         size_t offset, size_t bytes,
                         cl_command_queue command_queue,
                         cl_event *copy_event)
@@ -102,7 +102,7 @@ int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
  
  /*! \brief Launches synchronous host to device memory copy.
   */
-int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_sync(cl_mem d_dest, const void * h_src,
                        size_t offset, size_t bytes,
                        cl_command_queue command_queue)
  {
diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h

index 78cd747f7b4d6fbe2b10f57884834f7eb1666742..0cbb7a46aa95b03ee31e75a888f7b683b48811f4 100644 (file)
--- a/src/gromacs/gpu_utils/oclutils.h
+++ b/src/gromacs/gpu_utils/oclutils.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -132,20 +132,20 @@ int ocl_copy_D2H_async(void * h_dest, cl_mem d_src,
   *  identifying this particular host to device operation. The event can further
   *  be used to queue a wait for this operation or to query profiling information.
   */
-int ocl_copy_H2D(cl_mem d_dest, void* h_src,
+int ocl_copy_H2D(cl_mem d_dest, const void* h_src,
                   size_t offset, size_t bytes,
                   GpuApiCallBehavior transferKind,
                   cl_command_queue command_queue,
                   cl_event *copy_event);
  
  /*! \brief Launches asynchronous host to device memory copy. */
-int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_async(cl_mem d_dest, const void * h_src,
                         size_t offset, size_t bytes,
                         cl_command_queue command_queue,
                         cl_event *copy_event);
  
  /*! \brief Launches synchronous host to device memory copy. */
-int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_sync(cl_mem d_dest, const void * h_src,
                        size_t offset, size_t bytes,
                        cl_command_queue command_queue);
  
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp

index 763aaaa6c10095c3d115844305e386e55dde5711..8255b7b0f8015b337e60bb367bda191c7438401b 100644 (file)
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -2196,7 +2196,7 @@ static void init_nb_verlet(const gmx::MDLogger     &mdlog,
          enbnxninitcombrule = enbnxninitcombruleNONE;
      }
  
-    snew(nbv->nbat, 1);
+    nbv->nbat = new nbnxn_atomdata_t(nbv->bUseGPU ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
      int mimimumNumEnergyGroupNonbonded = ir->opts.ngener;
      if (ir->opts.ngener - ir->nwall == 1)
      {
@@ -2213,8 +2213,7 @@ static void init_nb_verlet(const gmx::MDLogger     &mdlog,
                          enbnxninitcombrule,
                          fr->ntype, fr->nbfp,
                          mimimumNumEnergyGroupNonbonded,
-                        bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
-                        nb_alloc, nb_free);
+                        bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1);
  
      if (nbv->bUseGPU)
      {
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.cpp b/src/gromacs/mdlib/nbnxn_atomdata.cpp

index 9c70f5ab6026249c2973890ea5d90f3d2b0acf07..34edde97c8725e3fc46a8988d946132e64a77c56 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.cpp
+++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp
@@ -112,73 +112,46 @@ void nbnxn_realloc_void(void **ptr,
      *ptr = ptr_new;
  }
  
-/* Reallocate the nbnxn_atomdata_t for a size of n atoms */
-void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n)
+void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms)
  {
-    GMX_ASSERT(nbat->nalloc >= nbat->natoms, "We should have at least as many elelements allocated as there are set");
-
-    int t;
-
-    nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->type),
-                       nbat->natoms*sizeof(*nbat->type),
-                       n*sizeof(*nbat->type),
-                       nbat->alloc, nbat->free);
-    nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->lj_comb),
-                       nbat->natoms*2*sizeof(*nbat->lj_comb),
-                       n*2*sizeof(*nbat->lj_comb),
-                       nbat->alloc, nbat->free);
-    if (nbat->XFormat != nbatXYZQ)
-    {
-        nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->q),
-                           nbat->natoms*sizeof(*nbat->q),
-                           n*sizeof(*nbat->q),
-                           nbat->alloc, nbat->free);
-    }
-    if (nbat->nenergrp > 1)
-    {
-        nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->energrp),
-                           nbat->natoms/nbat->na_c*sizeof(*nbat->energrp),
-                           n/nbat->na_c*sizeof(*nbat->energrp),
-                           nbat->alloc, nbat->free);
-    }
-    nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->x),
-                       nbat->natoms*nbat->xstride*sizeof(*nbat->x),
-                       n*nbat->xstride*sizeof(*nbat->x),
-                       nbat->alloc, nbat->free);
-    for (t = 0; t < nbat->nout; t++)
+    numAtoms_ = numAtoms;
+
+    x_.resize(numAtoms*xstride);
+}
+
+void nbnxn_atomdata_t::resizeForceBuffers()
+{
+    /* Force buffers need padding up to a multiple of the buffer flag size */
+    const int paddedSize = (numAtoms() + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE*NBNXN_BUFFERFLAG_SIZE;
+
+    /* Should we let each thread allocate it's own data instead? */
+    for (nbnxn_atomdata_output_t &outBuffer : out)
      {
-        /* Allocate one element extra for possible signaling with GPUs */
-        nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->out[t].f),
-                           nbat->natoms*nbat->fstride*sizeof(*nbat->out[t].f),
-                           n*nbat->fstride*sizeof(*nbat->out[t].f),
-                           nbat->alloc, nbat->free);
+        outBuffer.f.resize(paddedSize*fstride);
      }
-    nbat->nalloc = n;
  }
  
  /* Initializes an nbnxn_atomdata_output_t data structure */
-static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out,
-                                       int nb_kernel_type,
-                                       int nenergrp, int stride,
-                                       nbnxn_alloc_t *ma)
+nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int                nb_kernel_type,
+                                                 int                numEnergyGroups,
+                                                 int                simdEnergyBufferStride,
+                                                 gmx::PinningPolicy pinningPolicy) :
+    f({}, {pinningPolicy}),
+    fshift({}, {pinningPolicy}),
+    Vvdw({}, {pinningPolicy}),
+    Vc({}, {pinningPolicy})
  {
-    out->f = nullptr;
-    ma(reinterpret_cast<void **>(&out->fshift), SHIFTS*DIM*sizeof(*out->fshift));
-    out->nV = nenergrp*nenergrp;
-    ma(reinterpret_cast<void **>(&out->Vvdw), out->nV*sizeof(*out->Vvdw));
-    ma(reinterpret_cast<void **>(&out->Vc), out->nV*sizeof(*out->Vc  ));
+    fshift.resize(SHIFTS*DIM);
+    Vvdw.resize(numEnergyGroups*numEnergyGroups);
+    Vc.resize(numEnergyGroups*numEnergyGroups);
  
      if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
          nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
      {
-        int cj_size  = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
-        out->nVS = nenergrp*nenergrp*stride*(cj_size>>1)*cj_size;
-        ma(reinterpret_cast<void **>(&out->VSvdw), out->nVS*sizeof(*out->VSvdw));
-        ma(reinterpret_cast<void **>(&out->VSc), out->nVS*sizeof(*out->VSc  ));
-    }
-    else
-    {
-        out->nVS = 0;
+        int cj_size     = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+        int numElements = numEnergyGroups*numEnergyGroups*simdEnergyBufferStride*(cj_size/2)*cj_size;
+        VSvdw.resize(numElements);
+        VSc.resize(numElements);
      }
  }
  
@@ -321,11 +294,9 @@ void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
  }
  
  /* Stores the LJ parameter data in a format convenient for different kernels */
-static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
+static void set_lj_parameter_data(nbnxn_atomdata_t::Params *params, gmx_bool bSIMD)
  {
-    real c6, c12;
-
-    int  nt = nbat->ntype;
+    int  nt = params->numTypes;
  
      if (bSIMD)
      {
@@ -337,16 +308,16 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
           * when it might not be used, but introducing the conditional code is not
           * really worth it.
           */
-        nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_aligned),
-                    nt*nt*c_simdBestPairAlignment*sizeof(*nbat->nbfp_aligned));
+        params->nbfp_aligned.resize(nt*nt*c_simdBestPairAlignment);
+
          for (int i = 0; i < nt; i++)
          {
              for (int j = 0; j < nt; j++)
              {
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = nbat->nbfp[(i*nt+j)*2+0];
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = nbat->nbfp[(i*nt+j)*2+1];
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
-                nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = params->nbfp[(i*nt+j)*2+0];
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = params->nbfp[(i*nt+j)*2+1];
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
+                params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
              }
          }
  #endif
@@ -356,36 +327,37 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
       * and with LJ-PME kernels. We then only need parameters per atom type,
       * not per pair of atom types.
       */
-    switch (nbat->comb_rule)
+    params->nbfp_comb.resize(nt*2);
+    switch (params->comb_rule)
      {
          case ljcrGEOM:
-            nbat->comb_rule = ljcrGEOM;
+            params->comb_rule = ljcrGEOM;
  
              for (int i = 0; i < nt; i++)
              {
                  /* Store the sqrt of the diagonal from the nbfp matrix */
-                nbat->nbfp_comb[i*2  ] = std::sqrt(nbat->nbfp[(i*nt+i)*2  ]);
-                nbat->nbfp_comb[i*2+1] = std::sqrt(nbat->nbfp[(i*nt+i)*2+1]);
+                params->nbfp_comb[i*2  ] = std::sqrt(params->nbfp[(i*nt+i)*2  ]);
+                params->nbfp_comb[i*2+1] = std::sqrt(params->nbfp[(i*nt+i)*2+1]);
              }
              break;
          case ljcrLB:
              for (int i = 0; i < nt; i++)
              {
                  /* Get 6*C6 and 12*C12 from the diagonal of the nbfp matrix */
-                c6  = nbat->nbfp[(i*nt+i)*2  ];
-                c12 = nbat->nbfp[(i*nt+i)*2+1];
+                const real c6  = params->nbfp[(i*nt+i)*2  ];
+                const real c12 = params->nbfp[(i*nt+i)*2+1];
                  if (c6 > 0 && c12 > 0)
                  {
                      /* We store 0.5*2^1/6*sigma and sqrt(4*3*eps),
                       * so we get 6*C6 and 12*C12 after combining.
                       */
-                    nbat->nbfp_comb[i*2  ] = 0.5*gmx::sixthroot(c12/c6);
-                    nbat->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
+                    params->nbfp_comb[i*2  ] = 0.5*gmx::sixthroot(c12/c6);
+                    params->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
                  }
                  else
                  {
-                    nbat->nbfp_comb[i*2  ] = 0;
-                    nbat->nbfp_comb[i*2+1] = 0;
+                    params->nbfp_comb[i*2  ] = 0;
+                    params->nbfp_comb[i*2+1] = 0;
                  }
              }
              break;
@@ -397,34 +369,30 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
      }
  }
  
-#if GMX_SIMD
-static void
-nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
+nbnxn_atomdata_t::SimdMasks::SimdMasks()
  {
-    const int simd_width = GMX_SIMD_REAL_WIDTH;
-    int       simd_excl_size;
+#if GMX_SIMD
+    constexpr int simd_width = GMX_SIMD_REAL_WIDTH;
      /* Set the diagonal cluster pair exclusion mask setup data.
       * In the kernel we check 0 < j - i to generate the masks.
       * Here we store j - i for generating the mask for the first i,
       * we substract 0.5 to avoid rounding issues.
       * In the kernel we can subtract 1 to generate the subsequent mask.
       */
-    int        simd_4xn_diag_size;
-
-    simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
-    snew_aligned(nbat->simd_4xn_diagonal_j_minus_i, simd_4xn_diag_size, NBNXN_MEM_ALIGN);
+    const int simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
+    diagonal_4xn_j_minus_i.resize(simd_4xn_diag_size);
      for (int j = 0; j < simd_4xn_diag_size; j++)
      {
-        nbat->simd_4xn_diagonal_j_minus_i[j] = j - 0.5;
+        diagonal_4xn_j_minus_i[j] = j - 0.5;
      }
  
-    snew_aligned(nbat->simd_2xnn_diagonal_j_minus_i, simd_width, NBNXN_MEM_ALIGN);
+    diagonal_2xnn_j_minus_i.resize(simd_width);
      for (int j = 0; j < simd_width/2; j++)
      {
          /* The j-cluster size is half the SIMD width */
-        nbat->simd_2xnn_diagonal_j_minus_i[j]              = j - 0.5;
+        diagonal_2xnn_j_minus_i[j]                = j - 0.5;
          /* The next half of the SIMD width is for i + 1 */
-        nbat->simd_2xnn_diagonal_j_minus_i[simd_width/2+j] = j - 1 - 0.5;
+        diagonal_2xnn_j_minus_i[simd_width/2 + j] = j - 1 - 0.5;
      }
  
      /* We use up to 32 bits for exclusion masking.
@@ -434,95 +402,99 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
       * In single precision this means the real and integer SIMD registers
       * are of equal size.
       */
-    simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
+    const int simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
  #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    snew_aligned(nbat->simd_exclusion_filter64, simd_excl_size,   NBNXN_MEM_ALIGN);
+    exclusion_filter64.resize(simd_excl_size);
  #else
-    snew_aligned(nbat->simd_exclusion_filter, simd_excl_size,   NBNXN_MEM_ALIGN);
+    exclusion_filter.resize(simd_excl_size);
  #endif
  
      for (int j = 0; j < simd_excl_size; j++)
      {
          /* Set the consecutive bits for masking pair exclusions */
  #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-        nbat->simd_exclusion_filter64[j]     = (1U << j);
+        exclusion_filter64[j] = (1U << j);
  #else
-        nbat->simd_exclusion_filter[j]       = (1U << j);
+        exclusion_filter[j]   = (1U << j);
  #endif
      }
  
-#if !GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL
-    // If the SIMD implementation has no bitwise logical operation support
-    // whatsoever we cannot use the normal masking. Instead,
-    // we generate a vector of all 2^4 possible ways an i atom
-    // interacts with its 4 j atoms. Each array entry contains
-    // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
-    // Since there is no logical value representation in this case, we use
-    // any nonzero value to indicate 'true', while zero mean 'false'.
-    // This can then be converted to a SIMD boolean internally in the SIMD
-    // module by comparing to zero.
-    // Each array entry encodes how this i atom will interact with the 4 j atoms.
-    // Matching code exists in set_ci_top_excls() to generate indices into this array.
-    // Those indices are used in the kernels.
-
-    simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
-    const real simdFalse =  0.0;
-    const real simdTrue  =  1.0;
-    real      *simd_interaction_array;
-
-    snew_aligned(simd_interaction_array, simd_excl_size * GMX_SIMD_REAL_WIDTH, NBNXN_MEM_ALIGN);
-    for (int j = 0; j < simd_excl_size; j++)
+    if (!GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL)
      {
-        int index = j * GMX_SIMD_REAL_WIDTH;
-        for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+        // If the SIMD implementation has no bitwise logical operation support
+        // whatsoever we cannot use the normal masking. Instead,
+        // we generate a vector of all 2^4 possible ways an i atom
+        // interacts with its 4 j atoms. Each array entry contains
+        // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
+        // Since there is no logical value representation in this case, we use
+        // any nonzero value to indicate 'true', while zero mean 'false'.
+        // This can then be converted to a SIMD boolean internally in the SIMD
+        // module by comparing to zero.
+        // Each array entry encodes how this i atom will interact with the 4 j atoms.
+        // Matching code exists in set_ci_top_excls() to generate indices into this array.
+        // Those indices are used in the kernels.
+
+        const int  simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
+        const real simdFalse      = 0.0;
+        const real simdTrue       = 1.0;
+
+        interaction_array.resize(simd_excl_size * GMX_SIMD_REAL_WIDTH);
+        for (int j = 0; j < simd_excl_size; j++)
          {
-            simd_interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+            const int index = j * GMX_SIMD_REAL_WIDTH;
+            for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+            {
+                interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+            }
          }
      }
-    nbat->simd_interaction_array = simd_interaction_array;
  #endif
  }
-#endif
  
-/* Initializes the nbnxn_atomdata_t parameters data structure */
+nbnxn_atomdata_t::Params::Params(gmx::PinningPolicy pinningPolicy) :
+    numTypes(0),
+    nbfp({}, {pinningPolicy}),
+    nbfp_comb({}, {pinningPolicy}),
+    type({}, {pinningPolicy}),
+    lj_comb({}, {pinningPolicy}),
+    q({}, {pinningPolicy}),
+    nenergrp(0),
+    neg_2log(0),
+    energrp({}, {pinningPolicy})
+{
+}
+
+nbnxn_atomdata_t::nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy) :
+    params_(pinningPolicy),
+    numAtoms_(0),
+    natoms_local(0),
+    shift_vec({}, {pinningPolicy}),
+    x_({}, {pinningPolicy}),
+    simdMasks(),
+    bUseBufferFlags(FALSE),
+    bUseTreeReduce(FALSE)
+{
+}
+
+/* Initializes an nbnxn_atomdata_t::Params data structure */
  static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
-                                       nbnxn_atomdata_t *nbat,
+                                       nbnxn_atomdata_t::Params *params,
                                         int nb_kernel_type,
                                         int enbnxninitcombrule,
                                         int ntype, const real *nbfp,
-                                       int n_energygroups,
-                                       nbnxn_alloc_t *alloc,
-                                       nbnxn_free_t  *free)
+                                       int n_energygroups)
  {
      real     c6, c12, tol;
      char    *ptr;
      gmx_bool simple, bCombGeom, bCombLB, bSIMD;
  
-    if (alloc == nullptr)
-    {
-        nbat->alloc = nbnxn_alloc_aligned;
-    }
-    else
-    {
-        nbat->alloc = alloc;
-    }
-    if (free == nullptr)
-    {
-        nbat->free = nbnxn_free_aligned;
-    }
-    else
-    {
-        nbat->free = free;
-    }
-
      if (debug)
      {
          fprintf(debug, "There are %d atom types in the system, adding one for nbnxn_atomdata_t\n", ntype);
      }
-    nbat->ntype = ntype + 1;
-    nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp),
-                nbat->ntype*nbat->ntype*2*sizeof(*nbat->nbfp));
-    nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_comb), nbat->ntype*2*sizeof(*nbat->nbfp_comb));
+    params->numTypes = ntype + 1;
+    params->nbfp.resize(params->numTypes*params->numTypes*2);
+    params->nbfp_comb.resize(params->numTypes*2);
  
      /* A tolerance of 1e-5 seems reasonable for (possibly hand-typed)
       * force-field floating point parameters.
@@ -539,22 +511,22 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
      bCombGeom = TRUE;
      bCombLB   = TRUE;
  
-    /* Temporarily fill nbat->nbfp_comb with sigma and epsilon
+    /* Temporarily fill params->nbfp_comb with sigma and epsilon
       * to check for the LB rule.
       */
      for (int i = 0; i < ntype; i++)
      {
-        c6  = nbfp[(i*ntype+i)*2  ]/6.0;
-        c12 = nbfp[(i*ntype+i)*2+1]/12.0;
+        c6  = nbfp[(i*ntype+i)*2    ]/6.0;
+        c12 = nbfp[(i*ntype+i)*2 + 1]/12.0;
          if (c6 > 0 && c12 > 0)
          {
-            nbat->nbfp_comb[i*2  ] = gmx::sixthroot(c12/c6);
-            nbat->nbfp_comb[i*2+1] = 0.25*c6*c6/c12;
+            params->nbfp_comb[i*2    ] = gmx::sixthroot(c12/c6);
+            params->nbfp_comb[i*2 + 1] = 0.25*c6*c6/c12;
          }
          else if (c6 == 0 && c12 == 0)
          {
-            nbat->nbfp_comb[i*2  ] = 0;
-            nbat->nbfp_comb[i*2+1] = 0;
+            params->nbfp_comb[i*2    ] = 0;
+            params->nbfp_comb[i*2 + 1] = 0;
          }
          else
          {
@@ -563,41 +535,41 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
          }
      }
  
-    for (int i = 0; i < nbat->ntype; i++)
+    for (int i = 0; i < params->numTypes; i++)
      {
-        for (int j = 0; j < nbat->ntype; j++)
+        for (int j = 0; j < params->numTypes; j++)
          {
              if (i < ntype && j < ntype)
              {
                  /* fr->nbfp has been updated, so that array too now stores c6/c12 including
                   * the 6.0/12.0 prefactors to save 2 flops in the most common case (force-only).
                   */
-                c6  = nbfp[(i*ntype+j)*2  ];
-                c12 = nbfp[(i*ntype+j)*2+1];
-                nbat->nbfp[(i*nbat->ntype+j)*2  ] = c6;
-                nbat->nbfp[(i*nbat->ntype+j)*2+1] = c12;
+                c6  = nbfp[(i*ntype+j)*2    ];
+                c12 = nbfp[(i*ntype+j)*2 + 1];
+                params->nbfp[(i*params->numTypes+j)*2    ] = c6;
+                params->nbfp[(i*params->numTypes+j)*2 + 1] = c12;
  
                  /* Compare 6*C6 and 12*C12 for geometric cobination rule */
                  bCombGeom = bCombGeom &&
                      gmx_within_tol(c6*c6, nbfp[(i*ntype+i)*2  ]*nbfp[(j*ntype+j)*2  ], tol) &&
-                    gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2+1]*nbfp[(j*ntype+j)*2+1], tol);
+                    gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2 + 1]*nbfp[(j*ntype+j)*2 + 1], tol);
  
                  /* Compare C6 and C12 for Lorentz-Berthelot combination rule */
                  c6     /= 6.0;
                  c12    /= 12.0;
                  bCombLB = bCombLB &&
                      ((c6 == 0 && c12 == 0 &&
-                      (nbat->nbfp_comb[i*2+1] == 0 || nbat->nbfp_comb[j*2+1] == 0)) ||
+                      (params->nbfp_comb[i*2 + 1] == 0 || params->nbfp_comb[j*2 + 1] == 0)) ||
                       (c6 > 0 && c12 > 0 &&
                        gmx_within_tol(gmx::sixthroot(c12/c6),
-                                     0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) &&
-                      gmx_within_tol(0.25*c6*c6/c12, std::sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol)));
+                                     0.5*(params->nbfp_comb[i*2]+params->nbfp_comb[j*2]), tol) &&
+                      gmx_within_tol(0.25*c6*c6/c12, std::sqrt(params->nbfp_comb[i*2 + 1]*params->nbfp_comb[j*2 + 1]), tol)));
              }
              else
              {
                  /* Add zero parameters for the additional dummy atom type */
-                nbat->nbfp[(i*nbat->ntype+j)*2  ] = 0;
-                nbat->nbfp[(i*nbat->ntype+j)*2+1] = 0;
+                params->nbfp[(i*params->numTypes + j)*2  ] = 0;
+                params->nbfp[(i*params->numTypes + j)*2+1] = 0;
              }
          }
      }
@@ -617,43 +589,43 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
               */
              if (bCombGeom)
              {
-                nbat->comb_rule = ljcrGEOM;
+                params->comb_rule = ljcrGEOM;
              }
              else if (bCombLB)
              {
-                nbat->comb_rule = ljcrLB;
+                params->comb_rule = ljcrLB;
              }
              else
              {
-                nbat->comb_rule = ljcrNONE;
+                params->comb_rule = ljcrNONE;
  
-                nbat->free(nbat->nbfp_comb);
+                params->nbfp_comb.clear();
              }
  
              {
                  std::string mesg;
-                if (nbat->comb_rule == ljcrNONE)
+                if (params->comb_rule == ljcrNONE)
                  {
                      mesg = "Using full Lennard-Jones parameter combination matrix";
                  }
                  else
                  {
                      mesg = gmx::formatString("Using %s Lennard-Jones combination rule",
-                                             nbat->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
+                                             params->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
                  }
                  GMX_LOG(mdlog.info).asParagraph().appendText(mesg);
              }
              break;
          case enbnxninitcombruleGEOM:
-            nbat->comb_rule = ljcrGEOM;
+            params->comb_rule = ljcrGEOM;
              break;
          case enbnxninitcombruleLB:
-            nbat->comb_rule = ljcrLB;
+            params->comb_rule = ljcrLB;
              break;
          case enbnxninitcombruleNONE:
-            nbat->comb_rule = ljcrNONE;
+            params->comb_rule = ljcrNONE;
  
-            nbat->free(nbat->nbfp_comb);
+            params->nbfp_comb.clear();
              break;
          default:
              gmx_incons("Unknown enbnxninitcombrule");
@@ -662,23 +634,23 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
      bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
               nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
  
-    set_lj_parameter_data(nbat, bSIMD);
+    set_lj_parameter_data(params, bSIMD);
  
-    nbat->nenergrp = n_energygroups;
+    params->nenergrp = n_energygroups;
      if (!simple)
      {
          // We now check for energy groups already when starting mdrun
          GMX_RELEASE_ASSERT(n_energygroups == 1, "GPU kernels do not support energy groups");
      }
      /* Temporary storage goes as #grp^3*simd_width^2/2, so limit to 64 */
-    if (nbat->nenergrp > 64)
+    if (params->nenergrp > 64)
      {
          gmx_fatal(FARGS, "With NxN kernels not more than 64 energy groups are supported\n");
      }
-    nbat->neg_2log = 1;
-    while (nbat->nenergrp > (1<<nbat->neg_2log))
+    params->neg_2log = 1;
+    while (params->nenergrp > (1<<params->neg_2log))
      {
-        nbat->neg_2log++;
+        params->neg_2log++;
      }
  }
  
@@ -689,20 +661,15 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                           int enbnxninitcombrule,
                           int ntype, const real *nbfp,
                           int n_energygroups,
-                         int nout,
-                         nbnxn_alloc_t *alloc,
-                         nbnxn_free_t  *free)
+                         int nout)
  {
-    nbnxn_atomdata_params_init(mdlog, nbat, nb_kernel_type,
-                               enbnxninitcombrule, ntype, nbfp, n_energygroups,
-                               alloc, free);
+    nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), nb_kernel_type,
+                               enbnxninitcombrule, ntype, nbfp, n_energygroups);
  
      const gmx_bool simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
      const gmx_bool bSIMD  = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
                               nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
  
-    set_lj_parameter_data(nbat, bSIMD);
-
      if (simple)
      {
          int pack_x;
@@ -736,30 +703,19 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
          nbat->FFormat = nbatXYZ;
      }
  
-    nbat->alloc(reinterpret_cast<void **>(&nbat->shift_vec), SHIFTS*sizeof(*nbat->shift_vec));
+    nbat->shift_vec.resize(SHIFTS);
  
      nbat->xstride = (nbat->XFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
      nbat->fstride = (nbat->FFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
-    nbat->x       = nullptr;
-
-#if GMX_SIMD
-    if (simple)
-    {
-        nbnxn_atomdata_init_simple_exclusion_masks(nbat);
-    }
-#endif
  
      /* Initialize the output data structures */
-    nbat->nout    = nout;
-    snew(nbat->out, nbat->nout);
-    nbat->nalloc  = 0;
-    for (int i = 0; i < nbat->nout; i++)
+    for (int i = 0; i < nout; i++)
      {
-        nbnxn_atomdata_output_init(&nbat->out[i],
-                                   nb_kernel_type,
-                                   nbat->nenergrp, 1<<nbat->neg_2log,
-                                   nbat->alloc);
+        const auto &pinningPolicy = nbat->params().type.get_allocator().pinningPolicy();
+        nbat->out.emplace_back(nb_kernel_type, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
+                               pinningPolicy);
      }
+
      nbat->buffer_flags.flag        = nullptr;
      nbat->buffer_flags.flag_nalloc = 0;
  
@@ -789,7 +745,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
  }
  
  template<int packSize>
-static void copy_lj_to_nbat_lj_comb(const real *ljparam_type,
+static void copy_lj_to_nbat_lj_comb(gmx::ArrayRef<const real> ljparam_type,
                                      const int *type, int na,
                                      real *ljparam_at)
  {
@@ -807,11 +763,20 @@ static void copy_lj_to_nbat_lj_comb(const real *ljparam_type,
      }
  }
  
+static int numAtomsFromGrids(const nbnxn_search &nbs)
+{
+    const nbnxn_grid_t &lastGrid = nbs.grid.back();
+
+    return (lastGrid.cell0 + lastGrid.nc)*lastGrid.na_sc;
+}
+
  /* Sets the atom type in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
-                                         const nbnxn_search  *nbs,
-                                         const int           *type)
+static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t::Params *params,
+                                         const nbnxn_search       *nbs,
+                                         const int                *type)
  {
+    params->type.resize(numAtomsFromGrids(*nbs));
+
      for (const nbnxn_grid_t &grid : nbs->grid)
      {
          /* Loop over all columns and copy and fill */
@@ -821,16 +786,19 @@ static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
              int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
  
              copy_int_to_nbat_int(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
-                                 type, nbat->ntype-1, nbat->type+ash);
+                                 type, params->numTypes - 1, params->type.data() + ash);
          }
      }
  }
  
  /* Sets the LJ combination rule parameters in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t    *nbat,
-                                            const nbnxn_search  *nbs)
+static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t::Params *params,
+                                            const int                 XFormat,
+                                            const nbnxn_search       *nbs)
  {
-    if (nbat->comb_rule != ljcrNONE)
+    params->lj_comb.resize(numAtomsFromGrids(*nbs)*2);
+
+    if (params->comb_rule != ljcrNONE)
      {
          for (const nbnxn_grid_t &grid : nbs->grid)
          {
@@ -840,26 +808,26 @@ static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t    *nbat,
                  int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i];
                  int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
  
-                if (nbat->XFormat == nbatX4)
+                if (XFormat == nbatX4)
                  {
-                    copy_lj_to_nbat_lj_comb<c_packX4>(nbat->nbfp_comb,
-                                                      nbat->type + ash,
+                    copy_lj_to_nbat_lj_comb<c_packX4>(params->nbfp_comb,
+                                                      params->type.data() + ash,
                                                        ncz*grid.na_sc,
-                                                      nbat->lj_comb + ash*2);
+                                                      params->lj_comb.data() + ash*2);
                  }
-                else if (nbat->XFormat == nbatX8)
+                else if (XFormat == nbatX8)
                  {
-                    copy_lj_to_nbat_lj_comb<c_packX8>(nbat->nbfp_comb,
-                                                      nbat->type + ash,
+                    copy_lj_to_nbat_lj_comb<c_packX8>(params->nbfp_comb,
+                                                      params->type.data() + ash,
                                                        ncz*grid.na_sc,
-                                                      nbat->lj_comb + ash*2);
+                                                      params->lj_comb.data() + ash*2);
                  }
-                else if (nbat->XFormat == nbatXYZQ)
+                else if (XFormat == nbatXYZQ)
                  {
-                    copy_lj_to_nbat_lj_comb<1>(nbat->nbfp_comb,
-                                               nbat->type + ash,
+                    copy_lj_to_nbat_lj_comb<1>(params->nbfp_comb,
+                                               params->type.data() + ash,
                                                 ncz*grid.na_sc,
-                                               nbat->lj_comb + ash*2);
+                                               params->lj_comb.data() + ash*2);
                  }
              }
          }
@@ -871,6 +839,11 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
                                         const nbnxn_search  *nbs,
                                         const real          *charge)
  {
+    if (nbat->XFormat != nbatXYZQ)
+    {
+        nbat->paramsDeprecated().q.resize(nbat->numAtoms());
+    }
+
      for (const nbnxn_grid_t &grid : nbs->grid)
      {
          /* Loop over all columns and copy and fill */
@@ -882,7 +855,7 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
  
              if (nbat->XFormat == nbatXYZQ)
              {
-                real *q = nbat->x + ash*STRIDE_XYZQ + ZZ + 1;
+                real *q = nbat->x().data() + ash*STRIDE_XYZQ + ZZ + 1;
                  int   i;
                  for (i = 0; i < na; i++)
                  {
@@ -898,7 +871,7 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
              }
              else
              {
-                real *q = nbat->q + ash;
+                real *q = nbat->paramsDeprecated().q.data() + ash;
                  int   i;
                  for (i = 0; i < na; i++)
                  {
@@ -925,22 +898,24 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
  static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
                                      const nbnxn_search  *nbs)
  {
-    real               *q;
-    int                 stride_q, nsubc;
+    nbnxn_atomdata_t::Params &params = nbat->paramsDeprecated();
+    real                     *q;
+    int                       stride_q;
  
      if (nbat->XFormat == nbatXYZQ)
      {
-        q        = nbat->x + ZZ + 1;
+        q        = nbat->x().data() + ZZ + 1;
          stride_q = STRIDE_XYZQ;
      }
      else
      {
-        q        = nbat->q;
+        q        = params.q.data();
          stride_q = 1;
      }
  
      for (const nbnxn_grid_t &grid : nbs->grid)
      {
+        int nsubc;
          if (grid.bSimple)
          {
              nsubc = 1;
@@ -965,8 +940,8 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
                      {
                          int ind = c_offset + c*grid.na_c + i;
                          /* Set atom type and charge to non-interacting */
-                        nbat->type[ind] = nbat->ntype - 1;
-                        q[ind*stride_q] = 0;
+                        params.type[ind] = params.numTypes - 1;
+                        q[ind*stride_q]  = 0;
                      }
                  }
              }
@@ -1005,15 +980,17 @@ static void copy_egp_to_nbat_egps(const int *a, int na, int na_round,
  }
  
  /* Set the energy group indices for atoms in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t    *nbat,
-                                            const nbnxn_search  *nbs,
-                                            const int           *atinfo)
+static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t::Params *params,
+                                            const nbnxn_search       *nbs,
+                                            const int                *atinfo)
  {
-    if (nbat->nenergrp == 1)
+    if (params->nenergrp == 1)
      {
          return;
      }
  
+    params->energrp.resize(numAtomsFromGrids(*nbs));
+
      for (const nbnxn_grid_t &grid : nbs->grid)
      {
          /* Loop over all columns and copy and fill */
@@ -1023,8 +1000,9 @@ static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t    *nbat,
              int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
  
              copy_egp_to_nbat_egps(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
-                                  nbat->na_c, nbat->neg_2log,
-                                  atinfo, nbat->energrp+(ash>>grid.na_c_2log));
+                                  c_nbnxnCpuIClusterSize, params->neg_2log,
+                                  atinfo,
+                                  params->energrp.data() + (ash >> grid.na_c_2log));
          }
      }
  }
@@ -1035,7 +1013,9 @@ void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
                          const t_mdatoms     *mdatoms,
                          const int           *atinfo)
  {
-    nbnxn_atomdata_set_atomtypes(nbat, nbs, mdatoms->typeA);
+    nbnxn_atomdata_t::Params &params = nbat->paramsDeprecated();
+
+    nbnxn_atomdata_set_atomtypes(&params, nbs, mdatoms->typeA);
  
      nbnxn_atomdata_set_charges(nbat, nbs, mdatoms->chargeA);
  
@@ -1045,9 +1025,9 @@ void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
      }
  
      /* This must be done after masking types for FEP */
-    nbnxn_atomdata_set_ljcombparams(nbat, nbs);
+    nbnxn_atomdata_set_ljcombparams(&params, nbat->XFormat, nbs);
  
-    nbnxn_atomdata_set_energygroups(nbat, nbs, atinfo);
+    nbnxn_atomdata_set_energygroups(&params, nbs, atinfo);
  }
  
  /* Copies the shift vector array to nbnxn_atomdata_t */
@@ -1135,7 +1115,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search  *nbs,
                          na_fill = na;
                      }
                      copy_rvec_to_nbat_real(nbs->a.data() + ash, na, na_fill, x,
-                                           nbat->XFormat, nbat->x, ash);
+                                           nbat->XFormat, nbat->x().data(), ash);
                  }
              }
          }
@@ -1147,7 +1127,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search  *nbs,
  }
  
  static void
-nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
+nbnxn_atomdata_clear_reals(gmx::ArrayRef<real> dest,
                             int i0, int i1)
  {
      for (int i = i0; i < i1; i++)
@@ -1159,7 +1139,7 @@ nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
  gmx_unused static void
  nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
                              gmx_bool bDestSet,
-                            real ** gmx_restrict src,
+                            const real ** gmx_restrict src,
                              int nsrc,
                              int i0, int i1)
  {
@@ -1191,7 +1171,7 @@ nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
  gmx_unused static void
  nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
                                   gmx_bool gmx_unused bDestSet,
-                                 real gmx_unused ** gmx_restrict src,
+                                 const gmx_unused real ** gmx_restrict src,
                                   int gmx_unused nsrc,
                                   int gmx_unused i0, int gmx_unused i1)
  {
@@ -1234,7 +1214,7 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
  static void
  nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
                                      const nbnxn_atomdata_t *nbat,
-                                    nbnxn_atomdata_output_t *out,
+                                    gmx::ArrayRef<nbnxn_atomdata_output_t> out,
                                      int nfa,
                                      int a0, int a1,
                                      rvec *f)
@@ -1248,7 +1228,7 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
          case nbatXYZQ:
              if (nfa == 1)
              {
-                const real *fnb = out[0].f;
+                const real *fnb = out[0].f.data();
  
                  for (int a = a0; a < a1; a++)
                  {
@@ -1277,7 +1257,7 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
          case nbatX4:
              if (nfa == 1)
              {
-                const real *fnb = out[0].f;
+                const real *fnb = out[0].f.data();
  
                  for (int a = a0; a < a1; a++)
                  {
@@ -1306,7 +1286,7 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
          case nbatX8:
              if (nfa == 1)
              {
-                const real *fnb = out[0].f;
+                const real *fnb = out[0].f.data();
  
                  for (int a = a0; a < a1; a++)
                  {
@@ -1343,14 +1323,16 @@ static inline unsigned char reverse_bits(unsigned char b)
      return (b * 0x0202020202ULL & 0x010884422010ULL) % 1023;
  }
  
-static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nbat,
-                                                      int                     nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t *nbat,
+                                                      int               nth)
  {
      const nbnxn_buffer_flags_t *flags = &nbat->buffer_flags;
  
-    int next_pow2 = 1<<(gmx::log2I(nth-1)+1);
+    int                         next_pow2 = 1<<(gmx::log2I(nth-1)+1);
  
-    assert(nbat->nout == nth); /* tree-reduce currently only works for nout==nth */
+    const int                   numOutputBuffers = nbat->out.size();
+    GMX_ASSERT(numOutputBuffers == nth,
+               "tree-reduce currently only works for numOutputBuffers==nth");
  
      memset(nbat->syncStep, 0, sizeof(*(nbat->syncStep))*nth);
  
@@ -1404,7 +1386,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb
                  index[1]  = index[0] + group_size/2;
  
                  /* If no second buffer, nothing to do */
-                if (index[1] >= nbat->nout && group_size > 2)
+                if (index[1] >= numOutputBuffers && group_size > 2)
                  {
                      continue;
                  }
@@ -1446,14 +1428,15 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb
  
                          if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2)
                          {
+                            const real *fIndex1 = nbat->out[index[1]].f.data();
  #if GMX_SIMD
                              nbnxn_atomdata_reduce_reals_simd
  #else
                              nbnxn_atomdata_reduce_reals
  #endif
-                                (nbat->out[index[0]].f,
+                                (nbat->out[index[0]].f.data(),
                                  bitmask_is_set(flags->flag[b], index[0]) || group_size > 2,
-                                &(nbat->out[index[1]].f), 1, i0, i1);
+                                &fIndex1, 1, i0, i1);
  
                          }
                          else if (!bitmask_is_set(flags->flag[b], index[0]))
@@ -1470,8 +1453,8 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb
  }
  
  
-static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nbat,
-                                                     int                     nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t *nbat,
+                                                     int               nth)
  {
  #pragma omp parallel for num_threads(nth) schedule(static)
      for (int th = 0; th < nth; th++)
@@ -1479,8 +1462,8 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
          try
          {
              const nbnxn_buffer_flags_t *flags;
-            int   nfptr;
-            real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
+            int                         nfptr;
+            const real                 *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
  
              flags = &nbat->buffer_flags;
  
@@ -1494,11 +1477,11 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
                  int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
  
                  nfptr = 0;
-                for (int out = 1; out < nbat->nout; out++)
+                for (int out = 1; out < static_cast<gmx::index>(nbat->out.size()); out++)
                  {
                      if (bitmask_is_set(flags->flag[b], out))
                      {
-                        fptr[nfptr++] = nbat->out[out].f;
+                        fptr[nfptr++] = nbat->out[out].f.data();
                      }
                  }
                  if (nfptr > 0)
@@ -1508,7 +1491,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
  #else
                      nbnxn_atomdata_reduce_reals
  #endif
-                        (nbat->out[0].f,
+                        (nbat->out[0].f.data(),
                          bitmask_is_set(flags->flag[b], 0),
                          fptr, nfptr,
                          i0, i1);
@@ -1527,7 +1510,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
  /* Add the force array(s) from nbnxn_atomdata_t to f */
  void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
                                      int                     locality,
-                                    const nbnxn_atomdata_t *nbat,
+                                    nbnxn_atomdata_t       *nbat,
                                      rvec                   *f,
                                      gmx_wallcycle          *wcycle)
  {
@@ -1556,7 +1539,7 @@ void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
  
      int nth = gmx_omp_nthreads_get(emntNonbonded);
  
-    if (nbat->nout > 1)
+    if (nbat->out.size() > 1)
      {
          if (locality != eatAll)
          {
@@ -1600,17 +1583,17 @@ void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
  void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
                                                rvec                   *fshift)
  {
-    const nbnxn_atomdata_output_t * out = nbat->out;
+    gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat->out;
  
      for (int s = 0; s < SHIFTS; s++)
      {
          rvec sum;
          clear_rvec(sum);
-        for (int th = 0; th < nbat->nout; th++)
+        for (const nbnxn_atomdata_output_t &out : outputBuffers)
          {
-            sum[XX] += out[th].fshift[s*DIM+XX];
-            sum[YY] += out[th].fshift[s*DIM+YY];
-            sum[ZZ] += out[th].fshift[s*DIM+ZZ];
+            sum[XX] += out.fshift[s*DIM+XX];
+            sum[YY] += out.fshift[s*DIM+YY];
+            sum[ZZ] += out.fshift[s*DIM+ZZ];
          }
          rvec_inc(fshift[s], sum);
      }
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.h b/src/gromacs/mdlib/nbnxn_atomdata.h

index d336d4b54616aa5baef590d78ff07619a92a86cb..f5a8c1a291033bfddff42354f5f86960bfe0a640 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.h
+++ b/src/gromacs/mdlib/nbnxn_atomdata.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -91,11 +91,8 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                           int enbnxninitcombrule,
                           int ntype, const real *nbfp,
                           int n_energygroups,
-                         int nout,
-                         nbnxn_alloc_t *alloc,
-                         nbnxn_free_t  *free);
+                         int nout);
  
-/* Copy the atom data to the non-bonded atom data structure */
  void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
                          const nbnxn_search  *nbs,
                          const t_mdatoms     *mdatoms,
@@ -119,7 +116,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search  *nbs,
  /* Add the forces stored in nbat to f, zeros the forces in nbat */
  void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
                                      int                     locality,
-                                    const nbnxn_atomdata_t *nbat,
+                                    nbnxn_atomdata_t       *nbat,
                                      rvec                   *f,
                                      gmx_wallcycle          *wcycle);
  
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu

index 22e8eee3fd66141594a11f8d559f1b64b8b02ac6..7a78dda1097b29ce66909b46563004edd9c07d05 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -318,7 +318,8 @@ void nbnxn_gpu_copy_xq_to_gpu(gmx_nbnxn_cuda_t       *nb,
      }
  
      /* HtoD x, q */
-    cu_copy_H2D_async(adat->xq + adat_begin, nbatom->x + adat_begin * 4,
+    cu_copy_H2D_async(adat->xq + adat_begin,
+                      static_cast<const void *>(nbatom->x().data() + adat_begin * 4),
                        adat_len * sizeof(*adat->xq), stream);
  
      if (bDoTime)
@@ -612,7 +613,7 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_cuda_t       *nb,
  }
  
  void nbnxn_gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
-                              const nbnxn_atomdata_t *nbatom,
+                              nbnxn_atomdata_t       *nbatom,
                                int                     flags,
                                int                     aloc,
                                bool                    haveOtherWork)
@@ -654,7 +655,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
      }
  
      /* DtoH f */
-    cu_copy_D2H_async(nbatom->out[0].f + adat_begin * 3, adat->f + adat_begin,
+    cu_copy_D2H_async(nbatom->out[0].f.data() + adat_begin * 3, adat->f + adat_begin,
                        (adat_len)*sizeof(*adat->f), stream);
  
      /* After the non-local D2H is launched the nonlocal_done event can be
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index 40bdc78d20706243b2c39b237ba8cf1cf9a9296a..d9c26a08130eab01ef9d842b1d8c666f7bd6ac99 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -219,14 +219,14 @@ static void set_cutoff_parameters(cu_nbparam_t              *nbp,
  }
  
  /*! Initializes the nonbonded parameter data structure. */
-static void init_nbparam(cu_nbparam_t              *nbp,
-                         const interaction_const_t *ic,
-                         const NbnxnListParameters *listParams,
-                         const nbnxn_atomdata_t    *nbat)
+static void init_nbparam(cu_nbparam_t                   *nbp,
+                         const interaction_const_t      *ic,
+                         const NbnxnListParameters      *listParams,
+                         const nbnxn_atomdata_t::Params &nbatParams)
  {
      int         ntypes;
  
-    ntypes  = nbat->ntype;
+    ntypes  = nbatParams.numTypes;
  
      set_cutoff_parameters(nbp, ic, listParams);
  
@@ -245,7 +245,7 @@ static void init_nbparam(cu_nbparam_t              *nbp,
          {
              case eintmodNONE:
              case eintmodPOTSHIFT:
-                switch (nbat->comb_rule)
+                switch (nbatParams.comb_rule)
                  {
                      case ljcrNONE:
                          nbp->vdwtype = evdwCuCUT;
@@ -274,12 +274,12 @@ static void init_nbparam(cu_nbparam_t              *nbp,
      {
          if (ic->ljpme_comb_rule == ljcrGEOM)
          {
-            assert(nbat->comb_rule == ljcrGEOM);
+            assert(nbatParams.comb_rule == ljcrGEOM);
              nbp->vdwtype = evdwCuEWALDGEOM;
          }
          else
          {
-            assert(nbat->comb_rule == ljcrLB);
+            assert(nbatParams.comb_rule == ljcrLB);
              nbp->vdwtype = evdwCuEWALDLB;
          }
      }
@@ -318,14 +318,14 @@ static void init_nbparam(cu_nbparam_t              *nbp,
      if (!useLjCombRule(nbp))
      {
          initParamLookupTable(nbp->nbfp, nbp->nbfp_texobj,
-                             nbat->nbfp, 2*ntypes*ntypes);
+                             nbatParams.nbfp.data(), 2*ntypes*ntypes);
      }
  
      /* set up LJ-PME parameter lookup table */
      if (ic->vdwtype == evdwPME)
      {
          initParamLookupTable(nbp->nbfp_comb, nbp->nbfp_comb_texobj,
-                             nbat->nbfp_comb, 2*ntypes);
+                             nbatParams.nbfp_comb.data(), 2*ntypes);
      }
  }
  
@@ -412,10 +412,10 @@ static void init_timings(gmx_wallclock_gpu_nbnxn_t *t)
  static void nbnxn_cuda_init_const(gmx_nbnxn_cuda_t               *nb,
                                    const interaction_const_t      *ic,
                                    const NbnxnListParameters      *listParams,
-                                  const nbnxn_atomdata_t         *nbat)
+                                  const nbnxn_atomdata_t::Params &nbatParams)
  {
-    init_atomdata_first(nb->atdat, nbat->ntype);
-    init_nbparam(nb->nbparam, ic, listParams, nbat);
+    init_atomdata_first(nb->atdat, nbatParams.numTypes);
+    init_nbparam(nb->nbparam, ic, listParams, nbatParams);
  
      /* clear energy and shift force outputs */
      nbnxn_cuda_clear_e_fshift(nb);
@@ -504,7 +504,7 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
      /* pick L1 cache configuration */
      nbnxn_cuda_set_cacheconfig();
  
-    nbnxn_cuda_init_const(nb, ic, listParams, nbat);
+    nbnxn_cuda_init_const(nb, ic, listParams, nbat->params());
  
      *p_nb = nb;
  
@@ -584,7 +584,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_cuda_t       *nb,
      /* only if we have a dynamic box */
      if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
      {
-        cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec,
+        cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec.data(),
                            SHIFTS * sizeof(*adat->shift_vec), ls);
          adat->bShiftVecUploaded = true;
      }
@@ -638,7 +638,7 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_cuda_t              *nb,
      cu_atomdata_t *d_atdat   = nb->atdat;
      cudaStream_t   ls        = nb->stream[eintLocal];
  
-    natoms    = nbat->natoms;
+    natoms    = nbat->numAtoms();
      realloced = false;
  
      if (bDoTime)
@@ -692,12 +692,12 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_cuda_t              *nb,
  
      if (useLjCombRule(nb->nbparam))
      {
-        cu_copy_H2D_async(d_atdat->lj_comb, nbat->lj_comb,
+        cu_copy_H2D_async(d_atdat->lj_comb, nbat->params().lj_comb.data(),
                            natoms*sizeof(*d_atdat->lj_comb), ls);
      }
      else
      {
-        cu_copy_H2D_async(d_atdat->atom_types, nbat->type,
+        cu_copy_H2D_async(d_atdat->atom_types, nbat->params().type.data(),
                            natoms*sizeof(*d_atdat->atom_types), ls);
      }
  
diff --git a/src/gromacs/mdlib/nbnxn_gpu.h b/src/gromacs/mdlib/nbnxn_gpu.h

index fcc7888515f453e6305569251e69881a523201f0..b23347b6c51b53adc8fe8571ec4cf8dc8c9e0345 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_gpu.h
+++ b/src/gromacs/mdlib/nbnxn_gpu.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -132,7 +132,7 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb,
   */
  GPU_FUNC_QUALIFIER
  void nbnxn_gpu_launch_cpyback(gmx_nbnxn_gpu_t  gmx_unused              *nb,
-                              const struct nbnxn_atomdata_t gmx_unused *nbatom,
+                              struct nbnxn_atomdata_t gmx_unused       *nbatom,
                                int                    gmx_unused         flags,
                                int                    gmx_unused         aloc,
                                bool                   gmx_unused         haveOtherWork) GPU_FUNC_TERM
diff --git a/src/gromacs/mdlib/nbnxn_grid.cpp b/src/gromacs/mdlib/nbnxn_grid.cpp

index f33a958d0bf63e59b0544d7697db6aa75d80ba62..93a93fbb64c092c11821a37dffda22417d4e7b75 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_grid.cpp
+++ b/src/gromacs/mdlib/nbnxn_grid.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -841,7 +841,7 @@ static void fill_cell(nbnxn_search                  *nbs,
  
      copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, grid->na_c,
                             as_rvec_array(x.data()),
-                           nbat->XFormat, nbat->x, atomStart);
+                           nbat->XFormat, nbat->x().data(), atomStart);
  
      if (nbat->XFormat == nbatX4)
      {
@@ -852,13 +852,13 @@ static void fill_cell(nbnxn_search                  *nbs,
  #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
          if (2*grid->na_cj == grid->na_c)
          {
-            calc_bounding_box_x_x4_halves(numAtoms, nbat->x + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
+            calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
                                            grid->bbj.data() + offset*2);
          }
          else
  #endif
          {
-            calc_bounding_box_x_x4(numAtoms, nbat->x + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
+            calc_bounding_box_x_x4(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
          }
      }
      else if (nbat->XFormat == nbatX8)
@@ -867,7 +867,7 @@ static void fill_cell(nbnxn_search                  *nbs,
          size_t      offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log;
          nbnxn_bb_t *bb_ptr = grid->bb.data() + offset;
  
-        calc_bounding_box_x_x8(numAtoms, nbat->x +  atom_to_x_index<c_packX8>(atomStart), bb_ptr);
+        calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
      }
  #if NBNXN_BBXXXX
      else if (!grid->bSimple)
@@ -883,13 +883,13 @@ static void fill_cell(nbnxn_search                  *nbs,
  #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
          if (nbat->XFormat == nbatXYZQ)
          {
-            calc_bounding_box_xxxx_simd4(numAtoms, nbat->x + atomStart*nbat->xstride,
+            calc_bounding_box_xxxx_simd4(numAtoms, nbat->x().data() + atomStart*nbat->xstride,
                                           bb_work_aligned, pbb_ptr);
          }
          else
  #endif
          {
-            calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x + atomStart*nbat->xstride,
+            calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
                                     pbb_ptr);
          }
          if (gmx_debug_at)
@@ -907,7 +907,7 @@ static void fill_cell(nbnxn_search                  *nbs,
          /* Store the bounding boxes as xyz.xyz. */
          nbnxn_bb_t *bb_ptr = grid->bb.data() + ((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log);
  
-        calc_bounding_box(numAtoms, nbat->xstride, nbat->x + atomStart*nbat->xstride,
+        calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
                            bb_ptr);
  
          if (gmx_debug_at)
@@ -1226,13 +1226,8 @@ static void resizeForNumberOfCells(const nbnxn_grid_t &grid,
       */
      nbs->a.resize(numNbnxnAtoms + numAtomsMoved);
  
-    /* We need padding up to a multiple of the buffer flag size: simply add */
-    if (numNbnxnAtoms + NBNXN_BUFFERFLAG_SIZE > nbat->nalloc)
-    {
-        nbnxn_atomdata_realloc(nbat, numNbnxnAtoms + NBNXN_BUFFERFLAG_SIZE);
-    }
-
-    nbat->natoms = numNbnxnAtoms;
+    /* Make space in nbat for storing the atom coordinates */
+    nbat->resizeCoordinateBuffer(numNbnxnAtoms);
  }
  
  /* Determine in which grid cells the atoms should go */
@@ -1441,8 +1436,6 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
      grid->na_sc     = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
      grid->na_c_2log = get_2log(grid->na_c);
  
-    nbat->na_c = grid->na_c;
-
      if (ddZone == 0)
      {
          grid->cell0 = 0;
@@ -1510,7 +1503,12 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
  
      if (ddZone == 0)
      {
-        nbat->natoms_local = nbat->natoms;
+        nbat->natoms_local = nbat->numAtoms();
+    }
+    if (ddZone == static_cast<int>(nbs->grid.size()) - 1)
+    {
+        /* We are done setting up all grids, we can resize the force buffers */
+        nbat->resizeForceBuffers();
      }
  
      nbs_cycle_stop(&nbs->cc[enbsCCgrid]);
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp

index 7a009dc2c138df2824655d65dd4af5d89ce3ea5e..b4a3f1327fcabd45e3def92d72e9cb9a140e4749 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -42,9 +42,7 @@
  static void
  clear_f_all(const nbnxn_atomdata_t *nbat, real *f)
  {
-    int i;
-
-    for (i = 0; i < nbat->natoms*nbat->fstride; i++)
+    for (int i = 0; i < nbat->numAtoms()*nbat->fstride; i++)
      {
          f[i] = 0;
      }
@@ -103,24 +101,23 @@ reduce_energies_over_lists(const nbnxn_atomdata_t     *nbat,
                             real                       *Vvdw,
                             real                       *Vc)
  {
-    int nb;
-    int i, j, ind, indr;
+    const int nenergrp = nbat->params().nenergrp;
  
-    for (nb = 0; nb < nlist; nb++)
+    for (int nb = 0; nb < nlist; nb++)
      {
-        for (i = 0; i < nbat->nenergrp; i++)
+        for (int i = 0; i < nenergrp; i++)
          {
              /* Reduce the diagonal terms */
-            ind        = i*nbat->nenergrp + i;
+            int ind    = i*nenergrp + i;
              Vvdw[ind] += nbat->out[nb].Vvdw[ind];
              Vc[ind]   += nbat->out[nb].Vc[ind];
  
              /* Reduce the off-diagonal terms */
-            for (j = i+1; j < nbat->nenergrp; j++)
+            for (int j = i + 1; j < nenergrp; j++)
              {
                  /* The output should contain only one off-diagonal part */
-                ind        = i*nbat->nenergrp + j;
-                indr       = j*nbat->nenergrp + i;
+                int ind    = i*nenergrp + j;
+                int indr   = j*nenergrp + i;
                  Vvdw[ind] += nbat->out[nb].Vvdw[ind] + nbat->out[nb].Vvdw[indr];
                  Vc[ind]   += nbat->out[nb].Vc[ind]   + nbat->out[nb].Vc[indr];
              }
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp

index 074f97974d7adc50f243b00c94d5465d669d7db1..3f47f2d0b795a9ccb72d95637a3fb8c85029ddaa 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp
@@ -66,20 +66,10 @@
   */
  static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
  {
-    for (int i = 0; i < out->nV; i++)
-    {
-        out->Vvdw[i] = 0;
-        out->Vc[i]   = 0;
-    }
-
-    for (int i = 0; i < out->nVS; i++)
-    {
-        out->VSvdw[i] = 0;
-    }
-    for (int i = 0; i < out->nVS; i++)
-    {
-        out->VSc[i] = 0;
-    }
+    std::fill(out->Vvdw.begin(), out->Vvdw.end(), 0.0_real);
+    std::fill(out->Vc.begin(), out->Vc.end(), 0.0_real);
+    std::fill(out->VSvdw.begin(), out->VSvdw.end(), 0.0_real);
+    std::fill(out->VSc.begin(), out->VSc.end(), 0.0_real);
  }
  
  /*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
@@ -91,22 +81,21 @@ static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
   * \tparam        unrollj         The unroll size for j-particles in the SIMD kernel
   * \param[in]     numGroups       The number of energy groups
   * \param[in]     numGroups_2log  Log2 of numGroups, rounded up
- * \param[in]     vVdwSimd        SIMD Van der Waals energy buffers
- * \param[in]     vCoulombSimd    SIMD Coulomb energy buffers
- * \param[in,out] vVdw            Van der Waals energy output buffer
- * \param[in,out] vCoulomb        Coulomb energy output buffer
+ * \param[in,out] out             Struct with energy buffers
   */
  template <int unrollj> static void
  reduceGroupEnergySimdBuffers(int                       numGroups,
                               int                       numGroups_2log,
-                             const real * gmx_restrict vVdwSimd,
-                             const real * gmx_restrict vCoulombSimd,
-                             real * gmx_restrict       vVdw,
-                             real * gmx_restrict       vCoulomb)
+                             nbnxn_atomdata_output_t  *out)
  {
-    const int unrollj_half     = unrollj/2;
+    const int                 unrollj_half     = unrollj/2;
      /* Energies are stored in SIMD registers with size 2^numGroups_2log */
-    const int numGroupsStorage = (1 << numGroups_2log);
+    const int                 numGroupsStorage = (1 << numGroups_2log);
+
+    const real * gmx_restrict vVdwSimd     = out->VSvdw.data();
+    const real * gmx_restrict vCoulombSimd = out->VSc.data();
+    real * gmx_restrict       vVdw         = out->Vvdw.data();
+    real * gmx_restrict       vCoulomb     = out->Vc.data();
  
      /* The size of the SIMD energy group buffer array is:
       * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
@@ -133,7 +122,7 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
  
  void
  nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
-                 const nbnxn_atomdata_t    *nbat,
+                 nbnxn_atomdata_t          *nbat,
                   const interaction_const_t *ic,
                   rvec                      *shiftVectors,
                   int                        forceFlags,
@@ -174,6 +163,8 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
          }
      }
  
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
      int vdwkt = 0;
      if (ic->vdwtype == evdwCUT)
      {
@@ -181,7 +172,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
          {
              case eintmodNONE:
              case eintmodPOTSHIFT:
-                switch (nbat->comb_rule)
+                switch (nbatParams.comb_rule)
                  {
                      case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
                      case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
@@ -231,7 +222,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
  
          if (clearF == enbvClearFYes)
          {
-            clear_f(nbat, nb, out->f);
+            clear_f(nbat, nb, out->f.data());
          }
  
          real *fshift_p;
@@ -241,7 +232,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
          }
          else
          {
-            fshift_p = out->fshift;
+            fshift_p = out->fshift.data();
  
              if (clearF == enbvClearFYes)
              {
@@ -258,7 +249,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                             ic,
                                                             shiftVectors,
-                                                           out->f,
+                                                           out->f.data(),
                                                             fshift_p);
                      break;
  #ifdef GMX_NBNXN_SIMD_2XNN
@@ -266,7 +257,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_noener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
                                                                   ic,
                                                                   shiftVectors,
-                                                                 out->f,
+                                                                 out->f.data(),
                                                                   fshift_p);
                      break;
  #endif
@@ -275,7 +266,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_noener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
                                                                  ic,
                                                                  shiftVectors,
-                                                                out->f,
+                                                                out->f.data(),
                                                                  fshift_p);
                      break;
  #endif
@@ -283,7 +274,7 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
              }
          }
-        else if (out->nV == 1)
+        else if (out->Vvdw.size() == 1)
          {
              /* A single energy group (pair) */
              out->Vvdw[0] = 0;
@@ -295,20 +286,20 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                           ic,
                                                           shiftVectors,
-                                                         out->f,
+                                                         out->f.data(),
                                                           fshift_p,
-                                                         out->Vvdw,
-                                                         out->Vc);
+                                                         out->Vvdw.data(),
+                                                         out->Vc.data());
                      break;
  #ifdef GMX_NBNXN_SIMD_2XNN
                  case nbnxnk4xN_SIMD_2xNN:
                      nbnxn_kernel_ener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
                                                                 ic,
                                                                 shiftVectors,
-                                                               out->f,
+                                                               out->f.data(),
                                                                 fshift_p,
-                                                               out->Vvdw,
-                                                               out->Vc);
+                                                               out->Vvdw.data(),
+                                                               out->Vc.data());
                      break;
  #endif
  #ifdef GMX_NBNXN_SIMD_4XN
@@ -316,10 +307,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_ener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
                                                                ic,
                                                                shiftVectors,
-                                                              out->f,
+                                                              out->f.data(),
                                                                fshift_p,
-                                                              out->Vvdw,
-                                                              out->Vc);
+                                                              out->Vvdw.data(),
+                                                              out->Vc.data());
                      break;
  #endif
                  default:
@@ -340,10 +331,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                              ic,
                                                              shiftVectors,
-                                                            out->f,
+                                                            out->f.data(),
                                                              fshift_p,
-                                                            out->Vvdw,
-                                                            out->Vc);
+                                                            out->Vvdw.data(),
+                                                            out->Vc.data());
                      break;
  #ifdef GMX_NBNXN_SIMD_2XNN
                  case nbnxnk4xN_SIMD_2xNN:
@@ -351,10 +342,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_energrp_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
                                                                    ic,
                                                                    shiftVectors,
-                                                                  out->f,
+                                                                  out->f.data(),
                                                                    fshift_p,
-                                                                  out->VSvdw,
-                                                                  out->VSc);
+                                                                  out->VSvdw.data(),
+                                                                  out->VSc.data());
                      break;
  #endif
  #ifdef GMX_NBNXN_SIMD_4XN
@@ -363,10 +354,10 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                      nbnxn_kernel_energrp_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
                                                                   ic,
                                                                   shiftVectors,
-                                                                 out->f,
+                                                                 out->f.data(),
                                                                   fshift_p,
-                                                                 out->VSvdw,
-                                                                 out->VSc);
+                                                                 out->VSvdw.data(),
+                                                                 out->VSc.data());
                      break;
  #endif
                  default:
@@ -378,22 +369,19 @@ nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
                  switch (unrollj)
                  {
                      case 2:
-                        reduceGroupEnergySimdBuffers<2>(nbat->nenergrp,
-                                                        nbat->neg_2log,
-                                                        out->VSvdw, out->VSc,
-                                                        out->Vvdw, out->Vc);
+                        reduceGroupEnergySimdBuffers<2>(nbatParams.nenergrp,
+                                                        nbatParams.neg_2log,
+                                                        out);
                          break;
                      case 4:
-                        reduceGroupEnergySimdBuffers<4>(nbat->nenergrp,
-                                                        nbat->neg_2log,
-                                                        out->VSvdw, out->VSc,
-                                                        out->Vvdw, out->Vc);
+                        reduceGroupEnergySimdBuffers<4>(nbatParams.nenergrp,
+                                                        nbatParams.neg_2log,
+                                                        out);
                          break;
                      case 8:
-                        reduceGroupEnergySimdBuffers<8>(nbat->nenergrp,
-                                                        nbat->neg_2log,
-                                                        out->VSvdw, out->VSc,
-                                                        out->Vvdw, out->Vc);
+                        reduceGroupEnergySimdBuffers<8>(nbatParams.nenergrp,
+                                                        nbatParams.neg_2log,
+                                                        out);
                          break;
                      default:
                          GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h

index 5f4ba0cde0576fc6730859c0e2303d2644583598..389f5e6daa7418931024948603049010db006c59 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2017, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -58,7 +58,7 @@ struct nonbonded_verlet_group_t;
   * within this function.
   *
   * \param[in,out] nbvg          The group (local/non-local) to compute interaction for
- * \param[in]     nbat          The atomdata for the interactions
+ * \param[in,out] nbat          The atomdata for the interactions
   * \param[in]     ic            Non-bonded interaction constants
   * \param[in]     shiftVectors  The PBC shift vectors
   * \param[in]     forceFlags    Flags that tell what to compute
@@ -69,7 +69,7 @@ struct nonbonded_verlet_group_t;
   */
  void
  nbnxn_kernel_cpu(nonbonded_verlet_group_t  *nbvg,
-                 const nbnxn_atomdata_t    *nbat,
+                 nbnxn_atomdata_t          *nbat,
                   const interaction_const_t *ic,
                   rvec                      *shiftVectors,
                   int                        forceFlags,
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp

index 0463f634e273aba2e5becc50d37c1ec86b507b2e..fcbf9d01957ef566c6623da7e176c33fcb909281 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp
@@ -64,13 +64,12 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                       rvec                       *shift_vec,
                       int                         force_flags,
                       int                         clearF,
-                     real  *                     f,
+                     gmx::ArrayRef<real>         f,
                       real  *                     fshift,
                       real  *                     Vc,
                       real  *                     Vvdw)
  {
      const nbnxn_sci_t  *nbln;
-    const real         *x;
      gmx_bool            bEner;
      gmx_bool            bEwald;
      const real         *Ftab = nullptr;
@@ -102,9 +101,6 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
      int                 int_bit;
      real                fexcl;
      real                c6, c12;
-    const real       *  shiftvec;
-    real       *        vdwparam;
-    int       *         type;
      const nbnxn_excl_t *excl[2];
  
      int                 npair_tot, npair;
@@ -117,7 +113,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
  
      if (clearF == enbvClearFYes)
      {
-        clear_f(nbat, 0, f);
+        clear_f(nbat, 0, f.data());
      }
  
      bEner = ((force_flags & GMX_FORCE_ENERGY) != 0);
@@ -128,18 +124,18 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
          Ftab = iconst->tabq_coul_F;
      }
  
-    rcut2               = iconst->rcoulomb*iconst->rcoulomb;
-    rvdw2               = iconst->rvdw*iconst->rvdw;
+    rcut2                = iconst->rcoulomb*iconst->rcoulomb;
+    rvdw2                = iconst->rvdw*iconst->rvdw;
  
-    rlist2              = nbl->rlist*nbl->rlist;
+    rlist2               = nbl->rlist*nbl->rlist;
  
-    type                = nbat->type;
-    facel               = iconst->epsfac;
-    shiftvec            = shift_vec[0];
-    vdwparam            = nbat->nbfp;
-    ntype               = nbat->ntype;
+    const int  *type     = nbat->params().type.data();
+    facel                = iconst->epsfac;
+    const real *shiftvec = shift_vec[0];
+    const real *vdwparam = nbat->params().nbfp.data();
+    ntype                = nbat->params().numTypes;
  
-    x = nbat->x;
+    const real *x        = nbat->x().data();
  
      npair_tot   = 0;
      nhwu        = 0;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h

index a4a1b02e2229fd85777c998677c6ed18dec00d46..7a5a1cff26f866144840a5022e53bc45948cb19c 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
@@ -39,6 +39,7 @@
  #include "gromacs/math/vectypes.h"
  #include "gromacs/mdlib/nbnxn_pairlist.h"
  #include "gromacs/mdtypes/forcerec.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/real.h"
  
  /* Reference (slow) kernel for nb n vs n GPU type pair lists */
@@ -49,7 +50,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                       rvec                       *shift_vec,
                       int                         force_flags,
                       int                         clearF,
-                     real  *                     f,
+                     gmx::ArrayRef<real>         f,
                       real  *                     fshift,
                       real  *                     Vc,
                       real  *                     Vvdw);
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h

index 4e5f0481e503418347a9a01497ac002edb008844..a6d44626a750cd5d0c13288d6df2af3494e4b4c2 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -50,7 +50,7 @@
      cj = l_cj[cjind].cj;
  
  #ifdef ENERGY_GROUPS
-    egp_cj = nbat->energrp[cj];
+    egp_cj = nbatParams.energrp[cj];
  #endif
      for (i = 0; i < UNROLLI; i++)
      {
@@ -274,7 +274,7 @@
  
  #ifdef CALC_ENERGIES
  #ifdef ENERGY_GROUPS
-                Vvdw[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += VLJ;
+                Vvdw[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += VLJ;
  #else
                  Vvdw_ci += VLJ;
                  /* 1 flop for LJ energy addition */
@@ -333,7 +333,7 @@
  
  #ifdef CALC_ENERGIES
  #ifdef ENERGY_GROUPS
-            Vc[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += vcoul;
+            Vc[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += vcoul;
  #else
              Vc_ci += vcoul;
              /* 1 flop for Coulomb energy addition */
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h

index b56f6aecdb0055622bf60c5b20fe48fa3b93d8a7..b50a45d9c0ab9ebf00e322801a91070817b2af29 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
@@ -107,11 +107,6 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
  )
  {
      const nbnxn_cj_t   *l_cj;
-    const int          *type;
-    const real         *q;
-    const real         *shiftvec;
-    const real         *x;
-    const real         *nbfp;
      real                rcut2;
  #ifdef VDW_CUTOFF_CHECK
      real                rvdw2;
@@ -145,7 +140,6 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
  #ifdef CALC_ENERGIES
      real        lje_vc;
  #endif
-    const real *ljc;
  #endif
  
  #ifdef CALC_COUL_RF
@@ -179,6 +173,8 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
      swF4 = 5*ic->vdw_switch.c5;
  #endif
  
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
  #ifdef LJ_EWALD
      lje_coeff2   = ic->ewaldcoeff_lj*ic->ewaldcoeff_lj;
      lje_coeff6_6 = lje_coeff2*lje_coeff2*lje_coeff2/6.0;
@@ -186,7 +182,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
      lje_vc       = ic->sh_lj_ewald;
  #endif
  
-    ljc          = nbat->nbfp_comb;
+    const real *ljc = nbatParams.nbfp_comb.data();
  #endif
  
  #ifdef CALC_COUL_RF
@@ -210,22 +206,22 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
  #endif
  
  #ifdef ENERGY_GROUPS
-    egp_mask = (1<<nbat->neg_2log) - 1;
+    egp_mask = (1 << nbatParams.neg_2log) - 1;
  #endif
  
  
-    rcut2               = ic->rcoulomb*ic->rcoulomb;
+    rcut2                = ic->rcoulomb*ic->rcoulomb;
  #ifdef VDW_CUTOFF_CHECK
-    rvdw2               = ic->rvdw*ic->rvdw;
+    rvdw2                = ic->rvdw*ic->rvdw;
  #endif
  
-    ntype2              = nbat->ntype*2;
-    nbfp                = nbat->nbfp;
-    q                   = nbat->q;
-    type                = nbat->type;
-    facel               = ic->epsfac;
-    shiftvec            = shift_vec[0];
-    x                   = nbat->x;
+    ntype2               = nbatParams.numTypes*2;
+    const real *nbfp     = nbatParams.nbfp.data();
+    const real *q        = nbatParams.q.data();
+    const int  *type     = nbatParams.type.data();
+    facel                = ic->epsfac;
+    const real *shiftvec = shift_vec[0];
+    const real *x        = nbat->x().data();
  
      l_cj = nbl->cj.data();
  
@@ -265,7 +261,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
  #else
          for (i = 0; i < UNROLLI; i++)
          {
-            egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
+            egp_sh_i[i] = ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask)*nbatParams.nenergrp;
          }
  #endif
  #endif
@@ -303,7 +299,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
                  {
                      int egp_ind;
  #ifdef ENERGY_GROUPS
-                    egp_ind = egp_sh_i[i] + ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask);
+                    egp_ind = egp_sh_i[i] + ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask);
  #else
                      egp_ind = 0;
  #endif
@@ -312,7 +308,7 @@ NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
  
  #ifdef LJ_EWALD
                      /* LJ Ewald self interaction */
-                    Vvdw[egp_ind] += 0.5*nbat->nbfp[nbat->type[ci*UNROLLI+i]*(nbat->ntype + 1)*2]/6*lje_coeff6_6;
+                    Vvdw[egp_ind] += 0.5*nbatParams.nbfp[nbatParams.type[ci*UNROLLI+i]*(nbatParams.numTypes + 1)*2]/6*lje_coeff6_6;
  #endif
                  }
              }
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp

index 183c2cfed1d37c1d231fb9ac2132db94e3fd54f9..45b5eda20ae1da9c2f141e9c8184399ffed865d8 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp
@@ -60,7 +60,7 @@ nbnxn_kernel_prune_ref(NbnxnPairlistCpu *         nbl,
      nbnxn_cj_t       * gmx_restrict cjInner   = nbl->cj.data();
  
      const real       * gmx_restrict shiftvec = shift_vec[0];
-    const real       * gmx_restrict x        = nbat->x;
+    const real       * gmx_restrict x        = nbat->x().data();
  
      const real                      rlist2   = rlistInner*rlistInner;
  
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h

index 844f55d7f58ddc3860b640e30c7318ebee5bbd04..38724cb2b5f7f00bcf74d7d162c308e205aceff3 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -803,7 +803,7 @@
      {
          int egps_j;
  #if UNROLLJ == 2
-        egps_j    = nbat->energrp[cj>>1];
+        egps_j    = nbatParams.energrp[cj >> 1];
          egp_jj[0] = ((egps_j >> ((cj & 1)*egps_jshift)) & egps_jmask)*egps_jstride;
  #else
          /* We assume UNROLLI <= UNROLLJ */
@@ -811,7 +811,7 @@
          for (jdi = 0; jdi < UNROLLJ/UNROLLI; jdi++)
          {
              int jj;
-            egps_j = nbat->energrp[cj*(UNROLLJ/UNROLLI)+jdi];
+            egps_j = nbatParams.energrp[cj*(UNROLLJ/UNROLLI) + jdi];
              for (jj = 0; jj < (UNROLLI/2); jj++)
              {
                  egp_jj[jdi*(UNROLLI/2)+jj] = ((egps_j >> (jj*egps_jshift)) & egps_jmask)*egps_jstride;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h

index ced88fcf03fe8f0814873f1012a2a1bcfa28c169..3a289895fb9ec9e425f2074a4ce153d6f277201c 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h
@@ -37,10 +37,6 @@
  {
      using namespace gmx;
      const nbnxn_cj_t   *l_cj;
-    const real         *q;
-    const real         *shiftvec;
-    const real         *x;
-    real                facel;
      int                 ci, ci_sh;
      int                 ish, ish3;
      gmx_bool            do_LJ, half_LJ, do_coul;
@@ -71,7 +67,6 @@
      SimdBool  diagonal_mask1_S0, diagonal_mask1_S2;
  #endif
  
-    unsigned            *exclusion_filter;
      SimdBitMask          filter_S0, filter_S2;
  
      SimdReal             zero_S(0.0);
@@ -132,8 +127,6 @@
  #endif
  
  #ifdef LJ_COMB_LB
-    const real       *ljc;
-
      SimdReal          hsig_i_S0, seps_i_S0;
      SimdReal          hsig_i_S2, seps_i_S2;
  #else
@@ -141,10 +134,6 @@
      alignas(GMX_SIMD_ALIGNMENT) real  pvdw_c6[2*UNROLLI*UNROLLJ];
      real  *pvdw_c12 = pvdw_c6 + UNROLLI*UNROLLJ;
  #endif
-
-#if defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
-    const real       *ljc;
-#endif
  #endif /* LJ_COMB_LB */
  
      SimdReal  minRsq_S;
@@ -159,17 +148,19 @@
      int npair = 0;
  #endif
  
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
  #if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
-    ljc = nbat->lj_comb;
+    const real * gmx_restrict ljc      = nbatParams.lj_comb.data();
  #endif
  #if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
      /* No combination rule used */
-    real      *nbfp_ptr = nbat->nbfp_aligned;
-    const int *type     = nbat->type;
+    const real * gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
+    const int * gmx_restrict  type     = nbatParams.type.data();
  #endif
  
      /* Load j-i for the first i */
-    diagonal_jmi_S    = load<SimdReal>(nbat->simd_2xnn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simdMasks.diagonal_2xnn_j_minus_i.data());
      /* Generate all the diagonal masks as comparison results */
  #if UNROLLI == UNROLLJ
      diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
@@ -194,9 +185,9 @@
      /* Load masks for topology exclusion masking. filter_stride is
         static const, so the conditional will be optimized away. */
  #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    exclusion_filter = nbat->simd_exclusion_filter64;
+    const std::uint64_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
  #else
-    exclusion_filter = nbat->simd_exclusion_filter;
+    const std::uint32_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
  #endif
  
      /* Here we cast the exclusion filters from unsigned * to int * or real *.
@@ -305,12 +296,12 @@
      rcvdw2_S = SimdReal(ic->rvdw*ic->rvdw);
  #endif
  
-    minRsq_S            = SimdReal(NBNXN_MIN_RSQ);
+    minRsq_S                           = SimdReal(NBNXN_MIN_RSQ);
  
-    q                   = nbat->q;
-    facel               = ic->epsfac;
-    shiftvec            = shift_vec[0];
-    x                   = nbat->x;
+    const real * gmx_restrict q        = nbatParams.q.data();
+    const real                facel    = ic->epsfac;
+    const real * gmx_restrict shiftvec = shift_vec[0];
+    const real * gmx_restrict x        = nbat->x().data();
  
  #ifdef FIX_LJ_C
  
@@ -338,13 +329,13 @@
  #endif /* FIX_LJ_C */
  
  #ifdef ENERGY_GROUPS
-    egps_ishift  = nbat->neg_2log;
+    egps_ishift  = nbatParams.neg_2log;
      egps_imask   = (1<<egps_ishift) - 1;
-    egps_jshift  = 2*nbat->neg_2log;
+    egps_jshift  = 2*nbatParams.neg_2log;
      egps_jmask   = (1<<egps_jshift) - 1;
      egps_jstride = (UNROLLJ>>1)*UNROLLJ;
      /* Major division is over i-particle energy groups, determine the stride */
-    Vstride_i    = nbat->nenergrp*(1<<nbat->neg_2log)*egps_jstride;
+    Vstride_i    = nbatParams.nenergrp*(1 << nbatParams.neg_2log)*egps_jstride;
  #endif
  
      l_cj = nbl->cj.data();
@@ -389,7 +380,7 @@
          half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
  
  #ifdef ENERGY_GROUPS
-        egps_i = nbat->energrp[ci];
+        egps_i = nbatParams.energrp[ci];
          {
              int ia, egp_ia;
  
@@ -457,7 +448,7 @@
                  {
                      real c6_i;
  
-                    c6_i = nbat->nbfp[nbat->type[sci+ia]*(nbat->ntype + 1)*2]/6;
+                    c6_i = nbatParams.nbfp[nbatParams.type[sci+ia]*(nbatParams.numTypes + 1)*2]/6;
  #ifdef ENERGY_GROUPS
                      vvdwtp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
  #else
@@ -520,13 +511,14 @@
              c12s_S2 = loadU1DualHsimd(ljc+sci2+STRIDE+2);
          }
  #elif !defined LJ_COMB_LB && !defined FIX_LJ_C
-        const real *nbfp0     = nbfp_ptr + type[sci  ]*nbat->ntype*c_simdBestPairAlignment;
-        const real *nbfp1     = nbfp_ptr + type[sci+1]*nbat->ntype*c_simdBestPairAlignment;
+        const int   numTypes  = nbatParams.numTypes;
+        const real *nbfp0     = nbfp_ptr + type[sci  ]*numTypes*c_simdBestPairAlignment;
+        const real *nbfp1     = nbfp_ptr + type[sci+1]*numTypes*c_simdBestPairAlignment;
          const real *nbfp2     = nullptr, *nbfp3 = nullptr;
          if (!half_LJ)
          {
-            nbfp2 = nbfp_ptr + type[sci+2]*nbat->ntype*c_simdBestPairAlignment;
-            nbfp3 = nbfp_ptr + type[sci+3]*nbat->ntype*c_simdBestPairAlignment;
+            nbfp2 = nbfp_ptr + type[sci+2]*numTypes*c_simdBestPairAlignment;
+            nbfp3 = nbfp_ptr + type[sci+3]*numTypes*c_simdBestPairAlignment;
          }
  #endif
  #endif
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp

index 96323e49a40069070dc2396432d9b27e5eaa6935..daca1cd809ab885556aa38b58187dcb6814047b6 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp
@@ -67,7 +67,7 @@ nbnxn_kernel_prune_2xnn(NbnxnPairlistCpu *         nbl,
      nbnxn_cj_t       * gmx_restrict cjInner  = nbl->cj.data();
  
      const real       * gmx_restrict shiftvec = shift_vec[0];
-    const real       * gmx_restrict x        = nbat->x;
+    const real       * gmx_restrict x        = nbat->x().data();
  
      const SimdReal                  rlist2_S(rlistInner*rlistInner);
  
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h

index b787b9d79a94e90ab4a23e4f2b64f7da9695e350..7ac6e4f62ad62ed3cc6b02c651e5e0f08a781a36 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
@@ -104,7 +104,7 @@ gmx_load_simd_4xn_interactions(int                               excl,
                                 SimdBitMask gmx_unused            filter_S1,
                                 SimdBitMask gmx_unused            filter_S2,
                                 SimdBitMask gmx_unused            filter_S3,
-                               real gmx_unused                  *simd_interaction_array,
+                               const real gmx_unused            *simd_interaction_array,
                                 gmx::SimdBool                    *interact_S0,
                                 gmx::SimdBool                    *interact_S1,
                                 gmx::SimdBool                    *interact_S2,
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h

index 5bc77035f06a264a12efa5bf396becc1d3904f50..8b842916275578bc806d82c3dc31a676b015485b 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -271,7 +271,7 @@
      gmx_load_simd_4xn_interactions(l_cj[cjind].excl,
                                     filter_S0, filter_S1,
                                     filter_S2, filter_S3,
-                                   nbat->simd_interaction_array,
+                                   nbat->simdMasks.interaction_array.data(),
                                     &interact_S0, &interact_S1,
                                     &interact_S2, &interact_S3);
  #endif /* CHECK_EXCLS */
@@ -1066,7 +1066,7 @@
      {
          int egps_j;
  #if UNROLLJ == 2
-        egps_j    = nbat->energrp[cj>>1];
+        egps_j    = nbatParams.energrp[cj >> 1];
          egp_jj[0] = ((egps_j >> ((cj & 1)*egps_jshift)) & egps_jmask)*egps_jstride;
  #else
          /* We assume UNROLLI <= UNROLLJ */
@@ -1074,7 +1074,7 @@
          for (jdi = 0; jdi < UNROLLJ/UNROLLI; jdi++)
          {
              int jj;
-            egps_j = nbat->energrp[cj*(UNROLLJ/UNROLLI)+jdi];
+            egps_j = nbatParams.energrp[cj*(UNROLLJ/UNROLLI) + jdi];
              for (jj = 0; jj < (UNROLLI/2); jj++)
              {
                  egp_jj[jdi*(UNROLLI/2)+jj] = ((egps_j >> (jj*egps_jshift)) & egps_jmask)*egps_jstride;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h

index ad4a1b499d1596b2029bec712d36f19ea6132e86..df29ca0b5655d7626983192a1020524b54a0b698 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h
@@ -37,10 +37,6 @@
  {
      using namespace gmx;
      const nbnxn_cj_t   *l_cj;
-    const real *        q;
-    const real         *shiftvec;
-    const real         *x;
-    real                facel;
      int                 ci, ci_sh;
      int                 ish, ish3;
      gmx_bool            do_LJ, half_LJ, do_coul;
@@ -75,11 +71,6 @@
      SimdBool  diagonal_mask1_S0, diagonal_mask1_S1, diagonal_mask1_S2, diagonal_mask1_S3;
  #endif
  
-#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    std::uint64_t       *exclusion_filter;
-#else
-    std::uint32_t       *exclusion_filter;
-#endif
      SimdBitMask          filter_S0, filter_S1, filter_S2, filter_S3;
  
      SimdReal             zero_S(0.0);
@@ -141,17 +132,10 @@
  #endif
  
  #ifdef LJ_COMB_LB
-    const real       *ljc;
-
      SimdReal          hsig_i_S0, seps_i_S0;
      SimdReal          hsig_i_S1, seps_i_S1;
      SimdReal          hsig_i_S2, seps_i_S2;
      SimdReal          hsig_i_S3, seps_i_S3;
-#else
-
-#if defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
-    const real       *ljc;
-#endif
  #endif /* LJ_COMB_LB */
  
      SimdReal  minRsq_S;
@@ -166,17 +150,19 @@
      int npair = 0;
  #endif
  
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
  #if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
-    ljc = nbat->lj_comb;
+    const real * gmx_restrict ljc      = nbatParams.lj_comb.data();
  #endif
  #if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
      /* No combination rule used */
-    real      *nbfp_ptr    = nbat->nbfp_aligned;
-    const int *type        = nbat->type;
+    const real * gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
+    const int * gmx_restrict  type     = nbatParams.type.data();
  #endif
  
      /* Load j-i for the first i */
-    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simdMasks.diagonal_4xn_j_minus_i.data());
      /* Generate all the diagonal masks as comparison results */
  #if UNROLLI == UNROLLJ
      diagonal_mask_S0  = (zero_S < diagonal_jmi_S);
@@ -199,7 +185,7 @@
  
  #if UNROLLI == 2*UNROLLJ
      /* Load j-i for the second half of the j-cluster */
-    diagonal_jmi_S    = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
+    diagonal_jmi_S    = load<SimdReal>(nbat->simdMasks.diagonal_4xn_j_minus_i.data() + UNROLLJ);
  #endif
  
      diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
@@ -213,9 +199,9 @@
  #endif
  
  #if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
-    exclusion_filter = nbat->simd_exclusion_filter64;
+    const std::uint64_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
  #else
-    exclusion_filter = nbat->simd_exclusion_filter;
+    const std::uint32_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
  #endif
  
      /* Here we cast the exclusion filters from unsigned * to int * or real *.
@@ -326,12 +312,12 @@
      rcvdw2_S =  SimdReal(ic->rvdw*ic->rvdw);
  #endif
  
-    minRsq_S            = SimdReal(NBNXN_MIN_RSQ);
+    minRsq_S                           = SimdReal(NBNXN_MIN_RSQ);
  
-    q                   = nbat->q;
-    facel               = ic->epsfac;
-    shiftvec            = shift_vec[0];
-    x                   = nbat->x;
+    const real * gmx_restrict q        = nbatParams.q.data();
+    const real                facel    = ic->epsfac;
+    const real * gmx_restrict shiftvec = shift_vec[0];
+    const real * gmx_restrict x        = nbat->x().data();
  
  #ifdef FIX_LJ_C
      alignas(GMX_SIMD_ALIGNMENT) real  pvdw_c6[2*UNROLLI*UNROLLJ];
@@ -361,13 +347,13 @@
  #endif /* FIX_LJ_C */
  
  #ifdef ENERGY_GROUPS
-    egps_ishift  = nbat->neg_2log;
+    egps_ishift  = nbatParams.neg_2log;
      egps_imask   = (1<<egps_ishift) - 1;
-    egps_jshift  = 2*nbat->neg_2log;
+    egps_jshift  = 2*nbatParams.neg_2log;
      egps_jmask   = (1<<egps_jshift) - 1;
      egps_jstride = (UNROLLJ>>1)*UNROLLJ;
      /* Major division is over i-particle energy groups, determine the stride */
-    Vstride_i    = nbat->nenergrp*(1<<nbat->neg_2log)*egps_jstride;
+    Vstride_i    = nbatParams.nenergrp*(1 << nbatParams.neg_2log)*egps_jstride;
  #endif
  
      l_cj = nbl->cj.data();
@@ -413,7 +399,7 @@
          half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
  
  #ifdef ENERGY_GROUPS
-        egps_i = nbat->energrp[ci];
+        egps_i = nbatParams.energrp[ci];
          {
              int ia, egp_ia;
  
@@ -484,7 +470,7 @@
                  {
                      real c6_i;
  
-                    c6_i = nbat->nbfp[nbat->type[sci+ia]*(nbat->ntype + 1)*2]/6;
+                    c6_i = nbatParams.nbfp[nbatParams.type[sci+ia]*(nbatParams.numTypes + 1)*2]/6;
  #ifdef ENERGY_GROUPS
                      vvdwtp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
  #else
@@ -559,13 +545,14 @@
              c12s_S3    = setZero();
          }
  #else
-        const real *nbfp0     = nbfp_ptr + type[sci  ]*nbat->ntype*c_simdBestPairAlignment;
-        const real *nbfp1     = nbfp_ptr + type[sci+1]*nbat->ntype*c_simdBestPairAlignment;
+        const int   numTypes  = nbatParams.numTypes;
+        const real *nbfp0     = nbfp_ptr + type[sci  ]*numTypes*c_simdBestPairAlignment;
+        const real *nbfp1     = nbfp_ptr + type[sci+1]*numTypes*c_simdBestPairAlignment;
          const real *nbfp2     = nullptr, *nbfp3 = nullptr;
          if (!half_LJ)
          {
-            nbfp2 = nbfp_ptr + type[sci+2]*nbat->ntype*c_simdBestPairAlignment;
-            nbfp3 = nbfp_ptr + type[sci+3]*nbat->ntype*c_simdBestPairAlignment;
+            nbfp2 = nbfp_ptr + type[sci+2]*numTypes*c_simdBestPairAlignment;
+            nbfp3 = nbfp_ptr + type[sci+3]*numTypes*c_simdBestPairAlignment;
          }
  #endif
  #endif
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp

index 8a84c9348454a9b822a5a26450cf403e080fe425..e7f829c47b09ad69326219143bdf4482595e7738 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp
@@ -67,7 +67,7 @@ nbnxn_kernel_prune_4xn(NbnxnPairlistCpu *         nbl,
      nbnxn_cj_t       * gmx_restrict cjInner  = nbl->cj.data();
  
      const real       * gmx_restrict shiftvec = shift_vec[0];
-    const real       * gmx_restrict x        = nbat->x;
+    const real       * gmx_restrict x        = nbat->x().data();
  
      const SimdReal                  rlist2_S(rlistInner*rlistInner);
  
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp

index 22c6ba169e0700dd211338a3362ce43a55b93266..8908f3fb2e36937edeeb8ed314bd4d0348c53c9c 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -410,7 +410,7 @@ void nbnxn_gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t        *nb,
      }
  
      /* HtoD x, q */
-    ocl_copy_H2D_async(adat->xq, nbatom->x + adat_begin * 4, adat_begin*sizeof(float)*4,
+    ocl_copy_H2D_async(adat->xq, nbatom->x().data() + adat_begin * 4, adat_begin*sizeof(float)*4,
                         adat_len * sizeof(float) * 4, stream, bDoTime ? t->nb_h2d[iloc].fetchNextEvent() : nullptr);
  
      if (bDoTime)
@@ -729,7 +729,7 @@ void nbnxn_gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t       *nb,
   * (and energies/shift forces if required).
   */
  void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
-                              const struct nbnxn_atomdata_t *nbatom,
+                              struct nbnxn_atomdata_t       *nbatom,
                                int                            flags,
                                int                            aloc,
                                bool                           haveOtherWork)
@@ -780,7 +780,7 @@ void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
      }
  
      /* DtoH f */
-    ocl_copy_D2H_async(nbatom->out[0].f + adat_begin * 3, adat->f, adat_begin*3*sizeof(float),
+    ocl_copy_D2H_async(nbatom->out[0].f.data() + adat_begin * 3, adat->f, adat_begin*3*sizeof(float),
                         (adat_len)* adat->f_elem_size, stream, bDoTime ? t->nb_d2h[iloc].fetchNextEvent() : nullptr);
  
      /* kick off work */
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp

index 9547252b74b748b0939854abecaa5130fa51147f..4e1b973d672f9b71778f5a5906d5e9e8bfeab9a1 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -145,7 +145,7 @@ static void init_atomdata_first(cl_atomdata_t *ad, int ntypes, gmx_device_runtim
  
      /* An element of the shift_vec device buffer has the same size as one element
         of the host side shift_vec buffer. */
-    ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec);
+    ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec.data());
  
      ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                     SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error);
@@ -291,7 +291,7 @@ map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t *ic,
  static void init_nbparam(cl_nbparam_t                    *nbp,
                           const interaction_const_t       *ic,
                           const NbnxnListParameters       *listParams,
-                         const nbnxn_atomdata_t          *nbat,
+                         const nbnxn_atomdata_t::Params  &nbatParams,
                           const gmx_device_runtime_data_t *runData)
  {
      cl_int cl_error;
@@ -299,7 +299,7 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
      set_cutoff_parameters(nbp, ic, listParams);
  
      map_interaction_types_to_gpu_kernel_flavors(ic,
-                                                nbat->comb_rule,
+                                                nbatParams.comb_rule,
                                                  &(nbp->eeltype),
                                                  &(nbp->vdwtype));
  
@@ -307,11 +307,11 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
      {
          if (ic->ljpme_comb_rule == ljcrGEOM)
          {
-            GMX_ASSERT(nbat->comb_rule == ljcrGEOM, "Combination rule mismatch!");
+            GMX_ASSERT(nbatParams.comb_rule == ljcrGEOM, "Combination rule mismatch!");
          }
          else
          {
-            GMX_ASSERT(nbat->comb_rule == ljcrLB, "Combination rule mismatch!");
+            GMX_ASSERT(nbatParams.comb_rule == ljcrLB, "Combination rule mismatch!");
          }
      }
      /* generate table for PME */
@@ -342,8 +342,8 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
      }
  
-    int nnbfp      = 2*nbat->ntype*nbat->ntype;
-    int nnbfp_comb = 2*nbat->ntype;
+    const int nnbfp      = 2*nbatParams.numTypes*nbatParams.numTypes;
+    const int nnbfp_comb = 2*nbatParams.numTypes;
  
      {
          /* Switched from using textures to using buffers */
@@ -358,8 +358,12 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
              &array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error);
           */
  
-        nbp->nbfp_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
-                                           nnbfp*sizeof(cl_float), nbat->nbfp, &cl_error);
+        nbp->nbfp_climg2d =
+            clCreateBuffer(runData->context,
+                           CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+                           nnbfp*sizeof(cl_float),
+                           const_cast<float *>(nbatParams.nbfp.data()),
+                           &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
@@ -369,8 +373,12 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
              // TODO: decide which alternative is most efficient - textures or buffers.
              /*  nbp->nbfp_comb_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                  &array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/
-            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
-                                                    nnbfp_comb*sizeof(cl_float), nbat->nbfp_comb, &cl_error);
+            nbp->nbfp_comb_climg2d =
+                clCreateBuffer(runData->context,
+                               CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+                               nnbfp_comb*sizeof(cl_float),
+                               const_cast<float *>(nbatParams.nbfp_comb.data()),
+                               &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                                 ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
          }
@@ -598,10 +606,10 @@ static void nbnxn_gpu_init_kernels(gmx_nbnxn_ocl_t *nb)
  static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t                *nb,
                                   const interaction_const_t      *ic,
                                   const NbnxnListParameters      *listParams,
-                                 const nbnxn_atomdata_t         *nbat)
+                                 const nbnxn_atomdata_t::Params &nbatParams)
  {
-    init_atomdata_first(nb->atdat, nbat->ntype, nb->dev_rundata);
-    init_nbparam(nb->nbparam, ic, listParams, nbat, nb->dev_rundata);
+    init_atomdata_first(nb->atdat, nbatParams.numTypes, nb->dev_rundata);
+    init_nbparam(nb->nbparam, ic, listParams, nbatParams, nb->dev_rundata);
  }
  
  
@@ -695,7 +703,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
          init_timings(nb->timings);
      }
  
-    nbnxn_ocl_init_const(nb, ic, listParams, nbat);
+    nbnxn_ocl_init_const(nb, ic, listParams, nbat->params());
  
      /* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
       * TODO: decide about NVIDIA
@@ -839,7 +847,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_ocl_t        *nb,
      /* only if we have a dynamic box */
      if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
      {
-        ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0,
+        ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec.data(), 0,
                             SHIFTS * adat->shift_vec_elem_size, ls, nullptr);
          adat->bShiftVecUploaded = CL_TRUE;
      }
@@ -857,7 +865,7 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
      cl_atomdata_t   *d_atdat = nb->atdat;
      cl_command_queue ls      = nb->stream[eintLocal];
  
-    natoms    = nbat->natoms;
+    natoms    = nbat->numAtoms();
      realloced = false;
  
      if (bDoTime)
@@ -923,12 +931,12 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
  
      if (useLjCombRule(nb->nbparam->vdwtype))
      {
-        ocl_copy_H2D_async(d_atdat->lj_comb, nbat->lj_comb, 0,
+        ocl_copy_H2D_async(d_atdat->lj_comb, nbat->params().lj_comb.data(), 0,
                             natoms*sizeof(cl_float2), ls, bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
      }
      else
      {
-        ocl_copy_H2D_async(d_atdat->atom_types, nbat->type, 0,
+        ocl_copy_H2D_async(d_atdat->atom_types, nbat->params().type.data(), 0,
                             natoms*sizeof(int), ls, bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
  
      }
diff --git a/src/gromacs/mdlib/nbnxn_pairlist.h b/src/gromacs/mdlib/nbnxn_pairlist.h

index 9c01c7125b234d9a4a5515ace457fc5565ac648a..1ac278db916cd84b71740b0d4e97eda65c92d9ed 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_pairlist.h
+++ b/src/gromacs/mdlib/nbnxn_pairlist.h
@@ -40,6 +40,7 @@
  
  #include <cstddef>
  
+#include "gromacs/gpu_utils/hostallocator.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/mdlib/nbnxn_consts.h"
  #include "gromacs/mdtypes/nblist.h"
@@ -52,6 +53,10 @@ struct NbnxnPairlistCpuWork;
  struct NbnxnPairlistGpuWork;
  struct tMPI_Atomic;
  
+/* Convenience type for vector with aligned memory */
+template<typename T>
+using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
+
  /* Convenience type for vector that avoids initialization at resize() */
  template<typename T>
  using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
@@ -258,16 +263,28 @@ enum {
      nbatXYZ, nbatXYZQ, nbatX4, nbatX8
  };
  
-typedef struct {
-    real *f;      /* f, size natoms*fstride                             */
-    real *fshift; /* Shift force array, size SHIFTS*DIM                 */
-    int   nV;     /* The size of *Vvdw and *Vc                          */
-    real *Vvdw;   /* Temporary Van der Waals group energy storage       */
-    real *Vc;     /* Temporary Coulomb group energy storage             */
-    int   nVS;    /* The size of *VSvdw and *VSc                        */
-    real *VSvdw;  /* Temporary SIMD Van der Waals group energy storage  */
-    real *VSc;    /* Temporary SIMD Coulomb group energy storage        */
-} nbnxn_atomdata_output_t;
+// Struct that holds force and energy output buffers
+struct nbnxn_atomdata_output_t
+{
+    /* Constructor
+     *
+     * \param[in] nb_kernel_type          Type of non-bonded kernel
+     * \param[in] numEnergyGroups         The number of energy groups
+     * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
+     * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
+     */
+    nbnxn_atomdata_output_t(int                nb_kernel_type,
+                            int                numEnergyGroups,
+                            int                simdEnergyBUfferStride,
+                            gmx::PinningPolicy pinningPolicy);
+
+    gmx::HostVector<real> f;      // f, size natoms*fstride
+    gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
+    gmx::HostVector<real> Vvdw;   // Temporary Van der Waals group energy storage
+    gmx::HostVector<real> Vc;     // Temporary Coulomb group energy storage
+    AlignedVector<real>   VSvdw;  // Temporary SIMD Van der Waals group energy storage
+    AlignedVector<real>   VSc;    // Temporary SIMD Coulomb group energy storage
+};
  
  /* Block size in atoms for the non-bonded thread force-buffer reduction,
   * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
@@ -298,49 +315,132 @@ enum {
      ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
  };
  
-typedef struct nbnxn_atomdata_t { //NOLINT(clang-analyzer-optin.performance.Padding)
-    nbnxn_alloc_t           *alloc;
-    nbnxn_free_t            *free;
-    int                      ntype;           /* The number of different atom types                 */
-    real                    *nbfp;            /* Lennard-Jones 6*C6 and 12*C12 params, size ntype^2*2 */
-    int                      comb_rule;       /* Combination rule, see enum above                   */
-    real                    *nbfp_comb;       /* LJ parameter per atom type, size ntype*2           */
-    real                    *nbfp_aligned;    /* As nbfp, but with an alignment (stride) suitable
-                                               * for the present SIMD architectures
-                                               */
-    int                      natoms;          /* Number of atoms                                    */
-    int                      natoms_local;    /* Number of local atoms                           */
-    int                     *type;            /* Atom types                                         */
-    real                    *lj_comb;         /* LJ parameters per atom for combining for pairs     */
-    int                      XFormat;         /* The format of x (and q), enum                      */
-    int                      FFormat;         /* The format of f, enum                              */
-    real                    *q;               /* Charges, can be NULL if incorporated in x          */
-    int                      na_c;            /* The number of atoms per cluster                    */
-    int                      nenergrp;        /* The number of energy groups                        */
-    int                      neg_2log;        /* Log2 of nenergrp                                   */
-    int                     *energrp;         /* The energy groups per cluster, can be NULL         */
-    gmx_bool                 bDynamicBox;     /* Do we need to update shift_vec every step?    */
-    rvec                    *shift_vec;       /* Shift vectors, copied from t_forcerec              */
-    int                      xstride;         /* stride for a coordinate in x (usually 3 or 4)      */
-    int                      fstride;         /* stride for a coordinate in f (usually 3 or 4)      */
-    real                    *x;               /* x and possibly q, size natoms*xstride              */
-
-    /* j-atom minus i-atom index for generating self and Newton exclusions
-     * cluster-cluster pairs of the diagonal, for 4xn and 2xnn kernels.
+/* Struct that stores atom related data for the nbnxn module
+ *
+ * Note: performance would improve slightly when all std::vector containers
+ *       in this struct would not initialize during resize().
+ */
+struct nbnxn_atomdata_t
+{   //NOLINT(clang-analyzer-optin.performance.Padding)
+    struct Params
+    {
+        /* Constructor
+         *
+         * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
+         */
+        Params(gmx::PinningPolicy pinningPolicy);
+
+        // The number of different atom types
+        int                   numTypes;
+        // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
+        gmx::HostVector<real> nbfp;
+        // Combination rule, see enum defined above
+        int                   comb_rule;
+        // LJ parameters per atom type, size numTypes*2
+        gmx::HostVector<real> nbfp_comb;
+        // As nbfp, but with a stride for the present SIMD architecture
+        AlignedVector<real>   nbfp_aligned;
+        // Atom types per atom
+        gmx::HostVector<int>  type;
+        // LJ parameters per atom for fast SIMD loading
+        gmx::HostVector<real> lj_comb;
+        // Charges per atom, not set with format nbatXYZQ
+        gmx::HostVector<real> q;
+        // The number of energy groups
+        int                   nenergrp;
+        // 2log(nenergrp)
+        int                   neg_2log;
+        // The energy groups, one int entry per cluster, only set when needed
+        gmx::HostVector<int>  energrp;
+    };
+
+    // Diagonal and topology exclusion helper data for all SIMD kernels
+    struct SimdMasks
+    {
+        SimdMasks();
+
+        // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
+        AlignedVector<real>     diagonal_4xn_j_minus_i;
+        // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
+        AlignedVector<real>     diagonal_2xnn_j_minus_i;
+        // Filters for topology exclusion masks for the SIMD kernels
+        AlignedVector<uint32_t> exclusion_filter;
+        // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
+        AlignedVector<uint64_t> exclusion_filter64;
+        // Array of masks needed for exclusions
+        AlignedVector<real>     interaction_array;
+    };
+
+    /* Constructor
+     *
+     * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
       */
-    real                    *simd_4xn_diagonal_j_minus_i;
-    real                    *simd_2xnn_diagonal_j_minus_i;
-    /* Filters for topology exclusion masks for the SIMD kernels. */
-    uint32_t                *simd_exclusion_filter;
-    uint64_t                *simd_exclusion_filter64; //!< Used for double w/o SIMD int32 logical support
-    real                    *simd_interaction_array;  /* Array of masks needed for exclusions */
-    int                      nout;                    /* The number of force arrays                         */
-    nbnxn_atomdata_output_t *out;                     /* Output data structures               */
-    int                      nalloc;                  /* Allocation size of all arrays (for x/f *x/fstride) */
-    gmx_bool                 bUseBufferFlags;         /* Use the flags or operate on all atoms     */
-    nbnxn_buffer_flags_t     buffer_flags;            /* Flags for buffer zeroing+reduc.  */
-    gmx_bool                 bUseTreeReduce;          /* Use tree for force reduction */
-    tMPI_Atomic             *syncStep;                /* Synchronization step for tree reduce */
-} nbnxn_atomdata_t;
+    nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
+
+    /* Returns a const reference to the parameters */
+    const Params &params() const
+    {
+        return params_;
+    }
+
+    /* Returns a non-const reference to the parameters */
+    Params &paramsDeprecated()
+    {
+        return params_;
+    }
+
+    /* Returns the current total number of atoms stored */
+    int numAtoms() const
+    {
+        return numAtoms_;
+    }
+
+    /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+    gmx::ArrayRef<const real> x() const
+    {
+        return x_;
+    }
+
+    /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+    gmx::ArrayRef<real> x()
+    {
+        return x_;
+    }
+
+    /* Resizes the coordinate buffer and sets the number of atoms */
+    void resizeCoordinateBuffer(int numAtoms);
+
+    /* Resizes the force buffers for the current number of atoms */
+    void resizeForceBuffers();
+
+    private:
+        // The LJ and charge parameters
+        Params                     params_;
+        // The total number of atoms currently stored
+        int                        numAtoms_;
+    public:
+        int                        natoms_local; /* Number of local atoms                           */
+        int                        XFormat;      /* The format of x (and q), enum                      */
+        int                        FFormat;      /* The format of f, enum                              */
+        gmx_bool                   bDynamicBox;  /* Do we need to update shift_vec every step?    */
+        gmx::HostVector<gmx::RVec> shift_vec;    /* Shift vectors, copied from t_forcerec              */
+        int                        xstride;      /* stride for a coordinate in x (usually 3 or 4)      */
+        int                        fstride;      /* stride for a coordinate in f (usually 3 or 4)      */
+    private:
+        gmx::HostVector<real>      x_;           /* x and possibly q, size natoms*xstride              */
+
+    public:
+        // Masks for handling exclusions in the SIMD kernels
+        const SimdMasks          simdMasks;
+
+        /* Output data */
+        std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
+
+        /* Reduction related data */
+        gmx_bool                 bUseBufferFlags;     /* Use the flags or operate on all atoms     */
+        nbnxn_buffer_flags_t     buffer_flags;        /* Flags for buffer zeroing+reduc.  */
+        gmx_bool                 bUseTreeReduce;      /* Use tree for force reduction */
+        tMPI_Atomic             *syncStep;            /* Synchronization step for tree reduce */
+};
  
  #endif
diff --git a/src/gromacs/mdlib/nbnxn_search.cpp b/src/gromacs/mdlib/nbnxn_search.cpp

index 5b7efff423dffb5fcb23951c633c4be8583cb7de..4a5d6da7bcaca92bd6146ab451772c3662065c35 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_search.cpp
+++ b/src/gromacs/mdlib/nbnxn_search.cpp
@@ -1772,7 +1772,9 @@ static void make_fep_list(const nbnxn_search     *nbs,
          reallocate_nblist(nlist);
      }
  
-    ngid = nbat->nenergrp;
+    const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
+    ngid = nbatParams.nenergrp;
  
      if (ngid*jGrid.na_cj > gmx::index(sizeof(gid_cj)*8))
      {
@@ -1780,8 +1782,8 @@ static void make_fep_list(const nbnxn_search     *nbs,
                    iGrid.na_c, jGrid.na_cj, (sizeof(gid_cj)*8)/jGrid.na_cj);
      }
  
-    egp_shift = nbat->neg_2log;
-    egp_mask  = (1<<nbat->neg_2log) - 1;
+    egp_shift = nbatParams.neg_2log;
+    egp_mask  = (1 << egp_shift) - 1;
  
      /* Loop over the atoms in the i sub-cell */
      bFEP_i_all = TRUE;
@@ -1811,7 +1813,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
  
              if (ngid > 1)
              {
-                gid_i = (nbat->energrp[ci] >> (egp_shift*i)) & egp_mask;
+                gid_i = (nbatParams.energrp[ci] >> (egp_shift*i)) & egp_mask;
              }
  
              for (int cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++)
@@ -1826,7 +1828,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
                      fep_cj = jGrid.fep[cjr];
                      if (ngid > 1)
                      {
-                        gid_cj = nbat->energrp[cja];
+                        gid_cj = nbatParams.energrp[cja];
                      }
                  }
                  else if (2*jGrid.na_cj == jGrid.na_c)
@@ -1836,7 +1838,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
                      fep_cj = (jGrid.fep[cjr>>1] >> ((cjr&1)*jGrid.na_cj)) & ((1<<jGrid.na_cj) - 1);
                      if (ngid > 1)
                      {
-                        gid_cj = nbat->energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
+                        gid_cj = nbatParams.energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
                      }
                  }
                  else
@@ -1846,7 +1848,7 @@ static void make_fep_list(const nbnxn_search     *nbs,
                      fep_cj = jGrid.fep[cjr*2] + (jGrid.fep[cjr*2+1] << jGrid.na_c);
                      if (ngid > 1)
                      {
-                        gid_cj = nbat->energrp[cja*2] + (nbat->energrp[cja*2+1] << (jGrid.na_c*egp_shift));
+                        gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.na_c*egp_shift));
                      }
                  }
  
@@ -2001,9 +2003,9 @@ static void make_fep_list(const nbnxn_search     *nbs,
  
                  bFEP_i = ((iGrid.fep[c_abs - iGrid.cell0*c_gpuNumClusterPerCell] & (1 << i)) != 0u);
  
-                xi = nbat->x[ind_i*nbat->xstride+XX] + shx;
-                yi = nbat->x[ind_i*nbat->xstride+YY] + shy;
-                zi = nbat->x[ind_i*nbat->xstride+ZZ] + shz;
+                xi = nbat->x()[ind_i*nbat->xstride+XX] + shx;
+                yi = nbat->x()[ind_i*nbat->xstride+YY] + shy;
+                zi = nbat->x()[ind_i*nbat->xstride+ZZ] + shz;
  
                  if ((nlist->nrj + cj4_ind_end - cj4_ind_start)*c_nbnxnGpuJgroupSize*nbl->na_cj > nlist->maxnrj)
                  {
@@ -2052,9 +2054,9 @@ static void make_fep_list(const nbnxn_search     *nbs,
                                      excl_pair = a_mod_wj(j)*nbl->na_ci + i;
                                      excl_bit  = (1U << (gcj*c_gpuNumClusterPerCell + c));
  
-                                    dx = nbat->x[ind_j*nbat->xstride+XX] - xi;
-                                    dy = nbat->x[ind_j*nbat->xstride+YY] - yi;
-                                    dz = nbat->x[ind_j*nbat->xstride+ZZ] - zi;
+                                    dx = nbat->x()[ind_j*nbat->xstride+XX] - xi;
+                                    dy = nbat->x()[ind_j*nbat->xstride+YY] - yi;
+                                    dz = nbat->x()[ind_j*nbat->xstride+ZZ] - zi;
  
                                      /* The unpruned GPU list has more than 2/3
                                       * of the atom pairs beyond rlist. Using
@@ -3323,7 +3325,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
              makeClusterListSimple(jGrid,
                                    nbl, ci, firstCell, lastCell,
                                    excludeSubDiagonal,
-                                  nbat->x,
+                                  nbat->x().data(),
                                    rlist2, rbb2,
                                    numDistanceChecks);
              break;
@@ -3332,7 +3334,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
              makeClusterListSimd4xn(jGrid,
                                     nbl, ci, firstCell, lastCell,
                                     excludeSubDiagonal,
-                                   nbat->x,
+                                   nbat->x().data(),
                                     rlist2, rbb2,
                                     numDistanceChecks);
              break;
@@ -3342,7 +3344,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
              makeClusterListSimd2xnn(jGrid,
                                      nbl, ci, firstCell, lastCell,
                                      excludeSubDiagonal,
-                                    nbat->x,
+                                    nbat->x().data(),
                                      rlist2, rbb2,
                                      numDistanceChecks);
              break;
@@ -3368,7 +3370,7 @@ static void makeClusterListWrapper(NbnxnPairlistGpu              *nbl,
          make_cluster_list_supersub(iGrid, jGrid,
                                     nbl, ci, cj,
                                     excludeSubDiagonal,
-                                   nbat->xstride, nbat->x,
+                                   nbat->xstride, nbat->x().data(),
                                     rlist2, rbb2,
                                     numDistanceChecks);
      }
@@ -3739,7 +3741,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
                                   nbl->work);
  
                      icell_set_x(cell0_i+ci, shx, shy, shz,
-                                nbat->xstride, nbat->x,
+                                nbat->xstride, nbat->x().data(),
                                  nb_kernel_type,
                                  nbl->work);
  
@@ -4265,11 +4267,11 @@ void nbnxn_make_pairlist(nbnxn_search         *nbs,
          fprintf(debug, "ns making %d nblists\n", nnbl);
      }
  
-    nbat->bUseBufferFlags = (nbat->nout > 1);
+    nbat->bUseBufferFlags = (nbat->out.size() > 1);
      /* We should re-init the flags before making the first list */
      if (nbat->bUseBufferFlags && LOCAL_I(iloc))
      {
-        init_buffer_flags(&nbat->buffer_flags, nbat->natoms);
+        init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms());
      }
  
      int nzi;
@@ -4360,7 +4362,7 @@ void nbnxn_make_pairlist(nbnxn_search         *nbs,
                       */
                      if (nbat->bUseBufferFlags && ((zi == 0 && zj == 0)))
                      {
-                        init_buffer_flags(&nbs->work[th].buffer_flags, nbat->natoms);
+                        init_buffer_flags(&nbs->work[th].buffer_flags, nbat->numAtoms());
                      }
  
                      if (CombineNBLists && th > 0)
author	Berk Hess <hess@kth.se>
	Fri, 21 Dec 2018 20:49:29 +0000 (21:49 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 21 Jan 2019 10:14:07 +0000 (11:14 +0100)
src/gromacs/gpu_utils/cudautils.cu		patch \| blob \| history
src/gromacs/gpu_utils/cudautils.cuh		patch \| blob \| history
src/gromacs/gpu_utils/oclutils.cpp		patch \| blob \| history
src/gromacs/gpu_utils/oclutils.h		patch \| blob \| history
src/gromacs/mdlib/forcerec.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_atomdata.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_atomdata.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_gpu.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_grid.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_cpu.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_prune.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_prune.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_outer.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_prune.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_pairlist.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_search.cpp		patch \| blob \| history