Refactor PME atom handling

author Berk Hess <hess@kth.se>

Wed, 15 May 2019 11:04:44 +0000 (13:04 +0200)

committer Mark Abraham <mark.j.abraham@gmail.com>

Wed, 19 Jun 2019 19:52:59 +0000 (21:52 +0200)
author Berk Hess <hess@kth.se>
Wed, 15 May 2019 11:04:44 +0000 (13:04 +0200)
committer Mark Abraham <mark.j.abraham@gmail.com>
Wed, 19 Jun 2019 19:52:59 +0000 (21:52 +0200)
diff --git a/src/external/thread_mpi/include/thread_mpi/tmpi.h b/src/external/thread_mpi/include/thread_mpi/tmpi.h

index 87a31c3fc69508c4e89ac2476394c51417aeeba8..b4371da975df69748e7a2de4b79c66585ab81b56 100644 (file)
--- a/src/external/thread_mpi/include/thread_mpi/tmpi.h
+++ b/src/external/thread_mpi/include/thread_mpi/tmpi.h
@@ -2,7 +2,7 @@
     This source code file is part of thread_mpi.
     Written by Sander Pronk, Erik Lindahl, and possibly others.
  
-   Copyright (c) 2009,2016,2018, Sander Pronk, Erik Lindahl.
+   Copyright (c) 2009,2016,2018,2019, Sander Pronk, Erik Lindahl.
     All rights reserved.
  
     Redistribution and use in source and binary forms, with or without
@@ -252,10 +252,10 @@ extern tMPI_Comm TMPI_COMM_WORLD;
  
  /** A pre-defined NULL communicator to compare against, to check comm
             validity */
-#define TMPI_COMM_NULL NULL
+#define TMPI_COMM_NULL nullptr
  /** A pre-defined NULL group to compare against, to check group
             validity */
-#define TMPI_GROUP_NULL NULL
+#define TMPI_GROUP_NULL nullptr
  
  /** the empty group */
  extern tMPI_Group TMPI_GROUP_EMPTY;
diff --git a/src/gromacs/domdec/mdsetup.cpp b/src/gromacs/domdec/mdsetup.cpp

index b1457a2240455bd12e0d62e2d5e736ceec65e021..84ff045e94b36117ef1d3432c37b309189485615 100644 (file)
--- a/src/gromacs/domdec/mdsetup.cpp
+++ b/src/gromacs/domdec/mdsetup.cpp
@@ -138,12 +138,14 @@ void mdAlgorithmsSetupAtomData(const t_commrec  *cr,
                             fr->gpuBonded != nullptr,
                             top->idef);
  
-    gmx_pme_reinit_atoms(fr->pmedata, numHomeAtoms, mdatoms->chargeA);
-    /* This handles the PP+PME rank case where fr->pmedata is valid.
-     * For PME-only ranks, gmx_pmeonly() has its own call to gmx_pme_reinit_atoms().
-     * TODO: this only handles the GPU logic so far, should handle CPU as well.
-     * TODO: this also does not account for TPI.
-     */
+    if (EEL_PME(fr->ic->eeltype) && (cr->duty & DUTY_PME))
+    {
+        /* This handles the PP+PME rank case where fr->pmedata is valid.
+         * For PME-only ranks, gmx_pmeonly() has its own call to gmx_pme_reinit_atoms().
+         */
+        const int numPmeAtoms = numHomeAtoms - fr->n_tpi;
+        gmx_pme_reinit_atoms(fr->pmedata, numPmeAtoms, mdatoms->chargeA);
+    }
  
      if (constr)
      {
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp

index efe1738a78180204d12ee99d2dba3965062516a5..83ad4561728acce16534e75d5f3214cd5baa82d5 100644 (file)
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -272,7 +272,7 @@ gmx::PinningPolicy pme_get_pinning_policy()
  const int gmxCacheLineSize = 64;
  
  //! Set up coordinate communication
-static void setup_coordinate_communication(pme_atomcomm_t *atc)
+static void setup_coordinate_communication(PmeAtomComm *atc)
  {
      int nslab, n, i;
      int fw, bw;
@@ -286,14 +286,14 @@ static void setup_coordinate_communication(pme_atomcomm_t *atc)
          bw = (atc->nodeid - i + nslab) % nslab;
          if (n < nslab - 1)
          {
-            atc->node_dest[n] = fw;
-            atc->node_src[n]  = bw;
+            atc->slabCommSetup[n].node_dest = fw;
+            atc->slabCommSetup[n].node_src  = bw;
              n++;
          }
          if (n < nslab - 1)
          {
-            atc->node_dest[n] = bw;
-            atc->node_src[n]  = fw;
+            atc->slabCommSetup[n].node_dest = bw;
+            atc->slabCommSetup[n].node_src  = fw;
              n++;
          }
      }
@@ -323,112 +323,64 @@ static double estimate_pme_load_imbalance(struct gmx_pme_t *pme)
      return (n1 + n2 + 3*n3)/static_cast<double>(6*pme->nkx*pme->nky*pme->nkz);
  }
  
-/*! \brief Initialize atom communication data structure */
-static void init_atomcomm(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
-                          int dimind, gmx_bool bSpread)
+#ifndef DOXYGEN
+
+PmeAtomComm::PmeAtomComm(MPI_Comm   PmeMpiCommunicator,
+                         const int  numThreads,
+                         const int  pmeOrder,
+                         const int  dimIndex,
+                         const bool doSpread) :
+    dimind(dimIndex),
+    bSpread(doSpread),
+    pme_order(pmeOrder),
+    nthread(numThreads),
+    spline(nthread)
  {
-    int thread;
-
-    atc->dimind    = dimind;
-    atc->nslab     = 1;
-    atc->nodeid    = 0;
-    atc->pd_nalloc = 0;
-#if GMX_MPI
-    if (pme->nnodes > 1)
+    if (PmeMpiCommunicator != MPI_COMM_NULL)
      {
-        atc->mpi_comm = pme->mpi_comm_d[dimind];
-        MPI_Comm_size(atc->mpi_comm, &atc->nslab);
-        MPI_Comm_rank(atc->mpi_comm, &atc->nodeid);
+        mpi_comm = PmeMpiCommunicator;
+#if GMX_MPI
+        MPI_Comm_size(mpi_comm, &nslab);
+        MPI_Comm_rank(mpi_comm, &nodeid);
+#endif
      }
      if (debug)
      {
-        fprintf(debug, "For PME atom communication in dimind %d: nslab %d rank %d\n", atc->dimind, atc->nslab, atc->nodeid);
+        fprintf(debug, "For PME atom communication in dimind %d: nslab %d rank %d\n", dimind, nslab, nodeid);
      }
-#endif
  
-    atc->bSpread   = bSpread;
-    atc->pme_order = pme->pme_order;
-
-    if (atc->nslab > 1)
+    if (nslab > 1)
      {
-        snew(atc->node_dest, atc->nslab);
-        snew(atc->node_src, atc->nslab);
-        setup_coordinate_communication(atc);
-
-        snew(atc->count_thread, pme->nthread);
-        for (thread = 0; thread < pme->nthread; thread++)
-        {
-            snew(atc->count_thread[thread], atc->nslab);
-        }
-        atc->count = atc->count_thread[0];
-        snew(atc->rcount, atc->nslab);
-        snew(atc->buf_index, atc->nslab);
-    }
+        slabCommSetup.resize(nslab);
+        setup_coordinate_communication(this);
  
-    atc->nthread = pme->nthread;
-    if (atc->nthread > 1)
-    {
-        snew(atc->thread_plist, atc->nthread);
-    }
-    snew(atc->spline, atc->nthread);
-    for (thread = 0; thread < atc->nthread; thread++)
-    {
-        if (atc->nthread > 1)
+        count_thread.resize(nthread);
+        for (auto &countThread : count_thread)
          {
-            snew(atc->thread_plist[thread].n, atc->nthread+2*gmxCacheLineSize);
-            atc->thread_plist[thread].n += gmxCacheLineSize;
+            countThread.resize(nslab);
          }
      }
-}
  
-/*! \brief Destroy an atom communication data structure and its child structs */
-static void destroy_atomcomm(pme_atomcomm_t *atc)
-{
-    sfree(atc->pd);
-    if (atc->nslab > 1)
+    if (nthread > 1)
      {
-        sfree(atc->node_dest);
-        sfree(atc->node_src);
-        for (int i = 0; i < atc->nthread; i++)
-        {
-            sfree(atc->count_thread[i]);
-        }
-        sfree(atc->count_thread);
-        sfree(atc->rcount);
-        sfree(atc->buf_index);
-
-        sfree(atc->x);
-        sfree(atc->coefficient);
-        sfree(atc->f);
-    }
-    sfree(atc->idx);
-    sfree(atc->fractx);
+        threadMap.resize(nthread);
  
-    sfree(atc->thread_idx);
-    for (int i = 0; i < atc->nthread; i++)
-    {
-        if (atc->nthread > 1)
-        {
-            int *n_ptr = atc->thread_plist[i].n - gmxCacheLineSize;
-            sfree(n_ptr);
-            sfree(atc->thread_plist[i].i);
-        }
-        sfree(atc->spline[i].ind);
-        for (int d = 0; d < ZZ; d++)
+#pragma omp parallel for num_threads(nthread) schedule(static)
+        for (int thread = 0; thread < nthread; thread++)
          {
-            sfree(atc->spline[i].theta[d]);
-            sfree(atc->spline[i].dtheta[d]);
+            try
+            {
+                /* Allocate buffer with padding to avoid cache polution */
+                threadMap[thread].nBuffer.resize(nthread + 2*gmxCacheLineSize);
+                threadMap[thread].n = threadMap[thread].nBuffer.data() + gmxCacheLineSize;
+            }
+            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
-        sfree_aligned(atc->spline[i].ptr_dtheta_z);
-        sfree_aligned(atc->spline[i].ptr_theta_z);
-    }
-    if (atc->nthread > 1)
-    {
-        sfree(atc->thread_plist);
      }
-    sfree(atc->spline);
  }
  
+#endif // !DOXYGEN
+
  /*! \brief Initialize data structure for communication */
  static void
  init_overlap_comm(pme_overlap_t *  ol,
@@ -631,7 +583,6 @@ static int div_round_up(int enumerator, int denominator)
  gmx_pme_t *gmx_pme_init(const t_commrec         *cr,
                          const NumPmeDomains     &numPmeDomains,
                          const t_inputrec        *ir,
-                        int                      homenr,
                          gmx_bool                 bFreeEnergy_q,
                          gmx_bool                 bFreeEnergy_lj,
                          gmx_bool                 bReproducible,
@@ -692,9 +643,6 @@ gmx_pme_t *gmx_pme_init(const t_commrec         *cr,
          pme->ndecompdim   = 0;
          pme->nodeid_major = 0;
          pme->nodeid_minor = 0;
-#if GMX_MPI
-        pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
-#endif
      }
      else
      {
@@ -959,22 +907,21 @@ gmx_pme_t *gmx_pme_init(const t_commrec         *cr,
      }
  
      /* Use atc[0] for spreading */
-    init_atomcomm(pme.get(), &pme->atc[0], numPmeDomains.x > 1 ? 0 : 1, TRUE);
+    const int firstDimIndex   = (numPmeDomains.x > 1 ? 0 : 1);
+    MPI_Comm  mpiCommFirstDim = (pme->nnodes > 1 ? pme->mpi_comm_d[firstDimIndex] : MPI_COMM_NULL);
+    bool      doSpread        = true;
+    pme->atc.emplace_back(mpiCommFirstDim, pme->nthread,
+                          pme->pme_order,
+                          firstDimIndex, doSpread);
      if (pme->ndecompdim >= 2)
      {
-        init_atomcomm(pme.get(), &pme->atc[1], 1, FALSE);
+        const int secondDimIndex = 1;
+        doSpread                 = false;
+        pme->atc.emplace_back(pme->mpi_comm_d[1], pme->nthread,
+                              pme->pme_order,
+                              secondDimIndex, doSpread);
      }
  
-    if (pme->nnodes == 1)
-    {
-        pme->atc[0].n = homenr;
-        pme_realloc_atomcomm_things(&pme->atc[0]);
-    }
-
-    pme->lb_buf1       = nullptr;
-    pme->lb_buf2       = nullptr;
-    pme->lb_buf_nalloc = 0;
-
      if (pme_gpu_active(pme.get()))
      {
          if (!pme->gpu)
@@ -1005,8 +952,6 @@ void gmx_pme_reinit(struct gmx_pme_t **pmedata,
                      real               ewaldcoeff_q,
                      real               ewaldcoeff_lj)
  {
-    int        homenr;
-
      // Create a copy of t_inputrec fields that are used in gmx_pme_init().
      // TODO: This would be better as just copying a sub-structure that contains
      // all the PME parameters and nothing else.
@@ -1022,15 +967,6 @@ void gmx_pme_reinit(struct gmx_pme_t **pmedata,
      irc.nky                    = grid_size[YY];
      irc.nkz                    = grid_size[ZZ];
  
-    if (pme_src->nnodes == 1)
-    {
-        homenr = pme_src->atc[0].n;
-    }
-    else
-    {
-        homenr = -1;
-    }
-
      try
      {
          const gmx::MDLogger dummyLogger;
@@ -1040,8 +976,15 @@ void gmx_pme_reinit(struct gmx_pme_t **pmedata,
          GMX_ASSERT(pmedata, "Invalid PME pointer");
          NumPmeDomains numPmeDomains = { pme_src->nnodes_major, pme_src->nnodes_minor };
          *pmedata = gmx_pme_init(cr, numPmeDomains,
-                                &irc, homenr, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE, ewaldcoeff_q, ewaldcoeff_lj,
+                                &irc, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE, ewaldcoeff_q, ewaldcoeff_lj,
                                  pme_src->nthread, pme_src->runMode, pme_src->gpu, nullptr, nullptr, dummyLogger);
+        /* When running PME on the CPU not using domain decomposition,
+         * the atom data is allocated once only in gmx_pme_(re)init().
+         */
+        if (!pme_src->gpu && pme_src->nnodes == 1)
+        {
+            gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), nullptr);
+        }
          //TODO this is mostly passing around current values
      }
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -1051,9 +994,11 @@ void gmx_pme_reinit(struct gmx_pme_t **pmedata,
      /* We would like to reuse the fft grids, but that's harder */
  }
  
-void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V)
+void gmx_pme_calc_energy(gmx_pme_t                      *pme,
+                         gmx::ArrayRef<const gmx::RVec>  x,
+                         gmx::ArrayRef<const real>       q,
+                         real                           *V)
  {
-    pme_atomcomm_t *atc;
      pmegrids_t     *grid;
  
      if (pme->nnodes > 1)
@@ -1065,17 +1010,13 @@ void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V
          gmx_incons("gmx_pme_calc_energy with free energy");
      }
  
-    atc            = &pme->atc_energy;
-    atc->nthread   = 1;
-    if (atc->spline == nullptr)
+    if (!pme->atc_energy)
      {
-        snew(atc->spline, atc->nthread);
+        pme->atc_energy = std::make_unique<PmeAtomComm>(MPI_COMM_NULL, 1, pme->pme_order,
+                                                        0, true);
      }
-    atc->nslab     = 1;
-    atc->bSpread   = TRUE;
-    atc->pme_order = pme->pme_order;
-    atc->n         = n;
-    pme_realloc_atomcomm_things(atc);
+    PmeAtomComm *atc = pme->atc_energy.get();
+    atc->setNumAtoms(x.ssize());
      atc->x           = x;
      atc->coefficient = q;
  
@@ -1090,34 +1031,33 @@ void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V
  
  /*! \brief Calculate initial Lorentz-Berthelot coefficients for LJ-PME */
  static void
-calc_initial_lb_coeffs(struct gmx_pme_t *pme, const real *local_c6, const real *local_sigma)
+calc_initial_lb_coeffs(gmx::ArrayRef<real>  coefficient,
+                       const real          *local_c6,
+                       const real          *local_sigma)
  {
-    int  i;
-    for (i = 0; i < pme->atc[0].n; ++i)
+    for (gmx::index i = 0; i < coefficient.ssize(); ++i)
      {
-        real sigma4;
-        sigma4                     = local_sigma[i];
-        sigma4                     = sigma4*sigma4;
-        sigma4                     = sigma4*sigma4;
-        pme->atc[0].coefficient[i] = local_c6[i] / sigma4;
+        real sigma4    = local_sigma[i];
+        sigma4         = sigma4*sigma4;
+        sigma4         = sigma4*sigma4;
+        coefficient[i] = local_c6[i] / sigma4;
      }
  }
  
  /*! \brief Calculate next Lorentz-Berthelot coefficients for LJ-PME */
  static void
-calc_next_lb_coeffs(struct gmx_pme_t *pme, const real *local_sigma)
+calc_next_lb_coeffs(gmx::ArrayRef<real>  coefficient,
+                    const real          *local_sigma)
  {
-    int  i;
-
-    for (i = 0; i < pme->atc[0].n; ++i)
+    for (gmx::index i = 0; i < coefficient.ssize(); ++i)
      {
-        pme->atc[0].coefficient[i] *= local_sigma[i];
+        coefficient[i] *= local_sigma[i];
      }
  }
  
  int gmx_pme_do(struct gmx_pme_t *pme,
-               int start,       int homenr,
-               rvec x[],        rvec f[],
+               gmx::ArrayRef<const gmx::RVec> coordinates,
+               gmx::ArrayRef<gmx::RVec>       forces,
                 real chargeA[],  real chargeB[],
                 real c6A[],      real c6B[],
                 real sigmaA[],   real sigmaB[],
@@ -1132,12 +1072,10 @@ int gmx_pme_do(struct gmx_pme_t *pme,
  {
      GMX_ASSERT(pme->runMode == PmeRunMode::CPU, "gmx_pme_do should not be called on the GPU PME run.");
  
-    int                  d, i, j, npme, grid_index, max_grid_index;
-    int                  n_d;
-    pme_atomcomm_t      *atc        = nullptr;
-    pmegrids_t          *pmegrid    = nullptr;
-    real                *grid       = nullptr;
-    rvec                *f_d;
+    int                  d, npme, grid_index, max_grid_index;
+    PmeAtomComm         &atc         = pme->atc[0];
+    pmegrids_t          *pmegrid     = nullptr;
+    real                *grid        = nullptr;
      real                *coefficient = nullptr;
      PmeOutput            output[2]; // The second is used for the B state with FEP
      real                 scale, lambda;
@@ -1168,30 +1106,20 @@ int gmx_pme_do(struct gmx_pme_t *pme,
  
      if (pme->nnodes > 1)
      {
-        atc      = &pme->atc[0];
-        atc->npd = homenr;
-        if (atc->npd > atc->pd_nalloc)
+        atc.pd.resize(coordinates.ssize());
+        for (int d = pme->ndecompdim-1; d >= 0; d--)
          {
-            atc->pd_nalloc = over_alloc_dd(atc->npd);
-            srenew(atc->pd, atc->pd_nalloc);
-        }
-        for (d = pme->ndecompdim-1; d >= 0; d--)
-        {
-            atc           = &pme->atc[d];
-            atc->maxshift = (atc->dimind == 0 ? maxshift_x : maxshift_y);
+            PmeAtomComm &atc    = pme->atc[d];
+            atc.maxshift        = (atc.dimind == 0 ? maxshift_x : maxshift_y);
          }
      }
      else
      {
-        atc = &pme->atc[0];
-        /* This could be necessary for TPI */
-        pme->atc[0].n = homenr;
-        if (DOMAINDECOMP(cr))
-        {
-            pme_realloc_atomcomm_things(atc);
-        }
-        atc->x = x;
-        atc->f = f;
+        GMX_ASSERT(coordinates.ssize() == atc.numAtoms(), "We expect atc.numAtoms() coordinates");
+        GMX_ASSERT(forces.ssize() >= atc.numAtoms(), "We need a force buffer with at least atc.numAtoms() elements");
+
+        atc.x = coordinates;
+        atc.f = forces;
      }
  
      matrix scaledBox;
@@ -1242,10 +1170,10 @@ int gmx_pme_do(struct gmx_pme_t *pme,
          pfft_setup = pme->pfft_setup[grid_index];
          switch (grid_index)
          {
-            case 0: coefficient = chargeA + start; break;
-            case 1: coefficient = chargeB + start; break;
-            case 2: coefficient = c6A + start; break;
-            case 3: coefficient = c6B + start; break;
+            case 0: coefficient = chargeA; break;
+            case 1: coefficient = chargeB; break;
+            case 2: coefficient = c6A; break;
+            case 3: coefficient = c6B; break;
          }
  
          grid = pmegrid->grid.grid;
@@ -1263,12 +1191,12 @@ int gmx_pme_do(struct gmx_pme_t *pme,
  
          if (pme->nnodes == 1)
          {
-            atc->coefficient = coefficient;
+            atc.coefficient = gmx::arrayRefFromArray(coefficient, coordinates.size());
          }
          else
          {
              wallcycle_start(wcycle, ewcPME_REDISTXF);
-            do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, coefficient);
+            do_redist_pos_coeffs(pme, cr, bFirst, coordinates, coefficient);
  
              wallcycle_stop(wcycle, ewcPME_REDISTXF);
          }
@@ -1276,7 +1204,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
          if (debug)
          {
              fprintf(debug, "Rank= %6d, pme local particles=%6d\n",
-                    cr->nodeid, atc->n);
+                    cr->nodeid, atc.numAtoms());
          }
  
          if (flags & GMX_PME_SPREAD)
@@ -1284,14 +1212,14 @@ int gmx_pme_do(struct gmx_pme_t *pme,
              wallcycle_start(wcycle, ewcPME_SPREAD);
  
              /* Spread the coefficients on a grid */
-            spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+            spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
  
              if (bFirst)
              {
-                inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+                inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc.numAtoms());
              }
              inc_nrnb(nrnb, eNR_SPREADBSP,
-                     pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+                     pme->pme_order*pme->pme_order*pme->pme_order*atc.numAtoms());
  
              if (!pme->bUseThreads)
              {
@@ -1432,8 +1360,8 @@ int gmx_pme_do(struct gmx_pme_t *pme,
              {
                  try
                  {
-                    gather_f_bsplines(pme, grid, bClearF, atc,
-                                      &atc->spline[thread],
+                    gather_f_bsplines(pme, grid, bClearF, &atc,
+                                      &atc.spline[thread],
                                        pme->bFEP ? (grid_index % 2 == 0 ? 1.0-lambda : lambda) : 1.0);
                  }
                  GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -1441,7 +1369,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
  
  
              inc_nrnb(nrnb, eNR_GATHERFBSP,
-                     pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+                     pme->pme_order*pme->pme_order*pme->pme_order*atc.numAtoms());
              /* Note: this wallcycle region is opened above inside an OpenMP
                 region, so take care if refactoring code here. */
              wallcycle_stop(wcycle, ewcPME_GATHER);
@@ -1472,15 +1400,12 @@ int gmx_pme_do(struct gmx_pme_t *pme,
          /* Loop over A- and B-state if we are doing FEP */
          for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
          {
-            real *local_c6 = nullptr, *local_sigma = nullptr, *RedistC6 = nullptr, *RedistSigma = nullptr;
+            real               *local_c6 = nullptr, *local_sigma = nullptr, *RedistC6 = nullptr, *RedistSigma = nullptr;
+            gmx::ArrayRef<real> coefficientBuffer;
              if (pme->nnodes == 1)
              {
-                if (pme->lb_buf1 == nullptr)
-                {
-                    pme->lb_buf_nalloc = pme->atc[0].n;
-                    snew(pme->lb_buf1, pme->lb_buf_nalloc);
-                }
-                pme->atc[0].coefficient = pme->lb_buf1;
+                pme->lb_buf1.resize(atc.numAtoms());
+                coefficientBuffer = pme->lb_buf1;
                  switch (fep_state)
                  {
                      case 0:
@@ -1497,7 +1422,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
              }
              else
              {
-                atc = &pme->atc[0];
+                coefficientBuffer = atc.coefficientBuffer;
                  switch (fep_state)
                  {
                      case 0:
@@ -1513,29 +1438,26 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                  }
                  wallcycle_start(wcycle, ewcPME_REDISTXF);
  
-                do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, RedistC6);
-                if (pme->lb_buf_nalloc < atc->n)
-                {
-                    pme->lb_buf_nalloc = atc->nalloc;
-                    srenew(pme->lb_buf1, pme->lb_buf_nalloc);
-                    srenew(pme->lb_buf2, pme->lb_buf_nalloc);
-                }
-                local_c6 = pme->lb_buf1;
-                for (i = 0; i < atc->n; ++i)
+                do_redist_pos_coeffs(pme, cr, bFirst, coordinates, RedistC6);
+                pme->lb_buf1.resize(atc.numAtoms());
+                pme->lb_buf2.resize(atc.numAtoms());
+                local_c6 = pme->lb_buf1.data();
+                for (int i = 0; i < atc.numAtoms(); ++i)
                  {
-                    local_c6[i] = atc->coefficient[i];
+                    local_c6[i] = atc.coefficient[i];
                  }
  
-                do_redist_pos_coeffs(pme, cr, start, homenr, FALSE, x, RedistSigma);
-                local_sigma = pme->lb_buf2;
-                for (i = 0; i < atc->n; ++i)
+                do_redist_pos_coeffs(pme, cr, FALSE, coordinates, RedistSigma);
+                local_sigma = pme->lb_buf2.data();
+                for (int i = 0; i < atc.numAtoms(); ++i)
                  {
-                    local_sigma[i] = atc->coefficient[i];
+                    local_sigma[i] = atc.coefficient[i];
                  }
  
                  wallcycle_stop(wcycle, ewcPME_REDISTXF);
              }
-            calc_initial_lb_coeffs(pme, local_c6, local_sigma);
+            atc.coefficient = coefficientBuffer;
+            calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
  
              /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
              for (grid_index = 2; grid_index < 9; ++grid_index)
@@ -1544,22 +1466,22 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                  pmegrid    = &pme->pmegrid[grid_index];
                  fftgrid    = pme->fftgrid[grid_index];
                  pfft_setup = pme->pfft_setup[grid_index];
-                calc_next_lb_coeffs(pme, local_sigma);
+                calc_next_lb_coeffs(coefficientBuffer, local_sigma);
                  grid = pmegrid->grid.grid;
  
                  if (flags & GMX_PME_SPREAD)
                  {
                      wallcycle_start(wcycle, ewcPME_SPREAD);
                      /* Spread the c6 on a grid */
-                    spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+                    spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
  
                      if (bFirst)
                      {
-                        inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+                        inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc.numAtoms());
                      }
  
                      inc_nrnb(nrnb, eNR_SPREADBSP,
-                             pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+                             pme->pme_order*pme->pme_order*pme->pme_order*atc.numAtoms());
                      if (pme->nthread == 1)
                      {
                          wrap_periodic_pmegrid(pme, grid);
@@ -1640,7 +1562,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
              if (bBackFFT)
              {
                  bFirst = !pme->doCoulomb;
-                calc_initial_lb_coeffs(pme, local_c6, local_sigma);
+                calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
                  for (grid_index = 8; grid_index >= 2; --grid_index)
                  {
                      /* Unpack structure */
@@ -1648,7 +1570,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                      fftgrid    = pme->fftgrid[grid_index];
                      pfft_setup = pme->pfft_setup[grid_index];
                      grid       = pmegrid->grid.grid;
-                    calc_next_lb_coeffs(pme, local_sigma);
+                    calc_next_lb_coeffs(coefficientBuffer, local_sigma);
  #pragma omp parallel num_threads(pme->nthread) private(thread)
                      {
                          try
@@ -1712,7 +1634,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
  
  
                          inc_nrnb(nrnb, eNR_GATHERFBSP,
-                                 pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+                                 pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].numAtoms());
                      }
                      wallcycle_stop(wcycle, ewcPME_GATHER);
  
@@ -1727,20 +1649,20 @@ int gmx_pme_do(struct gmx_pme_t *pme,
          wallcycle_start(wcycle, ewcPME_REDISTXF);
          for (d = 0; d < pme->ndecompdim; d++)
          {
-            atc = &pme->atc[d];
+            gmx::ArrayRef<gmx::RVec> forcesRef;
              if (d == pme->ndecompdim - 1)
              {
-                n_d = homenr;
-                f_d = f + start;
+                const size_t numAtoms = coordinates.size();
+                GMX_ASSERT(forces.size() >= numAtoms, "Need at least numAtoms forces");
+                forcesRef = forces.subArray(0, numAtoms);
              }
              else
              {
-                n_d = pme->atc[d+1].n;
-                f_d = pme->atc[d+1].f;
+                forcesRef = pme->atc[d + 1].f;
              }
              if (DOMAINDECOMP(cr))
              {
-                dd_pmeredist_f(pme, atc, n_d, f_d,
+                dd_pmeredist_f(pme, &pme->atc[d], forcesRef,
                                 d == pme->ndecompdim-1 && pme->bPPnode);
              }
          }
@@ -1761,9 +1683,9 @@ int gmx_pme_do(struct gmx_pme_t *pme,
              {
                  *energy_q       = (1.0-lambda_q)*output[0].coulombEnergy_ + lambda_q*output[1].coulombEnergy_;
                  *dvdlambda_q   += output[1].coulombEnergy_ - output[0].coulombEnergy_;
-                for (i = 0; i < DIM; i++)
+                for (int i = 0; i < DIM; i++)
                  {
-                    for (j = 0; j < DIM; j++)
+                    for (int j = 0; j < DIM; j++)
                      {
                          vir_q[i][j] += (1.0-lambda_q)*output[0].coulombVirial_[i][j] +
                              lambda_q*output[1].coulombVirial_[i][j];
@@ -1791,9 +1713,9 @@ int gmx_pme_do(struct gmx_pme_t *pme,
              {
                  *energy_lj     = (1.0-lambda_lj)*output[0].lennardJonesEnergy_ + lambda_lj*output[1].lennardJonesEnergy_;
                  *dvdlambda_lj += output[1].lennardJonesEnergy_ - output[0].lennardJonesEnergy_;
-                for (i = 0; i < DIM; i++)
+                for (int i = 0; i < DIM; i++)
                  {
-                    for (j = 0; j < DIM; j++)
+                    for (int j = 0; j < DIM; j++)
                      {
                          vir_lj[i][j] += (1.0-lambda_lj)*output[0].lennardJonesVirial_[i][j] + lambda_lj*output[1].lennardJonesVirial_[i][j];
                      }
@@ -1843,19 +1765,11 @@ void gmx_pme_destroy(gmx_pme_t *pme)
      sfree(pme->cfftgrid);
      sfree(pme->pfft_setup);
  
-    for (int i = 0; i < std::max(1, pme->ndecompdim); i++) //pme->atc[0] is always allocated
-    {
-        destroy_atomcomm(&pme->atc[i]);
-    }
-
      for (int i = 0; i < DIM; i++)
      {
          sfree(pme->bsp_mod[i]);
      }
  
-    sfree(pme->lb_buf1);
-    sfree(pme->lb_buf2);
-
      sfree(pme->bufv);
      sfree(pme->bufr);
  
@@ -1877,11 +1791,17 @@ void gmx_pme_destroy(gmx_pme_t *pme)
      delete pme;
  }
  
-void gmx_pme_reinit_atoms(const gmx_pme_t *pme, const int nAtoms, const real *charges)
+void gmx_pme_reinit_atoms(gmx_pme_t  *pme,
+                          const int   numAtoms,
+                          const real *charges)
  {
      if (pme_gpu_active(pme))
      {
-        pme_gpu_reinit_atoms(pme->gpu, nAtoms, charges);
+        pme_gpu_reinit_atoms(pme->gpu, numAtoms, charges);
+    }
+    else
+    {
+        pme->atc[0].setNumAtoms(numAtoms);
+        // TODO: set the charges here as well
      }
-    // TODO: handle the CPU case here; handle the whole t_mdatoms
  }
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index 4c1fae8e1d07a22111807fcf129377d126197360..1d19f401c92552a3257bc4feef70cbe817bd5142 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -138,7 +138,7 @@ bool gmx_pme_check_restrictions(int pme_order,
   */
  gmx_pme_t *gmx_pme_init(const t_commrec *cr,
                          const NumPmeDomains &numPmeDomains,
-                        const t_inputrec *ir, int homenr,
+                        const t_inputrec *ir,
                          gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
                          gmx_bool bReproducible,
                          real ewaldcoeff_q, real ewaldcoeff_lj,
@@ -169,14 +169,18 @@ void gmx_pme_destroy(gmx_pme_t *pme);
  
  /*! \brief Do a PME calculation on a CPU for the long range electrostatics and/or LJ.
   *
+ * Computes the PME forces and the energy and viral, when requested,
+ * for all atoms in \p coordinates. Forces, when requested, are added
+ * to the buffer \p forces, which is allowed to contain more elements
+ * than the number of elements in \p coordinates.
   * The meaning of \p flags is defined above, and determines which
   * parts of the calculation are performed.
   *
   * \return 0 indicates all well, non zero is an error code.
   */
  int gmx_pme_do(struct gmx_pme_t *pme,
-               int start,       int homenr,
-               rvec x[],        rvec f[],
+               gmx::ArrayRef<const gmx::RVec> coordinates,
+               gmx::ArrayRef<gmx::RVec>       forces,
                 real chargeA[],  real chargeB[],
                 real c6A[],      real c6B[],
                 real sigmaA[],   real sigmaB[],
@@ -204,7 +208,10 @@ int gmx_pmeonly(struct gmx_pme_t *pme,
   * pme struct. Currently does not work in parallel or with free
   * energy.
   */
-void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V);
+void gmx_pme_calc_energy(gmx_pme_t                      *pme,
+                         gmx::ArrayRef<const gmx::RVec>  x,
+                         gmx::ArrayRef<const real>       q,
+                         real                           *V);
  
  /*! \brief Send the charges and maxshift to out PME-only node. */
  void gmx_pme_send_parameters(const t_commrec *cr,
@@ -239,11 +246,13 @@ void gmx_pme_receive_f(const t_commrec *cr,
   * TODO: it should update the PME CPU atom data as well.
   * (currently PME CPU call gmx_pme_do() gets passed the input pointers for each computation).
   *
- * \param[in] pme            The PME structure.
- * \param[in] nAtoms         The number of particles.
- * \param[in] charges        The pointer to the array of particle charges.
+ * \param[in,out] pme        The PME structure.
+ * \param[in]     numAtoms   The number of particles.
+ * \param[in]     charges    The pointer to the array of particle charges.
   */
-void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges);
+void gmx_pme_reinit_atoms(gmx_pme_t  *pme,
+                          int         numAtoms,
+                          const real *charges);
  
  /* A block of PME GPU functions */
  
diff --git a/src/gromacs/ewald/pme_gather.cpp b/src/gromacs/ewald/pme_gather.cpp

index 0802005eabb0ff4e8fb0e4257ac0ae9e17e09026..f9913ff0c93ddbedda067a0041ea2dddad74c70d 100644 (file)
--- a/src/gromacs/ewald/pme_gather.cpp
+++ b/src/gromacs/ewald/pme_gather.cpp
@@ -79,7 +79,7 @@ struct do_fspline
      do_fspline (
              const gmx_pme_t *                   pme,
              const real * gmx_restrict           grid,
-            const pme_atomcomm_t * gmx_restrict atc,
+            const PmeAtomComm * gmx_restrict    atc,
              const splinedata_t * gmx_restrict   spline,
              int                                 nn)
          : pme(pme), grid(grid), atc(atc), spline(spline), nn(nn) {}
@@ -93,12 +93,12 @@ struct do_fspline
          const int  norder = nn*order;
  
          /* Pointer arithmetic alert, next six statements */
-        const real *const gmx_restrict thx  = spline->theta[XX] + norder;
-        const real *const gmx_restrict thy  = spline->theta[YY] + norder;
-        const real *const gmx_restrict thz  = spline->theta[ZZ] + norder;
-        const real *const gmx_restrict dthx = spline->dtheta[XX] + norder;
-        const real *const gmx_restrict dthy = spline->dtheta[YY] + norder;
-        const real *const gmx_restrict dthz = spline->dtheta[ZZ] + norder;
+        const real *const gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
+        const real *const gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
+        const real *const gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
+        const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+        const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+        const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
  
          RVec                           f(0, 0, 0);
  
@@ -141,12 +141,12 @@ struct do_fspline
      {
          const int                      norder = nn*4;
          /* Pointer arithmetic alert, next six statements */
-        const real *const gmx_restrict thx  = spline->theta[XX] + norder;
-        const real *const gmx_restrict thy  = spline->theta[YY] + norder;
-        const real *const gmx_restrict thz  = spline->theta[ZZ] + norder;
-        const real *const gmx_restrict dthx = spline->dtheta[XX] + norder;
-        const real *const gmx_restrict dthy = spline->dtheta[YY] + norder;
-        const real *const gmx_restrict dthz = spline->dtheta[ZZ] + norder;
+        const real *const gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
+        const real *const gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
+        const real *const gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
+        const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+        const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+        const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
  
          Simd4NReal                     fx_S = setZero();
          Simd4NReal                     fy_S = setZero();
@@ -221,12 +221,12 @@ struct do_fspline
          const int                     norder = nn*order;
          GMX_ASSERT(gridNZ % 4 == 0, "For aligned SIMD4 operations the grid size has to be padded up to a multiple of 4");
          /* Pointer arithmetic alert, next six statements */
-        const real *const gmx_restrict thx  = spline->theta[XX] + norder;
-        const real *const gmx_restrict thy  = spline->theta[YY] + norder;
-        const real *const gmx_restrict thz  = spline->theta[ZZ] + norder;
-        const real *const gmx_restrict dthx = spline->dtheta[XX] + norder;
-        const real *const gmx_restrict dthy = spline->dtheta[YY] + norder;
-        const real *const gmx_restrict dthz = spline->dtheta[ZZ] + norder;
+        const real *const gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
+        const real *const gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
+        const real *const gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
+        const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+        const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+        const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
  
          struct pme_spline_work *const  work = pme->spline_work;
  
@@ -282,7 +282,7 @@ struct do_fspline
      private:
          const gmx_pme_t *const                   pme;
          const real *const gmx_restrict           grid;
-        const pme_atomcomm_t *const gmx_restrict atc;
+        const PmeAtomComm *const gmx_restrict    atc;
          const splinedata_t *const gmx_restrict   spline;
          const int                                nn;
  
@@ -297,7 +297,7 @@ struct do_fspline
  
  
  void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
-                       gmx_bool bClearF, const pme_atomcomm_t *atc,
+                       gmx_bool bClearF, const PmeAtomComm *atc,
                         const splinedata_t *spline,
                         real scale)
  {
@@ -316,7 +316,7 @@ void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
      const real rzz   = pme->recipbox[ZZ][ZZ];
  
      /* Extract the buffer for force output */
-    rvec * gmx_restrict force = atc->f;
+    rvec * gmx_restrict force = as_rvec_array(atc->f.data());
  
      /* Note that unrolling this loop by templating this function on order
       * deteriorates performance significantly with gcc5/6/7.
@@ -368,10 +368,10 @@ void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
  
  
  real gather_energy_bsplines(gmx_pme_t *pme, const real *grid,
-                            pme_atomcomm_t *atc)
+                            PmeAtomComm *atc)
  {
      splinedata_t *spline;
-    int           n, ithx, ithy, ithz, i0, j0, k0;
+    int           ithx, ithy, ithz, i0, j0, k0;
      int           index_x, index_xy;
      int          *idxptr;
      real          energy, pot, tx, ty, coefficient, gval;
@@ -384,7 +384,7 @@ real gather_energy_bsplines(gmx_pme_t *pme, const real *grid,
      order = pme->pme_order;
  
      energy = 0;
-    for (n = 0; (n < atc->n); n++)
+    for (int n = 0; n < atc->numAtoms(); n++)
      {
          coefficient      = atc->coefficient[n];
  
@@ -398,9 +398,9 @@ real gather_energy_bsplines(gmx_pme_t *pme, const real *grid,
              k0   = idxptr[ZZ];
  
              /* Pointer arithmetic alert, next three statements */
-            thx  = spline->theta[XX] + norder;
-            thy  = spline->theta[YY] + norder;
-            thz  = spline->theta[ZZ] + norder;
+            thx  = spline->theta.coefficients[XX] + norder;
+            thy  = spline->theta.coefficients[YY] + norder;
+            thz  = spline->theta.coefficients[ZZ] + norder;
  
              pot = 0;
              for (ithx = 0; (ithx < order); ithx++)
diff --git a/src/gromacs/ewald/pme_gather.h b/src/gromacs/ewald/pme_gather.h

index 384c72598d5f00eedab1e31a60bbef699216181f..8bb174a5f87a78bf4c27097ea2cbe17d70d68b9e 100644 (file)
--- a/src/gromacs/ewald/pme_gather.h
+++ b/src/gromacs/ewald/pme_gather.h
@@ -41,12 +41,12 @@
  
  void
  gather_f_bsplines(const struct gmx_pme_t *pme, const real *grid,
-                  gmx_bool bClearF, const pme_atomcomm_t *atc,
+                  gmx_bool bClearF, const PmeAtomComm *atc,
                    const splinedata_t *spline,
                    real scale);
  
  real
  gather_energy_bsplines(struct gmx_pme_t *pme, const real *grid,
-                       pme_atomcomm_t *atc);
+                       PmeAtomComm *atc);
  
  #endif
diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp

index a8560a7a020485d3b77de284615f2ca15a774436..cad028b557775811ad759173ffe303f5306f3a18 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -835,7 +835,7 @@ static void pme_gpu_init(gmx_pme_t               *pme,
      kernelParamsPtr->constants.elFactor = ONE_4PI_EPS0 / pmeGpu->common->epsilon_r;
  }
  
-void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGpu, const pme_atomcomm_t *atc,
+void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGpu, const PmeAtomComm *atc,
                                          PmeSplineDataType type, int dimIndex, PmeLayoutTransform transform)
  {
      // The GPU atom spline data is laid out in a different way currently than the CPU one.
@@ -855,12 +855,12 @@ void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGpu, const pme_atomcomm
      switch (type)
      {
          case PmeSplineDataType::Values:
-            cpuSplineBuffer = atc->spline[threadIndex].theta[dimIndex];
+            cpuSplineBuffer = atc->spline[threadIndex].theta.coefficients[dimIndex];
              h_splineBuffer  = pmeGpu->staging.h_theta;
              break;
  
          case PmeSplineDataType::Derivatives:
-            cpuSplineBuffer = atc->spline[threadIndex].dtheta[dimIndex];
+            cpuSplineBuffer = atc->spline[threadIndex].dtheta.coefficients[dimIndex];
              h_splineBuffer  = pmeGpu->staging.h_dtheta;
              break;
  
diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h

index 377f032fa2d1646cc1ea1ca9e80417ccd319cb9a..6f10f2d51eea05e805b5c5a87f08e4c0c91842ac 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.h
+++ b/src/gromacs/ewald/pme_gpu_internal.h
@@ -56,7 +56,7 @@ struct gmx_hw_info_t;
  struct gmx_gpu_opt_t;
  struct gmx_pme_t;                              // only used in pme_gpu_reinit
  struct gmx_wallclock_gpu_pme_t;
-struct pme_atomcomm_t;
+class PmeAtomComm;
  struct t_complex;
  
  namespace gmx
@@ -596,7 +596,7 @@ enum class PmeLayoutTransform
   * \param[in]  transform  Layout transform type
   */
  GPU_FUNC_QUALIFIER void pme_gpu_transform_spline_atom_data(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
-                                                           const pme_atomcomm_t *GPU_FUNC_ARGUMENT(atc),
+                                                           const PmeAtomComm *GPU_FUNC_ARGUMENT(atc),
                                                             PmeSplineDataType GPU_FUNC_ARGUMENT(type),
                                                             int GPU_FUNC_ARGUMENT(dimIndex),
                                                             PmeLayoutTransform GPU_FUNC_ARGUMENT(transform)) GPU_FUNC_TERM
diff --git a/src/gromacs/ewald/pme_internal.h b/src/gromacs/ewald/pme_internal.h

index 76aa3c090f1c54a7c5bb9c31807c300aef96c716..4f56d7607e0914fe9b7a0e6de13a7d0ed489eca8 100644 (file)
--- a/src/gromacs/ewald/pme_internal.h
+++ b/src/gromacs/ewald/pme_internal.h
@@ -57,7 +57,9 @@
  
  #include "gromacs/math/gmxcomplex.h"
  #include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/defaultinitializationallocator.h"
  #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
  
  #include "pme_gpu_types_host.h"
  
@@ -142,63 +144,150 @@ struct pme_overlap_t
      std::vector<real>            recvbuf;        //!< Shared buffer for receiving
  };
  
+template<typename T>
+using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
+
+template<typename T>
+using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
+
  /*! \brief Data structure for organizing particle allocation to threads */
-typedef struct {
-    int *n;      /* Cumulative counts of the number of particles per thread */
-    int  nalloc; /* Allocation size of i */
-    int *i;      /* Particle indices ordered on thread index (n) */
-} thread_plist_t;
+struct AtomToThreadMap
+{
+    //! Cumulative counts of the number of particles per thread
+    int             *n = nullptr;
+    //! Storage buffer for n
+    std::vector<int> nBuffer;
+    //! Particle indices ordered on thread index (n)
+    FastVector<int>  i;
+};
  
  /*! \brief Helper typedef for spline vectors */
  typedef real *splinevec[DIM];
  
+/*! \internal
+ * \brief Coefficients for theta or dtheta
+ */
+class SplineCoefficients
+{
+    public:
+        //! Reallocate for use with up to nalloc coefficients
+        void realloc(int nalloc);
+
+        //! Pointers to the coefficient buffer for x, y, z
+        splinevec           coefficients = { nullptr };
+    private:
+        //! Storage for x coefficients
+        std::vector<real>   bufferX_;
+        //! Storage for y coefficients
+        std::vector<real>   bufferY_;
+        //! Storage for z coefficients, aligned for SIMD load
+        AlignedVector<real> bufferZ_;
+};
+
  /*! \brief Data structure for beta-spline interpolation */
-typedef struct {
-    int       n;
-    int      *ind;
-    splinevec theta;
-    real     *ptr_theta_z;
-    splinevec dtheta;
-    real     *ptr_dtheta_z;
-} splinedata_t;
-
-/*! \brief Data structure for coordinating transfer between PP and PME ranks*/
-struct pme_atomcomm_t{
-    int      dimind;        /* The index of the dimension, 0=x, 1=y */
-    int      nslab;
-    int      nodeid;
-#if GMX_MPI
-    MPI_Comm mpi_comm;
-#endif
+struct splinedata_t
+{
+    int                 n      = 0;
+    FastVector<int>     ind;
+    SplineCoefficients  theta;
+    SplineCoefficients  dtheta;
+    int                 nalloc = 0;
+};
  
-    int     *node_dest;     /* The nodes to send x and q to with DD */
-    int     *node_src;      /* The nodes to receive x and q from with DD */
-    int     *buf_index;     /* Index for commnode into the buffers */
-
-    int      maxshift;
-
-    int      npd;
-    int      pd_nalloc;
-    int     *pd;
-    int     *count;         /* The number of atoms to send to each node */
-    int    **count_thread;
-    int     *rcount;        /* The number of atoms to receive */
-
-    int      n;
-    int      nalloc;
-    rvec    *x;
-    real    *coefficient;
-    rvec    *f;
-    gmx_bool bSpread;       /* These coordinates are used for spreading */
-    int      pme_order;
-    ivec    *idx;
-    rvec    *fractx;            /* Fractional coordinate relative to
-                                 * the lower cell boundary
-                                 */
-    int             nthread;
-    int            *thread_idx; /* Which thread should spread which coefficient */
-    thread_plist_t *thread_plist;
-    splinedata_t   *spline;
+/*! \brief PME slab MPI communication setup */
+struct SlabCommSetup
+{
+    //! The nodes to send x and q to with DD
+    int node_dest;
+    //! The nodes to receive x and q from with DD
+    int node_src;
+    //! Index for commnode into the buffers
+    int buf_index;
+    //! The number of atoms to receive
+    int rcount;
+};
+
+/*! \internal
+ * \brief Data structure for coordinating transfers between PME ranks along one dimension
+ *
+ * Also used for passing coordinates, coefficients and forces to and from PME routines.
+ */
+class PmeAtomComm
+{
+    public:
+        //! Constructor, \p PmeMpiCommunicator is the communicator for this dimension
+        PmeAtomComm(MPI_Comm PmeMpiCommunicator,
+                    int      numThreads,
+                    int      pmeOrder,
+                    int      dimIndex,
+                    bool     doSpread);
+
+        //! Set the atom count and when necessary resizes atom buffers
+        void setNumAtoms(int numAtoms);
+
+        //! Returns the atom count
+        int numAtoms() const
+        {
+            return numAtoms_;
+        }
+
+        //! Returns the number of atoms to send to each rank
+        gmx::ArrayRef<int> sendCount()
+        {
+            GMX_ASSERT(!count_thread.empty(), "Need at least one thread_count");
+            return count_thread[0];
+        }
+
+        //! The index of the dimension, 0=x, 1=y
+        int      dimind = 0;
+        //! The number of slabs and ranks this dimension is decomposed over
+        int      nslab  = 1;
+        //! Our MPI rank index
+        int      nodeid = 0;
+        //! Communicator for this dimension
+        MPI_Comm mpi_comm;
+
+        //! Communication setup for each slab, only present with nslab > 1
+        std::vector<SlabCommSetup> slabCommSetup;
+        //! The maximum communication distance counted in MPI ranks
+        int                        maxshift = 0;
+
+        //! The target slab index for each particle
+        FastVector<int>                    pd;
+        //! Target particle counts for each slab, for each thread
+        std::vector < std::vector < int>>  count_thread;
+
+    private:
+        //! The number of atoms
+        int                            numAtoms_ = 0;
+    public:
+        //! The coordinates
+        gmx::ArrayRef<const gmx::RVec> x;
+        //! The coefficient, charges or LJ C6
+        gmx::ArrayRef<const real>      coefficient;
+        //! The forces
+        gmx::ArrayRef<gmx::RVec>       f;
+        //! Coordinate buffer, used only with nslab > 1
+        FastVector<gmx::RVec>          xBuffer;
+        //! Coefficient buffer, used only with nslab > 1
+        FastVector<real>               coefficientBuffer;
+        //! Force buffer, used only with nslab > 1
+        FastVector<gmx::RVec>          fBuffer;
+        //! Tells whether these coordinates are used for spreading
+        bool                           bSpread;
+        //! The PME order
+        int                            pme_order;
+        //! The grid index per atom
+        FastVector<gmx::IVec>          idx;
+        //! Fractional atom coordinates relative to the lower cell boundary
+        FastVector<gmx::RVec>          fractx;
+
+        //! The number of threads to use in PME
+        int                            nthread;
+        //! Thread index for each atom
+        FastVector<int>                thread_idx;
+        std::vector<AtomToThreadMap>   threadMap;
+        std::vector<splinedata_t>      spline;
  };
  
  /*! \brief Data structure for a single PME grid */
@@ -320,7 +409,7 @@ struct gmx_pme_t {            //NOLINT(clang-analyzer-optin.performance.Padding)
      int                      *nnx, *nny, *nnz;
      real                     *fshx, *fshy, *fshz;
  
-    pme_atomcomm_t            atc[2]; /* Indexed on decomposition index */
+    std::vector<PmeAtomComm>  atc; /* Indexed on decomposition index */
      matrix                    recipbox;
      real                      boxVolume;
      splinevec                 bsp_mod;
@@ -329,13 +418,15 @@ struct gmx_pme_t {            //NOLINT(clang-analyzer-optin.performance.Padding)
       * for spreading/gathering (in serial), or the C6 coefficient for
       * local atoms (in parallel).  lb_buf2 is only used in parallel,
       * and stores the sigma values for local atoms. */
-    real                 *lb_buf1, *lb_buf2;
-    int                   lb_buf_nalloc; /* Allocation size for the above buffers. */
+    FastVector<real>          lb_buf1;
+    FastVector<real>          lb_buf2;
  
-    pme_overlap_t         overlap[2];    /* Indexed on dimension, 0=x, 1=y */
+    pme_overlap_t             overlap[2]; /* Indexed on dimension, 0=x, 1=y */
  
-    pme_atomcomm_t        atc_energy;    /* Only for gmx_pme_calc_energy */
+    /* Atom step for energy only calculation in gmx_pme_calc_energy() */
+    std::unique_ptr<PmeAtomComm> atc_energy;
  
+    /* Communication buffers */
      rvec                 *bufv;          /* Communication buffer */
      real                 *bufr;          /* Communication buffer */
      int                   buf_nalloc;    /* The communication buffer size */
diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp

index c6a7b8726a912101a7de38aa92306d8983176226..3d6c5bb3b00fd557748efc4c105e5e815b93a158 100644 (file)
--- a/src/gromacs/ewald/pme_only.cpp
+++ b/src/gromacs/ewald/pme_only.cpp
@@ -632,7 +632,9 @@ int gmx_pmeonly(struct gmx_pme_t *pme,
          }
          else
          {
-            gmx_pme_do(pme, 0, natoms, as_rvec_array(pme_pp->x.data()), as_rvec_array(pme_pp->f.data()),
+            GMX_ASSERT(pme_pp->x.size() == static_cast<size_t>(natoms), "The coordinate buffer should have size natoms");
+
+            gmx_pme_do(pme, pme_pp->x, pme_pp->f,
                         pme_pp->chargeA.data(), pme_pp->chargeB.data(),
                         pme_pp->sqrt_c6A.data(), pme_pp->sqrt_c6B.data(),
                         pme_pp->sigmaA.data(), pme_pp->sigmaB.data(), box,
diff --git a/src/gromacs/ewald/pme_redistribute.cpp b/src/gromacs/ewald/pme_redistribute.cpp

index 3db06d32d8e73f78c222a9ab15177091444223cc..bfe2a4d84fcd737bb6c5134eee33645c1adfec1a 100644 (file)
--- a/src/gromacs/ewald/pme_redistribute.cpp
+++ b/src/gromacs/ewald/pme_redistribute.cpp
@@ -34,6 +34,16 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
+
+/*! \internal \file
+ *
+ * \brief This file contains function definitions for redistributing
+ * atoms over the PME domains
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
+
  #include "gmxpre.h"
  
  #include "pme_redistribute.h"
@@ -50,17 +60,18 @@
  #include "gromacs/utility/smalloc.h"
  
  #include "pme_internal.h"
-#include "pme_simd.h"
  
+//! Calculate the slab indices and store in \p atc, store counts in \p count
  static void pme_calc_pidx(int start, int end,
-                          matrix recipbox, rvec x[],
-                          pme_atomcomm_t *atc, int *count)
+                          const matrix recipbox, const rvec x[],
+                          PmeAtomComm *atc, int *count)
  {
-    int   nslab, i;
-    int   si;
-    real *xptr, s;
-    real  rxx, ryx, rzx, ryy, rzy;
-    int  *pd;
+    int         nslab, i;
+    int         si;
+    const real *xptr;
+    real        s;
+    real        rxx, ryx, rzx, ryy, rzy;
+    int        *pd;
  
      /* Calculate PME task index (pidx) for each grid index.
       * Here we always assign equally sized slabs to each node
@@ -68,7 +79,7 @@ static void pme_calc_pidx(int start, int end,
       */
  
      nslab = atc->nslab;
-    pd    = atc->pd;
+    pd    = atc->pd.data();
  
      /* Reset the count */
      for (i = 0; i < nslab; i++)
@@ -109,8 +120,10 @@ static void pme_calc_pidx(int start, int end,
      }
  }
  
-static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
-                                  pme_atomcomm_t *atc)
+//! Wrapper function for calculating slab indices, stored in \p atc
+static void pme_calc_pidx_wrapper(gmx::ArrayRef<const gmx::RVec>  x,
+                                  const matrix                    recipbox,
+                                  PmeAtomComm                    *atc)
  {
      int nthread, thread, slab;
  
@@ -121,9 +134,11 @@ static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
      {
          try
          {
+            const int natoms = x.ssize();
              pme_calc_pidx(natoms* thread   /nthread,
                            natoms*(thread+1)/nthread,
-                          recipbox, x, atc, atc->count_thread[thread]);
+                          recipbox, as_rvec_array(x.data()),
+                          atc, atc->count_thread[thread].data());
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
@@ -138,83 +153,97 @@ static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
      }
  }
  
-static void realloc_splinevec(splinevec th, real **ptr_z, int nalloc)
+#ifndef DOXYGEN
+
+void SplineCoefficients::realloc(const int nalloc)
  {
      const int padding = 4;
-    int       i;
-
-    srenew(th[XX], nalloc);
-    srenew(th[YY], nalloc);
-    /* In z we add padding, this is only required for the aligned SIMD code */
-    sfree_aligned(*ptr_z);
-    snew_aligned(*ptr_z, nalloc+2*padding, SIMD4_ALIGNMENT);
-    th[ZZ] = *ptr_z + padding;
  
-    for (i = 0; i < padding; i++)
-    {
-        (*ptr_z)[               i] = 0;
-        (*ptr_z)[padding+nalloc+i] = 0;
-    }
+    bufferX_.resize(nalloc);
+    coefficients[XX] = bufferX_.data();
+    bufferY_.resize(nalloc);
+    coefficients[YY] = bufferY_.data();
+    /* In z we add padding, this is only required for the aligned 4-wide SIMD code */
+    bufferZ_.resize(nalloc + 2*padding);
+    coefficients[ZZ] = bufferZ_.data() + padding;
  }
  
-static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
+#endif // !DOXYGEN
+
+//! Reallocates all buffers in \p spline to fit atoms in \p atc
+static void pme_realloc_splinedata(splinedata_t         *spline,
+                                   const PmeAtomComm    *atc)
  {
-    int i;
+    if (spline->nalloc >= atc->x.ssize() &&
+        spline->nalloc >= atc->numAtoms())
+    {
+        return;
+    }
  
-    srenew(spline->ind, atc->nalloc);
+    spline->nalloc = std::max(atc->x.capacity(), static_cast<size_t>(atc->numAtoms()));
+    spline->ind.resize(spline->nalloc);
      /* Initialize the index to identity so it works without threads */
-    for (i = 0; i < atc->nalloc; i++)
+    for (int i = 0; i < spline->nalloc; i++)
      {
          spline->ind[i] = i;
      }
  
-    realloc_splinevec(spline->theta, &spline->ptr_theta_z,
-                      atc->pme_order*atc->nalloc);
-    realloc_splinevec(spline->dtheta, &spline->ptr_dtheta_z,
-                      atc->pme_order*atc->nalloc);
+    spline->theta.realloc(atc->pme_order*spline->nalloc);
+    spline->dtheta.realloc(atc->pme_order*spline->nalloc);
  }
  
-void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
+#ifndef DOXYGEN
+
+void PmeAtomComm::setNumAtoms(const int numAtoms)
  {
-    int nalloc_old, i;
+    numAtoms_ = numAtoms;
  
-    /* We have to avoid a NULL pointer for atc->x to avoid
-     * possible fatal errors in MPI routines.
-     */
-    if (atc->n > atc->nalloc || atc->nalloc == 0)
+    if (nslab > 1)
      {
-        nalloc_old  = atc->nalloc;
-        atc->nalloc = over_alloc_dd(std::max(atc->n, 1));
-
-        if (atc->nslab > 1)
+        /* We have to avoid a NULL pointer for atc->x to avoid
+         * possible fatal errors in MPI routines.
+         */
+        xBuffer.resize(numAtoms_);
+        if (xBuffer.capacity() == 0)
          {
-            srenew(atc->x, atc->nalloc);
-            srenew(atc->coefficient, atc->nalloc);
-            srenew(atc->f, atc->nalloc);
-            for (i = nalloc_old; i < atc->nalloc; i++)
-            {
-                clear_rvec(atc->f[i]);
-            }
+            xBuffer.reserve(1);
+        }
+        x = xBuffer;
+        coefficientBuffer.resize(numAtoms_);
+        if (coefficientBuffer.capacity() == 0)
+        {
+            coefficientBuffer.reserve(1);
          }
-        if (atc->bSpread)
+        coefficient = coefficientBuffer;
+        const int nalloc_old = fBuffer.size();
+        fBuffer.resize(numAtoms_);
+        for (int i = nalloc_old; i < numAtoms_; i++)
          {
-            srenew(atc->fractx, atc->nalloc);
-            srenew(atc->idx, atc->nalloc);
+            clear_rvec(fBuffer[i]);
+        }
+        f = fBuffer;
+    }
+    if (bSpread)
+    {
+        fractx.resize(numAtoms_);
+        idx.resize(numAtoms_);
  
-            if (atc->nthread > 1)
-            {
-                srenew(atc->thread_idx, atc->nalloc);
-            }
+        if (nthread > 1)
+        {
+            thread_idx.resize(numAtoms_);
+        }
  
-            for (i = 0; i < atc->nthread; i++)
-            {
-                pme_realloc_splinedata(&atc->spline[i], atc);
-            }
+        for (int i = 0; i < nthread; i++)
+        {
+            pme_realloc_splinedata(&spline[i], this);
          }
      }
  }
  
-static void pme_dd_sendrecv(pme_atomcomm_t gmx_unused *atc,
+#endif // !DOXYGEN
+
+//! Communicates buffers between rank separated by \p shift slabs
+static void pme_dd_sendrecv(PmeAtomComm gmx_unused *atc,
                              gmx_bool gmx_unused bBackward, int gmx_unused shift,
                              void gmx_unused *buf_s, int gmx_unused nbyte_s,
                              void gmx_unused *buf_r, int gmx_unused nbyte_r)
@@ -225,13 +254,13 @@ static void pme_dd_sendrecv(pme_atomcomm_t gmx_unused *atc,
  
      if (!bBackward)
      {
-        dest = atc->node_dest[shift];
-        src  = atc->node_src[shift];
+        dest = atc->slabCommSetup[shift].node_dest;
+        src  = atc->slabCommSetup[shift].node_src;
      }
      else
      {
-        dest = atc->node_src[shift];
-        src  = atc->node_dest[shift];
+        dest = atc->slabCommSetup[shift].node_src;
+        src  = atc->slabCommSetup[shift].node_dest;
      }
  
      if (nbyte_s > 0 && nbyte_r > 0)
@@ -257,31 +286,32 @@ static void pme_dd_sendrecv(pme_atomcomm_t gmx_unused *atc,
  #endif
  }
  
-static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
-                                    int n, gmx_bool bX, rvec *x, const real *data,
-                                    pme_atomcomm_t *atc)
+//! Redistristributes \p data and optionally coordinates between MPI ranks
+static void dd_pmeredist_pos_coeffs(gmx_pme_t                      *pme,
+                                    const gmx_bool                  bX,
+                                    gmx::ArrayRef<const gmx::RVec>  x,
+                                    const real                     *data,
+                                    PmeAtomComm                    *atc)
  {
-    int *commnode, *buf_index;
-    int  nnodes_comm, i, nsend, local_pos, buf_pos, node, scount, rcount;
-
-    commnode  = atc->node_dest;
-    buf_index = atc->buf_index;
+    int  nnodes_comm, i, local_pos, buf_pos, node;
  
      nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
  
-    nsend = 0;
+    auto sendCount = atc->sendCount();
+    int  nsend     = 0;
      for (i = 0; i < nnodes_comm; i++)
      {
-        buf_index[commnode[i]] = nsend;
-        nsend                 += atc->count[commnode[i]];
+        const int commnode                 = atc->slabCommSetup[i].node_dest;
+        atc->slabCommSetup[commnode].buf_index  = nsend;
+        nsend                                  += sendCount[commnode];
      }
      if (bX)
      {
-        if (atc->count[atc->nodeid] + nsend != n)
+        if (sendCount[atc->nodeid] + nsend != x.ssize())
          {
-            gmx_fatal(FARGS, "%d particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
+            gmx_fatal(FARGS, "%zd particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
                        "This usually means that your system is not well equilibrated.",
-                      n - (atc->count[atc->nodeid] + nsend),
+                      x.ssize() - (sendCount[atc->nodeid] + nsend),
                        pme->nodeid, 'x'+atc->dimind);
          }
  
@@ -292,27 +322,28 @@ static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
              srenew(pme->bufr, pme->buf_nalloc);
          }
  
-        atc->n = atc->count[atc->nodeid];
+        int numAtoms = sendCount[atc->nodeid];
          for (i = 0; i < nnodes_comm; i++)
          {
-            scount = atc->count[commnode[i]];
+            const int commnode = atc->slabCommSetup[i].node_dest;
+            int       scount   = sendCount[commnode];
              /* Communicate the count */
              if (debug)
              {
                  fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n",
-                        atc->dimind, atc->nodeid, commnode[i], scount);
+                        atc->dimind, atc->nodeid, commnode, scount);
              }
              pme_dd_sendrecv(atc, FALSE, i,
                              &scount, sizeof(int),
-                            &atc->rcount[i], sizeof(int));
-            atc->n += atc->rcount[i];
+                            &atc->slabCommSetup[i].rcount, sizeof(int));
+            numAtoms += atc->slabCommSetup[i].rcount;
          }
  
-        pme_realloc_atomcomm_things(atc);
+        atc->setNumAtoms(numAtoms);
      }
  
      local_pos = 0;
-    for (i = 0; i < n; i++)
+    for (gmx::index i = 0; i < x.ssize(); i++)
      {
          node = atc->pd[i];
          if (node == atc->nodeid)
@@ -320,28 +351,29 @@ static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
              /* Copy direct to the receive buffer */
              if (bX)
              {
-                copy_rvec(x[i], atc->x[local_pos]);
+                copy_rvec(x[i], atc->xBuffer[local_pos]);
              }
-            atc->coefficient[local_pos] = data[i];
+            atc->coefficientBuffer[local_pos] = data[i];
              local_pos++;
          }
          else
          {
              /* Copy to the send buffer */
+            int &buf_index = atc->slabCommSetup[node].buf_index;
              if (bX)
              {
-                copy_rvec(x[i], pme->bufv[buf_index[node]]);
+                copy_rvec(x[i], pme->bufv[buf_index]);
              }
-            pme->bufr[buf_index[node]] = data[i];
-            buf_index[node]++;
+            pme->bufr[buf_index] = data[i];
+            buf_index++;
          }
      }
  
      buf_pos = 0;
      for (i = 0; i < nnodes_comm; i++)
      {
-        scount = atc->count[commnode[i]];
-        rcount = atc->rcount[i];
+        const int scount = atc->sendCount()[atc->slabCommSetup[i].node_dest];
+        const int rcount = atc->slabCommSetup[i].rcount;
          if (scount > 0 || rcount > 0)
          {
              if (bX)
@@ -349,36 +381,33 @@ static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
                  /* Communicate the coordinates */
                  pme_dd_sendrecv(atc, FALSE, i,
                                  pme->bufv[buf_pos], scount*sizeof(rvec),
-                                atc->x[local_pos], rcount*sizeof(rvec));
+                                atc->xBuffer[local_pos], rcount*sizeof(rvec));
              }
              /* Communicate the coefficients */
              pme_dd_sendrecv(atc, FALSE, i,
                              pme->bufr+buf_pos, scount*sizeof(real),
-                            atc->coefficient+local_pos, rcount*sizeof(real));
+                            atc->coefficientBuffer.data() + local_pos, rcount*sizeof(real));
              buf_pos   += scount;
-            local_pos += atc->rcount[i];
+            local_pos += atc->slabCommSetup[i].rcount;
          }
      }
  }
  
-void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
-                    int n, rvec *f,
+void dd_pmeredist_f(struct gmx_pme_t *pme, PmeAtomComm *atc,
+                    gmx::ArrayRef<gmx::RVec> f,
                      gmx_bool bAddF)
  {
-    int *commnode, *buf_index;
-    int  nnodes_comm, local_pos, buf_pos, i, scount, rcount, node;
-
-    commnode  = atc->node_dest;
-    buf_index = atc->buf_index;
+    int  nnodes_comm, local_pos, buf_pos, i, node;
  
      nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
  
-    local_pos = atc->count[atc->nodeid];
+    local_pos = atc->sendCount()[atc->nodeid];
      buf_pos   = 0;
      for (i = 0; i < nnodes_comm; i++)
      {
-        scount = atc->rcount[i];
-        rcount = atc->count[commnode[i]];
+        const int commnode = atc->slabCommSetup[i].node_dest;
+        const int scount   = atc->slabCommSetup[i].rcount;
+        const int rcount   = atc->sendCount()[commnode];
          if (scount > 0 || rcount > 0)
          {
              /* Communicate the forces */
@@ -387,14 +416,14 @@ void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
                              pme->bufv[buf_pos], rcount*sizeof(rvec));
              local_pos += scount;
          }
-        buf_index[commnode[i]] = buf_pos;
-        buf_pos               += rcount;
+        atc->slabCommSetup[commnode].buf_index  = buf_pos;
+        buf_pos                                += rcount;
      }
  
      local_pos = 0;
      if (bAddF)
      {
-        for (i = 0; i < n; i++)
+        for (gmx::index i = 0; i < f.ssize(); i++)
          {
              node = atc->pd[i];
              if (node == atc->nodeid)
@@ -406,14 +435,14 @@ void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
              else
              {
                  /* Add from the receive buffer */
-                rvec_inc(f[i], pme->bufv[buf_index[node]]);
-                buf_index[node]++;
+                rvec_inc(f[i], pme->bufv[atc->slabCommSetup[node].buf_index]);
+                atc->slabCommSetup[node].buf_index++;
              }
          }
      }
      else
      {
-        for (i = 0; i < n; i++)
+        for (gmx::index i = 0; i < f.ssize(); i++)
          {
              node = atc->pd[i];
              if (node == atc->nodeid)
@@ -425,51 +454,41 @@ void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
              else
              {
                  /* Copy from the receive buffer */
-                copy_rvec(pme->bufv[buf_index[node]], f[i]);
-                buf_index[node]++;
+                copy_rvec(pme->bufv[atc->slabCommSetup[node].buf_index], f[i]);
+                atc->slabCommSetup[node].buf_index++;
              }
          }
      }
  }
  
  void
-do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr, int start, int homenr,
-                     gmx_bool bFirst, rvec x[], real *data)
+do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr,
+                     gmx_bool bFirst, gmx::ArrayRef<const gmx::RVec> x, const real *data)
  {
-    int             d;
-    pme_atomcomm_t *atc;
-    atc = &pme->atc[0];
-
-    for (d = pme->ndecompdim - 1; d >= 0; d--)
+    for (int d = pme->ndecompdim - 1; d >= 0; d--)
      {
-        int             n_d;
-        rvec           *x_d;
-        real           *param_d;
-
+        gmx::ArrayRef<const gmx::RVec>  xRef;
+        const real                     *param_d;
          if (d == pme->ndecompdim - 1)
          {
-            n_d     = homenr;
-            x_d     = x + start;
+            /* Start out with the local coordinates and charges */
+            xRef    = x;
              param_d = data;
          }
          else
          {
-            n_d     = pme->atc[d + 1].n;
-            x_d     = atc->x;
-            param_d = atc->coefficient;
-        }
-        atc      = &pme->atc[d];
-        atc->npd = n_d;
-        if (atc->npd > atc->pd_nalloc)
-        {
-            atc->pd_nalloc = over_alloc_dd(atc->npd);
-            srenew(atc->pd, atc->pd_nalloc);
+            /* Redistribute the data collected along the previous dimension */
+            const PmeAtomComm &atc = pme->atc[d + 1];
+            xRef                   = atc.x;
+            param_d                = atc.coefficient.data();
          }
-        pme_calc_pidx_wrapper(n_d, pme->recipbox, x_d, atc);
+        PmeAtomComm &atc = pme->atc[d];
+        atc.pd.resize(xRef.size());
+        pme_calc_pidx_wrapper(xRef, pme->recipbox, &atc);
          /* Redistribute x (only once) and qA/c6A or qB/c6B */
          if (DOMAINDECOMP(cr))
          {
-            dd_pmeredist_pos_coeffs(pme, n_d, bFirst, x_d, param_d, atc);
+            dd_pmeredist_pos_coeffs(pme, bFirst, xRef, param_d, &atc);
          }
      }
  }
diff --git a/src/gromacs/ewald/pme_redistribute.h b/src/gromacs/ewald/pme_redistribute.h

index dc30592b844f63491d4808d7f8cb21cc5c415cd0..aa630b15b36646819ad6ca76e251864819015929 100644 (file)
--- a/src/gromacs/ewald/pme_redistribute.h
+++ b/src/gromacs/ewald/pme_redistribute.h
@@ -32,21 +32,29 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
+
+/*! \internal \file
+ *
+ * \brief Declares functions for redistributing atoms over the PME domains
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
+
  #ifndef GMX_EWALD_PME_REDISTRIBUTE_H
  #define GMX_EWALD_PME_REDISTRIBUTE_H
  
  #include "pme_internal.h"
  
+//! Redistributes forces along the dimension gives by \p atc
  void
-pme_realloc_atomcomm_things(pme_atomcomm_t *atc);
-
-void
-dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
-               int n, rvec *f,
+dd_pmeredist_f(struct gmx_pme_t *pme, PmeAtomComm *atc,
+               gmx::ArrayRef<gmx::RVec> f,
                 gmx_bool bAddF);
  
+//! Redistributes coefficients and when \p bFirst=true coordinates over MPI ranks
  void
-do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr, int start, int homenr,
-                     gmx_bool bFirst, rvec x[], real *data);
+do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr,
+                     gmx_bool bFirst, gmx::ArrayRef<const gmx::RVec> x, const real *data);
  
  #endif
diff --git a/src/gromacs/ewald/pme_spread.cpp b/src/gromacs/ewald/pme_spread.cpp

index deed63f025ac7a7f88167b58d17b3cae4d8fdd89..88cbad784d890b2ad511376b98ffe13a1fb5140e 100644 (file)
--- a/src/gromacs/ewald/pme_spread.cpp
+++ b/src/gromacs/ewald/pme_spread.cpp
@@ -59,18 +59,18 @@
  
  /* TODO consider split of pme-spline from this file */
  
-static void calc_interpolation_idx(const gmx_pme_t *pme, const pme_atomcomm_t *atc,
+static void calc_interpolation_idx(const gmx_pme_t *pme, PmeAtomComm *atc,
                                     int start, int grid_index, int end, int thread)
  {
      int             i;
      int            *idxptr, tix, tiy, tiz;
-    real           *xptr, *fptr, tx, ty, tz;
+    const real     *xptr;
+    real           *fptr, tx, ty, tz;
      real            rxx, ryx, ryy, rzx, rzy, rzz;
      int             nx, ny, nz;
      int            *g2tx, *g2ty, *g2tz;
      gmx_bool        bThreads;
      int            *thread_idx = nullptr;
-    thread_plist_t *tpl        = nullptr;
      int            *tpl_n      = nullptr;
      int             thread_i;
  
@@ -92,10 +92,9 @@ static void calc_interpolation_idx(const gmx_pme_t *pme, const pme_atomcomm_t *a
      bThreads = (atc->nthread > 1);
      if (bThreads)
      {
-        thread_idx = atc->thread_idx;
+        thread_idx = atc->thread_idx.data();
  
-        tpl   = &atc->thread_plist[thread];
-        tpl_n = tpl->n;
+        tpl_n = atc->threadMap[thread].n;
          for (i = 0; i < atc->nthread; i++)
          {
              tpl_n[i] = 0;
@@ -162,11 +161,8 @@ static void calc_interpolation_idx(const gmx_pme_t *pme, const pme_atomcomm_t *a
           * over the threads, so we could actually allocate for that
           * in pme_realloc_atomcomm_things.
           */
-        if (tpl_n[atc->nthread-1] > tpl->nalloc)
-        {
-            tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
-            srenew(tpl->i, tpl->nalloc);
-        }
+        AtomToThreadMap &threadMap = atc->threadMap[thread];
+        threadMap.i.resize(tpl_n[atc->nthread - 1]);
          /* Set tpl_n to the cumulative start */
          for (i = atc->nthread-1; i >= 1; i--)
          {
@@ -177,17 +173,16 @@ static void calc_interpolation_idx(const gmx_pme_t *pme, const pme_atomcomm_t *a
          /* Fill our thread local array with indices sorted on thread */
          for (i = start; i < end; i++)
          {
-            tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
+            threadMap.i[tpl_n[atc->thread_idx[i]]++] = i;
          }
          /* Now tpl_n contains the cummulative count again */
      }
  }
  
-static void make_thread_local_ind(const pme_atomcomm_t *atc,
+static void make_thread_local_ind(const PmeAtomComm *atc,
                                    int thread, splinedata_t *spline)
  {
      int             n, t, i, start, end;
-    thread_plist_t *tpl;
  
      /* Combine the indices made by each thread into one index */
  
@@ -195,16 +190,16 @@ static void make_thread_local_ind(const pme_atomcomm_t *atc,
      start = 0;
      for (t = 0; t < atc->nthread; t++)
      {
-        tpl = &atc->thread_plist[t];
+        const AtomToThreadMap &threadMap = atc->threadMap[t];
          /* Copy our part (start - end) from the list of thread t */
          if (thread > 0)
          {
-            start = tpl->n[thread-1];
+            start = threadMap.n[thread-1];
          }
-        end = tpl->n[thread];
+        end = threadMap.n[thread];
          for (i = start; i < end; i++)
          {
-            spline->ind[n++] = tpl->i[i];
+            spline->ind[n++] = threadMap.i[i];
          }
      }
  
@@ -313,7 +308,7 @@ static void make_bsplines(splinevec theta, splinevec dtheta, int order,
  
  
  static void spread_coefficients_bsplines_thread(const pmegrid_t                   *pmegrid,
-                                                const pme_atomcomm_t              *atc,
+                                                const PmeAtomComm                 *atc,
                                                  splinedata_t                      *spline,
                                                  struct pme_spline_work gmx_unused *work)
  {
@@ -321,7 +316,7 @@ static void spread_coefficients_bsplines_thread(const pmegrid_t
      /* spread coefficients from home atoms to local grid */
      real          *grid;
      int            i, nn, n, ithx, ithy, ithz, i0, j0, k0;
-    int       *    idxptr;
+    const int     *idxptr;
      int            order, norder, index_x, index_xy, index_xyz;
      real           valx, valxy, coefficient;
      real          *thx, *thy, *thz;
@@ -363,9 +358,9 @@ static void spread_coefficients_bsplines_thread(const pmegrid_t
              j0   = idxptr[YY] - offy;
              k0   = idxptr[ZZ] - offz;
  
-            thx = spline->theta[XX] + norder;
-            thy = spline->theta[YY] + norder;
-            thz = spline->theta[ZZ] + norder;
+            thx = spline->theta.coefficients[XX] + norder;
+            thy = spline->theta.coefficients[YY] + norder;
+            thz = spline->theta.coefficients[ZZ] + norder;
  
              switch (order)
              {
@@ -856,7 +851,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
  }
  
  void spread_on_grid(const gmx_pme_t *pme,
-                    const pme_atomcomm_t *atc, const pmegrids_t *grids,
+                    PmeAtomComm *atc, const pmegrids_t *grids,
                      gmx_bool bCalcSplines, gmx_bool bSpread,
                      real *fftgrid, gmx_bool bDoSplines, int grid_index)
  {
@@ -883,8 +878,8 @@ void spread_on_grid(const gmx_pme_t *pme,
              {
                  int start, end;
  
-                start = atc->n* thread   /nthread;
-                end   = atc->n*(thread+1)/nthread;
+                start = atc->numAtoms()* thread   /nthread;
+                end   = atc->numAtoms()*(thread+1)/nthread;
  
                  /* Compute fftgrid index for all atoms,
                   * with help of some extra variables.
@@ -914,7 +909,7 @@ void spread_on_grid(const gmx_pme_t *pme,
              {
                  spline = &atc->spline[0];
  
-                spline->n = atc->n;
+                spline->n = atc->numAtoms();
              }
              else
              {
@@ -923,7 +918,7 @@ void spread_on_grid(const gmx_pme_t *pme,
                  if (grids->nthread == 1)
                  {
                      /* One thread, we operate on all coefficients */
-                    spline->n = atc->n;
+                    spline->n = atc->numAtoms();
                  }
                  else
                  {
@@ -934,8 +929,11 @@ void spread_on_grid(const gmx_pme_t *pme,
  
              if (bCalcSplines)
              {
-                make_bsplines(spline->theta, spline->dtheta, pme->pme_order,
-                              atc->fractx, spline->n, spline->ind, atc->coefficient, bDoSplines);
+                make_bsplines(spline->theta.coefficients,
+                              spline->dtheta.coefficients,
+                              pme->pme_order,
+                              as_rvec_array(atc->fractx.data()), spline->n, spline->ind.data(),
+                              atc->coefficient.data(), bDoSplines);
              }
  
              if (bSpread)
diff --git a/src/gromacs/ewald/pme_spread.h b/src/gromacs/ewald/pme_spread.h

index 23e07318533cdcfbe1a9df4070fbbf7159db5e39..5ecbf071072f1a749c3d6a6d3d0d72c1e7ab4eeb 100644 (file)
--- a/src/gromacs/ewald/pme_spread.h
+++ b/src/gromacs/ewald/pme_spread.h
@@ -43,7 +43,7 @@
  
  void
  spread_on_grid(const gmx_pme_t *pme,
-               const pme_atomcomm_t *atc, const pmegrids_t *grids,
+               PmeAtomComm *atc, const pmegrids_t *grids,
                 gmx_bool bCalcSplines, gmx_bool bSpread,
                 real *fftgrid, gmx_bool bDoSplines, int grid_index);
  
diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp

index ad8905d0fdd27587f95514e1f21e07489f1e5adf..55515294d2816d13629553745c8e4ff748fd7737 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.cpp
+++ b/src/gromacs/ewald/tests/pmetestcommon.cpp
@@ -52,6 +52,7 @@
  #include "gromacs/ewald/pme_gpu_internal.h"
  #include "gromacs/ewald/pme_grid.h"
  #include "gromacs/ewald/pme_internal.h"
+#include "gromacs/ewald/pme_redistribute.h"
  #include "gromacs/ewald/pme_solve.h"
  #include "gromacs/ewald/pme_spread.h"
  #include "gromacs/fft/parallel_3dfft.h"
@@ -110,7 +111,6 @@ static PmeSafePointer pmeInitInternal(const t_inputrec         *inputRec,
                                        CodePath                  mode,
                                        const gmx_device_info_t  *gpuInfo,
                                        PmeGpuProgramHandle       pmeGpuProgram,
-                                      size_t                    atomCount,
                                        const Matrix3x3          &box,
                                        real                      ewaldCoeff_q = 1.0f,
                                        real                      ewaldCoeff_lj = 1.0f
@@ -120,7 +120,7 @@ static PmeSafePointer pmeInitInternal(const t_inputrec         *inputRec,
      const auto     runMode       = (mode == CodePath::CPU) ? PmeRunMode::CPU : PmeRunMode::Mixed;
      t_commrec      dummyCommrec  = {0};
      NumPmeDomains  numPmeDomains = { 1, 1 };
-    gmx_pme_t     *pmeDataRaw    = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, atomCount, false, false, true,
+    gmx_pme_t     *pmeDataRaw    = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true,
                                                  ewaldCoeff_q, ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, pmeGpuProgram, dummyLogger);
      PmeSafePointer pme(pmeDataRaw); // taking ownership
  
@@ -164,7 +164,7 @@ PmeSafePointer pmeInitEmpty(const t_inputrec         *inputRec,
                              real                      ewaldCoeff_lj
                              )
  {
-    return pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, 0, box, ewaldCoeff_q, ewaldCoeff_lj);
+    return pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
      // hiding the fact that PME actually needs to know the number of atoms in advance
  }
  
@@ -180,19 +180,24 @@ PmeSafePointer pmeInitAtoms(const t_inputrec         *inputRec,
  {
      const index     atomCount = coordinates.size();
      GMX_RELEASE_ASSERT(atomCount == charges.ssize(), "Mismatch in atom data");
-    PmeSafePointer  pmeSafe = pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, atomCount, box);
-    pme_atomcomm_t *atc     = nullptr;
+    PmeSafePointer  pmeSafe = pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, box);
+    PmeAtomComm    *atc     = nullptr;
  
      switch (mode)
      {
          case CodePath::CPU:
              atc              = &(pmeSafe->atc[0]);
-            atc->x           = const_cast<rvec *>(as_rvec_array(coordinates.data()));
-            atc->coefficient = const_cast<real *>(charges.data());
+            atc->x           = coordinates;
+            atc->coefficient = charges;
+            gmx_pme_reinit_atoms(pmeSafe.get(), atomCount, charges.data());
              /* With decomposition there would be more boilerplate atc code here, e.g. do_redist_pos_coeffs */
              break;
  
          case CodePath::GPU:
+            // TODO: Avoid use of atc in the GPU code path
+            atc              = &(pmeSafe->atc[0]);
+            // We need to set atc->n for passing the size in the tests
+            atc->setNumAtoms(atomCount);
              gmx_pme_reinit_atoms(pmeSafe.get(), atomCount, charges.data());
              pme_gpu_copy_input_coordinates(pmeSafe->gpu, as_rvec_array(coordinates.data()));
              break;
@@ -277,7 +282,7 @@ void pmePerformSplineAndSpread(gmx_pme_t *pme, CodePath mode, // TODO const qual
                                 bool computeSplines, bool spreadCharges)
  {
      GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
-    pme_atomcomm_t *atc                          = &(pme->atc[0]);
+    PmeAtomComm    *atc                          = &(pme->atc[0]);
      const size_t    gridIndex                    = 0;
      const bool      computeSplinesForZeroCharges = true;
      real           *fftgrid                      = spreadCharges ? pme->fftgrid[gridIndex] : nullptr;
@@ -308,17 +313,17 @@ void pmePerformSplineAndSpread(gmx_pme_t *pme, CodePath mode, // TODO const qual
  static real *pmeGetSplineDataInternal(const gmx_pme_t *pme, PmeSplineDataType type, int dimIndex)
  {
      GMX_ASSERT((0 <= dimIndex) && (dimIndex < DIM), "Invalid dimension index");
-    const pme_atomcomm_t *atc          = &(pme->atc[0]);
+    const PmeAtomComm    *atc          = &(pme->atc[0]);
      const size_t          threadIndex  = 0;
      real                 *splineBuffer = nullptr;
      switch (type)
      {
          case PmeSplineDataType::Values:
-            splineBuffer = atc->spline[threadIndex].theta[dimIndex];
+            splineBuffer = atc->spline[threadIndex].theta.coefficients[dimIndex];
              break;
  
          case PmeSplineDataType::Derivatives:
-            splineBuffer = atc->spline[threadIndex].dtheta[dimIndex];
+            splineBuffer = atc->spline[threadIndex].dtheta.coefficients[dimIndex];
              break;
  
          default:
@@ -380,8 +385,8 @@ void pmePerformSolve(const gmx_pme_t *pme, CodePath mode,
  void pmePerformGather(gmx_pme_t *pme, CodePath mode,
                        PmeForceOutputHandling inputTreatment, ForcesVector &forces)
  {
-    pme_atomcomm_t *atc                     = &(pme->atc[0]);
-    const index     atomCount               = atc->n;
+    PmeAtomComm    *atc                     = &(pme->atc[0]);
+    const index     atomCount               = atc->numAtoms();
      GMX_RELEASE_ASSERT(forces.ssize() == atomCount, "Invalid force buffer size");
      const bool      forceReductionWithInput = (inputTreatment == PmeForceOutputHandling::ReduceWithInput);
      const real      scale                   = 1.0;
@@ -393,7 +398,7 @@ void pmePerformGather(gmx_pme_t *pme, CodePath mode,
      switch (mode)
      {
          case CodePath::CPU:
-            atc->f = as_rvec_array(forces.begin());
+            atc->f = forces;
              if (atc->nthread == 1)
              {
                  // something which is normally done in serial spline computation (make_thread_local_ind())
@@ -444,8 +449,8 @@ void pmeFinalizeTest(const gmx_pme_t *pme, CodePath mode)
  void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
                        const SplineParamsDimVector &splineValues, PmeSplineDataType type, int dimIndex)
  {
-    const pme_atomcomm_t *atc         = &(pme->atc[0]);
-    const index           atomCount   = atc->n;
+    const PmeAtomComm    *atc         = &(pme->atc[0]);
+    const index           atomCount   = atc->numAtoms();
      const index           pmeOrder    = pme->pme_order;
      const index           dimSize     = pmeOrder * atomCount;
      GMX_RELEASE_ASSERT(dimSize == splineValues.ssize(), "Mismatch in spline data");
@@ -468,11 +473,11 @@ void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
  }
  
  //! Setting gridline indices to be used in spread/gather
-void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
+void pmeSetGridLineIndices(gmx_pme_t *pme, CodePath mode,
                             const GridLineIndicesVector &gridLineIndices)
  {
-    const pme_atomcomm_t       *atc         = &(pme->atc[0]);
-    const index                 atomCount   = atc->n;
+    PmeAtomComm                *atc         = &(pme->atc[0]);
+    const index                 atomCount   = atc->numAtoms();
      GMX_RELEASE_ASSERT(atomCount == gridLineIndices.ssize(), "Mismatch in gridline indices size");
  
      IVec paddedGridSizeUnused, gridSize(0, 0, 0);
@@ -493,11 +498,9 @@ void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
              break;
  
          case CodePath::CPU:
-            // incompatible IVec and ivec assignment?
-            //std::copy(gridLineIndices.begin(), gridLineIndices.end(), atc->idx);
-            memcpy(atc->idx, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0]));
+            atc->idx.resize(gridLineIndices.size());
+            std::copy(gridLineIndices.begin(), gridLineIndices.end(), atc->idx.begin());
              break;
-
          default:
              GMX_THROW(InternalError("Test not implemented for this mode"));
      }
@@ -574,8 +577,8 @@ SplineParamsDimVector pmeGetSplineData(const gmx_pme_t *pme, CodePath mode,
                                         PmeSplineDataType type, int dimIndex)
  {
      GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
-    const pme_atomcomm_t    *atc         = &(pme->atc[0]);
-    const size_t             atomCount   = atc->n;
+    const PmeAtomComm       *atc         = &(pme->atc[0]);
+    const size_t             atomCount   = atc->numAtoms();
      const size_t             pmeOrder    = pme->pme_order;
      const size_t             dimSize     = pmeOrder * atomCount;
  
@@ -602,8 +605,8 @@ SplineParamsDimVector pmeGetSplineData(const gmx_pme_t *pme, CodePath mode,
  GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t *pme, CodePath mode)
  {
      GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
-    const pme_atomcomm_t *atc         = &(pme->atc[0]);
-    const size_t          atomCount   = atc->n;
+    const PmeAtomComm    *atc         = &(pme->atc[0]);
+    const size_t          atomCount   = atc->numAtoms();
  
      GridLineIndicesVector gridLineIndices;
      switch (mode)
@@ -613,7 +616,7 @@ GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t *pme, CodePath mode)
              break;
  
          case CodePath::CPU:
-            gridLineIndices = arrayRefFromArray(reinterpret_cast<IVec *>(atc->idx), atomCount);
+            gridLineIndices = atc->idx;
              break;
  
          default:
diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h

index 3b353b026ad1e3d05c5bc889d7fb3382cb91b3de..552f333abc4606f96a2688f2ea0242193a119454 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.h
+++ b/src/gromacs/ewald/tests/pmetestcommon.h
@@ -152,7 +152,7 @@ void pmeFinalizeTest(const gmx_pme_t *pme, CodePath mode);
  void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
                        const SplineParamsDimVector &splineValues, PmeSplineDataType type, int dimIndex);
  //! Setting gridline indices be used in spread/gather
-void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
+void pmeSetGridLineIndices(gmx_pme_t *pme, CodePath mode,
                             const GridLineIndicesVector &gridLineIndices);
  //! Setting real grid to be used in gather
  void pmeSetRealGrid(const gmx_pme_t *pme, CodePath mode,
diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp

index 784e502aed0ec5b90a94a7b492affbba898240f8..ca8e80a582620871b37403edc27f211153c3d5a0 100644 (file)
--- a/src/gromacs/mdlib/force.cpp
+++ b/src/gromacs/mdlib/force.cpp
@@ -98,27 +98,31 @@ static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t *ewc_t)
      }
  }
  
-void do_force_lowlevel(t_forcerec                   *fr,
-                       const t_inputrec             *ir,
-                       const t_idef                 *idef,
-                       const t_commrec              *cr,
-                       const gmx_multisim_t         *ms,
-                       t_nrnb                       *nrnb,
-                       gmx_wallcycle_t               wcycle,
-                       const t_mdatoms              *md,
-                       rvec                         *x,
-                       history_t                    *hist,
-                       rvec                         *forceForUseWithShiftForces,
-                       gmx::ForceWithVirial         *forceWithVirial,
-                       gmx_enerdata_t               *enerd,
-                       t_fcdata                     *fcd,
-                       const matrix                  box,
-                       const real                   *lambda,
-                       const t_graph                *graph,
-                       const rvec                   *mu_tot,
-                       const int                     flags,
-                       const DDBalanceRegionHandler &ddBalanceRegionHandler)
+void
+do_force_lowlevel(t_forcerec                               *fr,
+                  const t_inputrec                         *ir,
+                  const t_idef                             *idef,
+                  const t_commrec                          *cr,
+                  const gmx_multisim_t                     *ms,
+                  t_nrnb                                   *nrnb,
+                  gmx_wallcycle_t                           wcycle,
+                  const t_mdatoms                          *md,
+                  gmx::ArrayRefWithPadding<gmx::RVec>       coordinates,
+                  history_t                                *hist,
+                  rvec                                     *forceForUseWithShiftForces,
+                  gmx::ForceWithVirial                     *forceWithVirial,
+                  gmx_enerdata_t                           *enerd,
+                  t_fcdata                                 *fcd,
+                  const matrix                              box,
+                  const real                               *lambda,
+                  const t_graph                            *graph,
+                  const rvec                               *mu_tot,
+                  const int                                 flags,
+                  const DDBalanceRegionHandler             &ddBalanceRegionHandler)
  {
+    // TODO: Replace all uses of x by const coordinates
+    rvec *x = as_rvec_array(coordinates.paddedArrayRef().data());
+
      /* do QMMM first if requested */
      if (fr->bQMMM)
      {
@@ -301,9 +305,8 @@ void do_force_lowlevel(t_forcerec                   *fr,
  
                      wallcycle_start(wcycle, ewcPMEMESH);
                      status = gmx_pme_do(fr->pmedata,
-                                        0, md->homenr - fr->n_tpi,
-                                        x,
-                                        as_rvec_array(forceWithVirial->force_.data()),
+                                        gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(), md->homenr - fr->n_tpi),
+                                        forceWithVirial->force_,
                                          md->chargeA, md->chargeB,
                                          md->sqrt_c6A, md->sqrt_c6B,
                                          md->sigmaA, md->sigmaB,
@@ -341,9 +344,9 @@ void do_force_lowlevel(t_forcerec                   *fr,
                      /* Determine the PME grid energy of the test molecule
                       * with the PME grid potential of the other charges.
                       */
-                    gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
-                                        x + md->homenr - fr->n_tpi,
-                                        md->chargeA + md->homenr - fr->n_tpi,
+                    gmx_pme_calc_energy(fr->pmedata,
+                                        coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
+                                        gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
                                          &Vlr_q);
                  }
              }
diff --git a/src/gromacs/mdlib/force.h b/src/gromacs/mdlib/force.h

index ca68d909d7f250fe83630654db5fd7052b9afae7..8cf6c70fdfc8ab52a3592774860e823c78078b02 100644 (file)
--- a/src/gromacs/mdlib/force.h
+++ b/src/gromacs/mdlib/force.h
@@ -113,26 +113,26 @@ void do_force(FILE                                     *log,
  
  
  void
-do_force_lowlevel(t_forcerec                   *fr,
-                  const t_inputrec             *ir,
-                  const t_idef                 *idef,
-                  const t_commrec              *cr,
-                  const gmx_multisim_t         *ms,
-                  t_nrnb                       *nrnb,
-                  gmx_wallcycle                *wcycle,
-                  const t_mdatoms              *md,
-                  rvec                         *x,
-                  history_t                    *hist,
-                  rvec                         *f_shortrange,
-                  gmx::ForceWithVirial         *forceWithVirial,
-                  gmx_enerdata_t               *enerd,
-                  t_fcdata                     *fcd,
-                  const matrix                  box,
-                  const real                   *lambda,
-                  const t_graph                *graph,
-                  const rvec                   *mu_tot,
-                  int                           flags,
-                  const DDBalanceRegionHandler &ddBalanceRegionHandler);
+do_force_lowlevel(t_forcerec                               *fr,
+                  const t_inputrec                         *ir,
+                  const t_idef                             *idef,
+                  const t_commrec                          *cr,
+                  const gmx_multisim_t                     *ms,
+                  t_nrnb                                   *nrnb,
+                  gmx_wallcycle                            *wcycle,
+                  const t_mdatoms                          *md,
+                  gmx::ArrayRefWithPadding<gmx::RVec>       coordinates,
+                  history_t                                *hist,
+                  rvec                                     *f_shortrange,
+                  gmx::ForceWithVirial                     *forceWithVirial,
+                  gmx_enerdata_t                           *enerd,
+                  t_fcdata                                 *fcd,
+                  const matrix                              box,
+                  const real                               *lambda,
+                  const t_graph                            *graph,
+                  const rvec                               *mu_tot,
+                  int                                       flags,
+                  const DDBalanceRegionHandler             &ddBalanceRegionHandler);
  /* Call all the force routines */
  
  #endif
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 84736d802d2d5abd9571b009fe75db5fe428237b..7e6304774668a6136c24e09f3d94efe9407b684e 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -1312,7 +1312,7 @@ void do_force(FILE                                     *fplog,
      /* Compute the bonded and non-bonded energies and optionally forces */
      do_force_lowlevel(fr, inputrec, &(top->idef),
                        cr, ms, nrnb, wcycle, mdatoms,
-                      as_rvec_array(x.unpaddedArrayRef().data()), hist, forceOut.f, &forceOut.forceWithVirial, enerd, fcd,
+                      x, hist, forceOut.f, &forceOut.forceWithVirial, enerd, fcd,
                        box, lambda.data(), graph, fr->mu_tot,
                        flags,
                        ddBalanceRegionHandler);
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 70acaf28fd1d3e4ab638f5eeeaa2224c456a38ef..b5419b0ac040914b618a5da76aa9d6c4d12f415e 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1348,7 +1348,7 @@ int Mdrunner::mdrunner()
                  pmedata = gmx_pme_init(cr,
                                         getNumPmeDomains(cr->dd),
                                         inputrec,
-                                       mtop.natoms, nChargePerturbed != 0, nTypePerturbed != 0,
+                                       nChargePerturbed != 0, nTypePerturbed != 0,
                                         mdrunOptions.reproducible,
                                         ewaldcoeff_q, ewaldcoeff_lj,
                                         gmx_omp_nthreads_get(emntPME),
author	Berk Hess <hess@kth.se>
	Wed, 15 May 2019 11:04:44 +0000 (13:04 +0200)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 19 Jun 2019 19:52:59 +0000 (21:52 +0200)
src/external/thread_mpi/include/thread_mpi/tmpi.h		patch \| blob \| history
src/gromacs/domdec/mdsetup.cpp		patch \| blob \| history
src/gromacs/ewald/pme.cpp		patch \| blob \| history
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/ewald/pme_gather.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gather.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.h		patch \| blob \| history
src/gromacs/ewald/pme_internal.h		patch \| blob \| history
src/gromacs/ewald/pme_only.cpp		patch \| blob \| history
src/gromacs/ewald/pme_redistribute.cpp		patch \| blob \| history
src/gromacs/ewald/pme_redistribute.h		patch \| blob \| history
src/gromacs/ewald/pme_spread.cpp		patch \| blob \| history
src/gromacs/ewald/pme_spread.h		patch \| blob \| history
src/gromacs/ewald/tests/pmetestcommon.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmetestcommon.h		patch \| blob \| history
src/gromacs/mdlib/force.cpp		patch \| blob \| history
src/gromacs/mdlib/force.h		patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history