Got rid of manual memory management in the PME overlap structures

author Aleksei Iupinov <a.yupinov@gmail.com>

Tue, 28 Nov 2017 14:14:40 +0000 (15:14 +0100)

committer Berk Hess <hess@kth.se>

Thu, 18 Jan 2018 21:16:31 +0000 (22:16 +0100)
author Aleksei Iupinov <a.yupinov@gmail.com>
Tue, 28 Nov 2017 14:14:40 +0000 (15:14 +0100)
committer Berk Hess <hess@kth.se>
Thu, 18 Jan 2018 21:16:31 +0000 (22:16 +0100)
diff --git a/src/gromacs/ewald/pme-grid.cpp b/src/gromacs/ewald/pme-grid.cpp

index 937584ca6192a64b3cb28d7349db84a7ca9907eb..dcf2a06c3cc624a15b3e362bd18e82faf033580b 100644 (file)
--- a/src/gromacs/ewald/pme-grid.cpp
+++ b/src/gromacs/ewald/pme-grid.cpp
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -73,22 +73,22 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
      int            recv_index0, recv_nindex;
      MPI_Status     stat;
      int            i, j, k, ix, iy, iz, icnt;
-    int            ipulse, send_id, recv_id, datasize;
+    int            send_id, recv_id, datasize;
      real          *p;
      real          *sendptr, *recvptr;
  
      /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
      overlap = &pme->overlap[1];
  
-    for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+    for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
      {
          /* Since we have already (un)wrapped the overlap in the z-dimension,
           * we only have to communicate 0 to nkz (not pmegrid_nz).
           */
          if (direction == GMX_SUM_GRID_FORWARD)
          {
-            send_id       = overlap->send_id[ipulse];
-            recv_id       = overlap->recv_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].send_id;
+            recv_id       = overlap->comm_data[ipulse].recv_id;
              send_index0   = overlap->comm_data[ipulse].send_index0;
              send_nindex   = overlap->comm_data[ipulse].send_nindex;
              recv_index0   = overlap->comm_data[ipulse].recv_index0;
@@ -96,8 +96,8 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
          }
          else
          {
-            send_id       = overlap->recv_id[ipulse];
-            recv_id       = overlap->send_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].recv_id;
+            recv_id       = overlap->comm_data[ipulse].send_id;
              send_index0   = overlap->comm_data[ipulse].recv_index0;
              send_nindex   = overlap->comm_data[ipulse].recv_nindex;
              recv_index0   = overlap->comm_data[ipulse].send_index0;
@@ -130,9 +130,9 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
  
          datasize      = pme->pmegrid_nx * pme->nkz;
  
-        MPI_Sendrecv(overlap->sendbuf, send_nindex*datasize, GMX_MPI_REAL,
+        MPI_Sendrecv(overlap->sendbuf.data(), send_nindex*datasize, GMX_MPI_REAL,
                       send_id, ipulse,
-                     overlap->recvbuf, recv_nindex*datasize, GMX_MPI_REAL,
+                     overlap->recvbuf.data(), recv_nindex*datasize, GMX_MPI_REAL,
                       recv_id, ipulse,
                       overlap->mpi_comm, &stat);
  
@@ -175,22 +175,22 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
       */
      overlap = &pme->overlap[0];
  
-    for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+    for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
      {
          if (direction == GMX_SUM_GRID_FORWARD)
          {
-            send_id       = overlap->send_id[ipulse];
-            recv_id       = overlap->recv_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].send_id;
+            recv_id       = overlap->comm_data[ipulse].recv_id;
              send_index0   = overlap->comm_data[ipulse].send_index0;
              send_nindex   = overlap->comm_data[ipulse].send_nindex;
              recv_index0   = overlap->comm_data[ipulse].recv_index0;
              recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
-            recvptr       = overlap->recvbuf;
+            recvptr       = overlap->recvbuf.data();
          }
          else
          {
-            send_id       = overlap->recv_id[ipulse];
-            recv_id       = overlap->send_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].recv_id;
+            recv_id       = overlap->comm_data[ipulse].send_id;
              send_index0   = overlap->comm_data[ipulse].recv_index0;
              send_nindex   = overlap->comm_data[ipulse].recv_nindex;
              recv_index0   = overlap->comm_data[ipulse].send_index0;
diff --git a/src/gromacs/ewald/pme-internal.h b/src/gromacs/ewald/pme-internal.h

index b7eac933689cb5f943bbfa4b58ad04d3390aa599..b8c6900d58a6a35f4cacbb03ced2cb886249d8cc 100644 (file)
--- a/src/gromacs/ewald/pme-internal.h
+++ b/src/gromacs/ewald/pme-internal.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -115,29 +115,32 @@ void gmx_pme_reinit(struct gmx_pme_t **pmedata,
  //! @cond Doxygen_Suppress
  
  /*! \brief Data structure for grid communication */
-typedef struct {
+struct pme_grid_comm_t
+{
+    int send_id;         //!< Source rank id
      int send_index0;
      int send_nindex;
+    int recv_id;         //!< Destination rank id
      int recv_index0;
      int recv_nindex;
-    int recv_size;   /* Receive buffer width, used with OpenMP */
-} pme_grid_comm_t;
+    int recv_size = 0;   //!< Receive buffer width, used with OpenMP
+};
  
-/*! \brief Data structure for grid overlap communication */
-typedef struct {
+/*! \brief Data structure for grid overlap communication in a single dimension */
+struct pme_overlap_t
+{
  #if GMX_MPI
-    MPI_Comm         mpi_comm;
+    MPI_Comm                     mpi_comm;       //!< MPI communcator
  #endif
-    int              nnodes, nodeid;
-    int             *s2g0;
-    int             *s2g1;
-    int              noverlap_nodes;
-    int             *send_id, *recv_id;
-    int              send_size; /* Send buffer width, used with OpenMP */
-    pme_grid_comm_t *comm_data;
-    real            *sendbuf;
-    real            *recvbuf;
-} pme_overlap_t;
+    int                          nnodes;         //!< Number of ranks
+    int                          nodeid;         //!< Unique rank identifcator
+    std::vector<int>             s2g0;           //!< The local interpolation grid start
+    std::vector<int>             s2g1;           //!< The local interpolation grid end
+    int                          send_size;      //!< Send buffer width, used with OpenMP
+    std::vector<pme_grid_comm_t> comm_data;      //!< All the individual communication data for each rank
+    std::vector<real>            sendbuf;        //!< Shared buffer for sending
+    std::vector<real>            recvbuf;        //!< Shared buffer for receiving
+};
  
  /*! \brief Data structure for organizing particle allocation to threads */
  typedef struct {
diff --git a/src/gromacs/ewald/pme-spread.cpp b/src/gromacs/ewald/pme-spread.cpp

index 3f70f80ed57be8b637a80afbce222c5b2ddd71f5..62c9862dd61ee2423bd7e0665bff854587bd4647 100644 (file)
--- a/src/gromacs/ewald/pme-spread.cpp
+++ b/src/gromacs/ewald/pme-spread.cpp
@@ -710,8 +710,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
      MPI_Status stat;
  #endif
      int  recv_size_y;
-    int  ipulse, size_yx;
-    real *sendptr, *recvptr;
+    int  size_yx;
      int  x, y, z, indg, indb;
  
      /* Note that this routine is only used for forward communication.
@@ -745,7 +744,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
          int send_size_y = overlap->send_size;
  #endif
  
-        for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+        for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
          {
              send_index0   =
                  overlap->comm_data[ipulse].send_index0 -
@@ -755,8 +754,8 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
              recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
              recv_size_y   = overlap->comm_data[ipulse].recv_size;
  
-            sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
-            recvptr = overlap->recvbuf;
+            auto *sendptr = const_cast<real *>(overlap->sendbuf.data()) + send_index0 * local_fft_ndata[ZZ];
+            auto *recvptr = const_cast<real *>(overlap->recvbuf.data());
  
              if (debug != nullptr)
              {
@@ -765,8 +764,8 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
              }
  
  #if GMX_MPI
-            int send_id = overlap->send_id[ipulse];
-            int recv_id = overlap->recv_id[ipulse];
+            int send_id = overlap->comm_data[ipulse].send_id;
+            int recv_id = overlap->comm_data[ipulse].recv_id;
              MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
                           send_id, ipulse,
                           recvptr, recv_size_y*datasize, GMX_MPI_REAL,
@@ -790,7 +789,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
              if (pme->nnodes_major > 1)
              {
                  /* Copy from the received buffer to the send buffer for dim 0 */
-                sendptr = pme->overlap[0].sendbuf;
+                sendptr = const_cast<real *>(pme->overlap[0].sendbuf.data());
                  for (x = 0; x < size_yx; x++)
                  {
                      for (y = 0; y < recv_nindex; y++)
@@ -816,14 +815,12 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
          /* Major dimension */
          const pme_overlap_t *overlap = &pme->overlap[0];
  
-        ipulse = 0;
+        size_t ipulse = 0;
  
          send_nindex   = overlap->comm_data[ipulse].send_nindex;
          /* We don't use recv_index0, as we always receive starting at 0 */
          recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
  
-        recvptr = overlap->recvbuf;
-
          if (debug != nullptr)
          {
              fprintf(debug, "PME fftgrid comm x %2d x %2d x %2d\n",
@@ -831,10 +828,11 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
          }
  
  #if GMX_MPI
-        int datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
-        int send_id  = overlap->send_id[ipulse];
-        int recv_id  = overlap->recv_id[ipulse];
-        sendptr      = overlap->sendbuf;
+        int datasize  = local_fft_ndata[YY]*local_fft_ndata[ZZ];
+        int send_id   = overlap->comm_data[ipulse].send_id;
+        int recv_id   = overlap->comm_data[ipulse].recv_id;
+        auto *sendptr = const_cast<real *>(overlap->sendbuf.data());
+        auto *recvptr = const_cast<real *>(overlap->recvbuf.data());
          MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
                       send_id, ipulse,
                       recvptr, recv_nindex*datasize, GMX_MPI_REAL,
@@ -850,7 +848,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
                  indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
                  for (z = 0; z < local_fft_ndata[ZZ]; z++)
                  {
-                    fftgrid[indg+z] += recvptr[indb+z];
+                    fftgrid[indg + z] += overlap->recvbuf[indb + z];
                  }
              }
          }
@@ -979,8 +977,8 @@ void spread_on_grid(const gmx_pme_t *pme,
              {
                  reduce_threadgrid_overlap(pme, grids, thread,
                                            fftgrid,
-                                          pme->overlap[0].sendbuf,
-                                          pme->overlap[1].sendbuf,
+                                          const_cast<real *>(pme->overlap[0].sendbuf.data()),
+                                          const_cast<real *>(pme->overlap[1].sendbuf.data()),
                                            grid_index);
              }
              GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp

index d669a50e5146cfc43c4216163585be36294b468e..fe2f007a0567634ecf0e56f54c03713a390e7c49 100644 (file)
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -298,10 +298,7 @@ init_overlap_comm(pme_overlap_t *  ol,
                    int              ndata,
                    int              commplainsize)
  {
-    int              b, i;
-    pme_grid_comm_t *pgc;
      gmx_bool         bCont;
-    int              fft_start, fft_end, send_index1, recv_index1;
  #if GMX_MPI
      MPI_Status       stat;
  
@@ -318,13 +315,13 @@ init_overlap_comm(pme_overlap_t *  ol,
       * that belong to higher nodes (modulo nnodes)
       */
  
-    snew(ol->s2g0, ol->nnodes+1);
-    snew(ol->s2g1, ol->nnodes);
+    ol->s2g0.resize(ol->nnodes + 1);
+    ol->s2g1.resize(ol->nnodes);
      if (debug)
      {
          fprintf(debug, "PME slab boundaries:");
      }
-    for (i = 0; i < nnodes; i++)
+    for (int i = 0; i < nnodes; i++)
      {
          /* s2g0 the local interpolation grid start.
           * s2g1 the local interpolation grid end.
@@ -332,8 +329,8 @@ init_overlap_comm(pme_overlap_t *  ol,
           * spatially uniform along dimension x or y, we need to round
           * s2g0 down and s2g1 up.
           */
-        ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
-        ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
+        ol->s2g0[i] = (i * ndata + 0) / nnodes;
+        ol->s2g1[i] = ((i + 1) * ndata + nnodes - 1) / nnodes + norder - 1;
  
          if (debug)
          {
@@ -347,55 +344,50 @@ init_overlap_comm(pme_overlap_t *  ol,
      }
  
      /* Determine with how many nodes we need to communicate the grid overlap */
-    b = 0;
+    int testRankCount = 0;
      do
      {
-        b++;
+        testRankCount++;
          bCont = FALSE;
-        for (i = 0; i < nnodes; i++)
+        for (int i = 0; i < nnodes; i++)
          {
-            if ((i+b <  nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
-                (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
+            if ((i + testRankCount <  nnodes && ol->s2g1[i] > ol->s2g0[i + testRankCount]) ||
+                (i + testRankCount >= nnodes && ol->s2g1[i] > ol->s2g0[i + testRankCount - nnodes] + ndata))
              {
                  bCont = TRUE;
              }
          }
      }
-    while (bCont && b < nnodes);
-    ol->noverlap_nodes = b - 1;
-
-    snew(ol->send_id, ol->noverlap_nodes);
-    snew(ol->recv_id, ol->noverlap_nodes);
-    for (b = 0; b < ol->noverlap_nodes; b++)
-    {
-        ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
-        ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
-    }
-    snew(ol->comm_data, ol->noverlap_nodes);
+    while (bCont && testRankCount < nnodes);
  
+    ol->comm_data.resize(testRankCount - 1);
      ol->send_size = 0;
-    for (b = 0; b < ol->noverlap_nodes; b++)
+
+    for (size_t b = 0; b < ol->comm_data.size(); b++)
      {
-        pgc = &ol->comm_data[b];
+        pme_grid_comm_t *pgc = &ol->comm_data[b];
+
          /* Send */
-        fft_start        = ol->s2g0[ol->send_id[b]];
-        fft_end          = ol->s2g0[ol->send_id[b]+1];
-        if (ol->send_id[b] < nodeid)
+        pgc->send_id = (ol->nodeid + (b + 1)) % ol->nnodes;
+        int fft_start = ol->s2g0[pgc->send_id];
+        int fft_end   = ol->s2g0[pgc->send_id + 1];
+        if (pgc->send_id < nodeid)
          {
              fft_start += ndata;
              fft_end   += ndata;
          }
-        send_index1       = ol->s2g1[nodeid];
-        send_index1       = std::min(send_index1, fft_end);
-        pgc->send_index0  = fft_start;
-        pgc->send_nindex  = std::max(0, send_index1 - pgc->send_index0);
-        ol->send_size    += pgc->send_nindex;
+        int send_index1  = ol->s2g1[nodeid];
+        send_index1      = std::min(send_index1, fft_end);
+        pgc->send_index0 = fft_start;
+        pgc->send_nindex = std::max(0, send_index1 - pgc->send_index0);
+        ol->send_size   += pgc->send_nindex;
  
          /* We always start receiving to the first index of our slab */
+        pgc->recv_id     = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
          fft_start        = ol->s2g0[ol->nodeid];
-        fft_end          = ol->s2g0[ol->nodeid+1];
-        recv_index1      = ol->s2g1[ol->recv_id[b]];
-        if (ol->recv_id[b] > nodeid)
+        fft_end          = ol->s2g0[ol->nodeid + 1];
+        int recv_index1  = ol->s2g1[pgc->recv_id];
+        if (pgc->recv_id > nodeid)
          {
              recv_index1 -= ndata;
          }
@@ -406,30 +398,17 @@ init_overlap_comm(pme_overlap_t *  ol,
  
  #if GMX_MPI
      /* Communicate the buffer sizes to receive */
-    for (b = 0; b < ol->noverlap_nodes; b++)
+    for (size_t b = 0; b < ol->comm_data.size(); b++)
      {
-        MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->send_id[b], b,
-                     &ol->comm_data[b].recv_size, 1, MPI_INT, ol->recv_id[b], b,
+        MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->comm_data[b].send_id, b,
+                     &ol->comm_data[b].recv_size, 1, MPI_INT, ol->comm_data[b].recv_id, b,
                       ol->mpi_comm, &stat);
      }
  #endif
  
      /* For non-divisible grid we need pme_order iso pme_order-1 */
-    snew(ol->sendbuf, norder*commplainsize);
-    snew(ol->recvbuf, norder*commplainsize);
-}
-
-/*! \brief Destroy data structure for communication */
-static void
-destroy_overlap_comm(const pme_overlap_t *ol)
-{
-    sfree(ol->s2g0);
-    sfree(ol->s2g1);
-    sfree(ol->send_id);
-    sfree(ol->recv_id);
-    sfree(ol->comm_data);
-    sfree(ol->sendbuf);
-    sfree(ol->recvbuf);
+    ol->sendbuf.resize(norder * commplainsize);
+    ol->recvbuf.resize(norder * commplainsize);
  }
  
  int minimalPmeGridSize(int pmeOrder)
@@ -740,7 +719,7 @@ gmx_pme_t *gmx_pme_init(const t_commrec     *cr,
      /* Double-check for a limitation of the (current) sum_fftgrid_dd code.
       * Note that gmx_pme_check_restrictions checked for this already.
       */
-    if (pme->bUseThreads && pme->overlap[0].noverlap_nodes > 1)
+    if (pme->bUseThreads && (pme->overlap[0].comm_data.size() > 1))
      {
          gmx_incons("More than one communication pulse required for grid overlap communication along the major dimension while using threads");
      }
@@ -1730,9 +1709,6 @@ void gmx_pme_destroy(gmx_pme_t *pme)
          sfree(pme->bsp_mod[i]);
      }
  
-    destroy_overlap_comm(&pme->overlap[0]);
-    destroy_overlap_comm(&pme->overlap[1]);
-
      sfree(pme->lb_buf1);
      sfree(pme->lb_buf2);
author	Aleksei Iupinov <a.yupinov@gmail.com>
	Tue, 28 Nov 2017 14:14:40 +0000 (15:14 +0100)
committer	Berk Hess <hess@kth.se>
	Thu, 18 Jan 2018 21:16:31 +0000 (22:16 +0100)
src/gromacs/ewald/pme-grid.cpp		patch \| blob \| history
src/gromacs/ewald/pme-internal.h		patch \| blob \| history
src/gromacs/ewald/pme-spread.cpp		patch \| blob \| history
src/gromacs/ewald/pme.cpp		patch \| blob \| history