Got rid of manual memory management in the PME overlap structures
authorAleksei Iupinov <a.yupinov@gmail.com>
Tue, 28 Nov 2017 14:14:40 +0000 (15:14 +0100)
committerBerk Hess <hess@kth.se>
Thu, 18 Jan 2018 21:16:31 +0000 (22:16 +0100)
Also moved a couple of fields (send_id, recv_id) to the proper
place.

Change-Id: I372d82b703349c65fd86257795d958a8b38c98b8

src/gromacs/ewald/pme-grid.cpp
src/gromacs/ewald/pme-internal.h
src/gromacs/ewald/pme-spread.cpp
src/gromacs/ewald/pme.cpp

index 937584ca6192a64b3cb28d7349db84a7ca9907eb..dcf2a06c3cc624a15b3e362bd18e82faf033580b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -73,22 +73,22 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
     int            recv_index0, recv_nindex;
     MPI_Status     stat;
     int            i, j, k, ix, iy, iz, icnt;
-    int            ipulse, send_id, recv_id, datasize;
+    int            send_id, recv_id, datasize;
     real          *p;
     real          *sendptr, *recvptr;
 
     /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
     overlap = &pme->overlap[1];
 
-    for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+    for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
     {
         /* Since we have already (un)wrapped the overlap in the z-dimension,
          * we only have to communicate 0 to nkz (not pmegrid_nz).
          */
         if (direction == GMX_SUM_GRID_FORWARD)
         {
-            send_id       = overlap->send_id[ipulse];
-            recv_id       = overlap->recv_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].send_id;
+            recv_id       = overlap->comm_data[ipulse].recv_id;
             send_index0   = overlap->comm_data[ipulse].send_index0;
             send_nindex   = overlap->comm_data[ipulse].send_nindex;
             recv_index0   = overlap->comm_data[ipulse].recv_index0;
@@ -96,8 +96,8 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
         }
         else
         {
-            send_id       = overlap->recv_id[ipulse];
-            recv_id       = overlap->send_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].recv_id;
+            recv_id       = overlap->comm_data[ipulse].send_id;
             send_index0   = overlap->comm_data[ipulse].recv_index0;
             send_nindex   = overlap->comm_data[ipulse].recv_nindex;
             recv_index0   = overlap->comm_data[ipulse].send_index0;
@@ -130,9 +130,9 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
 
         datasize      = pme->pmegrid_nx * pme->nkz;
 
-        MPI_Sendrecv(overlap->sendbuf, send_nindex*datasize, GMX_MPI_REAL,
+        MPI_Sendrecv(overlap->sendbuf.data(), send_nindex*datasize, GMX_MPI_REAL,
                      send_id, ipulse,
-                     overlap->recvbuf, recv_nindex*datasize, GMX_MPI_REAL,
+                     overlap->recvbuf.data(), recv_nindex*datasize, GMX_MPI_REAL,
                      recv_id, ipulse,
                      overlap->mpi_comm, &stat);
 
@@ -175,22 +175,22 @@ void gmx_sum_qgrid_dd(struct gmx_pme_t *pme, real *grid, int direction)
      */
     overlap = &pme->overlap[0];
 
-    for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+    for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
     {
         if (direction == GMX_SUM_GRID_FORWARD)
         {
-            send_id       = overlap->send_id[ipulse];
-            recv_id       = overlap->recv_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].send_id;
+            recv_id       = overlap->comm_data[ipulse].recv_id;
             send_index0   = overlap->comm_data[ipulse].send_index0;
             send_nindex   = overlap->comm_data[ipulse].send_nindex;
             recv_index0   = overlap->comm_data[ipulse].recv_index0;
             recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
-            recvptr       = overlap->recvbuf;
+            recvptr       = overlap->recvbuf.data();
         }
         else
         {
-            send_id       = overlap->recv_id[ipulse];
-            recv_id       = overlap->send_id[ipulse];
+            send_id       = overlap->comm_data[ipulse].recv_id;
+            recv_id       = overlap->comm_data[ipulse].send_id;
             send_index0   = overlap->comm_data[ipulse].recv_index0;
             send_nindex   = overlap->comm_data[ipulse].recv_nindex;
             recv_index0   = overlap->comm_data[ipulse].send_index0;
index b7eac933689cb5f943bbfa4b58ad04d3390aa599..b8c6900d58a6a35f4cacbb03ced2cb886249d8cc 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -115,29 +115,32 @@ void gmx_pme_reinit(struct gmx_pme_t **pmedata,
 //! @cond Doxygen_Suppress
 
 /*! \brief Data structure for grid communication */
-typedef struct {
+struct pme_grid_comm_t
+{
+    int send_id;         //!< Source rank id
     int send_index0;
     int send_nindex;
+    int recv_id;         //!< Destination rank id
     int recv_index0;
     int recv_nindex;
-    int recv_size;   /* Receive buffer width, used with OpenMP */
-} pme_grid_comm_t;
+    int recv_size = 0;   //!< Receive buffer width, used with OpenMP
+};
 
-/*! \brief Data structure for grid overlap communication */
-typedef struct {
+/*! \brief Data structure for grid overlap communication in a single dimension */
+struct pme_overlap_t
+{
 #if GMX_MPI
-    MPI_Comm         mpi_comm;
+    MPI_Comm                     mpi_comm;       //!< MPI communcator
 #endif
-    int              nnodes, nodeid;
-    int             *s2g0;
-    int             *s2g1;
-    int              noverlap_nodes;
-    int             *send_id, *recv_id;
-    int              send_size; /* Send buffer width, used with OpenMP */
-    pme_grid_comm_t *comm_data;
-    real            *sendbuf;
-    real            *recvbuf;
-} pme_overlap_t;
+    int                          nnodes;         //!< Number of ranks
+    int                          nodeid;         //!< Unique rank identifcator
+    std::vector<int>             s2g0;           //!< The local interpolation grid start
+    std::vector<int>             s2g1;           //!< The local interpolation grid end
+    int                          send_size;      //!< Send buffer width, used with OpenMP
+    std::vector<pme_grid_comm_t> comm_data;      //!< All the individual communication data for each rank
+    std::vector<real>            sendbuf;        //!< Shared buffer for sending
+    std::vector<real>            recvbuf;        //!< Shared buffer for receiving
+};
 
 /*! \brief Data structure for organizing particle allocation to threads */
 typedef struct {
index 3f70f80ed57be8b637a80afbce222c5b2ddd71f5..62c9862dd61ee2423bd7e0665bff854587bd4647 100644 (file)
@@ -710,8 +710,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
     MPI_Status stat;
 #endif
     int  recv_size_y;
-    int  ipulse, size_yx;
-    real *sendptr, *recvptr;
+    int  size_yx;
     int  x, y, z, indg, indb;
 
     /* Note that this routine is only used for forward communication.
@@ -745,7 +744,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
         int send_size_y = overlap->send_size;
 #endif
 
-        for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
+        for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
         {
             send_index0   =
                 overlap->comm_data[ipulse].send_index0 -
@@ -755,8 +754,8 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
             recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
             recv_size_y   = overlap->comm_data[ipulse].recv_size;
 
-            sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
-            recvptr = overlap->recvbuf;
+            auto *sendptr = const_cast<real *>(overlap->sendbuf.data()) + send_index0 * local_fft_ndata[ZZ];
+            auto *recvptr = const_cast<real *>(overlap->recvbuf.data());
 
             if (debug != nullptr)
             {
@@ -765,8 +764,8 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
             }
 
 #if GMX_MPI
-            int send_id = overlap->send_id[ipulse];
-            int recv_id = overlap->recv_id[ipulse];
+            int send_id = overlap->comm_data[ipulse].send_id;
+            int recv_id = overlap->comm_data[ipulse].recv_id;
             MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
                          send_id, ipulse,
                          recvptr, recv_size_y*datasize, GMX_MPI_REAL,
@@ -790,7 +789,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
             if (pme->nnodes_major > 1)
             {
                 /* Copy from the received buffer to the send buffer for dim 0 */
-                sendptr = pme->overlap[0].sendbuf;
+                sendptr = const_cast<real *>(pme->overlap[0].sendbuf.data());
                 for (x = 0; x < size_yx; x++)
                 {
                     for (y = 0; y < recv_nindex; y++)
@@ -816,14 +815,12 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
         /* Major dimension */
         const pme_overlap_t *overlap = &pme->overlap[0];
 
-        ipulse = 0;
+        size_t ipulse = 0;
 
         send_nindex   = overlap->comm_data[ipulse].send_nindex;
         /* We don't use recv_index0, as we always receive starting at 0 */
         recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 
-        recvptr = overlap->recvbuf;
-
         if (debug != nullptr)
         {
             fprintf(debug, "PME fftgrid comm x %2d x %2d x %2d\n",
@@ -831,10 +828,11 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
         }
 
 #if GMX_MPI
-        int datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
-        int send_id  = overlap->send_id[ipulse];
-        int recv_id  = overlap->recv_id[ipulse];
-        sendptr      = overlap->sendbuf;
+        int datasize  = local_fft_ndata[YY]*local_fft_ndata[ZZ];
+        int send_id   = overlap->comm_data[ipulse].send_id;
+        int recv_id   = overlap->comm_data[ipulse].recv_id;
+        auto *sendptr = const_cast<real *>(overlap->sendbuf.data());
+        auto *recvptr = const_cast<real *>(overlap->recvbuf.data());
         MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
                      send_id, ipulse,
                      recvptr, recv_nindex*datasize, GMX_MPI_REAL,
@@ -850,7 +848,7 @@ static void sum_fftgrid_dd(const gmx_pme_t *pme, real *fftgrid, int grid_index)
                 indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
                 for (z = 0; z < local_fft_ndata[ZZ]; z++)
                 {
-                    fftgrid[indg+z] += recvptr[indb+z];
+                    fftgrid[indg + z] += overlap->recvbuf[indb + z];
                 }
             }
         }
@@ -979,8 +977,8 @@ void spread_on_grid(const gmx_pme_t *pme,
             {
                 reduce_threadgrid_overlap(pme, grids, thread,
                                           fftgrid,
-                                          pme->overlap[0].sendbuf,
-                                          pme->overlap[1].sendbuf,
+                                          const_cast<real *>(pme->overlap[0].sendbuf.data()),
+                                          const_cast<real *>(pme->overlap[1].sendbuf.data()),
                                           grid_index);
             }
             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
index d669a50e5146cfc43c4216163585be36294b468e..fe2f007a0567634ecf0e56f54c03713a390e7c49 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -298,10 +298,7 @@ init_overlap_comm(pme_overlap_t *  ol,
                   int              ndata,
                   int              commplainsize)
 {
-    int              b, i;
-    pme_grid_comm_t *pgc;
     gmx_bool         bCont;
-    int              fft_start, fft_end, send_index1, recv_index1;
 #if GMX_MPI
     MPI_Status       stat;
 
@@ -318,13 +315,13 @@ init_overlap_comm(pme_overlap_t *  ol,
      * that belong to higher nodes (modulo nnodes)
      */
 
-    snew(ol->s2g0, ol->nnodes+1);
-    snew(ol->s2g1, ol->nnodes);
+    ol->s2g0.resize(ol->nnodes + 1);
+    ol->s2g1.resize(ol->nnodes);
     if (debug)
     {
         fprintf(debug, "PME slab boundaries:");
     }
-    for (i = 0; i < nnodes; i++)
+    for (int i = 0; i < nnodes; i++)
     {
         /* s2g0 the local interpolation grid start.
          * s2g1 the local interpolation grid end.
@@ -332,8 +329,8 @@ init_overlap_comm(pme_overlap_t *  ol,
          * spatially uniform along dimension x or y, we need to round
          * s2g0 down and s2g1 up.
          */
-        ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
-        ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
+        ol->s2g0[i] = (i * ndata + 0) / nnodes;
+        ol->s2g1[i] = ((i + 1) * ndata + nnodes - 1) / nnodes + norder - 1;
 
         if (debug)
         {
@@ -347,55 +344,50 @@ init_overlap_comm(pme_overlap_t *  ol,
     }
 
     /* Determine with how many nodes we need to communicate the grid overlap */
-    b = 0;
+    int testRankCount = 0;
     do
     {
-        b++;
+        testRankCount++;
         bCont = FALSE;
-        for (i = 0; i < nnodes; i++)
+        for (int i = 0; i < nnodes; i++)
         {
-            if ((i+b <  nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
-                (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
+            if ((i + testRankCount <  nnodes && ol->s2g1[i] > ol->s2g0[i + testRankCount]) ||
+                (i + testRankCount >= nnodes && ol->s2g1[i] > ol->s2g0[i + testRankCount - nnodes] + ndata))
             {
                 bCont = TRUE;
             }
         }
     }
-    while (bCont && b < nnodes);
-    ol->noverlap_nodes = b - 1;
-
-    snew(ol->send_id, ol->noverlap_nodes);
-    snew(ol->recv_id, ol->noverlap_nodes);
-    for (b = 0; b < ol->noverlap_nodes; b++)
-    {
-        ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
-        ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
-    }
-    snew(ol->comm_data, ol->noverlap_nodes);
+    while (bCont && testRankCount < nnodes);
 
+    ol->comm_data.resize(testRankCount - 1);
     ol->send_size = 0;
-    for (b = 0; b < ol->noverlap_nodes; b++)
+
+    for (size_t b = 0; b < ol->comm_data.size(); b++)
     {
-        pgc = &ol->comm_data[b];
+        pme_grid_comm_t *pgc = &ol->comm_data[b];
+
         /* Send */
-        fft_start        = ol->s2g0[ol->send_id[b]];
-        fft_end          = ol->s2g0[ol->send_id[b]+1];
-        if (ol->send_id[b] < nodeid)
+        pgc->send_id = (ol->nodeid + (b + 1)) % ol->nnodes;
+        int fft_start = ol->s2g0[pgc->send_id];
+        int fft_end   = ol->s2g0[pgc->send_id + 1];
+        if (pgc->send_id < nodeid)
         {
             fft_start += ndata;
             fft_end   += ndata;
         }
-        send_index1       = ol->s2g1[nodeid];
-        send_index1       = std::min(send_index1, fft_end);
-        pgc->send_index0  = fft_start;
-        pgc->send_nindex  = std::max(0, send_index1 - pgc->send_index0);
-        ol->send_size    += pgc->send_nindex;
+        int send_index1  = ol->s2g1[nodeid];
+        send_index1      = std::min(send_index1, fft_end);
+        pgc->send_index0 = fft_start;
+        pgc->send_nindex = std::max(0, send_index1 - pgc->send_index0);
+        ol->send_size   += pgc->send_nindex;
 
         /* We always start receiving to the first index of our slab */
+        pgc->recv_id     = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
         fft_start        = ol->s2g0[ol->nodeid];
-        fft_end          = ol->s2g0[ol->nodeid+1];
-        recv_index1      = ol->s2g1[ol->recv_id[b]];
-        if (ol->recv_id[b] > nodeid)
+        fft_end          = ol->s2g0[ol->nodeid + 1];
+        int recv_index1  = ol->s2g1[pgc->recv_id];
+        if (pgc->recv_id > nodeid)
         {
             recv_index1 -= ndata;
         }
@@ -406,30 +398,17 @@ init_overlap_comm(pme_overlap_t *  ol,
 
 #if GMX_MPI
     /* Communicate the buffer sizes to receive */
-    for (b = 0; b < ol->noverlap_nodes; b++)
+    for (size_t b = 0; b < ol->comm_data.size(); b++)
     {
-        MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->send_id[b], b,
-                     &ol->comm_data[b].recv_size, 1, MPI_INT, ol->recv_id[b], b,
+        MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->comm_data[b].send_id, b,
+                     &ol->comm_data[b].recv_size, 1, MPI_INT, ol->comm_data[b].recv_id, b,
                      ol->mpi_comm, &stat);
     }
 #endif
 
     /* For non-divisible grid we need pme_order iso pme_order-1 */
-    snew(ol->sendbuf, norder*commplainsize);
-    snew(ol->recvbuf, norder*commplainsize);
-}
-
-/*! \brief Destroy data structure for communication */
-static void
-destroy_overlap_comm(const pme_overlap_t *ol)
-{
-    sfree(ol->s2g0);
-    sfree(ol->s2g1);
-    sfree(ol->send_id);
-    sfree(ol->recv_id);
-    sfree(ol->comm_data);
-    sfree(ol->sendbuf);
-    sfree(ol->recvbuf);
+    ol->sendbuf.resize(norder * commplainsize);
+    ol->recvbuf.resize(norder * commplainsize);
 }
 
 int minimalPmeGridSize(int pmeOrder)
@@ -740,7 +719,7 @@ gmx_pme_t *gmx_pme_init(const t_commrec     *cr,
     /* Double-check for a limitation of the (current) sum_fftgrid_dd code.
      * Note that gmx_pme_check_restrictions checked for this already.
      */
-    if (pme->bUseThreads && pme->overlap[0].noverlap_nodes > 1)
+    if (pme->bUseThreads && (pme->overlap[0].comm_data.size() > 1))
     {
         gmx_incons("More than one communication pulse required for grid overlap communication along the major dimension while using threads");
     }
@@ -1730,9 +1709,6 @@ void gmx_pme_destroy(gmx_pme_t *pme)
         sfree(pme->bsp_mod[i]);
     }
 
-    destroy_overlap_comm(&pme->overlap[0]);
-    destroy_overlap_comm(&pme->overlap[1]);
-
     sfree(pme->lb_buf1);
     sfree(pme->lb_buf2);