Reduced the cost of the pull communication

author Berk Hess <hess@kth.se>

Mon, 11 Aug 2014 16:11:17 +0000 (18:11 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Thu, 18 Jun 2015 14:08:40 +0000 (16:08 +0200)
author Berk Hess <hess@kth.se>
Mon, 11 Aug 2014 16:11:17 +0000 (18:11 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Thu, 18 Jun 2015 14:08:40 +0000 (16:08 +0200)
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index c61e2893bc7a38cb3852bd7423fe71c48b9c017b..2cb8e7a64d6122b6622367354a9565fe825f687b 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -9858,7 +9858,7 @@ void dd_partition_system(FILE                *fplog,
      if (ir->bPull)
      {
          /* Update the local pull groups */
-        dd_make_local_pull_groups(dd, ir->pull_work, mdatoms);
+        dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
      }
  
      if (ir->bRot)
diff --git a/src/gromacs/pulling/pull.cpp b/src/gromacs/pulling/pull.cpp

index 3c970da0e43bfa5edd4b00df9e7bf36376bbc3b3..5a4f8e0150ea849cd41d7326574c810a2fb14c88 100644 (file)
--- a/src/gromacs/pulling/pull.cpp
+++ b/src/gromacs/pulling/pull.cpp
@@ -38,6 +38,8 @@
  
  #include "pull.h"
  
+#include "config.h"
+
  #include <assert.h>
  #include <math.h>
  #include <stdio.h>
@@ -1158,21 +1160,24 @@ real pull_potential(struct pull_t *pull, t_mdatoms *md, t_pbc *pbc,
                      t_commrec *cr, double t, real lambda,
                      rvec *x, rvec *f, tensor vir, real *dvdlambda)
  {
-    real V, dVdl;
+    real V = 0, dVdl;
  
      assert(pull != NULL);
  
-    pull_calc_coms(cr, pull, md, pbc, t, x, NULL);
+    if (pull->comm.bParticipate)
+    {
+        pull_calc_coms(cr, pull, md, pbc, t, x, NULL);
  
-    do_pull_pot(pull, pbc, t, lambda,
-                &V, MASTER(cr) ? vir : NULL, &dVdl);
+        do_pull_pot(pull, pbc, t, lambda,
+                    &V, MASTER(cr) ? vir : NULL, &dVdl);
  
-    /* Distribute forces over pulled groups */
-    apply_forces(pull, md, f);
+        /* Distribute forces over the pull groups */
+        apply_forces(pull, md, f);
  
-    if (MASTER(cr))
-    {
-        *dvdlambda += dVdl;
+        if (MASTER(cr))
+        {
+            *dvdlambda += dVdl;
+        }
      }
  
      return (MASTER(cr) ? V : 0.0);
@@ -1184,9 +1189,12 @@ void pull_constraint(struct pull_t *pull, t_mdatoms *md, t_pbc *pbc,
  {
      assert(pull != NULL);
  
-    pull_calc_coms(cr, pull, md, pbc, t, x, xp);
+    if (pull->comm.bParticipate)
+    {
+        pull_calc_coms(cr, pull, md, pbc, t, x, xp);
  
-    do_constraint(pull, pbc, xp, v, MASTER(cr), vir, dt, t);
+        do_constraint(pull, pbc, xp, v, MASTER(cr), vir, dt, t);
+    }
  }
  
  static void make_local_pull_group(gmx_ga2la_t ga2la,
@@ -1227,10 +1235,17 @@ static void make_local_pull_group(gmx_ga2la_t ga2la,
      }
  }
  
-void dd_make_local_pull_groups(gmx_domdec_t *dd, struct pull_t *pull, t_mdatoms *md)
+void dd_make_local_pull_groups(t_commrec *cr, struct pull_t *pull, t_mdatoms *md)
  {
-    gmx_ga2la_t ga2la;
-    int         g;
+    gmx_domdec_t *dd;
+    pull_comm_t  *comm;
+    gmx_ga2la_t   ga2la;
+    gmx_bool      bMustParticipate;
+    int           g;
+
+    dd = cr->dd;
+
+    comm = &pull->comm;
  
      if (dd)
      {
@@ -1241,10 +1256,103 @@ void dd_make_local_pull_groups(gmx_domdec_t *dd, struct pull_t *pull, t_mdatoms
          ga2la = NULL;
      }
  
+    /* We always make the master node participate, such that it can do i/o
+     * and to simplify MC type extensions people might have.
+     */
+    bMustParticipate = (comm->bParticipateAll || dd == NULL || DDMASTER(dd));
+
      for (g = 0; g < pull->ngroup; g++)
      {
+        int a;
+
          make_local_pull_group(ga2la, &pull->group[g],
                                0, md->homenr);
+
+        /* We should participate if we have pull or pbc atoms */
+        if (!bMustParticipate &&
+            (pull->group[g].nat_loc > 0 ||
+             (pull->group[g].params.pbcatom >= 0 &&
+              ga2la_get_home(dd->ga2la, pull->group[g].params.pbcatom, &a))))
+        {
+            bMustParticipate = TRUE;
+        }
+    }
+
+    if (!comm->bParticipateAll)
+    {
+        /* Keep currently not required ranks in the communicator
+         * if they needed to participate up to 20 decompositions ago.
+         * This avoids frequent rebuilds due to atoms jumping back and forth.
+         */
+        const gmx_int64_t history_count = 20;
+        gmx_bool          bWillParticipate;
+        int               count[2];
+
+        /* Increase the decomposition counter for the current call */
+        comm->setup_count++;
+
+        if (bMustParticipate)
+        {
+            comm->must_count = comm->setup_count;
+        }
+
+        bWillParticipate =
+            bMustParticipate ||
+            (comm->bParticipate &&
+             comm->must_count >= comm->setup_count - history_count);
+
+        if (debug && dd != NULL)
+        {
+            fprintf(debug, "Our DD rank (%3d) pull #atoms>0 or master: %d, will be part %d\n",
+                    dd->rank, bMustParticipate, bWillParticipate);
+        }
+
+        if (bWillParticipate)
+        {
+            /* Count the number of ranks that we want to have participating */
+            count[0] = 1;
+            /* Count the number of ranks that need to be added */
+            count[1] = comm->bParticipate ? 0 : 1;
+        }
+        else
+        {
+            count[0] = 0;
+            count[1] = 0;
+        }
+
+        /* The cost of this global operation will be less that the cost
+         * of the extra MPI_Comm_split calls that we can avoid.
+         */
+        gmx_sumi(2, count, cr);
+
+        /* If we are missing ranks or if we have 20% more ranks than needed
+         * we make a new sub-communicator.
+         */
+        if (count[1] > 0 || 6*count[0] < 5*comm->nparticipate)
+        {
+            if (debug)
+            {
+                fprintf(debug, "Creating new pull subcommunicator of size %d\n",
+                        count[0]);
+            }
+
+#ifdef GMX_MPI
+            if (comm->mpi_comm_com != MPI_COMM_NULL)
+            {
+                MPI_Comm_free(&comm->mpi_comm_com);
+            }
+
+            /* This might be an extremely expensive operation, so we try
+             * to avoid this splitting as much as possible.
+             */
+            assert(dd != NULL);
+            MPI_Comm_split(dd->mpi_comm_all, bWillParticipate ? 0 : 1, dd->rank,
+                           &comm->mpi_comm_com);
+#endif
+
+            comm->bParticipate = bWillParticipate;
+            comm->nparticipate = count[0];
+        }
      }
  
      /* Since the PBC of atoms might have changed, we need to update the PBC */
@@ -1437,6 +1545,7 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir,
            gmx_bool bOutFile, unsigned long Flags)
  {
      struct pull_t *pull;
+    pull_comm_t   *comm;
      int            g, c, m;
  
      snew(pull, 1);
@@ -1604,9 +1713,6 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir,
          }
      }
  
-    pull->rbuf     = NULL;
-    pull->dbuf     = NULL;
-    pull->dbuf_cyl = NULL;
      pull->bRefAt   = FALSE;
      pull->cosdim   = -1;
      for (g = 0; g < pull->ngroup; g++)
@@ -1724,6 +1830,35 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir,
          }
      }
  
+    comm = &pull->comm;
+
+#ifdef GMX_MPI
+    /* Use a sub-communicator when we have more than 32 ranks */
+    comm->bParticipateAll = (cr == NULL || !DOMAINDECOMP(cr) ||
+                             cr->dd->nnodes <= 32 ||
+                             getenv("GMX_PULL_PARTICIPATE_ALL") != NULL);
+    /* This sub-commicator is not used with comm->bParticipateAll,
+     * so we can always initialize it to NULL.
+     */
+    comm->mpi_comm_com    = MPI_COMM_NULL;
+    comm->nparticipate    = 0;
+#else
+    /* No MPI: 1 rank: all ranks pull */
+    comm->bParticipateAll = TRUE;
+#endif
+    comm->bParticipate    = comm->bParticipateAll;
+    comm->setup_count     = 0;
+    comm->must_count      = 0;
+
+    if (!comm->bParticipateAll && fplog != NULL)
+    {
+        fprintf(fplog, "Will use a sub-communicator for pull communication\n");
+    }
+
+    comm->rbuf     = NULL;
+    comm->dbuf     = NULL;
+    comm->dbuf_cyl = NULL;
+
      /* We still need to initialize the PBC reference coordinates */
      pull->bSetPBCatoms = TRUE;
  
@@ -1783,9 +1918,15 @@ static void destroy_pull(struct pull_t *pull)
      sfree(pull->dyna);
      sfree(pull->coord);
  
-    sfree(pull->rbuf);
-    sfree(pull->dbuf);
-    sfree(pull->dbuf_cyl);
+#ifdef GMX_MPI
+    if (pull->comm.mpi_comm_com != MPI_COMM_NULL)
+    {
+        MPI_Comm_free(&pull->comm.mpi_comm_com);
+    }
+#endif
+    sfree(pull->comm.rbuf);
+    sfree(pull->comm.dbuf);
+    sfree(pull->comm.dbuf_cyl);
  
      sfree(pull);
  }
diff --git a/src/gromacs/pulling/pull.h b/src/gromacs/pulling/pull.h

index c73ca640dc7455ee6fb6536fcf2d9061e5d69a1d..2d6898c5f9d6fa79c1cef434e5539343a45c498b 100644 (file)
--- a/src/gromacs/pulling/pull.h
+++ b/src/gromacs/pulling/pull.h
@@ -122,11 +122,11 @@ void pull_constraint(struct pull_t *pull, t_mdatoms *md, struct t_pbc *pbc,
  /*! \brief Make a selection of the home atoms for all pull groups.
   * Should be called at every domain decomposition.
   *
- * \param dd             Structure containing domain decomposition data.
+ * \param cr             Structure for communication info.
   * \param pull           The pull group.
   * \param md             All atoms.
   */
-void dd_make_local_pull_groups(gmx_domdec_t *dd,
+void dd_make_local_pull_groups(t_commrec *cr,
                                 struct pull_t *pull, t_mdatoms *md);
  
  
diff --git a/src/gromacs/pulling/pull_internal.h b/src/gromacs/pulling/pull_internal.h

index ad5c10f0e06185df5faac6b260a4f78d9baf04c7..75fbc2ac18e43f40529e1416becfe1c6134ee36e 100644 (file)
--- a/src/gromacs/pulling/pull_internal.h
+++ b/src/gromacs/pulling/pull_internal.h
@@ -50,6 +50,8 @@
  #ifndef GMX_PULLING_PULL_INTERNAL_H
  #define GMX_PULLING_PULL_INTERNAL_H
  
+#include "config.h"
+
  #include "gromacs/legacyheaders/typedefs.h"
  
  #ifdef __cplusplus
@@ -98,6 +100,23 @@ typedef struct
  }
  pull_coord_work_t;
  
+typedef struct {
+    gmx_bool    bParticipateAll; /* Do all ranks always participate in pulling? */
+    gmx_bool    bParticipate;    /* Does our rank participate in pulling? */
+#ifdef GMX_MPI
+    MPI_Comm    mpi_comm_com;    /* Communicator for pulling */
+#endif
+    int         nparticipate;    /* The number of ranks participating */
+
+    gmx_int64_t setup_count;     /* The number of decomposition calls */
+    gmx_int64_t must_count;      /* The last count our rank needed to be part */
+
+    rvec       *rbuf;            /* COM calculation buffer */
+    dvec       *dbuf;            /* COM calculation buffer */
+    double     *dbuf_cyl;        /* cylinder ref. groups calculation buffer */
+}
+pull_comm_t;
+
  struct pull_t
  {
      pull_params_t      params;       /* The pull parameters, from inputrec */
@@ -117,11 +136,11 @@ struct pull_t
      pull_coord_work_t *coord;        /* The pull group param and work data */
  
      gmx_bool           bCylinder;    /* Is group 0 a cylinder group? */
+
      gmx_bool           bSetPBCatoms; /* Do we need to set x_pbc for the groups? */
  
-    rvec              *rbuf;         /* COM calculation buffer */
-    dvec              *dbuf;         /* COM calculation buffer */
-    double            *dbuf_cyl;     /* cylinder ref. groups COM calculation buffer */
+    pull_comm_t        comm;         /* Communication parameters, communicator and buffers */
+
      FILE              *out_x;        /* Output file for pull data */
      FILE              *out_f;        /* Output file for pull data */
  };
diff --git a/src/gromacs/pulling/pullutil.cpp b/src/gromacs/pulling/pullutil.cpp

index 7345aad8570be6e214a9df46ebe952e8d655431b..29b2ae8cd01086dee756c54d34c22b7b49670623 100644 (file)
--- a/src/gromacs/pulling/pullutil.cpp
+++ b/src/gromacs/pulling/pullutil.cpp
@@ -36,6 +36,8 @@
   */
  #include "gmxpre.h"
  
+#include "config.h"
+
  #include <assert.h>
  #include <stdlib.h>
  
@@ -52,8 +54,89 @@
  #include "gromacs/pulling/pull_internal.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/futil.h"
+#include "gromacs/utility/real.h"
  #include "gromacs/utility/smalloc.h"
  
+static void pull_reduce_real(t_commrec   *cr,
+                             pull_comm_t *comm,
+                             int          n,
+                             real        *data)
+{
+    if (cr != NULL && PAR(cr))
+    {
+        if (comm->bParticipateAll)
+        {
+            /* Sum the contributions over all DD ranks */
+            gmx_sum(n, data, cr);
+        }
+        else
+        {
+#ifdef GMX_MPI
+#ifdef MPI_IN_PLACE_EXISTS
+            MPI_Allreduce(MPI_IN_PLACE, data, n, GMX_MPI_REAL, MPI_SUM,
+                          comm->mpi_comm_com);
+#else
+            real *buf;
+
+            snew(buf, n);
+
+            MPI_Allreduce(data, buf, n, GMX_MPI_REAL, MPI_SUM,
+                          comm->mpi_comm_com);
+
+            /* Copy the result from the buffer to the input/output data */
+            for (i = 0; i < nr; i++)
+            {
+                data[i] = buf[i];
+            }
+            sfree(buf);
+#endif
+#else
+            gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
+#endif
+        }
+    }
+}
+
+static void pull_reduce_double(t_commrec   *cr,
+                               pull_comm_t *comm,
+                               int          n,
+                               double      *data)
+{
+    if (cr != NULL && PAR(cr))
+    {
+        if (comm->bParticipateAll)
+        {
+            /* Sum the contributions over all DD ranks */
+            gmx_sumd(n, data, cr);
+        }
+        else
+        {
+#ifdef GMX_MPI
+#ifdef MPI_IN_PLACE_EXISTS
+            MPI_Allreduce(MPI_IN_PLACE, data, n, MPI_DOUBLE, MPI_SUM,
+                          comm->mpi_comm_com);
+#else
+            double *buf;
+
+            snew(buf, n);
+
+            MPI_Allreduce(data, buf, n, MPI_DOUBLE, MPI_SUM,
+                          comm->mpi_comm_com);
+
+            /* Copy the result from the buffer to the input/output data */
+            for (i = 0; i < nr; i++)
+            {
+                data[i] = buf[i];
+            }
+            sfree(buf);
+#endif
+#else
+            gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
+#endif
+        }
+    }
+}
+
  static void pull_set_pbcatom(t_commrec *cr, pull_group_work_t *pgrp,
                               rvec *x,
                               rvec x_pbc)
@@ -99,8 +182,11 @@ static void pull_set_pbcatoms(t_commrec *cr, struct pull_t *pull,
  
      if (cr && PAR(cr) && n > 0)
      {
-        /* Sum over the nodes to get x_pbc from the home node of pbcatom */
-        gmx_sum(pull->ngroup*DIM, x_pbc[0], cr);
+        /* Sum over participating ranks to get x_pbc from the home ranks.
+         * This can be very expensive at high parallelization, so we only
+         * do this after each DD repartitioning.
+         */
+        pull_reduce_real(cr, &pull->comm, pull->ngroup*DIM, x_pbc[0]);
      }
  }
  
@@ -112,11 +198,14 @@ static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md,
      int           c, i, ii, m, start, end;
      rvec          g_x, dx, dir;
      double        inv_cyl_r2;
+    pull_comm_t  *comm;
      gmx_ga2la_t   ga2la = NULL;
  
-    if (pull->dbuf_cyl == NULL)
+    comm = &pull->comm;
+
+    if (comm->dbuf_cyl == NULL)
      {
-        snew(pull->dbuf_cyl, pull->ncoord*stride);
+        snew(comm->dbuf_cyl, pull->ncoord*stride);
      }
  
      if (cr && DOMAINDECOMP(cr))
@@ -243,21 +332,21 @@ static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md,
                  }
              }
          }
-        pull->dbuf_cyl[c*stride+0] = wmass;
-        pull->dbuf_cyl[c*stride+1] = wwmass;
-        pull->dbuf_cyl[c*stride+2] = sum_a;
-        pull->dbuf_cyl[c*stride+3] = radf_fac0[XX];
-        pull->dbuf_cyl[c*stride+4] = radf_fac0[YY];
-        pull->dbuf_cyl[c*stride+5] = radf_fac0[ZZ];
-        pull->dbuf_cyl[c*stride+6] = radf_fac1[XX];
-        pull->dbuf_cyl[c*stride+7] = radf_fac1[YY];
-        pull->dbuf_cyl[c*stride+8] = radf_fac1[ZZ];
+        comm->dbuf_cyl[c*stride+0] = wmass;
+        comm->dbuf_cyl[c*stride+1] = wwmass;
+        comm->dbuf_cyl[c*stride+2] = sum_a;
+        comm->dbuf_cyl[c*stride+3] = radf_fac0[XX];
+        comm->dbuf_cyl[c*stride+4] = radf_fac0[YY];
+        comm->dbuf_cyl[c*stride+5] = radf_fac0[ZZ];
+        comm->dbuf_cyl[c*stride+6] = radf_fac1[XX];
+        comm->dbuf_cyl[c*stride+7] = radf_fac1[YY];
+        comm->dbuf_cyl[c*stride+8] = radf_fac1[ZZ];
      }
  
      if (cr != NULL && PAR(cr))
      {
          /* Sum the contributions over the ranks */
-        gmx_sumd(pull->ncoord*stride, pull->dbuf_cyl, cr);
+        pull_reduce_double(cr, comm, pull->ncoord*stride, comm->dbuf_cyl);
      }
  
      for (c = 0; c < pull->ncoord; c++)
@@ -274,8 +363,8 @@ static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md,
              pdyna = &pull->dyna[c];
              pgrp  = &pull->group[pcrd->params.group[1]];
  
-            wmass          = pull->dbuf_cyl[c*stride+0];
-            wwmass         = pull->dbuf_cyl[c*stride+1];
+            wmass          = comm->dbuf_cyl[c*stride+0];
+            wwmass         = comm->dbuf_cyl[c*stride+1];
              pdyna->mwscale = 1.0/wmass;
              /* Cylinder pulling can't be used with constraints, but we set
               * wscale and invtm anyhow, in case someone would like to use them.
@@ -291,7 +380,7 @@ static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md,
              for (m = 0; m < DIM; m++)
              {
                  g_x[m]         = pgrp->x[m] - pcrd->vec[m]*pcrd->value_ref;
-                dist           = -pcrd->vec[m]*pull->dbuf_cyl[c*stride+2]*pdyna->mwscale;
+                dist           = -pcrd->vec[m]*comm->dbuf_cyl[c*stride+2]*pdyna->mwscale;
                  pdyna->x[m]    = g_x[m] - dist;
                  pcrd->cyl_dev += dist;
              }
@@ -302,8 +391,8 @@ static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md,
               */
              for (m = 0; m < DIM; m++)
              {
-                pcrd->ffrad[m] = (pull->dbuf_cyl[c*stride+6+m] +
-                                  pull->dbuf_cyl[c*stride+3+m]*pcrd->cyl_dev)/wmass;
+                pcrd->ffrad[m] = (comm->dbuf_cyl[c*stride+6+m] +
+                                  comm->dbuf_cyl[c*stride+3+m]*pcrd->cyl_dev)/wmass;
              }
  
              if (debug)
@@ -335,21 +424,24 @@ void pull_calc_coms(t_commrec *cr,
                      struct pull_t *pull, t_mdatoms *md, t_pbc *pbc, double t,
                      rvec x[], rvec *xp)
  {
-    int  g;
-    real twopi_box = 0;
+    int          g;
+    real         twopi_box = 0;
+    pull_comm_t *comm;
+
+    comm = &pull->comm;
  
-    if (pull->rbuf == NULL)
+    if (comm->rbuf == NULL)
      {
-        snew(pull->rbuf, pull->ngroup);
+        snew(comm->rbuf, pull->ngroup);
      }
-    if (pull->dbuf == NULL)
+    if (comm->dbuf == NULL)
      {
-        snew(pull->dbuf, 3*pull->ngroup);
+        snew(comm->dbuf, 3*pull->ngroup);
      }
  
      if (pull->bRefAt && pull->bSetPBCatoms)
      {
-        pull_set_pbcatoms(cr, pull, x, pull->rbuf);
+        pull_set_pbcatoms(cr, pull, x, comm->rbuf);
  
          if (cr != NULL && DOMAINDECOMP(cr))
          {
@@ -403,7 +495,7 @@ void pull_calc_coms(t_commrec *cr,
                  if (pgrp->epgrppbc == epgrppbcREFAT)
                  {
                      /* Set the pbc atom */
-                    copy_rvec(pull->rbuf[g], x_pbc);
+                    copy_rvec(comm->rbuf[g], x_pbc);
                  }
  
                  for (i = 0; i < pgrp->nat_loc; i++)
@@ -492,11 +584,11 @@ void pull_calc_coms(t_commrec *cr,
                  }
  
                  /* Copy local sums to a buffer for global summing */
-                copy_dvec(com, pull->dbuf[g*3]);
-                copy_dvec(comp, pull->dbuf[g*3+1]);
-                pull->dbuf[g*3+2][0] = wmass;
-                pull->dbuf[g*3+2][1] = wwmass;
-                pull->dbuf[g*3+2][2] = 0;
+                copy_dvec(com,  comm->dbuf[g*3]);
+                copy_dvec(comp, comm->dbuf[g*3 + 1]);
+                comm->dbuf[g*3 + 2][0] = wmass;
+                comm->dbuf[g*3 + 2][1] = wwmass;
+                comm->dbuf[g*3 + 2][2] = 0;
              }
              else
              {
@@ -538,24 +630,20 @@ void pull_calc_coms(t_commrec *cr,
                  }
  
                  /* Copy local sums to a buffer for global summing */
-                pull->dbuf[g*3  ][0] = cm;
-                pull->dbuf[g*3  ][1] = sm;
-                pull->dbuf[g*3  ][2] = 0;
-                pull->dbuf[g*3+1][0] = ccm;
-                pull->dbuf[g*3+1][1] = csm;
-                pull->dbuf[g*3+1][2] = ssm;
-                pull->dbuf[g*3+2][0] = cmp;
-                pull->dbuf[g*3+2][1] = smp;
-                pull->dbuf[g*3+2][2] = 0;
+                comm->dbuf[g*3  ][0] = cm;
+                comm->dbuf[g*3  ][1] = sm;
+                comm->dbuf[g*3  ][2] = 0;
+                comm->dbuf[g*3+1][0] = ccm;
+                comm->dbuf[g*3+1][1] = csm;
+                comm->dbuf[g*3+1][2] = ssm;
+                comm->dbuf[g*3+2][0] = cmp;
+                comm->dbuf[g*3+2][1] = smp;
+                comm->dbuf[g*3+2][2] = 0;
              }
          }
      }
  
-    if (cr && PAR(cr))
-    {
-        /* Sum the contributions over the nodes */
-        gmx_sumd(pull->ngroup*3*DIM, pull->dbuf[0], cr);
-    }
+    pull_reduce_double(cr, comm, pull->ngroup*3*DIM, comm->dbuf[0]);
  
      for (g = 0; g < pull->ngroup; g++)
      {
@@ -570,8 +658,8 @@ void pull_calc_coms(t_commrec *cr,
                  int    m;
  
                  /* Determine the inverse mass */
-                wmass             = pull->dbuf[g*3+2][0];
-                wwmass            = pull->dbuf[g*3+2][1];
+                wmass             = comm->dbuf[g*3+2][0];
+                wwmass            = comm->dbuf[g*3+2][1];
                  pgrp->mwscale     = 1.0/wmass;
                  /* invtm==0 signals a frozen group, so then we should keep it zero */
                  if (pgrp->invtm != 0)
@@ -582,17 +670,17 @@ void pull_calc_coms(t_commrec *cr,
                  /* Divide by the total mass */
                  for (m = 0; m < DIM; m++)
                  {
-                    pgrp->x[m]    = pull->dbuf[g*3  ][m]*pgrp->mwscale;
+                    pgrp->x[m]      = comm->dbuf[g*3  ][m]*pgrp->mwscale;
                      if (xp)
                      {
-                        pgrp->xp[m] = pull->dbuf[g*3+1][m]*pgrp->mwscale;
+                        pgrp->xp[m] = comm->dbuf[g*3+1][m]*pgrp->mwscale;
                      }
                      if (pgrp->epgrppbc == epgrppbcREFAT)
                      {
-                        pgrp->x[m]    += pull->rbuf[g][m];
+                        pgrp->x[m]      += comm->rbuf[g][m];
                          if (xp)
                          {
-                            pgrp->xp[m] += pull->rbuf[g][m];
+                            pgrp->xp[m] += comm->rbuf[g][m];
                          }
                      }
                  }
@@ -604,14 +692,14 @@ void pull_calc_coms(t_commrec *cr,
                  int    i, ii;
  
                  /* Determine the optimal location of the cosine weight */
-                csw                   = pull->dbuf[g*3][0];
-                snw                   = pull->dbuf[g*3][1];
+                csw                   = comm->dbuf[g*3][0];
+                snw                   = comm->dbuf[g*3][1];
                  pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                  /* Set the weights for the local atoms */
                  wmass  = sqrt(csw*csw + snw*snw);
-                wwmass = (pull->dbuf[g*3+1][0]*csw*csw +
-                          pull->dbuf[g*3+1][1]*csw*snw +
-                          pull->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass);
+                wwmass = (comm->dbuf[g*3+1][0]*csw*csw +
+                          comm->dbuf[g*3+1][1]*csw*snw +
+                          comm->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass);
  
                  pgrp->mwscale = 1.0/wmass;
                  pgrp->wscale  = wmass/wwmass;
@@ -627,8 +715,8 @@ void pull_calc_coms(t_commrec *cr,
                  }
                  if (xp)
                  {
-                    csw                    = pull->dbuf[g*3+2][0];
-                    snw                    = pull->dbuf[g*3+2][1];
+                    csw                    = comm->dbuf[g*3+2][0];
+                    snw                    = comm->dbuf[g*3+2][1];
                      pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                  }
              }
author	Berk Hess <hess@kth.se>
	Mon, 11 Aug 2014 16:11:17 +0000 (18:11 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Thu, 18 Jun 2015 14:08:40 +0000 (16:08 +0200)
src/gromacs/domdec/domdec.cpp		patch \| blob \| history
src/gromacs/pulling/pull.cpp		patch \| blob \| history
src/gromacs/pulling/pull.h		patch \| blob \| history
src/gromacs/pulling/pull_internal.h		patch \| blob \| history
src/gromacs/pulling/pullutil.cpp		patch \| blob \| history