Store a physical node communicator in t_commrec

author Aleksei Iupinov <a.yupinov@gmail.com>

Thu, 11 May 2017 15:03:11 +0000 (17:03 +0200)

committer Mark Abraham <mark.j.abraham@gmail.com>

Mon, 12 Jun 2017 12:19:39 +0000 (14:19 +0200)
author Aleksei Iupinov <a.yupinov@gmail.com>
Thu, 11 May 2017 15:03:11 +0000 (17:03 +0200)
committer Mark Abraham <mark.j.abraham@gmail.com>
Mon, 12 Jun 2017 12:19:39 +0000 (14:19 +0200)
diff --git a/src/gromacs/gmxlib/network.cpp b/src/gromacs/gmxlib/network.cpp

index 920f3fd7e13873f5fcebde5880b983ea66fb7089..a68e54d0a5e6492a72997530a684f38dae84744e 100644 (file)
--- a/src/gromacs/gmxlib/network.cpp
+++ b/src/gromacs/gmxlib/network.cpp
@@ -69,6 +69,10 @@ void gmx_fill_commrec_from_mpi(t_commrec gmx_unused *cr)
  
      cr->nnodes           = gmx_node_num();
      cr->nodeid           = gmx_node_rank();
+    if (PAR(cr) || MULTISIM(cr))
+    {
+        MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(), cr->nodeid, &cr->mpi_comm_physicalnode);
+    }
      cr->sim_nodeid       = cr->nodeid;
      cr->mpi_comm_mysim   = MPI_COMM_WORLD;
      cr->mpi_comm_mygroup = MPI_COMM_WORLD;
@@ -125,6 +129,12 @@ static void done_mpi_in_place_buf(mpi_in_place_buf_t *buf)
  
  void done_commrec(t_commrec *cr)
  {
+#if GMX_MPI
+    if (PAR(cr) || MULTISIM(cr))
+    {
+        MPI_Comm_free(&cr->mpi_comm_physicalnode);
+    }
+#endif
      if (nullptr != cr->dd)
      {
          // TODO: implement
@@ -347,6 +357,15 @@ void gmx_barrier(const t_commrec gmx_unused *cr)
  #endif
  }
  
+void gmx_barrier_physical_node(const t_commrec gmx_unused *cr)
+{
+#if !GMX_MPI
+    gmx_call("gmx_barrier_physical_node");
+#else
+    MPI_Barrier(cr->mpi_comm_physicalnode);
+#endif
+}
+
  void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
  {
  #if !GMX_MPI
diff --git a/src/gromacs/gmxlib/network.h b/src/gromacs/gmxlib/network.h

index 78b637e76a9e6c8a5f96e1e2c836d90fd4662928..3297dc8d63eeef5d8b17a9c13b938a850edbe80e 100644 (file)
--- a/src/gromacs/gmxlib/network.h
+++ b/src/gromacs/gmxlib/network.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -75,6 +75,9 @@ void gmx_init_intranode_counters(struct t_commrec *cr);
  void gmx_barrier(const struct t_commrec *cr);
  /* Wait till all processes in cr->mpi_comm_mygroup have reached the barrier */
  
+void gmx_barrier_physical_node(const struct t_commrec *cr);
+/* Wait till all processes in cr->mpi_comm_physical_node have reached the barrier */
+
  void gmx_bcast(int nbytes, void *b, const struct t_commrec *cr);
  /* Broadcast nbytes bytes from the master to cr->mpi_comm_mygroup */
  
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp

index 4815eb0a15c1dd283dd63ef6ad5dcb99079fc814..1c92cdf99d4eee805b3823f4b8167c2f43f9f508 100644 (file)
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -3264,24 +3264,27 @@ void free_gpu_resources(const t_forcerec     *fr,
          nbnxn_gpu_free(fr->nbv->gpu_nbv);
          /* stop the GPU profiler (only CUDA) */
          stopGpuProfiler();
+    }
  
-        /* With tMPI we need to wait for all ranks to finish deallocation before
-         * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
-         * GPU and context.
-         *
-         * This is not a concern in OpenCL where we use one context per rank which
-         * is freed in nbnxn_gpu_free().
-         *
-         * Note: as only PP ranks need to free GPU resources, so it is safe to
-         * not call the barrier on PME ranks.
-         */
+    /* With tMPI we need to wait for all ranks to finish deallocation before
+     * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
+     * GPU and context.
+     *
+     * This is not a concern in OpenCL where we use one context per rank which
+     * is freed in nbnxn_gpu_free().
+     *
+     * Note: it is safe to not call the barrier on the ranks which do not use GPU,
+     * but it is easier and more futureproof to call it on the whole node.
+     */
  #if GMX_THREAD_MPI
-        if (PAR(cr))
-        {
-            gmx_barrier(cr);
-        }
+    if (PAR(cr) || MULTISIM(cr))
+    {
+        gmx_barrier_physical_node(cr);
+    }
  #endif  /* GMX_THREAD_MPI */
  
+    if (bIsPPrankUsingGPU)
+    {
          /* uninitialize GPU (by destroying the context) */
          if (!free_cuda_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
          {
diff --git a/src/gromacs/mdtypes/commrec.h b/src/gromacs/mdtypes/commrec.h

index bbe97073c75761511ebd1696cff71a54daa5b03a..37cab123b418ddb1e488e11fb261f0b7422f5b57 100644 (file)
--- a/src/gromacs/mdtypes/commrec.h
+++ b/src/gromacs/mdtypes/commrec.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -97,11 +97,14 @@ struct t_commrec {
      MPI_Comm mpi_comm_mysim;
      MPI_Comm mpi_comm_mygroup;
  
-    /* MPI ranks within a physical node for hardware access */
-    int            nrank_intranode;    /* nr of ranks on this physical node */
-    int            rank_intranode;     /* our rank on this physical node */
-    int            nrank_pp_intranode; /* as nrank_intranode, for particle-particle only */
-    int            rank_pp_intranode;  /* as rank_intranode, for particle-particle only */
+    /* MPI ranks and a communicator within a physical node for hardware access */
+    MPI_Comm       mpi_comm_physicalnode; /* communicator for all ranks of the physical node
+                                           * NOTE: this communicator should only be used during initialization and finalization, as it can contain ranks from PP, PME and multiple simulations with multisim
+                                           */
+    int            nrank_intranode;       /* nr of ranks on this physical node */
+    int            rank_intranode;        /* our rank on this physical node */
+    int            nrank_pp_intranode;    /* as nrank_intranode, for particle-particle only */
+    int            rank_pp_intranode;     /* as rank_intranode, for particle-particle only */
  
      gmx_nodecomm_t nc;
author	Aleksei Iupinov <a.yupinov@gmail.com>
	Thu, 11 May 2017 15:03:11 +0000 (17:03 +0200)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 12 Jun 2017 12:19:39 +0000 (14:19 +0200)
src/gromacs/gmxlib/network.cpp		patch \| blob \| history
src/gromacs/gmxlib/network.h		patch \| blob \| history
src/gromacs/mdlib/forcerec.cpp		patch \| blob \| history
src/gromacs/mdtypes/commrec.h		patch \| blob \| history