More consolidation of MPI initialization

author Mark Abraham <mark.j.abraham@gmail.com>

Sat, 28 Sep 2013 22:52:27 +0000 (00:52 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Fri, 4 Oct 2013 15:11:33 +0000 (17:11 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Sat, 28 Sep 2013 22:52:27 +0000 (00:52 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Fri, 4 Oct 2013 15:11:33 +0000 (17:11 +0200)
diff --git a/cmake/legacy_and_external.supp b/cmake/legacy_and_external.supp

index 6ef044bf493244d1adde80d8f7f813da0485a0b5..6813d138630e0cc64e7a6c5ea120c090d8071b7b 100644 (file)
--- a/cmake/legacy_and_external.supp
+++ b/cmake/legacy_and_external.supp
@@ -23,7 +23,7 @@
     fun:malloc
     fun:ompi_proc_all
     ...
-   fun:gmx_finalize_mpi
+   fun:_ZN3gmx8finalizeEv
  }
  {
     MPI_Init/Addr4
diff --git a/src/gromacs/gmxana/gmx_pme_error.cpp b/src/gromacs/gmxana/gmx_pme_error.cpp

index e7ce33b23fbae04006c679d5efe0ab418587515a..e863973145049fd02a9a67b6f95b180b1d324a6e 100644 (file)
--- a/src/gromacs/gmxana/gmx_pme_error.cpp
+++ b/src/gromacs/gmxana/gmx_pme_error.cpp
@@ -1121,7 +1121,7 @@ int gmx_pme_error(int argc, char *argv[])
  
  #define NFILE asize(fnm)
  
-    cr = init_par();
+    cr = init_commrec();
  #ifdef GMX_LIB_MPI
      MPI_Barrier(MPI_COMM_WORLD);
  #endif
diff --git a/src/gromacs/gmxlib/main.cpp b/src/gromacs/gmxlib/main.cpp

index 616ce581f5858dd9168c7e001d6987307fd9e1eb..55f4c2525c35ebd365ff9d570473dda6fce072fe 100644 (file)
--- a/src/gromacs/gmxlib/main.cpp
+++ b/src/gromacs/gmxlib/main.cpp
@@ -472,84 +472,3 @@ void init_multisystem(t_commrec *cr, int nsim, char **multidirs,
          }
      }
  }
-
-t_commrec *init_par()
-{
-    t_commrec    *cr;
-
-    snew(cr, 1);
-
-#if defined GMX_MPI && !defined GMX_THREAD_MPI
-    if (!gmx_mpi_initialized())
-    {
-        gmx_comm("MPI has not been initialized properly");
-    }
-    cr->nnodes     = gmx_node_num();
-    cr->sim_nodeid = gmx_node_rank();
-    if (!PAR(cr) && (cr->sim_nodeid != 0))
-    {
-        gmx_comm("(!PAR(cr) && (cr->sim_nodeid != 0))");
-    }
-
-    cr->mpi_comm_mysim   = MPI_COMM_WORLD;
-    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
-#else
-    /* These should never be accessed */
-    cr->mpi_comm_mysim   = NULL;
-    cr->mpi_comm_mygroup = NULL;
-    cr->nnodes           = 1;
-    cr->sim_nodeid       = 0;
-#endif
-
-    cr->nodeid = cr->sim_nodeid;
-
-    cr->duty = (DUTY_PP | DUTY_PME);
-
-#ifdef GMX_MPI
-#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
-    /* initialize the MPI_IN_PLACE replacement buffers */
-    snew(cr->mpb, 1);
-    cr->mpb->ibuf        = NULL;
-    cr->mpb->libuf       = NULL;
-    cr->mpb->fbuf        = NULL;
-    cr->mpb->dbuf        = NULL;
-    cr->mpb->ibuf_alloc  = 0;
-    cr->mpb->libuf_alloc = 0;
-    cr->mpb->fbuf_alloc  = 0;
-    cr->mpb->dbuf_alloc  = 0;
-#endif
-#endif
-
-    return cr;
-}
-
-t_commrec *init_par_threads(const t_commrec *cro)
-{
-#ifdef GMX_THREAD_MPI
-    int        initialized;
-    t_commrec *cr;
-
-    /* make a thread-specific commrec */
-    snew(cr, 1);
-    /* now copy the whole thing, so settings like the number of PME nodes
-       get propagated. */
-    *cr = *cro;
-
-    /* and we start setting our own thread-specific values for things */
-    MPI_Initialized(&initialized);
-    if (!initialized)
-    {
-        gmx_comm("Initializing threads without comm");
-    }
-
-    gmx_do_mpi_init(0, NULL);
-    gmx_fill_commrec_from_mpi(cr);
-
-    // TODO cr->duty should not be initialized here
-    cr->duty             = (DUTY_PP | DUTY_PME);
-
-    return cr;
-#else
-    return NULL;
-#endif
-}
diff --git a/src/gromacs/gmxlib/network.c b/src/gromacs/gmxlib/network.c

index 70a16c388590737f0759f5b3baf179b3646c9412..2fd91342df1ba336f07ae42ed53ef2115562e494 100644 (file)
--- a/src/gromacs/gmxlib/network.c
+++ b/src/gromacs/gmxlib/network.c
@@ -64,53 +64,89 @@ gmx_bool gmx_mpi_initialized(void)
  #endif
  }
  
-void gmx_do_mpi_init(int gmx_unused *argc, char gmx_unused ***argv)
+void gmx_fill_commrec_from_mpi(t_commrec *cr)
  {
  #ifndef GMX_MPI
-    gmx_call("gmx_do_mpi_init");
+    gmx_call("gmx_fill_commrec_from_mpi");
  #else
+    cr->nnodes           = gmx_node_num();
+    cr->nodeid           = gmx_node_rank();
+    cr->sim_nodeid       = cr->nodeid;
+    cr->mpi_comm_mysim   = MPI_COMM_WORLD;
+    cr->mpi_comm_mygroup = MPI_COMM_WORLD;
+
+#endif
+}
+
+t_commrec *init_commrec()
+{
+    t_commrec    *cr;
+
+    snew(cr, 1);
+
+#ifdef GMX_LIB_MPI
      if (!gmx_mpi_initialized())
      {
-#ifdef GMX_LIB_MPI
-#ifdef GMX_FAHCORE
-        (void) fah_MPI_Init(argc, argv);
+        gmx_comm("MPI has not been initialized properly");
+    }
+
+    gmx_fill_commrec_from_mpi(cr);
  #else
-        (void) MPI_Init(argc, argv);
+    /* These should never be accessed */
+    cr->mpi_comm_mysim   = NULL;
+    cr->mpi_comm_mygroup = NULL;
+    cr->nnodes           = 1;
+    cr->sim_nodeid       = 0;
+    cr->nodeid           = cr->sim_nodeid;
  #endif
+
+    // TODO this should be initialized elsewhere
+    cr->duty = (DUTY_PP | DUTY_PME);
+
+#if defined GMX_LIB_MPI && !defined MPI_IN_PLACE_EXISTS
+    /* initialize the MPI_IN_PLACE replacement buffers */
+    snew(cr->mpb, 1);
+    cr->mpb->ibuf        = NULL;
+    cr->mpb->libuf       = NULL;
+    cr->mpb->fbuf        = NULL;
+    cr->mpb->dbuf        = NULL;
+    cr->mpb->ibuf_alloc  = 0;
+    cr->mpb->libuf_alloc = 0;
+    cr->mpb->fbuf_alloc  = 0;
+    cr->mpb->dbuf_alloc  = 0;
  #endif
-    }
-#endif
+
+    return cr;
  }
  
-void gmx_fill_commrec_from_mpi(t_commrec *cr)
+t_commrec *init_par_threads(const t_commrec *cro)
  {
-#ifndef GMX_MPI
-    gmx_call("gmx_fill_commrec_from_mpi");
-#else
-    char   buf[256];
-    int    resultlen;             /* actual length of node name      */
-    int    i, flag;
-    int    mpi_num_nodes;
-    int    mpi_my_rank;
-    char   mpi_hostname[MPI_MAX_PROCESSOR_NAME];
+#ifdef GMX_THREAD_MPI
+    int        initialized;
+    t_commrec *cr;
  
-    mpi_num_nodes = gmx_node_num();
-    mpi_my_rank   = gmx_node_rank();
-    (void) MPI_Get_processor_name( mpi_hostname, &resultlen );
+    /* make a thread-specific commrec */
+    snew(cr, 1);
+    /* now copy the whole thing, so settings like the number of PME nodes
+       get propagated. */
+    *cr = *cro;
  
-#ifdef GMX_LIB_MPI
-    if (debug)
+    /* and we start setting our own thread-specific values for things */
+    MPI_Initialized(&initialized);
+    if (!initialized)
      {
-        fprintf(debug, "NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
-                mpi_num_nodes, mpi_my_rank, mpi_hostname);
+        gmx_comm("Initializing threads without comm");
      }
-#endif
  
-    cr->nnodes           = mpi_num_nodes;
-    cr->nodeid           = mpi_my_rank;
-    cr->sim_nodeid       = mpi_my_rank;
-    cr->mpi_comm_mysim   = MPI_COMM_WORLD;
-    cr->mpi_comm_mygroup = MPI_COMM_WORLD;
+    /* No need to do MPI_Init() for thread-MPI. */
+    gmx_fill_commrec_from_mpi(cr);
+
+    // TODO cr->duty should not be initialized here
+    cr->duty             = (DUTY_PP | DUTY_PME);
+
+    return cr;
+#else
+    return NULL;
  #endif
  }
  
@@ -778,55 +814,3 @@ void gmx_sumli_sim(int nr, gmx_large_int_t r[], const gmx_multisim_t *ms)
  #endif
  #endif
  }
-
-
-void gmx_finalize_mpi(void)
-{
-#ifndef GMX_MPI
-    /* Compiled without MPI, no MPI finalizing needed */
-    return;
-#else
-    int finalized;
-    int ret;
-
-    if (!gmx_mpi_initialized())
-    {
-        return;
-    }
-    /* just as a check; we don't want to finalize twice */
-    MPI_Finalized(&finalized);
-    if (finalized)
-    {
-        return;
-    }
-
-    /* We sync the processes here to try to avoid problems
-     * with buggy MPI implementations that could cause
-     * unfinished processes to terminate.
-     */
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    /*
-       if (DOMAINDECOMP(cr)) {
-       if (cr->npmenodes > 0 || cr->dd->bCartesian)
-        MPI_Comm_free(&cr->mpi_comm_mygroup);
-       if (cr->dd->bCartesian)
-        MPI_Comm_free(&cr->mpi_comm_mysim);
-       }
-     */
-
-    /* Apparently certain mpich implementations cause problems
-     * with MPI_Finalize. In that case comment out MPI_Finalize.
-     */
-    if (debug)
-    {
-        fprintf(debug, "Will call MPI_Finalize now\n");
-    }
-
-    ret = MPI_Finalize();
-    if (debug)
-    {
-        fprintf(debug, "Return code from MPI_Finalize = %d\n", ret);
-    }
-#endif
-}
diff --git a/src/gromacs/legacyheaders/main.h b/src/gromacs/legacyheaders/main.h

index 4724aa611fcc4725d477243a194042d4aacdc62e..cce5d48b5b9fe524db41370b2203e8fc846d9245 100644 (file)
--- a/src/gromacs/legacyheaders/main.h
+++ b/src/gromacs/legacyheaders/main.h
@@ -79,18 +79,6 @@ void init_multisystem(t_commrec *cr, int nsim, char **multidirs,
   * If bParFn is set, the nodeid is appended to the tpx and each output file.
   */
  
-t_commrec *init_par(void);
-/* Initiate the parallel computer. Return the communication record
- * (see network.h).
- */
-
-t_commrec *init_par_threads(const t_commrec *cro);
-/* Initialize communication records for thread-parallel simulations.
-   Must be called on all threads before any communication takes place by
-   the individual threads. Copies the original commrec to
-   thread-local versions (a small memory leak results because we don't
-   deallocate the old shared version).  */
-
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/gromacs/legacyheaders/network.h b/src/gromacs/legacyheaders/network.h

index 0d2d2735d1d9bdabdd48b612e0de05e03377b6f3..6dc09159f6594a00025d15d3299cab31b6c5be38 100644 (file)
--- a/src/gromacs/legacyheaders/network.h
+++ b/src/gromacs/legacyheaders/network.h
@@ -53,8 +53,15 @@
  extern "C" {
  #endif
  
-void gmx_do_mpi_init(int *argc, char ***argv);
-/* Initializes the MPI parallel communication */
+t_commrec *init_commrec(void);
+/* Allocate, initialize and return the commrec. */
+
+t_commrec *init_par_threads(const t_commrec *cro);
+/* Initialize communication records for thread-parallel simulations.
+   Must be called on all threads before any communication takes place by
+   the individual threads. Copies the original commrec to
+   thread-local versions (a small memory leak results because we don't
+   deallocate the old shared version).  */
  
  void gmx_fill_commrec_from_mpi(t_commrec *cr);
  /* Continues t_commrec construction */
@@ -124,9 +131,6 @@ void gmx_sumd_sim(int nr, double r[], const gmx_multisim_t *ms);
  void gmx_abort(int nodeid, int nnodes, int errorno);
  /* Abort the parallel run */
  
-void gmx_finalize_mpi(void);
-/* Finish the parallel run in an ordered manner */
-
  #ifdef GMX_DOUBLE
  #define gmx_sum_comm  gmx_sumd_comm
  #define gmx_sum       gmx_sumd
diff --git a/src/gromacs/utility/init.cpp b/src/gromacs/utility/init.cpp

index e5fe7494b622f88b9805194f017bed860e848ee9..7aee502cf6cc6a17d92e721e202fca36841361df 100644 (file)
--- a/src/gromacs/utility/init.cpp
+++ b/src/gromacs/utility/init.cpp
@@ -94,13 +94,19 @@ void broadcastArguments(const t_commrec *cr, int *argc, char ***argv)
  ProgramInfo &init(const char *realBinaryName, int *argc, char ***argv)
  {
  #ifdef GMX_LIB_MPI
+#ifdef GMX_FAHCORE
+    (void) fah_MPI_Init(argc, argv);
+#else
+    (void) MPI_Init(argc, argv);
+#endif
+
      // TODO: Rewrite this to not use t_commrec once there is clarity on
      // the approach for MPI in C++ code.
      // TODO: Consider whether the argument broadcast would better be done
      // in CommandLineModuleManager.
      t_commrec cr;
      std::memset(&cr, 0, sizeof(cr));
-    gmx_do_mpi_init(argc, argv);
+
      gmx_fill_commrec_from_mpi(&cr);
      if (PAR(&cr))
      {
@@ -117,7 +123,34 @@ ProgramInfo &init(int *argc, char ***argv)
  
  void finalize()
  {
-    gmx_finalize_mpi();
+#ifndef GMX_MPI
+    /* Compiled without MPI, no MPI finalizing needed */
+    return;
+#else
+    int finalized;
+
+    if (!gmx_mpi_initialized())
+    {
+        return;
+    }
+    /* just as a check; we don't want to finalize twice */
+    MPI_Finalized(&finalized);
+    if (finalized)
+    {
+        return;
+    }
+
+    /* We sync the processes here to try to avoid problems
+     * with buggy MPI implementations that could cause
+     * unfinished processes to terminate.
+     */
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    /* Apparently certain mpich implementations cause problems
+     * with MPI_Finalize. In that case comment out MPI_Finalize.
+     */
+    MPI_Finalize();
+#endif
  }
  
  } // namespace gmx
diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp

index 8eec306a393a98d8094c2b446b9beb51d9f6e736..cac5894f1968a2c4eb12ecf6e3efda2c757dec7c 100644 (file)
--- a/src/programs/mdrun/mdrun.cpp
+++ b/src/programs/mdrun/mdrun.cpp
@@ -545,7 +545,7 @@ int gmx_mdrun(int argc, char *argv[])
      char          **multidir = NULL;
  
  
-    cr = init_par();
+    cr = init_commrec();
  
      PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET));
author	Mark Abraham <mark.j.abraham@gmail.com>
	Sat, 28 Sep 2013 22:52:27 +0000 (00:52 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Fri, 4 Oct 2013 15:11:33 +0000 (17:11 +0200)
cmake/legacy_and_external.supp		patch \| blob \| history
src/gromacs/gmxana/gmx_pme_error.cpp		patch \| blob \| history
src/gromacs/gmxlib/main.cpp		patch \| blob \| history
src/gromacs/gmxlib/network.c		patch \| blob \| history
src/gromacs/legacyheaders/main.h		patch \| blob \| history
src/gromacs/legacyheaders/network.h		patch \| blob \| history
src/gromacs/utility/init.cpp		patch \| blob \| history
src/programs/mdrun/mdrun.cpp		patch \| blob \| history