cr->nnodes = gmx_node_num();
cr->nodeid = gmx_node_rank();
+ if (PAR(cr) || MULTISIM(cr))
+ {
+ MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(), cr->nodeid, &cr->mpi_comm_physicalnode);
+ }
cr->sim_nodeid = cr->nodeid;
cr->mpi_comm_mysim = MPI_COMM_WORLD;
cr->mpi_comm_mygroup = MPI_COMM_WORLD;
void done_commrec(t_commrec *cr)
{
+#if GMX_MPI
+ if (PAR(cr) || MULTISIM(cr))
+ {
+ MPI_Comm_free(&cr->mpi_comm_physicalnode);
+ }
+#endif
if (nullptr != cr->dd)
{
// TODO: implement
#endif
}
+void gmx_barrier_physical_node(const t_commrec gmx_unused *cr)
+{
+#if !GMX_MPI
+ gmx_call("gmx_barrier_physical_node");
+#else
+ MPI_Barrier(cr->mpi_comm_physicalnode);
+#endif
+}
+
void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
{
#if !GMX_MPI
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
void gmx_barrier(const struct t_commrec *cr);
/* Wait till all processes in cr->mpi_comm_mygroup have reached the barrier */
+void gmx_barrier_physical_node(const struct t_commrec *cr);
+/* Wait till all processes in cr->mpi_comm_physical_node have reached the barrier */
+
void gmx_bcast(int nbytes, void *b, const struct t_commrec *cr);
/* Broadcast nbytes bytes from the master to cr->mpi_comm_mygroup */
nbnxn_gpu_free(fr->nbv->gpu_nbv);
/* stop the GPU profiler (only CUDA) */
stopGpuProfiler();
+ }
- /* With tMPI we need to wait for all ranks to finish deallocation before
- * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
- * GPU and context.
- *
- * This is not a concern in OpenCL where we use one context per rank which
- * is freed in nbnxn_gpu_free().
- *
- * Note: as only PP ranks need to free GPU resources, so it is safe to
- * not call the barrier on PME ranks.
- */
+ /* With tMPI we need to wait for all ranks to finish deallocation before
+ * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
+ * GPU and context.
+ *
+ * This is not a concern in OpenCL where we use one context per rank which
+ * is freed in nbnxn_gpu_free().
+ *
+ * Note: it is safe to not call the barrier on the ranks which do not use GPU,
+ * but it is easier and more futureproof to call it on the whole node.
+ */
#if GMX_THREAD_MPI
- if (PAR(cr))
- {
- gmx_barrier(cr);
- }
+ if (PAR(cr) || MULTISIM(cr))
+ {
+ gmx_barrier_physical_node(cr);
+ }
#endif /* GMX_THREAD_MPI */
+ if (bIsPPrankUsingGPU)
+ {
/* uninitialize GPU (by destroying the context) */
if (!free_cuda_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
{
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
MPI_Comm mpi_comm_mysim;
MPI_Comm mpi_comm_mygroup;
- /* MPI ranks within a physical node for hardware access */
- int nrank_intranode; /* nr of ranks on this physical node */
- int rank_intranode; /* our rank on this physical node */
- int nrank_pp_intranode; /* as nrank_intranode, for particle-particle only */
- int rank_pp_intranode; /* as rank_intranode, for particle-particle only */
+ /* MPI ranks and a communicator within a physical node for hardware access */
+ MPI_Comm mpi_comm_physicalnode; /* communicator for all ranks of the physical node
+ * NOTE: this communicator should only be used during initialization and finalization, as it can contain ranks from PP, PME and multiple simulations with multisim
+ */
+ int nrank_intranode; /* nr of ranks on this physical node */
+ int rank_intranode; /* our rank on this physical node */
+ int nrank_pp_intranode; /* as nrank_intranode, for particle-particle only */
+ int rank_pp_intranode; /* as rank_intranode, for particle-particle only */
gmx_nodecomm_t nc;