src/gromacs/gmxlib/network.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "network.h"
  40
  41 #include "config.h"
  42
  43 #include <cctype>
  44 #include <cstdarg>
  45 #include <cstdlib>
  46 #include <cstring>
  47
  48 #include "gromacs/commandline/filenm.h"
  49 #include "gromacs/mdtypes/commrec.h"
  50 #include "gromacs/utility/basenetwork.h"
  51 #include "gromacs/utility/cstringutil.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "gromacs/utility/futil.h"
  54 #include "gromacs/utility/gmxmpi.h"
  55 #include "gromacs/utility/smalloc.h"
  56
  57 /* The source code in this file should be thread-safe.
  58       Please keep it that way. */
  59
  60 void gmx_fill_commrec_from_mpi(t_commrec gmx_unused *cr)
  61 {
  62 #if !GMX_MPI
  63     gmx_call("gmx_fill_commrec_from_mpi");
  64 #else
  65     if (!gmx_mpi_initialized())
  66     {
  67         gmx_comm("MPI has not been initialized properly");
  68     }
  69
  70     cr->nnodes           = gmx_node_num();
  71     cr->nodeid           = gmx_node_rank();
  72     // TODO This communicator should be always available. Currently we
  73     // make it multiple times, and keep it only when relevant. But the
  74     // cost of an extra communicator is negligible in single-node
  75     // cases (both thread-MPI and real MPI) case, and we need it in
  76     // all multi-node MPI cases with more than one PP rank per node,
  77     // with and without GPUs. By always having it available, we also
  78     // don't need to protect calls to mpi_comm_physicalnode, etc.
  79     if (PAR(cr) || isMultiSim(cr->ms))
  80     {
  81         MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(), cr->nodeid, &cr->mpi_comm_physicalnode);
  82     }
  83     cr->sim_nodeid       = cr->nodeid;
  84     cr->mpi_comm_mysim   = MPI_COMM_WORLD;
  85     cr->mpi_comm_mygroup = MPI_COMM_WORLD;
  86
  87 #endif
  88 }
  89
  90 t_commrec *init_commrec()
  91 {
  92     t_commrec    *cr;
  93
  94     snew(cr, 1);
  95
  96 #if GMX_LIB_MPI
  97     gmx_fill_commrec_from_mpi(cr);
  98 #else
  99     cr->mpi_comm_mysim   = nullptr;
 100     cr->mpi_comm_mygroup = nullptr;
 101     cr->nnodes           = 1;
 102     cr->sim_nodeid       = 0;
 103     cr->nodeid           = cr->sim_nodeid;
 104 #endif
 105
 106     // TODO cr->duty should not be initialized here
 107     cr->duty = (DUTY_PP | DUTY_PME);
 108
 109 #if GMX_MPI && !MPI_IN_PLACE_EXISTS
 110     /* initialize the MPI_IN_PLACE replacement buffers */
 111     snew(cr->mpb, 1);
 112     cr->mpb->ibuf        = NULL;
 113     cr->mpb->libuf       = NULL;
 114     cr->mpb->fbuf        = NULL;
 115     cr->mpb->dbuf        = NULL;
 116     cr->mpb->ibuf_alloc  = 0;
 117     cr->mpb->libuf_alloc = 0;
 118     cr->mpb->fbuf_alloc  = 0;
 119     cr->mpb->dbuf_alloc  = 0;
 120 #endif
 121
 122     return cr;
 123 }
 124
 125 static void done_mpi_in_place_buf(mpi_in_place_buf_t *buf)
 126 {
 127     if (nullptr != buf)
 128     {
 129         sfree(buf->ibuf);
 130         sfree(buf->libuf);
 131         sfree(buf->fbuf);
 132         sfree(buf->dbuf);
 133         sfree(buf);
 134     }
 135 }
 136
 137 void done_commrec(t_commrec *cr)
 138 {
 139 #if GMX_MPI
 140     if (PAR(cr) || isMultiSim(cr->ms))
 141     {
 142         MPI_Comm_free(&cr->mpi_comm_physicalnode);
 143     }
 144 #endif
 145     if (nullptr != cr->dd)
 146     {
 147         // TODO: implement
 148         // done_domdec(cr->dd);
 149     }
 150     if (nullptr != cr->ms)
 151     {
 152         done_mpi_in_place_buf(cr->ms->mpb);
 153         sfree(cr->ms);
 154     }
 155     done_mpi_in_place_buf(cr->mpb);
 156     sfree(cr);
 157 }
 158
 159 t_commrec *reinitialize_commrec_for_this_thread(const t_commrec gmx_unused *cro)
 160 {
 161 #if GMX_THREAD_MPI
 162     t_commrec *cr;
 163
 164     /* make a thread-specific commrec */
 165     snew(cr, 1);
 166     /* now copy the whole thing, so settings like the number of PME nodes
 167        get propagated. */
 168     *cr = *cro;
 169
 170     /* and we start setting our own thread-specific values for things */
 171     gmx_fill_commrec_from_mpi(cr);
 172
 173     // TODO cr->duty should not be initialized here
 174     cr->duty             = (DUTY_PP | DUTY_PME);
 175
 176     return cr;
 177 #else
 178     return nullptr;
 179 #endif
 180 }
 181
 182 void gmx_setup_nodecomm(FILE gmx_unused *fplog, t_commrec *cr)
 183 {
 184     gmx_nodecomm_t *nc;
 185
 186     /* Many MPI implementations do not optimize MPI_Allreduce
 187      * (and probably also other global communication calls)
 188      * for multi-core nodes connected by a network.
 189      * We can optimize such communication by using one MPI call
 190      * within each node and one between the nodes.
 191      * For MVAPICH2 and Intel MPI this reduces the time for
 192      * the global_stat communication by 25%
 193      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 194      * B. Hess, November 2007
 195      */
 196
 197     nc = &cr->nc;
 198
 199     nc->bUse = FALSE;
 200 #if !GMX_THREAD_MPI
 201 #if GMX_MPI
 202     int n, rank;
 203
 204     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 205     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 206
 207     int nodehash = gmx_physicalnode_id_hash();
 208
 209     if (debug)
 210     {
 211         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 212     }
 213
 214
 215     /* The intra-node communicator, split on node number */
 216     MPI_Comm_split(cr->mpi_comm_mygroup, nodehash, rank, &nc->comm_intra);
 217     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 218     if (debug)
 219     {
 220         fprintf(debug, "In gmx_setup_nodecomm: node ID %d rank within node %d\n",
 221                 rank, nc->rank_intra);
 222     }
 223     /* The inter-node communicator, split on rank_intra.
 224      * We actually only need the one for rank=0,
 225      * but it is easier to create them all.
 226      */
 227     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 228     /* Check if this really created two step communication */
 229     int ng, ni;
 230
 231     MPI_Comm_size(nc->comm_inter, &ng);
 232     MPI_Comm_size(nc->comm_intra, &ni);
 233     if (debug)
 234     {
 235         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
 236                 ng, ni);
 237     }
 238
 239     if (getenv("GMX_NO_NODECOMM") == nullptr &&
 240         ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 241     {
 242         nc->bUse = TRUE;
 243         if (fplog)
 244         {
 245             fprintf(fplog, "Using two step summing over %d groups of on average %.1f ranks\n\n",
 246                     ng, (real)n/(real)ng);
 247         }
 248         if (nc->rank_intra > 0)
 249         {
 250             MPI_Comm_free(&nc->comm_inter);
 251         }
 252     }
 253     else
 254     {
 255         /* One group or all processes in a separate group, use normal summing */
 256         MPI_Comm_free(&nc->comm_inter);
 257         MPI_Comm_free(&nc->comm_intra);
 258         if (debug)
 259         {
 260             fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
 261         }
 262     }
 263 #endif
 264 #else
 265     /* tMPI runs only on a single node so just use the nodeid */
 266     nc->rank_intra = cr->nodeid;
 267 #endif
 268 }
 269
 270 void gmx_init_intranode_counters(t_commrec *cr)
 271 {
 272     /* counters for PP+PME and PP-only processes on my physical node */
 273     int nrank_intranode, rank_intranode;
 274     /* thread-MPI is not initialized when not running in parallel */
 275 #if GMX_MPI && !GMX_THREAD_MPI
 276     int nrank_world, rank_world;
 277     int i, myhash, *hash, *hash_s, *hash_pp, *hash_pp_s;
 278
 279     MPI_Comm_size(MPI_COMM_WORLD, &nrank_world);
 280     MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 281
 282     /* Get a (hopefully unique) hash that identifies our physical node */
 283     myhash = gmx_physicalnode_id_hash();
 284
 285     /* We can't rely on MPI_IN_PLACE, so we need send and receive buffers */
 286     snew(hash,   nrank_world);
 287     snew(hash_s, nrank_world);
 288     snew(hash_pp,   nrank_world);
 289     snew(hash_pp_s, nrank_world);
 290
 291     hash_s[rank_world]    = myhash;
 292     hash_pp_s[rank_world] = thisRankHasDuty(cr, DUTY_PP) ? myhash : -1;
 293
 294     MPI_Allreduce(hash_s,    hash,    nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 295     MPI_Allreduce(hash_pp_s, hash_pp, nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 296
 297     nrank_intranode    = 0;
 298     rank_intranode     = 0;
 299     for (i = 0; i < nrank_world; i++)
 300     {
 301         if (hash[i] == myhash)
 302         {
 303             nrank_intranode++;
 304             if (i < rank_world)
 305             {
 306                 rank_intranode++;
 307             }
 308         }
 309     }
 310     sfree(hash);
 311     sfree(hash_s);
 312     sfree(hash_pp);
 313     sfree(hash_pp_s);
 314 #else
 315     /* Serial or thread-MPI code: we run within a single physical node */
 316     nrank_intranode    = cr->nnodes;
 317     rank_intranode     = cr->sim_nodeid;
 318 #endif
 319
 320     if (debug)
 321     {
 322         char sbuf[STRLEN];
 323         if (thisRankHasDuty(cr, DUTY_PP) && thisRankHasDuty(cr, DUTY_PME))
 324         {
 325             sprintf(sbuf, "PP+PME");
 326         }
 327         else
 328         {
 329             sprintf(sbuf, "%s", thisRankHasDuty(cr, DUTY_PP) ? "PP" : "PME");
 330         }
 331         fprintf(debug, "On %3s rank %d: nrank_intranode=%d, rank_intranode=%d\n",
 332                 sbuf, cr->sim_nodeid,
 333                 nrank_intranode, rank_intranode);
 334     }
 335
 336     cr->nrank_intranode    = nrank_intranode;
 337     cr->rank_intranode     = rank_intranode;
 338 }
 339
 340
 341 void gmx_barrier(const t_commrec gmx_unused *cr)
 342 {
 343 #if !GMX_MPI
 344     gmx_call("gmx_barrier");
 345 #else
 346     MPI_Barrier(cr->mpi_comm_mygroup);
 347 #endif
 348 }
 349
 350 void gmx_barrier_physical_node(const t_commrec gmx_unused *cr)
 351 {
 352 #if !GMX_MPI
 353     gmx_call("gmx_barrier_physical_node");
 354 #else
 355     MPI_Barrier(cr->mpi_comm_physicalnode);
 356 #endif
 357 }
 358
 359 void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 360 {
 361 #if !GMX_MPI
 362     gmx_call("gmx_bast");
 363 #else
 364     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
 365 #endif
 366 }
 367
 368 void gmx_bcast_sim(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 369 {
 370 #if !GMX_MPI
 371     gmx_call("gmx_bast");
 372 #else
 373     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mysim);
 374 #endif
 375 }
 376
 377 void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused *cr)
 378 {
 379 #if !GMX_MPI
 380     gmx_call("gmx_sumd");
 381 #else
 382 #if MPI_IN_PLACE_EXISTS
 383     if (cr->nc.bUse)
 384     {
 385         if (cr->nc.rank_intra == 0)
 386         {
 387             /* Use two step summing. */
 388             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0,
 389                        cr->nc.comm_intra);
 390             /* Sum the roots of the internal (intra) buffers. */
 391             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 392                           cr->nc.comm_inter);
 393         }
 394         else
 395         {
 396             /* This is here because of the silly MPI specification
 397                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 398             MPI_Reduce(r, nullptr, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 399         }
 400         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 401     }
 402     else
 403     {
 404         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 405                       cr->mpi_comm_mygroup);
 406     }
 407 #else
 408     int i;
 409
 410     if (nr > cr->mpb->dbuf_alloc)
 411     {
 412         cr->mpb->dbuf_alloc = nr;
 413         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 414     }
 415     if (cr->nc.bUse)
 416     {
 417         /* Use two step summing */
 418         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 419         if (cr->nc.rank_intra == 0)
 420         {
 421             /* Sum with the buffers reversed */
 422             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM,
 423                           cr->nc.comm_inter);
 424         }
 425         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 426     }
 427     else
 428     {
 429         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM,
 430                       cr->mpi_comm_mygroup);
 431         for (i = 0; i < nr; i++)
 432         {
 433             r[i] = cr->mpb->dbuf[i];
 434         }
 435     }
 436 #endif
 437 #endif
 438 }
 439
 440 void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused *cr)
 441 {
 442 #if !GMX_MPI
 443     gmx_call("gmx_sumf");
 444 #else
 445 #if MPI_IN_PLACE_EXISTS
 446     if (cr->nc.bUse)
 447     {
 448         /* Use two step summing.  */
 449         if (cr->nc.rank_intra == 0)
 450         {
 451             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0,
 452                        cr->nc.comm_intra);
 453             /* Sum the roots of the internal (intra) buffers */
 454             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM,
 455                           cr->nc.comm_inter);
 456         }
 457         else
 458         {
 459             /* This is here because of the silly MPI specification
 460                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 461             MPI_Reduce(r, nullptr, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 462         }
 463         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 464     }
 465     else
 466     {
 467         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 468     }
 469 #else
 470     int i;
 471
 472     if (nr > cr->mpb->fbuf_alloc)
 473     {
 474         cr->mpb->fbuf_alloc = nr;
 475         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 476     }
 477     if (cr->nc.bUse)
 478     {
 479         /* Use two step summing */
 480         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 481         if (cr->nc.rank_intra == 0)
 482         {
 483             /* Sum with the buffers reversed */
 484             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM,
 485                           cr->nc.comm_inter);
 486         }
 487         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 488     }
 489     else
 490     {
 491         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM,
 492                       cr->mpi_comm_mygroup);
 493         for (i = 0; i < nr; i++)
 494         {
 495             r[i] = cr->mpb->fbuf[i];
 496         }
 497     }
 498 #endif
 499 #endif
 500 }
 501
 502 void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused *cr)
 503 {
 504 #if !GMX_MPI
 505     gmx_call("gmx_sumi");
 506 #else
 507 #if MPI_IN_PLACE_EXISTS
 508     if (cr->nc.bUse)
 509     {
 510         /* Use two step summing */
 511         if (cr->nc.rank_intra == 0)
 512         {
 513             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 514             /* Sum with the buffers reversed */
 515             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 516         }
 517         else
 518         {
 519             /* This is here because of the silly MPI specification
 520                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 521             MPI_Reduce(r, nullptr, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 522         }
 523         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 524     }
 525     else
 526     {
 527         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 528     }
 529 #else
 530     int i;
 531
 532     if (nr > cr->mpb->ibuf_alloc)
 533     {
 534         cr->mpb->ibuf_alloc = nr;
 535         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 536     }
 537     if (cr->nc.bUse)
 538     {
 539         /* Use two step summing */
 540         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 541         if (cr->nc.rank_intra == 0)
 542         {
 543             /* Sum with the buffers reversed */
 544             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 545         }
 546         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 547     }
 548     else
 549     {
 550         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 551         for (i = 0; i < nr; i++)
 552         {
 553             r[i] = cr->mpb->ibuf[i];
 554         }
 555     }
 556 #endif
 557 #endif
 558 }
 559
 560 void gmx_sumli(int gmx_unused nr, gmx_int64_t gmx_unused r[], const t_commrec gmx_unused *cr)
 561 {
 562 #if !GMX_MPI
 563     gmx_call("gmx_sumli");
 564 #else
 565 #if MPI_IN_PLACE_EXISTS
 566     if (cr->nc.bUse)
 567     {
 568         /* Use two step summing */
 569         if (cr->nc.rank_intra == 0)
 570         {
 571             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, 0,
 572                        cr->nc.comm_intra);
 573             /* Sum with the buffers reversed */
 574             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 575                           cr->nc.comm_inter);
 576         }
 577         else
 578         {
 579             /* This is here because of the silly MPI specification
 580                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 581             MPI_Reduce(r, nullptr, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 582         }
 583         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 584     }
 585     else
 586     {
 587         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 588     }
 589 #else
 590     int i;
 591
 592     if (nr > cr->mpb->libuf_alloc)
 593     {
 594         cr->mpb->libuf_alloc = nr;
 595         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 596     }
 597     if (cr->nc.bUse)
 598     {
 599         /* Use two step summing */
 600         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 601                       cr->nc.comm_intra);
 602         if (cr->nc.rank_intra == 0)
 603         {
 604             /* Sum with the buffers reversed */
 605             MPI_Allreduce(cr->mpb->libuf, r, nr, MPI_INT64_T, MPI_SUM,
 606                           cr->nc.comm_inter);
 607         }
 608         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 609     }
 610     else
 611     {
 612         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 613                       cr->mpi_comm_mygroup);
 614         for (i = 0; i < nr; i++)
 615         {
 616             r[i] = cr->mpb->libuf[i];
 617         }
 618     }
 619 #endif
 620 #endif
 621 }
 622
 623
 624
 625 #if GMX_MPI
 626 static void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
 627 {
 628 #if MPI_IN_PLACE_EXISTS
 629     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 630 #else
 631     /* this function is only used in code that is not performance critical,
 632        (during setup, when comm_rec is not the appropriate communication
 633        structure), so this isn't as bad as it looks. */
 634     double *buf;
 635     int     i;
 636
 637     snew(buf, nr);
 638     MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 639     for (i = 0; i < nr; i++)
 640     {
 641         r[i] = buf[i];
 642     }
 643     sfree(buf);
 644 #endif
 645 }
 646 #endif
 647
 648 #if GMX_MPI
 649 static void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
 650 {
 651 #if MPI_IN_PLACE_EXISTS
 652     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 653 #else
 654     /* this function is only used in code that is not performance critical,
 655        (during setup, when comm_rec is not the appropriate communication
 656        structure), so this isn't as bad as it looks. */
 657     float *buf;
 658     int    i;
 659
 660     snew(buf, nr);
 661     MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 662     for (i = 0; i < nr; i++)
 663     {
 664         r[i] = buf[i];
 665     }
 666     sfree(buf);
 667 #endif
 668 }
 669 #endif
 670
 671 void gmx_sumd_sim(int gmx_unused nr, double gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 672 {
 673 #if !GMX_MPI
 674     gmx_call("gmx_sumd_sim");
 675 #else
 676     gmx_sumd_comm(nr, r, ms->mpi_comm_masters);
 677 #endif
 678 }
 679
 680 void gmx_sumf_sim(int gmx_unused nr, float gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 681 {
 682 #if !GMX_MPI
 683     gmx_call("gmx_sumf_sim");
 684 #else
 685     gmx_sumf_comm(nr, r, ms->mpi_comm_masters);
 686 #endif
 687 }
 688
 689 void gmx_sumi_sim(int gmx_unused nr, int gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 690 {
 691 #if !GMX_MPI
 692     gmx_call("gmx_sumi_sim");
 693 #else
 694 #if MPI_IN_PLACE_EXISTS
 695     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 696 #else
 697     /* this is thread-unsafe, but it will do for now: */
 698     int i;
 699
 700     if (nr > ms->mpb->ibuf_alloc)
 701     {
 702         ms->mpb->ibuf_alloc = nr;
 703         srenew(ms->mpb->ibuf, ms->mpb->ibuf_alloc);
 704     }
 705     MPI_Allreduce(r, ms->mpb->ibuf, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 706     for (i = 0; i < nr; i++)
 707     {
 708         r[i] = ms->mpb->ibuf[i];
 709     }
 710 #endif
 711 #endif
 712 }
 713
 714 void gmx_sumli_sim(int gmx_unused nr, gmx_int64_t gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 715 {
 716 #if !GMX_MPI
 717     gmx_call("gmx_sumli_sim");
 718 #else
 719 #if MPI_IN_PLACE_EXISTS
 720     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 721                   ms->mpi_comm_masters);
 722 #else
 723     /* this is thread-unsafe, but it will do for now: */
 724     int i;
 725
 726     if (nr > ms->mpb->libuf_alloc)
 727     {
 728         ms->mpb->libuf_alloc = nr;
 729         srenew(ms->mpb->libuf, ms->mpb->libuf_alloc);
 730     }
 731     MPI_Allreduce(r, ms->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 732                   ms->mpi_comm_masters);
 733     for (i = 0; i < nr; i++)
 734     {
 735         r[i] = ms->mpb->libuf[i];
 736     }
 737 #endif
 738 #endif
 739 }
 740
 741 const char *opt2fn_master(const char *opt, int nfile, const t_filenm fnm[],
 742                           t_commrec *cr)
 743 {
 744     return SIMMASTER(cr) ? opt2fn(opt, nfile, fnm) : nullptr;
 745 }
 746
 747 void gmx_fatal_collective(int f_errno, const char *file, int line,
 748                           MPI_Comm comm, gmx_bool bMaster,
 749                           const char *fmt, ...)
 750 {
 751     va_list  ap;
 752     gmx_bool bFinalize;
 753 #if GMX_MPI
 754     int      result;
 755     /* Check if we are calling on all processes in MPI_COMM_WORLD */
 756     MPI_Comm_compare(comm, MPI_COMM_WORLD, &result);
 757     /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
 758     bFinalize = (result != MPI_UNEQUAL);
 759 #else
 760     GMX_UNUSED_VALUE(comm);
 761     bFinalize = TRUE;
 762 #endif
 763
 764     va_start(ap, fmt);
 765     gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
 766     va_end(ap);
 767 }