src/gromacs/gmxlib/network.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "network.h"
  40
  41 #include "config.h"
  42
  43 #include <cctype>
  44 #include <cstdarg>
  45 #include <cstdlib>
  46 #include <cstring>
  47
  48 #include "gromacs/commandline/filenm.h"
  49 #include "gromacs/mdtypes/commrec.h"
  50 #include "gromacs/utility/basenetwork.h"
  51 #include "gromacs/utility/cstringutil.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "gromacs/utility/futil.h"
  54 #include "gromacs/utility/gmxmpi.h"
  55 #include "gromacs/utility/smalloc.h"
  56
  57 /* The source code in this file should be thread-safe.
  58       Please keep it that way. */
  59
  60 void gmx_fill_commrec_from_mpi(t_commrec            *cr,
  61                                const gmx_multisim_t *ms)
  62 {
  63 #if !GMX_MPI
  64     gmx_call("gmx_fill_commrec_from_mpi");
  65     GMX_UNUSED_VALUE(cr);
  66     GMX_UNUSED_VALUE(ms);
  67 #else
  68     if (!gmx_mpi_initialized())
  69     {
  70         gmx_comm("MPI has not been initialized properly");
  71     }
  72
  73     cr->nnodes           = gmx_node_num();
  74     cr->nodeid           = gmx_node_rank();
  75     // TODO This communicator should be always available. Currently we
  76     // make it multiple times, and keep it only when relevant. But the
  77     // cost of an extra communicator is negligible in single-node
  78     // cases (both thread-MPI and real MPI) case, and we need it in
  79     // all multi-node MPI cases with more than one PP rank per node,
  80     // with and without GPUs. By always having it available, we also
  81     // don't need to protect calls to mpi_comm_physicalnode, etc.
  82     if (PAR(cr) || isMultiSim(ms))
  83     {
  84         MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(), cr->nodeid, &cr->mpi_comm_physicalnode);
  85     }
  86     cr->sim_nodeid       = cr->nodeid;
  87     cr->mpi_comm_mysim   = MPI_COMM_WORLD;
  88     cr->mpi_comm_mygroup = MPI_COMM_WORLD;
  89
  90 #endif
  91 }
  92
  93 t_commrec *init_commrec()
  94 {
  95     t_commrec    *cr;
  96
  97     snew(cr, 1);
  98
  99     cr->mpi_comm_physicalnode = MPI_COMM_NULL;
 100 #if GMX_LIB_MPI
 101     gmx_fill_commrec_from_mpi(cr, nullptr);
 102 #else
 103     cr->mpi_comm_mysim   = MPI_COMM_NULL;
 104     cr->mpi_comm_mygroup = MPI_COMM_NULL;
 105     cr->nnodes           = 1;
 106     cr->sim_nodeid       = 0;
 107     cr->nodeid           = cr->sim_nodeid;
 108 #endif
 109
 110     // TODO cr->duty should not be initialized here
 111     cr->duty = (DUTY_PP | DUTY_PME);
 112
 113 #if GMX_MPI && !MPI_IN_PLACE_EXISTS
 114     /* initialize the MPI_IN_PLACE replacement buffers */
 115     snew(cr->mpb, 1);
 116     cr->mpb->ibuf        = NULL;
 117     cr->mpb->libuf       = NULL;
 118     cr->mpb->fbuf        = NULL;
 119     cr->mpb->dbuf        = NULL;
 120     cr->mpb->ibuf_alloc  = 0;
 121     cr->mpb->libuf_alloc = 0;
 122     cr->mpb->fbuf_alloc  = 0;
 123     cr->mpb->dbuf_alloc  = 0;
 124 #endif
 125
 126     return cr;
 127 }
 128
 129 void done_mpi_in_place_buf(mpi_in_place_buf_t *buf)
 130 {
 131     if (nullptr != buf)
 132     {
 133         sfree(buf->ibuf);
 134         sfree(buf->libuf);
 135         sfree(buf->fbuf);
 136         sfree(buf->dbuf);
 137         sfree(buf);
 138     }
 139 }
 140
 141 void done_commrec(t_commrec *cr)
 142 {
 143 #if GMX_MPI
 144     if (cr->mpi_comm_physicalnode != MPI_COMM_NULL)
 145     {
 146         MPI_Comm_free(&cr->mpi_comm_physicalnode);
 147     }
 148 #endif
 149     if (nullptr != cr->dd)
 150     {
 151         // TODO: implement
 152         // done_domdec(cr->dd);
 153     }
 154     done_mpi_in_place_buf(cr->mpb);
 155     sfree(cr);
 156 }
 157
 158 t_commrec *reinitialize_commrec_for_this_thread(const t_commrec      *cro,
 159                                                 const gmx_multisim_t *ms)
 160 {
 161 #if GMX_THREAD_MPI
 162     t_commrec *cr;
 163
 164     /* make a thread-specific commrec */
 165     snew(cr, 1);
 166     /* now copy the whole thing, so settings like the number of PME nodes
 167        get propagated. */
 168     *cr = *cro;
 169
 170     /* and we start setting our own thread-specific values for things */
 171     gmx_fill_commrec_from_mpi(cr, ms);
 172
 173     // TODO cr->duty should not be initialized here
 174     cr->duty             = (DUTY_PP | DUTY_PME);
 175
 176     return cr;
 177 #else
 178     GMX_UNUSED_VALUE(cro);
 179     GMX_UNUSED_VALUE(ms);
 180     return nullptr;
 181 #endif
 182 }
 183
 184 void gmx_setup_nodecomm(FILE gmx_unused *fplog, t_commrec *cr)
 185 {
 186     gmx_nodecomm_t *nc;
 187
 188     /* Many MPI implementations do not optimize MPI_Allreduce
 189      * (and probably also other global communication calls)
 190      * for multi-core nodes connected by a network.
 191      * We can optimize such communication by using one MPI call
 192      * within each node and one between the nodes.
 193      * For MVAPICH2 and Intel MPI this reduces the time for
 194      * the global_stat communication by 25%
 195      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 196      * B. Hess, November 2007
 197      */
 198
 199     nc = &cr->nc;
 200
 201     nc->bUse = FALSE;
 202 #if !GMX_THREAD_MPI
 203 #if GMX_MPI
 204     int n, rank;
 205
 206     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 207     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 208
 209     int nodehash = gmx_physicalnode_id_hash();
 210
 211     if (debug)
 212     {
 213         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 214     }
 215
 216
 217     /* The intra-node communicator, split on node number */
 218     MPI_Comm_split(cr->mpi_comm_mygroup, nodehash, rank, &nc->comm_intra);
 219     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 220     if (debug)
 221     {
 222         fprintf(debug, "In gmx_setup_nodecomm: node ID %d rank within node %d\n",
 223                 rank, nc->rank_intra);
 224     }
 225     /* The inter-node communicator, split on rank_intra.
 226      * We actually only need the one for rank=0,
 227      * but it is easier to create them all.
 228      */
 229     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 230     /* Check if this really created two step communication */
 231     int ng, ni;
 232
 233     MPI_Comm_size(nc->comm_inter, &ng);
 234     MPI_Comm_size(nc->comm_intra, &ni);
 235     if (debug)
 236     {
 237         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
 238                 ng, ni);
 239     }
 240
 241     if (getenv("GMX_NO_NODECOMM") == nullptr &&
 242         ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 243     {
 244         nc->bUse = TRUE;
 245         if (fplog)
 246         {
 247             fprintf(fplog, "Using two step summing over %d groups of on average %.1f ranks\n\n",
 248                     ng, (real)n/(real)ng);
 249         }
 250         if (nc->rank_intra > 0)
 251         {
 252             MPI_Comm_free(&nc->comm_inter);
 253         }
 254     }
 255     else
 256     {
 257         /* One group or all processes in a separate group, use normal summing */
 258         MPI_Comm_free(&nc->comm_inter);
 259         MPI_Comm_free(&nc->comm_intra);
 260         if (debug)
 261         {
 262             fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
 263         }
 264     }
 265 #endif
 266 #else
 267     /* tMPI runs only on a single node so just use the nodeid */
 268     nc->rank_intra = cr->nodeid;
 269 #endif
 270 }
 271
 272 void gmx_init_intranode_counters(t_commrec *cr)
 273 {
 274     /* counters for PP+PME and PP-only processes on my physical node */
 275     int nrank_intranode, rank_intranode;
 276     /* thread-MPI is not initialized when not running in parallel */
 277 #if GMX_MPI && !GMX_THREAD_MPI
 278     int nrank_world, rank_world;
 279     int i, myhash, *hash, *hash_s, *hash_pp, *hash_pp_s;
 280
 281     MPI_Comm_size(MPI_COMM_WORLD, &nrank_world);
 282     MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 283
 284     /* Get a (hopefully unique) hash that identifies our physical node */
 285     myhash = gmx_physicalnode_id_hash();
 286
 287     /* We can't rely on MPI_IN_PLACE, so we need send and receive buffers */
 288     snew(hash,   nrank_world);
 289     snew(hash_s, nrank_world);
 290     snew(hash_pp,   nrank_world);
 291     snew(hash_pp_s, nrank_world);
 292
 293     hash_s[rank_world]    = myhash;
 294     hash_pp_s[rank_world] = thisRankHasDuty(cr, DUTY_PP) ? myhash : -1;
 295
 296     MPI_Allreduce(hash_s,    hash,    nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 297     MPI_Allreduce(hash_pp_s, hash_pp, nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 298
 299     nrank_intranode    = 0;
 300     rank_intranode     = 0;
 301     for (i = 0; i < nrank_world; i++)
 302     {
 303         if (hash[i] == myhash)
 304         {
 305             nrank_intranode++;
 306             if (i < rank_world)
 307             {
 308                 rank_intranode++;
 309             }
 310         }
 311     }
 312     sfree(hash);
 313     sfree(hash_s);
 314     sfree(hash_pp);
 315     sfree(hash_pp_s);
 316 #else
 317     /* Serial or thread-MPI code: we run within a single physical node */
 318     nrank_intranode    = cr->nnodes;
 319     rank_intranode     = cr->sim_nodeid;
 320 #endif
 321
 322     if (debug)
 323     {
 324         char sbuf[STRLEN];
 325         if (thisRankHasDuty(cr, DUTY_PP) && thisRankHasDuty(cr, DUTY_PME))
 326         {
 327             sprintf(sbuf, "PP+PME");
 328         }
 329         else
 330         {
 331             sprintf(sbuf, "%s", thisRankHasDuty(cr, DUTY_PP) ? "PP" : "PME");
 332         }
 333         fprintf(debug, "On %3s rank %d: nrank_intranode=%d, rank_intranode=%d\n",
 334                 sbuf, cr->sim_nodeid,
 335                 nrank_intranode, rank_intranode);
 336     }
 337
 338     cr->nrank_intranode    = nrank_intranode;
 339     cr->rank_intranode     = rank_intranode;
 340 }
 341
 342
 343 void gmx_barrier(const t_commrec gmx_unused *cr)
 344 {
 345 #if !GMX_MPI
 346     gmx_call("gmx_barrier");
 347 #else
 348     MPI_Barrier(cr->mpi_comm_mygroup);
 349 #endif
 350 }
 351
 352 void gmx_barrier_physical_node(const t_commrec gmx_unused *cr)
 353 {
 354 #if !GMX_MPI
 355     gmx_call("gmx_barrier_physical_node");
 356 #else
 357     MPI_Barrier(cr->mpi_comm_physicalnode);
 358 #endif
 359 }
 360
 361 void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 362 {
 363 #if !GMX_MPI
 364     gmx_call("gmx_bast");
 365 #else
 366     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
 367 #endif
 368 }
 369
 370 void gmx_bcast_sim(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 371 {
 372 #if !GMX_MPI
 373     gmx_call("gmx_bast");
 374 #else
 375     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mysim);
 376 #endif
 377 }
 378
 379 void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused *cr)
 380 {
 381 #if !GMX_MPI
 382     gmx_call("gmx_sumd");
 383 #else
 384 #if MPI_IN_PLACE_EXISTS
 385     if (cr->nc.bUse)
 386     {
 387         if (cr->nc.rank_intra == 0)
 388         {
 389             /* Use two step summing. */
 390             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0,
 391                        cr->nc.comm_intra);
 392             /* Sum the roots of the internal (intra) buffers. */
 393             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 394                           cr->nc.comm_inter);
 395         }
 396         else
 397         {
 398             /* This is here because of the silly MPI specification
 399                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 400             MPI_Reduce(r, nullptr, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 401         }
 402         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 403     }
 404     else
 405     {
 406         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 407                       cr->mpi_comm_mygroup);
 408     }
 409 #else
 410     int i;
 411
 412     if (nr > cr->mpb->dbuf_alloc)
 413     {
 414         cr->mpb->dbuf_alloc = nr;
 415         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 416     }
 417     if (cr->nc.bUse)
 418     {
 419         /* Use two step summing */
 420         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 421         if (cr->nc.rank_intra == 0)
 422         {
 423             /* Sum with the buffers reversed */
 424             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM,
 425                           cr->nc.comm_inter);
 426         }
 427         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 428     }
 429     else
 430     {
 431         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM,
 432                       cr->mpi_comm_mygroup);
 433         for (i = 0; i < nr; i++)
 434         {
 435             r[i] = cr->mpb->dbuf[i];
 436         }
 437     }
 438 #endif
 439 #endif
 440 }
 441
 442 void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused *cr)
 443 {
 444 #if !GMX_MPI
 445     gmx_call("gmx_sumf");
 446 #else
 447 #if MPI_IN_PLACE_EXISTS
 448     if (cr->nc.bUse)
 449     {
 450         /* Use two step summing.  */
 451         if (cr->nc.rank_intra == 0)
 452         {
 453             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0,
 454                        cr->nc.comm_intra);
 455             /* Sum the roots of the internal (intra) buffers */
 456             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM,
 457                           cr->nc.comm_inter);
 458         }
 459         else
 460         {
 461             /* This is here because of the silly MPI specification
 462                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 463             MPI_Reduce(r, nullptr, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 464         }
 465         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 466     }
 467     else
 468     {
 469         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 470     }
 471 #else
 472     int i;
 473
 474     if (nr > cr->mpb->fbuf_alloc)
 475     {
 476         cr->mpb->fbuf_alloc = nr;
 477         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 478     }
 479     if (cr->nc.bUse)
 480     {
 481         /* Use two step summing */
 482         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 483         if (cr->nc.rank_intra == 0)
 484         {
 485             /* Sum with the buffers reversed */
 486             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM,
 487                           cr->nc.comm_inter);
 488         }
 489         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 490     }
 491     else
 492     {
 493         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM,
 494                       cr->mpi_comm_mygroup);
 495         for (i = 0; i < nr; i++)
 496         {
 497             r[i] = cr->mpb->fbuf[i];
 498         }
 499     }
 500 #endif
 501 #endif
 502 }
 503
 504 void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused *cr)
 505 {
 506 #if !GMX_MPI
 507     gmx_call("gmx_sumi");
 508 #else
 509 #if MPI_IN_PLACE_EXISTS
 510     if (cr->nc.bUse)
 511     {
 512         /* Use two step summing */
 513         if (cr->nc.rank_intra == 0)
 514         {
 515             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 516             /* Sum with the buffers reversed */
 517             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 518         }
 519         else
 520         {
 521             /* This is here because of the silly MPI specification
 522                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 523             MPI_Reduce(r, nullptr, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 524         }
 525         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 526     }
 527     else
 528     {
 529         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 530     }
 531 #else
 532     int i;
 533
 534     if (nr > cr->mpb->ibuf_alloc)
 535     {
 536         cr->mpb->ibuf_alloc = nr;
 537         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 538     }
 539     if (cr->nc.bUse)
 540     {
 541         /* Use two step summing */
 542         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 543         if (cr->nc.rank_intra == 0)
 544         {
 545             /* Sum with the buffers reversed */
 546             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 547         }
 548         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 549     }
 550     else
 551     {
 552         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 553         for (i = 0; i < nr; i++)
 554         {
 555             r[i] = cr->mpb->ibuf[i];
 556         }
 557     }
 558 #endif
 559 #endif
 560 }
 561
 562 void gmx_sumli(int gmx_unused nr, gmx_int64_t gmx_unused r[], const t_commrec gmx_unused *cr)
 563 {
 564 #if !GMX_MPI
 565     gmx_call("gmx_sumli");
 566 #else
 567 #if MPI_IN_PLACE_EXISTS
 568     if (cr->nc.bUse)
 569     {
 570         /* Use two step summing */
 571         if (cr->nc.rank_intra == 0)
 572         {
 573             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, 0,
 574                        cr->nc.comm_intra);
 575             /* Sum with the buffers reversed */
 576             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 577                           cr->nc.comm_inter);
 578         }
 579         else
 580         {
 581             /* This is here because of the silly MPI specification
 582                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 583             MPI_Reduce(r, nullptr, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 584         }
 585         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 586     }
 587     else
 588     {
 589         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 590     }
 591 #else
 592     int i;
 593
 594     if (nr > cr->mpb->libuf_alloc)
 595     {
 596         cr->mpb->libuf_alloc = nr;
 597         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 598     }
 599     if (cr->nc.bUse)
 600     {
 601         /* Use two step summing */
 602         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 603                       cr->nc.comm_intra);
 604         if (cr->nc.rank_intra == 0)
 605         {
 606             /* Sum with the buffers reversed */
 607             MPI_Allreduce(cr->mpb->libuf, r, nr, MPI_INT64_T, MPI_SUM,
 608                           cr->nc.comm_inter);
 609         }
 610         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 611     }
 612     else
 613     {
 614         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 615                       cr->mpi_comm_mygroup);
 616         for (i = 0; i < nr; i++)
 617         {
 618             r[i] = cr->mpb->libuf[i];
 619         }
 620     }
 621 #endif
 622 #endif
 623 }
 624
 625
 626
 627 #if GMX_MPI
 628 static void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
 629 {
 630 #if MPI_IN_PLACE_EXISTS
 631     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 632 #else
 633     /* this function is only used in code that is not performance critical,
 634        (during setup, when comm_rec is not the appropriate communication
 635        structure), so this isn't as bad as it looks. */
 636     double *buf;
 637     int     i;
 638
 639     snew(buf, nr);
 640     MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 641     for (i = 0; i < nr; i++)
 642     {
 643         r[i] = buf[i];
 644     }
 645     sfree(buf);
 646 #endif
 647 }
 648 #endif
 649
 650 #if GMX_MPI
 651 static void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
 652 {
 653 #if MPI_IN_PLACE_EXISTS
 654     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 655 #else
 656     /* this function is only used in code that is not performance critical,
 657        (during setup, when comm_rec is not the appropriate communication
 658        structure), so this isn't as bad as it looks. */
 659     float *buf;
 660     int    i;
 661
 662     snew(buf, nr);
 663     MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 664     for (i = 0; i < nr; i++)
 665     {
 666         r[i] = buf[i];
 667     }
 668     sfree(buf);
 669 #endif
 670 }
 671 #endif
 672
 673 void gmx_sumd_sim(int gmx_unused nr, double gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 674 {
 675 #if !GMX_MPI
 676     gmx_call("gmx_sumd_sim");
 677 #else
 678     gmx_sumd_comm(nr, r, ms->mpi_comm_masters);
 679 #endif
 680 }
 681
 682 void gmx_sumf_sim(int gmx_unused nr, float gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 683 {
 684 #if !GMX_MPI
 685     gmx_call("gmx_sumf_sim");
 686 #else
 687     gmx_sumf_comm(nr, r, ms->mpi_comm_masters);
 688 #endif
 689 }
 690
 691 void gmx_sumi_sim(int gmx_unused nr, int gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 692 {
 693 #if !GMX_MPI
 694     gmx_call("gmx_sumi_sim");
 695 #else
 696 #if MPI_IN_PLACE_EXISTS
 697     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 698 #else
 699     /* this is thread-unsafe, but it will do for now: */
 700     int i;
 701
 702     if (nr > ms->mpb->ibuf_alloc)
 703     {
 704         ms->mpb->ibuf_alloc = nr;
 705         srenew(ms->mpb->ibuf, ms->mpb->ibuf_alloc);
 706     }
 707     MPI_Allreduce(r, ms->mpb->ibuf, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 708     for (i = 0; i < nr; i++)
 709     {
 710         r[i] = ms->mpb->ibuf[i];
 711     }
 712 #endif
 713 #endif
 714 }
 715
 716 void gmx_sumli_sim(int gmx_unused nr, gmx_int64_t gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 717 {
 718 #if !GMX_MPI
 719     gmx_call("gmx_sumli_sim");
 720 #else
 721 #if MPI_IN_PLACE_EXISTS
 722     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 723                   ms->mpi_comm_masters);
 724 #else
 725     /* this is thread-unsafe, but it will do for now: */
 726     int i;
 727
 728     if (nr > ms->mpb->libuf_alloc)
 729     {
 730         ms->mpb->libuf_alloc = nr;
 731         srenew(ms->mpb->libuf, ms->mpb->libuf_alloc);
 732     }
 733     MPI_Allreduce(r, ms->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 734                   ms->mpi_comm_masters);
 735     for (i = 0; i < nr; i++)
 736     {
 737         r[i] = ms->mpb->libuf[i];
 738     }
 739 #endif
 740 #endif
 741 }
 742
 743 const char *opt2fn_master(const char *opt, int nfile, const t_filenm fnm[],
 744                           t_commrec *cr)
 745 {
 746     return SIMMASTER(cr) ? opt2fn(opt, nfile, fnm) : nullptr;
 747 }
 748
 749 void gmx_fatal_collective(int f_errno, const char *file, int line,
 750                           MPI_Comm comm, gmx_bool bMaster,
 751                           const char *fmt, ...)
 752 {
 753     va_list  ap;
 754     gmx_bool bFinalize;
 755 #if GMX_MPI
 756     int      result;
 757     /* Check if we are calling on all processes in MPI_COMM_WORLD */
 758     MPI_Comm_compare(comm, MPI_COMM_WORLD, &result);
 759     /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
 760     bFinalize = (result != MPI_UNEQUAL);
 761 #else
 762     GMX_UNUSED_VALUE(comm);
 763     bFinalize = TRUE;
 764 #endif
 765
 766     va_start(ap, fmt);
 767     gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
 768     va_end(ap);
 769 }