src/gromacs/gmxlib/network.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "gromacs/legacyheaders/network.h"
  40
  41 #include "config.h"
  42
  43 #include <ctype.h>
  44 #include <stdarg.h>
  45 #include <stdlib.h>
  46 #include <string.h>
  47
  48 #include "gromacs/legacyheaders/copyrite.h"
  49 #include "gromacs/legacyheaders/macros.h"
  50 #include "gromacs/legacyheaders/types/commrec.h"
  51 #include "gromacs/utility/basenetwork.h"
  52 #include "gromacs/utility/cstringutil.h"
  53 #include "gromacs/utility/fatalerror.h"
  54 #include "gromacs/utility/futil.h"
  55 #include "gromacs/utility/gmxmpi.h"
  56 #include "gromacs/utility/smalloc.h"
  57
  58 /* The source code in this file should be thread-safe.
  59       Please keep it that way. */
  60
  61 void gmx_fill_commrec_from_mpi(t_commrec gmx_unused *cr)
  62 {
  63 #ifndef GMX_MPI
  64     gmx_call("gmx_fill_commrec_from_mpi");
  65 #else
  66     if (!gmx_mpi_initialized())
  67     {
  68         gmx_comm("MPI has not been initialized properly");
  69     }
  70
  71     cr->nnodes           = gmx_node_num();
  72     cr->nodeid           = gmx_node_rank();
  73     cr->sim_nodeid       = cr->nodeid;
  74     cr->mpi_comm_mysim   = MPI_COMM_WORLD;
  75     cr->mpi_comm_mygroup = MPI_COMM_WORLD;
  76
  77 #endif
  78 }
  79
  80 t_commrec *init_commrec()
  81 {
  82     t_commrec    *cr;
  83
  84     snew(cr, 1);
  85
  86 #ifdef GMX_LIB_MPI
  87     gmx_fill_commrec_from_mpi(cr);
  88 #else
  89     cr->mpi_comm_mysim   = NULL;
  90     cr->mpi_comm_mygroup = NULL;
  91     cr->nnodes           = 1;
  92     cr->sim_nodeid       = 0;
  93     cr->nodeid           = cr->sim_nodeid;
  94 #endif
  95
  96     // TODO cr->duty should not be initialized here
  97     cr->duty = (DUTY_PP | DUTY_PME);
  98
  99 #if defined GMX_MPI && !defined MPI_IN_PLACE_EXISTS
 100     /* initialize the MPI_IN_PLACE replacement buffers */
 101     snew(cr->mpb, 1);
 102     cr->mpb->ibuf        = NULL;
 103     cr->mpb->libuf       = NULL;
 104     cr->mpb->fbuf        = NULL;
 105     cr->mpb->dbuf        = NULL;
 106     cr->mpb->ibuf_alloc  = 0;
 107     cr->mpb->libuf_alloc = 0;
 108     cr->mpb->fbuf_alloc  = 0;
 109     cr->mpb->dbuf_alloc  = 0;
 110 #endif
 111
 112     return cr;
 113 }
 114
 115 t_commrec *reinitialize_commrec_for_this_thread(const t_commrec gmx_unused *cro)
 116 {
 117 #ifdef GMX_THREAD_MPI
 118     t_commrec *cr;
 119
 120     /* make a thread-specific commrec */
 121     snew(cr, 1);
 122     /* now copy the whole thing, so settings like the number of PME nodes
 123        get propagated. */
 124     *cr = *cro;
 125
 126     /* and we start setting our own thread-specific values for things */
 127     gmx_fill_commrec_from_mpi(cr);
 128
 129     // TODO cr->duty should not be initialized here
 130     cr->duty             = (DUTY_PP | DUTY_PME);
 131
 132     return cr;
 133 #else
 134     return NULL;
 135 #endif
 136 }
 137
 138 void gmx_setup_nodecomm(FILE gmx_unused *fplog, t_commrec *cr)
 139 {
 140     gmx_nodecomm_t *nc;
 141     int             n, rank, nodehash, ng, ni;
 142
 143     /* Many MPI implementations do not optimize MPI_Allreduce
 144      * (and probably also other global communication calls)
 145      * for multi-core nodes connected by a network.
 146      * We can optimize such communication by using one MPI call
 147      * within each node and one between the nodes.
 148      * For MVAPICH2 and Intel MPI this reduces the time for
 149      * the global_stat communication by 25%
 150      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 151      * B. Hess, November 2007
 152      */
 153
 154     nc = &cr->nc;
 155
 156     nc->bUse = FALSE;
 157 #ifndef GMX_THREAD_MPI
 158 #ifdef GMX_MPI
 159     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 160     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 161
 162     nodehash = gmx_physicalnode_id_hash();
 163
 164     if (debug)
 165     {
 166         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 167     }
 168
 169
 170     /* The intra-node communicator, split on node number */
 171     MPI_Comm_split(cr->mpi_comm_mygroup, nodehash, rank, &nc->comm_intra);
 172     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 173     if (debug)
 174     {
 175         fprintf(debug, "In gmx_setup_nodecomm: node ID %d rank within node %d\n",
 176                 rank, nc->rank_intra);
 177     }
 178     /* The inter-node communicator, split on rank_intra.
 179      * We actually only need the one for rank=0,
 180      * but it is easier to create them all.
 181      */
 182     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 183     /* Check if this really created two step communication */
 184     MPI_Comm_size(nc->comm_inter, &ng);
 185     MPI_Comm_size(nc->comm_intra, &ni);
 186     if (debug)
 187     {
 188         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
 189                 ng, ni);
 190     }
 191
 192     if (getenv("GMX_NO_NODECOMM") == NULL &&
 193         ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 194     {
 195         nc->bUse = TRUE;
 196         if (fplog)
 197         {
 198             fprintf(fplog, "Using two step summing over %d groups of on average %.1f ranks\n\n",
 199                     ng, (real)n/(real)ng);
 200         }
 201         if (nc->rank_intra > 0)
 202         {
 203             MPI_Comm_free(&nc->comm_inter);
 204         }
 205     }
 206     else
 207     {
 208         /* One group or all processes in a separate group, use normal summing */
 209         MPI_Comm_free(&nc->comm_inter);
 210         MPI_Comm_free(&nc->comm_intra);
 211         if (debug)
 212         {
 213             fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
 214         }
 215     }
 216 #endif
 217 #else
 218     /* tMPI runs only on a single node so just use the nodeid */
 219     nc->rank_intra = cr->nodeid;
 220 #endif
 221 }
 222
 223 void gmx_init_intranode_counters(t_commrec *cr)
 224 {
 225     /* counters for PP+PME and PP-only processes on my physical node */
 226     int nrank_intranode, rank_intranode;
 227     int nrank_pp_intranode, rank_pp_intranode;
 228     /* thread-MPI is not initialized when not running in parallel */
 229 #if defined GMX_MPI && !defined GMX_THREAD_MPI
 230     int nrank_world, rank_world;
 231     int i, myhash, *hash, *hash_s, *hash_pp, *hash_pp_s;
 232
 233     MPI_Comm_size(MPI_COMM_WORLD, &nrank_world);
 234     MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 235
 236     /* Get a (hopefully unique) hash that identifies our physical node */
 237     myhash = gmx_physicalnode_id_hash();
 238
 239     /* We can't rely on MPI_IN_PLACE, so we need send and receive buffers */
 240     snew(hash,   nrank_world);
 241     snew(hash_s, nrank_world);
 242     snew(hash_pp,   nrank_world);
 243     snew(hash_pp_s, nrank_world);
 244
 245     hash_s[rank_world]    = myhash;
 246     hash_pp_s[rank_world] = (cr->duty & DUTY_PP) ? myhash : -1;
 247
 248     MPI_Allreduce(hash_s,    hash,    nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 249     MPI_Allreduce(hash_pp_s, hash_pp, nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 250
 251     nrank_intranode    = 0;
 252     rank_intranode     = 0;
 253     nrank_pp_intranode = 0;
 254     rank_pp_intranode  = 0;
 255     for (i = 0; i < nrank_world; i++)
 256     {
 257         if (hash[i] == myhash)
 258         {
 259             nrank_intranode++;
 260             if (i < rank_world)
 261             {
 262                 rank_intranode++;
 263             }
 264         }
 265         if (hash_pp[i] == myhash)
 266         {
 267             nrank_pp_intranode++;
 268             if ((cr->duty & DUTY_PP) && i < rank_world)
 269             {
 270                 rank_pp_intranode++;
 271             }
 272         }
 273     }
 274     sfree(hash);
 275     sfree(hash_s);
 276     sfree(hash_pp);
 277     sfree(hash_pp_s);
 278 #else
 279     /* Serial or thread-MPI code: we run within a single physical node */
 280     nrank_intranode    = cr->nnodes;
 281     rank_intranode     = cr->sim_nodeid;
 282     nrank_pp_intranode = cr->nnodes - cr->npmenodes;
 283     rank_pp_intranode  = cr->nodeid;
 284 #endif
 285
 286     if (debug)
 287     {
 288         char sbuf[STRLEN];
 289         if (cr->duty & DUTY_PP && cr->duty & DUTY_PME)
 290         {
 291             sprintf(sbuf, "PP+PME");
 292         }
 293         else
 294         {
 295             sprintf(sbuf, "%s", cr->duty & DUTY_PP ? "PP" : "PME");
 296         }
 297         fprintf(debug, "On %3s rank %d: nrank_intranode=%d, rank_intranode=%d, "
 298                 "nrank_pp_intranode=%d, rank_pp_intranode=%d\n",
 299                 sbuf, cr->sim_nodeid,
 300                 nrank_intranode, rank_intranode,
 301                 nrank_pp_intranode, rank_pp_intranode);
 302     }
 303
 304     cr->nrank_intranode    = nrank_intranode;
 305     cr->rank_intranode     = rank_intranode;
 306     cr->nrank_pp_intranode = nrank_pp_intranode;
 307     cr->rank_pp_intranode  = rank_pp_intranode;
 308 }
 309
 310
 311 void gmx_barrier(const t_commrec gmx_unused *cr)
 312 {
 313 #ifndef GMX_MPI
 314     gmx_call("gmx_barrier");
 315 #else
 316     MPI_Barrier(cr->mpi_comm_mygroup);
 317 #endif
 318 }
 319
 320 void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 321 {
 322 #ifndef GMX_MPI
 323     gmx_call("gmx_bast");
 324 #else
 325     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
 326 #endif
 327 }
 328
 329 void gmx_bcast_sim(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 330 {
 331 #ifndef GMX_MPI
 332     gmx_call("gmx_bast");
 333 #else
 334     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mysim);
 335 #endif
 336 }
 337
 338 void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused *cr)
 339 {
 340 #ifndef GMX_MPI
 341     gmx_call("gmx_sumd");
 342 #else
 343 #if defined(MPI_IN_PLACE_EXISTS)
 344     if (cr->nc.bUse)
 345     {
 346         if (cr->nc.rank_intra == 0)
 347         {
 348             /* Use two step summing. */
 349             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0,
 350                        cr->nc.comm_intra);
 351             /* Sum the roots of the internal (intra) buffers. */
 352             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 353                           cr->nc.comm_inter);
 354         }
 355         else
 356         {
 357             /* This is here because of the silly MPI specification
 358                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 359             MPI_Reduce(r, NULL, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 360         }
 361         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 362     }
 363     else
 364     {
 365         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 366                       cr->mpi_comm_mygroup);
 367     }
 368 #else
 369     int i;
 370
 371     if (nr > cr->mpb->dbuf_alloc)
 372     {
 373         cr->mpb->dbuf_alloc = nr;
 374         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 375     }
 376     if (cr->nc.bUse)
 377     {
 378         /* Use two step summing */
 379         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 380         if (cr->nc.rank_intra == 0)
 381         {
 382             /* Sum with the buffers reversed */
 383             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM,
 384                           cr->nc.comm_inter);
 385         }
 386         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 387     }
 388     else
 389     {
 390         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM,
 391                       cr->mpi_comm_mygroup);
 392         for (i = 0; i < nr; i++)
 393         {
 394             r[i] = cr->mpb->dbuf[i];
 395         }
 396     }
 397 #endif
 398 #endif
 399 }
 400
 401 void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused *cr)
 402 {
 403 #ifndef GMX_MPI
 404     gmx_call("gmx_sumf");
 405 #else
 406 #if defined(MPI_IN_PLACE_EXISTS)
 407     if (cr->nc.bUse)
 408     {
 409         /* Use two step summing.  */
 410         if (cr->nc.rank_intra == 0)
 411         {
 412             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0,
 413                        cr->nc.comm_intra);
 414             /* Sum the roots of the internal (intra) buffers */
 415             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM,
 416                           cr->nc.comm_inter);
 417         }
 418         else
 419         {
 420             /* This is here because of the silly MPI specification
 421                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 422             MPI_Reduce(r, NULL, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 423         }
 424         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 425     }
 426     else
 427     {
 428         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 429     }
 430 #else
 431     int i;
 432
 433     if (nr > cr->mpb->fbuf_alloc)
 434     {
 435         cr->mpb->fbuf_alloc = nr;
 436         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 437     }
 438     if (cr->nc.bUse)
 439     {
 440         /* Use two step summing */
 441         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 442         if (cr->nc.rank_intra == 0)
 443         {
 444             /* Sum with the buffers reversed */
 445             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM,
 446                           cr->nc.comm_inter);
 447         }
 448         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 449     }
 450     else
 451     {
 452         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM,
 453                       cr->mpi_comm_mygroup);
 454         for (i = 0; i < nr; i++)
 455         {
 456             r[i] = cr->mpb->fbuf[i];
 457         }
 458     }
 459 #endif
 460 #endif
 461 }
 462
 463 void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused *cr)
 464 {
 465 #ifndef GMX_MPI
 466     gmx_call("gmx_sumi");
 467 #else
 468 #if defined(MPI_IN_PLACE_EXISTS)
 469     if (cr->nc.bUse)
 470     {
 471         /* Use two step summing */
 472         if (cr->nc.rank_intra == 0)
 473         {
 474             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 475             /* Sum with the buffers reversed */
 476             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 477         }
 478         else
 479         {
 480             /* This is here because of the silly MPI specification
 481                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 482             MPI_Reduce(r, NULL, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 483         }
 484         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 485     }
 486     else
 487     {
 488         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 489     }
 490 #else
 491     int i;
 492
 493     if (nr > cr->mpb->ibuf_alloc)
 494     {
 495         cr->mpb->ibuf_alloc = nr;
 496         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 497     }
 498     if (cr->nc.bUse)
 499     {
 500         /* Use two step summing */
 501         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 502         if (cr->nc.rank_intra == 0)
 503         {
 504             /* Sum with the buffers reversed */
 505             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 506         }
 507         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 508     }
 509     else
 510     {
 511         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 512         for (i = 0; i < nr; i++)
 513         {
 514             r[i] = cr->mpb->ibuf[i];
 515         }
 516     }
 517 #endif
 518 #endif
 519 }
 520
 521 void gmx_sumli(int gmx_unused nr, gmx_int64_t gmx_unused r[], const t_commrec gmx_unused *cr)
 522 {
 523 #ifndef GMX_MPI
 524     gmx_call("gmx_sumli");
 525 #else
 526 #if defined(MPI_IN_PLACE_EXISTS)
 527     if (cr->nc.bUse)
 528     {
 529         /* Use two step summing */
 530         if (cr->nc.rank_intra == 0)
 531         {
 532             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, 0,
 533                        cr->nc.comm_intra);
 534             /* Sum with the buffers reversed */
 535             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 536                           cr->nc.comm_inter);
 537         }
 538         else
 539         {
 540             /* This is here because of the silly MPI specification
 541                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 542             MPI_Reduce(r, NULL, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 543         }
 544         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 545     }
 546     else
 547     {
 548         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 549     }
 550 #else
 551     int i;
 552
 553     if (nr > cr->mpb->libuf_alloc)
 554     {
 555         cr->mpb->libuf_alloc = nr;
 556         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 557     }
 558     if (cr->nc.bUse)
 559     {
 560         /* Use two step summing */
 561         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 562                       cr->nc.comm_intra);
 563         if (cr->nc.rank_intra == 0)
 564         {
 565             /* Sum with the buffers reversed */
 566             MPI_Allreduce(cr->mpb->libuf, r, nr, MPI_INT64_T, MPI_SUM,
 567                           cr->nc.comm_inter);
 568         }
 569         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 570     }
 571     else
 572     {
 573         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 574                       cr->mpi_comm_mygroup);
 575         for (i = 0; i < nr; i++)
 576         {
 577             r[i] = cr->mpb->libuf[i];
 578         }
 579     }
 580 #endif
 581 #endif
 582 }
 583
 584
 585
 586 #ifdef GMX_MPI
 587 static void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
 588 {
 589 #if defined(MPI_IN_PLACE_EXISTS)
 590     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 591 #else
 592     /* this function is only used in code that is not performance critical,
 593        (during setup, when comm_rec is not the appropriate communication
 594        structure), so this isn't as bad as it looks. */
 595     double *buf;
 596     int     i;
 597
 598     snew(buf, nr);
 599     MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 600     for (i = 0; i < nr; i++)
 601     {
 602         r[i] = buf[i];
 603     }
 604     sfree(buf);
 605 #endif
 606 }
 607 #endif
 608
 609 #ifdef GMX_MPI
 610 static void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
 611 {
 612 #if defined(MPI_IN_PLACE_EXISTS)
 613     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 614 #else
 615     /* this function is only used in code that is not performance critical,
 616        (during setup, when comm_rec is not the appropriate communication
 617        structure), so this isn't as bad as it looks. */
 618     float *buf;
 619     int    i;
 620
 621     snew(buf, nr);
 622     MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 623     for (i = 0; i < nr; i++)
 624     {
 625         r[i] = buf[i];
 626     }
 627     sfree(buf);
 628 #endif
 629 }
 630 #endif
 631
 632 void gmx_sumd_sim(int gmx_unused nr, double gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 633 {
 634 #ifndef GMX_MPI
 635     gmx_call("gmx_sumd_sim");
 636 #else
 637     gmx_sumd_comm(nr, r, ms->mpi_comm_masters);
 638 #endif
 639 }
 640
 641 void gmx_sumf_sim(int gmx_unused nr, float gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 642 {
 643 #ifndef GMX_MPI
 644     gmx_call("gmx_sumf_sim");
 645 #else
 646     gmx_sumf_comm(nr, r, ms->mpi_comm_masters);
 647 #endif
 648 }
 649
 650 void gmx_sumi_sim(int gmx_unused nr, int gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 651 {
 652 #ifndef GMX_MPI
 653     gmx_call("gmx_sumi_sim");
 654 #else
 655 #if defined(MPI_IN_PLACE_EXISTS)
 656     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 657 #else
 658     /* this is thread-unsafe, but it will do for now: */
 659     int i;
 660
 661     if (nr > ms->mpb->ibuf_alloc)
 662     {
 663         ms->mpb->ibuf_alloc = nr;
 664         srenew(ms->mpb->ibuf, ms->mpb->ibuf_alloc);
 665     }
 666     MPI_Allreduce(r, ms->mpb->ibuf, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 667     for (i = 0; i < nr; i++)
 668     {
 669         r[i] = ms->mpb->ibuf[i];
 670     }
 671 #endif
 672 #endif
 673 }
 674
 675 void gmx_sumli_sim(int gmx_unused nr, gmx_int64_t gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 676 {
 677 #ifndef GMX_MPI
 678     gmx_call("gmx_sumli_sim");
 679 #else
 680 #if defined(MPI_IN_PLACE_EXISTS)
 681     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 682                   ms->mpi_comm_masters);
 683 #else
 684     /* this is thread-unsafe, but it will do for now: */
 685     int i;
 686
 687     if (nr > ms->mpb->libuf_alloc)
 688     {
 689         ms->mpb->libuf_alloc = nr;
 690         srenew(ms->mpb->libuf, ms->mpb->libuf_alloc);
 691     }
 692     MPI_Allreduce(r, ms->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 693                   ms->mpi_comm_masters);
 694     for (i = 0; i < nr; i++)
 695     {
 696         r[i] = ms->mpb->libuf[i];
 697     }
 698 #endif
 699 #endif
 700 }
 701
 702 gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr)
 703 {
 704     gmx_bool bExist;
 705
 706     if (SIMMASTER(cr))
 707     {
 708         bExist = gmx_fexist(fname);
 709     }
 710     if (PAR(cr))
 711     {
 712         gmx_bcast(sizeof(bExist), &bExist, cr);
 713     }
 714     return bExist;
 715 }
 716
 717 void gmx_fatal_collective(int f_errno, const char *file, int line,
 718                           const t_commrec *cr, gmx_domdec_t *dd,
 719                           const char *fmt, ...)
 720 {
 721     va_list  ap;
 722     gmx_bool bMaster, bFinalize;
 723 #ifdef GMX_MPI
 724     int      result;
 725     /* Check if we are calling on all processes in MPI_COMM_WORLD */
 726     if (cr != NULL)
 727     {
 728         MPI_Comm_compare(cr->mpi_comm_mysim, MPI_COMM_WORLD, &result);
 729     }
 730     else
 731     {
 732         MPI_Comm_compare(dd->mpi_comm_all, MPI_COMM_WORLD, &result);
 733     }
 734     /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
 735     bFinalize = (result != MPI_UNEQUAL);
 736 #else
 737     bFinalize = TRUE;
 738 #endif
 739     bMaster = (cr != NULL && MASTER(cr)) || (dd != NULL && DDMASTER(dd));
 740
 741     va_start(ap, fmt);
 742     gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
 743     va_end(ap);
 744 }