src/gromacs/gmxlib/network.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "gromacs/legacyheaders/network.h"
  40
  41 #include "config.h"
  42
  43 #include <ctype.h>
  44 #include <stdarg.h>
  45 #include <stdlib.h>
  46 #include <string.h>
  47
  48 #include "gromacs/legacyheaders/types/commrec.h"
  49 #include "gromacs/legacyheaders/copyrite.h"
  50 #include "gromacs/legacyheaders/macros.h"
  51
  52 #include "gromacs/utility/basenetwork.h"
  53 #include "gromacs/utility/cstringutil.h"
  54 #include "gromacs/utility/fatalerror.h"
  55 #include "gromacs/utility/futil.h"
  56 #include "gromacs/utility/gmxmpi.h"
  57 #include "gromacs/utility/smalloc.h"
  58
  59 /* The source code in this file should be thread-safe.
  60       Please keep it that way. */
  61
  62 void gmx_fill_commrec_from_mpi(t_commrec gmx_unused *cr)
  63 {
  64 #ifndef GMX_MPI
  65     gmx_call("gmx_fill_commrec_from_mpi");
  66 #else
  67     if (!gmx_mpi_initialized())
  68     {
  69         gmx_comm("MPI has not been initialized properly");
  70     }
  71
  72     cr->nnodes           = gmx_node_num();
  73     cr->nodeid           = gmx_node_rank();
  74     cr->sim_nodeid       = cr->nodeid;
  75     cr->mpi_comm_mysim   = MPI_COMM_WORLD;
  76     cr->mpi_comm_mygroup = MPI_COMM_WORLD;
  77
  78 #endif
  79 }
  80
  81 t_commrec *init_commrec()
  82 {
  83     t_commrec    *cr;
  84
  85     snew(cr, 1);
  86
  87 #ifdef GMX_LIB_MPI
  88     gmx_fill_commrec_from_mpi(cr);
  89 #else
  90     cr->mpi_comm_mysim   = NULL;
  91     cr->mpi_comm_mygroup = NULL;
  92     cr->nnodes           = 1;
  93     cr->sim_nodeid       = 0;
  94     cr->nodeid           = cr->sim_nodeid;
  95 #endif
  96
  97     // TODO cr->duty should not be initialized here
  98     cr->duty = (DUTY_PP | DUTY_PME);
  99
 100 #if defined GMX_MPI && !defined MPI_IN_PLACE_EXISTS
 101     /* initialize the MPI_IN_PLACE replacement buffers */
 102     snew(cr->mpb, 1);
 103     cr->mpb->ibuf        = NULL;
 104     cr->mpb->libuf       = NULL;
 105     cr->mpb->fbuf        = NULL;
 106     cr->mpb->dbuf        = NULL;
 107     cr->mpb->ibuf_alloc  = 0;
 108     cr->mpb->libuf_alloc = 0;
 109     cr->mpb->fbuf_alloc  = 0;
 110     cr->mpb->dbuf_alloc  = 0;
 111 #endif
 112
 113     return cr;
 114 }
 115
 116 t_commrec *reinitialize_commrec_for_this_thread(const t_commrec gmx_unused *cro)
 117 {
 118 #ifdef GMX_THREAD_MPI
 119     t_commrec *cr;
 120
 121     /* make a thread-specific commrec */
 122     snew(cr, 1);
 123     /* now copy the whole thing, so settings like the number of PME nodes
 124        get propagated. */
 125     *cr = *cro;
 126
 127     /* and we start setting our own thread-specific values for things */
 128     gmx_fill_commrec_from_mpi(cr);
 129
 130     // TODO cr->duty should not be initialized here
 131     cr->duty             = (DUTY_PP | DUTY_PME);
 132
 133     return cr;
 134 #else
 135     return NULL;
 136 #endif
 137 }
 138
 139 void gmx_setup_nodecomm(FILE gmx_unused *fplog, t_commrec *cr)
 140 {
 141     gmx_nodecomm_t *nc;
 142     int             n, rank, nodehash, ng, ni;
 143
 144     /* Many MPI implementations do not optimize MPI_Allreduce
 145      * (and probably also other global communication calls)
 146      * for multi-core nodes connected by a network.
 147      * We can optimize such communication by using one MPI call
 148      * within each node and one between the nodes.
 149      * For MVAPICH2 and Intel MPI this reduces the time for
 150      * the global_stat communication by 25%
 151      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 152      * B. Hess, November 2007
 153      */
 154
 155     nc = &cr->nc;
 156
 157     nc->bUse = FALSE;
 158 #ifndef GMX_THREAD_MPI
 159 #ifdef GMX_MPI
 160     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 161     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 162
 163     nodehash = gmx_physicalnode_id_hash();
 164
 165     if (debug)
 166     {
 167         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 168     }
 169
 170
 171     /* The intra-node communicator, split on node number */
 172     MPI_Comm_split(cr->mpi_comm_mygroup, nodehash, rank, &nc->comm_intra);
 173     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 174     if (debug)
 175     {
 176         fprintf(debug, "In gmx_setup_nodecomm: node ID %d rank within node %d\n",
 177                 rank, nc->rank_intra);
 178     }
 179     /* The inter-node communicator, split on rank_intra.
 180      * We actually only need the one for rank=0,
 181      * but it is easier to create them all.
 182      */
 183     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 184     /* Check if this really created two step communication */
 185     MPI_Comm_size(nc->comm_inter, &ng);
 186     MPI_Comm_size(nc->comm_intra, &ni);
 187     if (debug)
 188     {
 189         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
 190                 ng, ni);
 191     }
 192
 193     if (getenv("GMX_NO_NODECOMM") == NULL &&
 194         ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 195     {
 196         nc->bUse = TRUE;
 197         if (fplog)
 198         {
 199             fprintf(fplog, "Using two step summing over %d groups of on average %.1f ranks\n\n",
 200                     ng, (real)n/(real)ng);
 201         }
 202         if (nc->rank_intra > 0)
 203         {
 204             MPI_Comm_free(&nc->comm_inter);
 205         }
 206     }
 207     else
 208     {
 209         /* One group or all processes in a separate group, use normal summing */
 210         MPI_Comm_free(&nc->comm_inter);
 211         MPI_Comm_free(&nc->comm_intra);
 212         if (debug)
 213         {
 214             fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
 215         }
 216     }
 217 #endif
 218 #else
 219     /* tMPI runs only on a single node so just use the nodeid */
 220     nc->rank_intra = cr->nodeid;
 221 #endif
 222 }
 223
 224 void gmx_init_intranode_counters(t_commrec *cr)
 225 {
 226     /* counters for PP+PME and PP-only processes on my physical node */
 227     int nrank_intranode, rank_intranode;
 228     int nrank_pp_intranode, rank_pp_intranode;
 229     /* thread-MPI is not initialized when not running in parallel */
 230 #if defined GMX_MPI && !defined GMX_THREAD_MPI
 231     int nrank_world, rank_world;
 232     int i, myhash, *hash, *hash_s, *hash_pp, *hash_pp_s;
 233
 234     MPI_Comm_size(MPI_COMM_WORLD, &nrank_world);
 235     MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 236
 237     /* Get a (hopefully unique) hash that identifies our physical node */
 238     myhash = gmx_physicalnode_id_hash();
 239
 240     /* We can't rely on MPI_IN_PLACE, so we need send and receive buffers */
 241     snew(hash,   nrank_world);
 242     snew(hash_s, nrank_world);
 243     snew(hash_pp,   nrank_world);
 244     snew(hash_pp_s, nrank_world);
 245
 246     hash_s[rank_world]    = myhash;
 247     hash_pp_s[rank_world] = (cr->duty & DUTY_PP) ? myhash : -1;
 248
 249     MPI_Allreduce(hash_s,    hash,    nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 250     MPI_Allreduce(hash_pp_s, hash_pp, nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 251
 252     nrank_intranode    = 0;
 253     rank_intranode     = 0;
 254     nrank_pp_intranode = 0;
 255     rank_pp_intranode  = 0;
 256     for (i = 0; i < nrank_world; i++)
 257     {
 258         if (hash[i] == myhash)
 259         {
 260             nrank_intranode++;
 261             if (i < rank_world)
 262             {
 263                 rank_intranode++;
 264             }
 265         }
 266         if (hash_pp[i] == myhash)
 267         {
 268             nrank_pp_intranode++;
 269             if ((cr->duty & DUTY_PP) && i < rank_world)
 270             {
 271                 rank_pp_intranode++;
 272             }
 273         }
 274     }
 275     sfree(hash);
 276     sfree(hash_s);
 277     sfree(hash_pp);
 278     sfree(hash_pp_s);
 279 #else
 280     /* Serial or thread-MPI code: we run within a single physical node */
 281     nrank_intranode    = cr->nnodes;
 282     rank_intranode     = cr->sim_nodeid;
 283     nrank_pp_intranode = cr->nnodes - cr->npmenodes;
 284     rank_pp_intranode  = cr->nodeid;
 285 #endif
 286
 287     if (debug)
 288     {
 289         char sbuf[STRLEN];
 290         if (cr->duty & DUTY_PP && cr->duty & DUTY_PME)
 291         {
 292             sprintf(sbuf, "PP+PME");
 293         }
 294         else
 295         {
 296             sprintf(sbuf, "%s", cr->duty & DUTY_PP ? "PP" : "PME");
 297         }
 298         fprintf(debug, "On %3s rank %d: nrank_intranode=%d, rank_intranode=%d, "
 299                 "nrank_pp_intranode=%d, rank_pp_intranode=%d\n",
 300                 sbuf, cr->sim_nodeid,
 301                 nrank_intranode, rank_intranode,
 302                 nrank_pp_intranode, rank_pp_intranode);
 303     }
 304
 305     cr->nrank_intranode    = nrank_intranode;
 306     cr->rank_intranode     = rank_intranode;
 307     cr->nrank_pp_intranode = nrank_pp_intranode;
 308     cr->rank_pp_intranode  = rank_pp_intranode;
 309 }
 310
 311
 312 void gmx_barrier(const t_commrec gmx_unused *cr)
 313 {
 314 #ifndef GMX_MPI
 315     gmx_call("gmx_barrier");
 316 #else
 317     MPI_Barrier(cr->mpi_comm_mygroup);
 318 #endif
 319 }
 320
 321 void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 322 {
 323 #ifndef GMX_MPI
 324     gmx_call("gmx_bast");
 325 #else
 326     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
 327 #endif
 328 }
 329
 330 void gmx_bcast_sim(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 331 {
 332 #ifndef GMX_MPI
 333     gmx_call("gmx_bast");
 334 #else
 335     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mysim);
 336 #endif
 337 }
 338
 339 void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused *cr)
 340 {
 341 #ifndef GMX_MPI
 342     gmx_call("gmx_sumd");
 343 #else
 344 #if defined(MPI_IN_PLACE_EXISTS)
 345     if (cr->nc.bUse)
 346     {
 347         if (cr->nc.rank_intra == 0)
 348         {
 349             /* Use two step summing. */
 350             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0,
 351                        cr->nc.comm_intra);
 352             /* Sum the roots of the internal (intra) buffers. */
 353             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 354                           cr->nc.comm_inter);
 355         }
 356         else
 357         {
 358             /* This is here because of the silly MPI specification
 359                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 360             MPI_Reduce(r, NULL, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 361         }
 362         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 363     }
 364     else
 365     {
 366         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 367                       cr->mpi_comm_mygroup);
 368     }
 369 #else
 370     int i;
 371
 372     if (nr > cr->mpb->dbuf_alloc)
 373     {
 374         cr->mpb->dbuf_alloc = nr;
 375         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 376     }
 377     if (cr->nc.bUse)
 378     {
 379         /* Use two step summing */
 380         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 381         if (cr->nc.rank_intra == 0)
 382         {
 383             /* Sum with the buffers reversed */
 384             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM,
 385                           cr->nc.comm_inter);
 386         }
 387         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 388     }
 389     else
 390     {
 391         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM,
 392                       cr->mpi_comm_mygroup);
 393         for (i = 0; i < nr; i++)
 394         {
 395             r[i] = cr->mpb->dbuf[i];
 396         }
 397     }
 398 #endif
 399 #endif
 400 }
 401
 402 void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused *cr)
 403 {
 404 #ifndef GMX_MPI
 405     gmx_call("gmx_sumf");
 406 #else
 407 #if defined(MPI_IN_PLACE_EXISTS)
 408     if (cr->nc.bUse)
 409     {
 410         /* Use two step summing.  */
 411         if (cr->nc.rank_intra == 0)
 412         {
 413             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0,
 414                        cr->nc.comm_intra);
 415             /* Sum the roots of the internal (intra) buffers */
 416             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM,
 417                           cr->nc.comm_inter);
 418         }
 419         else
 420         {
 421             /* This is here because of the silly MPI specification
 422                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 423             MPI_Reduce(r, NULL, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 424         }
 425         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 426     }
 427     else
 428     {
 429         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 430     }
 431 #else
 432     int i;
 433
 434     if (nr > cr->mpb->fbuf_alloc)
 435     {
 436         cr->mpb->fbuf_alloc = nr;
 437         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 438     }
 439     if (cr->nc.bUse)
 440     {
 441         /* Use two step summing */
 442         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 443         if (cr->nc.rank_intra == 0)
 444         {
 445             /* Sum with the buffers reversed */
 446             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM,
 447                           cr->nc.comm_inter);
 448         }
 449         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 450     }
 451     else
 452     {
 453         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM,
 454                       cr->mpi_comm_mygroup);
 455         for (i = 0; i < nr; i++)
 456         {
 457             r[i] = cr->mpb->fbuf[i];
 458         }
 459     }
 460 #endif
 461 #endif
 462 }
 463
 464 void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused *cr)
 465 {
 466 #ifndef GMX_MPI
 467     gmx_call("gmx_sumi");
 468 #else
 469 #if defined(MPI_IN_PLACE_EXISTS)
 470     if (cr->nc.bUse)
 471     {
 472         /* Use two step summing */
 473         if (cr->nc.rank_intra == 0)
 474         {
 475             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 476             /* Sum with the buffers reversed */
 477             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 478         }
 479         else
 480         {
 481             /* This is here because of the silly MPI specification
 482                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 483             MPI_Reduce(r, NULL, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 484         }
 485         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 486     }
 487     else
 488     {
 489         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 490     }
 491 #else
 492     int i;
 493
 494     if (nr > cr->mpb->ibuf_alloc)
 495     {
 496         cr->mpb->ibuf_alloc = nr;
 497         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 498     }
 499     if (cr->nc.bUse)
 500     {
 501         /* Use two step summing */
 502         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 503         if (cr->nc.rank_intra == 0)
 504         {
 505             /* Sum with the buffers reversed */
 506             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 507         }
 508         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 509     }
 510     else
 511     {
 512         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 513         for (i = 0; i < nr; i++)
 514         {
 515             r[i] = cr->mpb->ibuf[i];
 516         }
 517     }
 518 #endif
 519 #endif
 520 }
 521
 522 void gmx_sumli(int gmx_unused nr, gmx_int64_t gmx_unused r[], const t_commrec gmx_unused *cr)
 523 {
 524 #ifndef GMX_MPI
 525     gmx_call("gmx_sumli");
 526 #else
 527 #if defined(MPI_IN_PLACE_EXISTS)
 528     if (cr->nc.bUse)
 529     {
 530         /* Use two step summing */
 531         if (cr->nc.rank_intra == 0)
 532         {
 533             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, 0,
 534                        cr->nc.comm_intra);
 535             /* Sum with the buffers reversed */
 536             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 537                           cr->nc.comm_inter);
 538         }
 539         else
 540         {
 541             /* This is here because of the silly MPI specification
 542                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 543             MPI_Reduce(r, NULL, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 544         }
 545         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 546     }
 547     else
 548     {
 549         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 550     }
 551 #else
 552     int i;
 553
 554     if (nr > cr->mpb->libuf_alloc)
 555     {
 556         cr->mpb->libuf_alloc = nr;
 557         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 558     }
 559     if (cr->nc.bUse)
 560     {
 561         /* Use two step summing */
 562         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 563                       cr->nc.comm_intra);
 564         if (cr->nc.rank_intra == 0)
 565         {
 566             /* Sum with the buffers reversed */
 567             MPI_Allreduce(cr->mpb->libuf, r, nr, MPI_INT64_T, MPI_SUM,
 568                           cr->nc.comm_inter);
 569         }
 570         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 571     }
 572     else
 573     {
 574         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 575                       cr->mpi_comm_mygroup);
 576         for (i = 0; i < nr; i++)
 577         {
 578             r[i] = cr->mpb->libuf[i];
 579         }
 580     }
 581 #endif
 582 #endif
 583 }
 584
 585
 586
 587 #ifdef GMX_MPI
 588 static void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
 589 {
 590 #if defined(MPI_IN_PLACE_EXISTS)
 591     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 592 #else
 593     /* this function is only used in code that is not performance critical,
 594        (during setup, when comm_rec is not the appropriate communication
 595        structure), so this isn't as bad as it looks. */
 596     double *buf;
 597     int     i;
 598
 599     snew(buf, nr);
 600     MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 601     for (i = 0; i < nr; i++)
 602     {
 603         r[i] = buf[i];
 604     }
 605     sfree(buf);
 606 #endif
 607 }
 608 #endif
 609
 610 #ifdef GMX_MPI
 611 static void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
 612 {
 613 #if defined(MPI_IN_PLACE_EXISTS)
 614     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 615 #else
 616     /* this function is only used in code that is not performance critical,
 617        (during setup, when comm_rec is not the appropriate communication
 618        structure), so this isn't as bad as it looks. */
 619     float *buf;
 620     int    i;
 621
 622     snew(buf, nr);
 623     MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 624     for (i = 0; i < nr; i++)
 625     {
 626         r[i] = buf[i];
 627     }
 628     sfree(buf);
 629 #endif
 630 }
 631 #endif
 632
 633 void gmx_sumd_sim(int gmx_unused nr, double gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 634 {
 635 #ifndef GMX_MPI
 636     gmx_call("gmx_sumd_sim");
 637 #else
 638     gmx_sumd_comm(nr, r, ms->mpi_comm_masters);
 639 #endif
 640 }
 641
 642 void gmx_sumf_sim(int gmx_unused nr, float gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 643 {
 644 #ifndef GMX_MPI
 645     gmx_call("gmx_sumf_sim");
 646 #else
 647     gmx_sumf_comm(nr, r, ms->mpi_comm_masters);
 648 #endif
 649 }
 650
 651 void gmx_sumi_sim(int gmx_unused nr, int gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 652 {
 653 #ifndef GMX_MPI
 654     gmx_call("gmx_sumi_sim");
 655 #else
 656 #if defined(MPI_IN_PLACE_EXISTS)
 657     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 658 #else
 659     /* this is thread-unsafe, but it will do for now: */
 660     int i;
 661
 662     if (nr > ms->mpb->ibuf_alloc)
 663     {
 664         ms->mpb->ibuf_alloc = nr;
 665         srenew(ms->mpb->ibuf, ms->mpb->ibuf_alloc);
 666     }
 667     MPI_Allreduce(r, ms->mpb->ibuf, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 668     for (i = 0; i < nr; i++)
 669     {
 670         r[i] = ms->mpb->ibuf[i];
 671     }
 672 #endif
 673 #endif
 674 }
 675
 676 void gmx_sumli_sim(int gmx_unused nr, gmx_int64_t gmx_unused r[], const gmx_multisim_t gmx_unused *ms)
 677 {
 678 #ifndef GMX_MPI
 679     gmx_call("gmx_sumli_sim");
 680 #else
 681 #if defined(MPI_IN_PLACE_EXISTS)
 682     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 683                   ms->mpi_comm_masters);
 684 #else
 685     /* this is thread-unsafe, but it will do for now: */
 686     int i;
 687
 688     if (nr > ms->mpb->libuf_alloc)
 689     {
 690         ms->mpb->libuf_alloc = nr;
 691         srenew(ms->mpb->libuf, ms->mpb->libuf_alloc);
 692     }
 693     MPI_Allreduce(r, ms->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 694                   ms->mpi_comm_masters);
 695     for (i = 0; i < nr; i++)
 696     {
 697         r[i] = ms->mpb->libuf[i];
 698     }
 699 #endif
 700 #endif
 701 }
 702
 703 gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr)
 704 {
 705     gmx_bool bExist;
 706
 707     if (SIMMASTER(cr))
 708     {
 709         bExist = gmx_fexist(fname);
 710     }
 711     if (PAR(cr))
 712     {
 713         gmx_bcast(sizeof(bExist), &bExist, cr);
 714     }
 715     return bExist;
 716 }
 717
 718 void gmx_fatal_collective(int f_errno, const char *file, int line,
 719                           const t_commrec *cr, gmx_domdec_t *dd,
 720                           const char *fmt, ...)
 721 {
 722     va_list  ap;
 723     gmx_bool bMaster, bFinalize;
 724 #ifdef GMX_MPI
 725     int      result;
 726     /* Check if we are calling on all processes in MPI_COMM_WORLD */
 727     if (cr != NULL)
 728     {
 729         MPI_Comm_compare(cr->mpi_comm_mysim, MPI_COMM_WORLD, &result);
 730     }
 731     else
 732     {
 733         MPI_Comm_compare(dd->mpi_comm_all, MPI_COMM_WORLD, &result);
 734     }
 735     /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
 736     bFinalize = (result != MPI_UNEQUAL);
 737 #else
 738     bFinalize = TRUE;
 739 #endif
 740     bMaster = (cr != NULL && MASTER(cr)) || (dd != NULL && DDMASTER(dd));
 741
 742     va_start(ap, fmt);
 743     gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
 744     va_end(ap);
 745 }