src/gmxlib/network.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <string.h>
  43 #include "gmx_fatal.h"
  44 #include "main.h"
  45 #include "smalloc.h"
  46 #include "network.h"
  47 #include "copyrite.h"
  48 #include "statutil.h"
  49 #include <ctype.h>
  50 #include "macros.h"
  51 #include "string2.h"
  52
  53 #ifdef GMX_LIB_MPI
  54 #include <mpi.h>
  55 #endif
  56
  57 #ifdef GMX_THREAD_MPI
  58 #include "tmpi.h"
  59 #endif
  60
  61 #include "mpelogging.h"
  62
  63 /* The source code in this file should be thread-safe.
  64       Please keep it that way. */
  65
  66 gmx_bool gmx_mpi_initialized(void)
  67 {
  68     int n;
  69 #ifndef GMX_MPI
  70     return 0;
  71 #else
  72     MPI_Initialized(&n);
  73
  74     return n;
  75 #endif
  76 }
  77
  78 int gmx_setup(int *argc, char **argv, int *nnodes)
  79 {
  80 #ifndef GMX_MPI
  81     gmx_call("gmx_setup");
  82     return 0;
  83 #else
  84     char   buf[256];
  85     int    resultlen;             /* actual length of node name      */
  86     int    i, flag;
  87     int    mpi_num_nodes;
  88     int    mpi_my_rank;
  89     char   mpi_hostname[MPI_MAX_PROCESSOR_NAME];
  90
  91     /* Call the MPI routines */
  92 #ifdef GMX_LIB_MPI
  93 #ifdef GMX_FAHCORE
  94     (void) fah_MPI_Init(argc, &argv);
  95 #else
  96     (void) MPI_Init(argc, &argv);
  97 #endif
  98 #endif
  99     (void) MPI_Comm_size( MPI_COMM_WORLD, &mpi_num_nodes );
 100     (void) MPI_Comm_rank( MPI_COMM_WORLD, &mpi_my_rank );
 101     (void) MPI_Get_processor_name( mpi_hostname, &resultlen );
 102
 103
 104 #ifdef USE_MPE
 105     /* MPE logging routines. Get event IDs from MPE: */
 106     /* General events */
 107     ev_timestep1               = MPE_Log_get_event_number( );
 108     ev_timestep2               = MPE_Log_get_event_number( );
 109     ev_force_start             = MPE_Log_get_event_number( );
 110     ev_force_finish            = MPE_Log_get_event_number( );
 111     ev_do_fnbf_start           = MPE_Log_get_event_number( );
 112     ev_do_fnbf_finish          = MPE_Log_get_event_number( );
 113     ev_ns_start                = MPE_Log_get_event_number( );
 114     ev_ns_finish               = MPE_Log_get_event_number( );
 115     ev_calc_bonds_start        = MPE_Log_get_event_number( );
 116     ev_calc_bonds_finish       = MPE_Log_get_event_number( );
 117     ev_global_stat_start       = MPE_Log_get_event_number( );
 118     ev_global_stat_finish      = MPE_Log_get_event_number( );
 119     ev_virial_start            = MPE_Log_get_event_number( );
 120     ev_virial_finish           = MPE_Log_get_event_number( );
 121
 122     /* Shift related events */
 123     ev_shift_start             = MPE_Log_get_event_number( );
 124     ev_shift_finish            = MPE_Log_get_event_number( );
 125     ev_unshift_start           = MPE_Log_get_event_number( );
 126     ev_unshift_finish          = MPE_Log_get_event_number( );
 127     ev_mk_mshift_start         = MPE_Log_get_event_number( );
 128     ev_mk_mshift_finish        = MPE_Log_get_event_number( );
 129
 130     /* PME related events */
 131     ev_pme_start                = MPE_Log_get_event_number( );
 132     ev_pme_finish               = MPE_Log_get_event_number( );
 133     ev_spread_on_grid_start     = MPE_Log_get_event_number( );
 134     ev_spread_on_grid_finish    = MPE_Log_get_event_number( );
 135     ev_sum_qgrid_start          = MPE_Log_get_event_number( );
 136     ev_sum_qgrid_finish         = MPE_Log_get_event_number( );
 137     ev_gmxfft3d_start           = MPE_Log_get_event_number( );
 138     ev_gmxfft3d_finish          = MPE_Log_get_event_number( );
 139     ev_solve_pme_start          = MPE_Log_get_event_number( );
 140     ev_solve_pme_finish         = MPE_Log_get_event_number( );
 141     ev_gather_f_bsplines_start  = MPE_Log_get_event_number( );
 142     ev_gather_f_bsplines_finish = MPE_Log_get_event_number( );
 143     ev_reduce_start             = MPE_Log_get_event_number( );
 144     ev_reduce_finish            = MPE_Log_get_event_number( );
 145     ev_rscatter_start           = MPE_Log_get_event_number( );
 146     ev_rscatter_finish          = MPE_Log_get_event_number( );
 147     ev_alltoall_start           = MPE_Log_get_event_number( );
 148     ev_alltoall_finish          = MPE_Log_get_event_number( );
 149     ev_pmeredist_start          = MPE_Log_get_event_number( );
 150     ev_pmeredist_finish         = MPE_Log_get_event_number( );
 151     ev_init_pme_start           = MPE_Log_get_event_number( );
 152     ev_init_pme_finish          = MPE_Log_get_event_number( );
 153     ev_send_coordinates_start   = MPE_Log_get_event_number( );
 154     ev_send_coordinates_finish  = MPE_Log_get_event_number( );
 155     ev_update_fr_start          = MPE_Log_get_event_number( );
 156     ev_update_fr_finish         = MPE_Log_get_event_number( );
 157     ev_clear_rvecs_start        = MPE_Log_get_event_number( );
 158     ev_clear_rvecs_finish       = MPE_Log_get_event_number( );
 159     ev_update_start             = MPE_Log_get_event_number( );
 160     ev_update_finish            = MPE_Log_get_event_number( );
 161     ev_output_start             = MPE_Log_get_event_number( );
 162     ev_output_finish            = MPE_Log_get_event_number( );
 163     ev_sum_lrforces_start       = MPE_Log_get_event_number( );
 164     ev_sum_lrforces_finish      = MPE_Log_get_event_number( );
 165     ev_sort_start               = MPE_Log_get_event_number( );
 166     ev_sort_finish              = MPE_Log_get_event_number( );
 167     ev_sum_qgrid_start          = MPE_Log_get_event_number( );
 168     ev_sum_qgrid_finish         = MPE_Log_get_event_number( );
 169
 170     /* Essential dynamics related events */
 171     ev_edsam_start             = MPE_Log_get_event_number( );
 172     ev_edsam_finish            = MPE_Log_get_event_number( );
 173     ev_get_coords_start        = MPE_Log_get_event_number( );
 174     ev_get_coords_finish       = MPE_Log_get_event_number( );
 175     ev_ed_apply_cons_start     = MPE_Log_get_event_number( );
 176     ev_ed_apply_cons_finish    = MPE_Log_get_event_number( );
 177     ev_fit_to_reference_start  = MPE_Log_get_event_number( );
 178     ev_fit_to_reference_finish = MPE_Log_get_event_number( );
 179
 180     /* describe events: */
 181     if (mpi_my_rank == 0)
 182     {
 183         /* General events */
 184         MPE_Describe_state(ev_timestep1,               ev_timestep2,                "timestep START",  "magenta" );
 185         MPE_Describe_state(ev_force_start,             ev_force_finish,             "force",           "cornflower blue" );
 186         MPE_Describe_state(ev_do_fnbf_start,           ev_do_fnbf_finish,           "do_fnbf",         "navy" );
 187         MPE_Describe_state(ev_ns_start,                ev_ns_finish,                "neighbor search", "tomato" );
 188         MPE_Describe_state(ev_calc_bonds_start,        ev_calc_bonds_finish,        "bonded forces",   "slate blue" );
 189         MPE_Describe_state(ev_global_stat_start,       ev_global_stat_finish,       "global stat",     "firebrick3");
 190         MPE_Describe_state(ev_update_fr_start,         ev_update_fr_finish,         "update forcerec", "goldenrod");
 191         MPE_Describe_state(ev_clear_rvecs_start,       ev_clear_rvecs_finish,       "clear rvecs",     "bisque");
 192         MPE_Describe_state(ev_update_start,            ev_update_finish,            "update",          "cornsilk");
 193         MPE_Describe_state(ev_output_start,            ev_output_finish,            "output",          "black");
 194         MPE_Describe_state(ev_virial_start,            ev_virial_finish,            "calc_virial",     "thistle4");
 195
 196         /* PME related events */
 197         MPE_Describe_state(ev_pme_start,               ev_pme_finish,               "doing PME",       "grey" );
 198         MPE_Describe_state(ev_spread_on_grid_start,    ev_spread_on_grid_finish,    "spread",          "dark orange" );
 199         MPE_Describe_state(ev_sum_qgrid_start,         ev_sum_qgrid_finish,         "sum qgrid",       "slate blue");
 200         MPE_Describe_state(ev_gmxfft3d_start,          ev_gmxfft3d_finish,          "fft3d",           "snow2" );
 201         MPE_Describe_state(ev_solve_pme_start,         ev_solve_pme_finish,         "solve PME",       "indian red" );
 202         MPE_Describe_state(ev_gather_f_bsplines_start, ev_gather_f_bsplines_finish, "bsplines",        "light sea green" );
 203         MPE_Describe_state(ev_reduce_start,            ev_reduce_finish,            "reduce",          "cyan1" );
 204         MPE_Describe_state(ev_rscatter_start,          ev_rscatter_finish,          "rscatter",        "cyan3" );
 205         MPE_Describe_state(ev_alltoall_start,          ev_alltoall_finish,          "alltoall",        "LightCyan4" );
 206         MPE_Describe_state(ev_pmeredist_start,         ev_pmeredist_finish,         "pmeredist",       "thistle" );
 207         MPE_Describe_state(ev_init_pme_start,          ev_init_pme_finish,          "init PME",        "snow4");
 208         MPE_Describe_state(ev_send_coordinates_start,  ev_send_coordinates_finish,  "send_coordinates", "blue");
 209         MPE_Describe_state(ev_sum_lrforces_start,      ev_sum_lrforces_finish,      "sum_LRforces",    "lime green");
 210         MPE_Describe_state(ev_sort_start,              ev_sort_finish,              "sort pme atoms",  "brown");
 211         MPE_Describe_state(ev_sum_qgrid_start,         ev_sum_qgrid_finish,         "sum charge grid", "medium orchid");
 212
 213         /* Shift related events */
 214         MPE_Describe_state(ev_shift_start,             ev_shift_finish,             "shift",           "orange");
 215         MPE_Describe_state(ev_unshift_start,           ev_unshift_finish,           "unshift",         "dark orange");
 216         MPE_Describe_state(ev_mk_mshift_start,         ev_mk_mshift_finish,         "mk_mshift",       "maroon");
 217
 218         /* Essential dynamics related events */
 219         MPE_Describe_state(ev_edsam_start,             ev_edsam_finish,             "EDSAM",           "deep sky blue");
 220         MPE_Describe_state(ev_get_coords_start,        ev_get_coords_finish,        "ED get coords",   "steel blue");
 221         MPE_Describe_state(ev_ed_apply_cons_start,     ev_ed_apply_cons_finish,     "ED apply constr", "forest green");
 222         MPE_Describe_state(ev_fit_to_reference_start,  ev_fit_to_reference_finish,  "ED fit to ref",   "lavender");
 223
 224     }
 225     MPE_Init_log();
 226 #endif
 227
 228 #ifdef GMX_LIB_MPI
 229     if (debug)
 230     {
 231         fprintf(debug, "NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
 232                 mpi_num_nodes, mpi_my_rank, mpi_hostname);
 233     }
 234 #endif
 235
 236     *nnodes = mpi_num_nodes;
 237
 238     return mpi_my_rank;
 239 #endif
 240 }
 241
 242 int  gmx_node_num(void)
 243 {
 244 #ifndef GMX_MPI
 245     return 1;
 246 #else
 247     int i;
 248     (void) MPI_Comm_size(MPI_COMM_WORLD, &i);
 249     return i;
 250 #endif
 251 }
 252
 253 int gmx_node_rank(void)
 254 {
 255 #ifndef GMX_MPI
 256     return 0;
 257 #else
 258     int i;
 259     (void) MPI_Comm_rank(MPI_COMM_WORLD, &i);
 260     return i;
 261 #endif
 262 }
 263
 264 #if defined GMX_LIB_MPI && defined GMX_TARGET_BGQ
 265 #include <spi/include/kernel/location.h>
 266 #endif
 267
 268 int gmx_physicalnode_id_hash(void)
 269 {
 270     int hash_int;
 271
 272 #ifndef GMX_LIB_MPI
 273     /* We have a single physical node */
 274     hash_int = 0;
 275 #else
 276     int  resultlen;
 277     char mpi_hostname[MPI_MAX_PROCESSOR_NAME];
 278
 279     /* This procedure can only differentiate nodes with different names.
 280      * Architectures where different physical nodes have identical names,
 281      * such as IBM Blue Gene, should use an architecture specific solution.
 282      */
 283     MPI_Get_processor_name(mpi_hostname, &resultlen);
 284
 285     /* The string hash function returns an unsigned int. We cast to an int.
 286      * Negative numbers are converted to positive by setting the sign bit to 0.
 287      * This makes the hash one bit smaller.
 288      * A 63-bit hash (with 64-bit int) should be enough for unique node hashes,
 289      * even on a million node machine. 31 bits might not be enough though!
 290      */
 291     hash_int =
 292         (int)gmx_string_fullhash_func(mpi_hostname, gmx_string_hash_init);
 293     if (hash_int < 0)
 294     {
 295         hash_int -= INT_MIN;
 296     }
 297 #endif
 298
 299     return hash_int;
 300 }
 301
 302 /* TODO: this function should be fully replaced by gmx_physicalnode_id_hash */
 303 int gmx_hostname_num()
 304 {
 305 #ifndef GMX_MPI
 306     return 0;
 307 #else
 308 #ifdef GMX_THREAD_MPI
 309     /* thread-MPI currently puts the thread number in the process name,
 310      * we might want to change this, as this is inconsistent with what
 311      * most MPI implementations would do when running on a single node.
 312      */
 313     return 0;
 314 #else
 315     int  resultlen, hostnum, i, j;
 316     char mpi_hostname[MPI_MAX_PROCESSOR_NAME], hostnum_str[MPI_MAX_PROCESSOR_NAME];
 317
 318     MPI_Get_processor_name(mpi_hostname, &resultlen);
 319 #ifdef GMX_TARGET_BGQ
 320     Personality_t personality;
 321     Kernel_GetPersonality(&personality, sizeof(personality));
 322     /* Each MPI rank has a unique coordinate in a 6-dimensional space
 323        (A,B,C,D,E,T), with dimensions A-E corresponding to different
 324        physical nodes, and T within each node. Each node has sixteen
 325        physical cores, each of which can have up to four hardware
 326        threads, so 0 <= T <= 63 (but the maximum value of T depends on
 327        the confituration of ranks and OpenMP threads per
 328        node). However, T is irrelevant for computing a suitable return
 329        value for gmx_hostname_num().
 330      */
 331     hostnum  = personality.Network_Config.Acoord;
 332     hostnum *= personality.Network_Config.Bnodes;
 333     hostnum += personality.Network_Config.Bcoord;
 334     hostnum *= personality.Network_Config.Cnodes;
 335     hostnum += personality.Network_Config.Ccoord;
 336     hostnum *= personality.Network_Config.Dnodes;
 337     hostnum += personality.Network_Config.Dcoord;
 338     hostnum *= personality.Network_Config.Enodes;
 339     hostnum += personality.Network_Config.Ecoord;
 340 #else
 341     /* This procedure can only differentiate nodes with host names
 342      * that end on unique numbers.
 343      */
 344     i = 0;
 345     j = 0;
 346     /* Only parse the host name up to the first dot */
 347     while (i < resultlen && mpi_hostname[i] != '.')
 348     {
 349         if (isdigit(mpi_hostname[i]))
 350         {
 351             hostnum_str[j++] = mpi_hostname[i];
 352         }
 353         i++;
 354     }
 355     hostnum_str[j] = '\0';
 356     if (j == 0)
 357     {
 358         hostnum = 0;
 359     }
 360     else
 361     {
 362         /* Use only the last 9 decimals, so we don't overflow an int */
 363         hostnum = strtol(hostnum_str + max(0, j-9), NULL, 10);
 364     }
 365 #endif
 366
 367     if (debug)
 368     {
 369         fprintf(debug, "In gmx_hostname_num: hostname '%s', hostnum %d\n",
 370                 mpi_hostname, hostnum);
 371 #ifdef GMX_TARGET_BGQ
 372         fprintf(debug,
 373                 "Torus ID A: %d / %d B: %d / %d C: %d / %d D: %d / %d E: %d / %d\nNode ID T: %d / %d core: %d / %d hardware thread: %d / %d\n",
 374                 personality.Network_Config.Acoord,
 375                 personality.Network_Config.Anodes,
 376                 personality.Network_Config.Bcoord,
 377                 personality.Network_Config.Bnodes,
 378                 personality.Network_Config.Ccoord,
 379                 personality.Network_Config.Cnodes,
 380                 personality.Network_Config.Dcoord,
 381                 personality.Network_Config.Dnodes,
 382                 personality.Network_Config.Ecoord,
 383                 personality.Network_Config.Enodes,
 384                 Kernel_ProcessorCoreID(),
 385                 16,
 386                 Kernel_ProcessorID(),
 387                 64,
 388                 Kernel_ProcessorThreadID(),
 389                 4);
 390 #endif
 391     }
 392     return hostnum;
 393 #endif
 394 #endif
 395 }
 396
 397 void gmx_setup_nodecomm(FILE *fplog, t_commrec *cr)
 398 {
 399     gmx_nodecomm_t *nc;
 400     int             n, rank, hostnum, ng, ni;
 401
 402     /* Many MPI implementations do not optimize MPI_Allreduce
 403      * (and probably also other global communication calls)
 404      * for multi-core nodes connected by a network.
 405      * We can optimize such communication by using one MPI call
 406      * within each node and one between the nodes.
 407      * For MVAPICH2 and Intel MPI this reduces the time for
 408      * the global_stat communication by 25%
 409      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 410      * B. Hess, November 2007
 411      */
 412
 413     nc = &cr->nc;
 414
 415     nc->bUse = FALSE;
 416 #ifndef GMX_THREAD_MPI
 417 #ifdef GMX_MPI
 418     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 419     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 420
 421     hostnum = gmx_hostname_num();
 422
 423     if (debug)
 424     {
 425         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 426     }
 427
 428
 429     /* The intra-node communicator, split on node number */
 430     MPI_Comm_split(cr->mpi_comm_mygroup, hostnum, rank, &nc->comm_intra);
 431     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 432     if (debug)
 433     {
 434         fprintf(debug, "In gmx_setup_nodecomm: node rank %d rank_intra %d\n",
 435                 rank, nc->rank_intra);
 436     }
 437     /* The inter-node communicator, split on rank_intra.
 438      * We actually only need the one for rank=0,
 439      * but it is easier to create them all.
 440      */
 441     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 442     /* Check if this really created two step communication */
 443     MPI_Comm_size(nc->comm_inter, &ng);
 444     MPI_Comm_size(nc->comm_intra, &ni);
 445     if (debug)
 446     {
 447         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
 448                 ng, ni);
 449     }
 450
 451     if (getenv("GMX_NO_NODECOMM") == NULL &&
 452         ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 453     {
 454         nc->bUse = TRUE;
 455         if (fplog)
 456         {
 457             fprintf(fplog, "Using two step summing over %d groups of on average %.1f processes\n\n",
 458                     ng, (real)n/(real)ng);
 459         }
 460         if (nc->rank_intra > 0)
 461         {
 462             MPI_Comm_free(&nc->comm_inter);
 463         }
 464     }
 465     else
 466     {
 467         /* One group or all processes in a separate group, use normal summing */
 468         MPI_Comm_free(&nc->comm_inter);
 469         MPI_Comm_free(&nc->comm_intra);
 470         if (debug)
 471         {
 472             fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
 473         }
 474     }
 475 #endif
 476 #else
 477     /* tMPI runs only on a single node so just use the nodeid */
 478     nc->rank_intra = cr->nodeid;
 479 #endif
 480 }
 481
 482 void gmx_init_intranode_counters(t_commrec *cr)
 483 {
 484     /* counters for PP+PME and PP-only processes on my physical node */
 485     int nrank_intranode, rank_intranode;
 486     int nrank_pp_intranode, rank_pp_intranode;
 487     /* thread-MPI is not initialized when not running in parallel */
 488 #if defined GMX_MPI && !defined GMX_THREAD_MPI
 489     int nrank_world, rank_world;
 490     int i, mynum, *num, *num_s, *num_pp, *num_pp_s;
 491
 492     MPI_Comm_size(MPI_COMM_WORLD, &nrank_world);
 493     MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 494
 495     /* Get the node number from the hostname to identify the nodes */
 496     mynum = gmx_hostname_num();
 497
 498     /* We can't rely on MPI_IN_PLACE, so we need send and receive buffers */
 499     snew(num,   nrank_world);
 500     snew(num_s, nrank_world);
 501     snew(num_pp,   nrank_world);
 502     snew(num_pp_s, nrank_world);
 503
 504     num_s[rank_world]    = mynum;
 505     num_pp_s[rank_world] = (cr->duty & DUTY_PP) ? mynum : -1;
 506
 507     MPI_Allreduce(num_s,    num,    nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 508     MPI_Allreduce(num_pp_s, num_pp, nrank_world, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 509
 510     nrank_intranode    = 0;
 511     rank_intranode     = 0;
 512     nrank_pp_intranode = 0;
 513     rank_pp_intranode  = 0;
 514     for (i = 0; i < nrank_world; i++)
 515     {
 516         if (num[i] == mynum)
 517         {
 518             nrank_intranode++;
 519             if (i < rank_world)
 520             {
 521                 rank_intranode++;
 522             }
 523         }
 524         if ((cr->duty & DUTY_PP) && num_pp[i] == mynum)
 525         {
 526             nrank_pp_intranode++;
 527             if (i < rank_world)
 528             {
 529                 rank_pp_intranode++;
 530             }
 531         }
 532     }
 533     sfree(num);
 534     sfree(num_s);
 535     sfree(num_pp);
 536     sfree(num_pp_s);
 537 #else
 538     /* Serial or thread-MPI code: we run within a single physical node */
 539     nrank_intranode    = cr->nnodes;
 540     rank_intranode     = cr->sim_nodeid;
 541     nrank_pp_intranode = cr->nnodes - cr->npmenodes;
 542     rank_pp_intranode  = cr->nodeid;
 543 #endif
 544
 545     if (debug)
 546     {
 547         char sbuf[STRLEN];
 548         if (cr->duty & DUTY_PP && cr->duty & DUTY_PME)
 549         {
 550             sprintf(sbuf, "PP+PME");
 551         }
 552         else
 553         {
 554             sprintf(sbuf, "%s", cr->duty & DUTY_PP ? "PP" : "PME");
 555         }
 556         fprintf(debug, "On %3s node %d: nrank_intranode=%d, rank_intranode=%d, "
 557                 "nrank_pp_intranode=%d, rank_pp_intranode=%d\n",
 558                 sbuf, cr->sim_nodeid,
 559                 nrank_intranode, rank_intranode,
 560                 nrank_pp_intranode, rank_pp_intranode);
 561     }
 562
 563     cr->nrank_intranode    = nrank_intranode;
 564     cr->rank_intranode     = rank_intranode;
 565     cr->nrank_pp_intranode = nrank_pp_intranode;
 566     cr->rank_pp_intranode  = rank_pp_intranode;
 567 }
 568
 569
 570 void gmx_barrier(const t_commrec *cr)
 571 {
 572 #ifndef GMX_MPI
 573     gmx_call("gmx_barrier");
 574 #else
 575     MPI_Barrier(cr->mpi_comm_mygroup);
 576 #endif
 577 }
 578
 579 void gmx_abort(int noderank, int nnodes, int errorno)
 580 {
 581 #ifndef GMX_MPI
 582     gmx_call("gmx_abort");
 583 #else
 584 #ifdef GMX_THREAD_MPI
 585     fprintf(stderr, "Halting program %s\n", ShortProgram());
 586     thanx(stderr);
 587     exit(1);
 588 #else
 589     if (nnodes > 1)
 590     {
 591         fprintf(stderr, "Halting parallel program %s on CPU %d out of %d\n",
 592                 ShortProgram(), noderank, nnodes);
 593     }
 594     else
 595     {
 596         fprintf(stderr, "Halting program %s\n", ShortProgram());
 597     }
 598
 599     thanx(stderr);
 600     MPI_Abort(MPI_COMM_WORLD, errorno);
 601     exit(1);
 602 #endif
 603 #endif
 604 }
 605
 606 void gmx_bcast(int nbytes, void *b, const t_commrec *cr)
 607 {
 608 #ifndef GMX_MPI
 609     gmx_call("gmx_bast");
 610 #else
 611     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
 612 #endif
 613 }
 614
 615 void gmx_bcast_sim(int nbytes, void *b, const t_commrec *cr)
 616 {
 617 #ifndef GMX_MPI
 618     gmx_call("gmx_bast");
 619 #else
 620     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mysim);
 621 #endif
 622 }
 623
 624 void gmx_sumd(int nr, double r[], const t_commrec *cr)
 625 {
 626 #ifndef GMX_MPI
 627     gmx_call("gmx_sumd");
 628 #else
 629 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 630     if (cr->nc.bUse)
 631     {
 632         if (cr->nc.rank_intra == 0)
 633         {
 634             /* Use two step summing. */
 635             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0,
 636                        cr->nc.comm_intra);
 637             /* Sum the roots of the internal (intra) buffers. */
 638             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 639                           cr->nc.comm_inter);
 640         }
 641         else
 642         {
 643             /* This is here because of the silly MPI specification
 644                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 645             MPI_Reduce(r, NULL, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 646         }
 647         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 648     }
 649     else
 650     {
 651         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 652                       cr->mpi_comm_mygroup);
 653     }
 654 #else
 655     int i;
 656
 657     if (nr > cr->mpb->dbuf_alloc)
 658     {
 659         cr->mpb->dbuf_alloc = nr;
 660         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 661     }
 662     if (cr->nc.bUse)
 663     {
 664         /* Use two step summing */
 665         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 666         if (cr->nc.rank_intra == 0)
 667         {
 668             /* Sum with the buffers reversed */
 669             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM,
 670                           cr->nc.comm_inter);
 671         }
 672         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 673     }
 674     else
 675     {
 676         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM,
 677                       cr->mpi_comm_mygroup);
 678         for (i = 0; i < nr; i++)
 679         {
 680             r[i] = cr->mpb->dbuf[i];
 681         }
 682     }
 683 #endif
 684 #endif
 685 }
 686
 687 void gmx_sumf(int nr, float r[], const t_commrec *cr)
 688 {
 689 #ifndef GMX_MPI
 690     gmx_call("gmx_sumf");
 691 #else
 692 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 693     if (cr->nc.bUse)
 694     {
 695         /* Use two step summing.  */
 696         if (cr->nc.rank_intra == 0)
 697         {
 698             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0,
 699                        cr->nc.comm_intra);
 700             /* Sum the roots of the internal (intra) buffers */
 701             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM,
 702                           cr->nc.comm_inter);
 703         }
 704         else
 705         {
 706             /* This is here because of the silly MPI specification
 707                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 708             MPI_Reduce(r, NULL, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 709         }
 710         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 711     }
 712     else
 713     {
 714         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 715     }
 716 #else
 717     int i;
 718
 719     if (nr > cr->mpb->fbuf_alloc)
 720     {
 721         cr->mpb->fbuf_alloc = nr;
 722         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 723     }
 724     if (cr->nc.bUse)
 725     {
 726         /* Use two step summing */
 727         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 728         if (cr->nc.rank_intra == 0)
 729         {
 730             /* Sum with the buffers reversed */
 731             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM,
 732                           cr->nc.comm_inter);
 733         }
 734         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 735     }
 736     else
 737     {
 738         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM,
 739                       cr->mpi_comm_mygroup);
 740         for (i = 0; i < nr; i++)
 741         {
 742             r[i] = cr->mpb->fbuf[i];
 743         }
 744     }
 745 #endif
 746 #endif
 747 }
 748
 749 void gmx_sumi(int nr, int r[], const t_commrec *cr)
 750 {
 751 #ifndef GMX_MPI
 752     gmx_call("gmx_sumi");
 753 #else
 754 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 755     if (cr->nc.bUse)
 756     {
 757         /* Use two step summing */
 758         if (cr->nc.rank_intra == 0)
 759         {
 760             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 761             /* Sum with the buffers reversed */
 762             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 763         }
 764         else
 765         {
 766             /* This is here because of the silly MPI specification
 767                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 768             MPI_Reduce(r, NULL, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 769         }
 770         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 771     }
 772     else
 773     {
 774         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 775     }
 776 #else
 777     int i;
 778
 779     if (nr > cr->mpb->ibuf_alloc)
 780     {
 781         cr->mpb->ibuf_alloc = nr;
 782         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 783     }
 784     if (cr->nc.bUse)
 785     {
 786         /* Use two step summing */
 787         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 788         if (cr->nc.rank_intra == 0)
 789         {
 790             /* Sum with the buffers reversed */
 791             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 792         }
 793         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 794     }
 795     else
 796     {
 797         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 798         for (i = 0; i < nr; i++)
 799         {
 800             r[i] = cr->mpb->ibuf[i];
 801         }
 802     }
 803 #endif
 804 #endif
 805 }
 806
 807 void gmx_sumli(int nr, gmx_large_int_t r[], const t_commrec *cr)
 808 {
 809 #ifndef GMX_MPI
 810     gmx_call("gmx_sumli");
 811 #else
 812 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 813     if (cr->nc.bUse)
 814     {
 815         /* Use two step summing */
 816         if (cr->nc.rank_intra == 0)
 817         {
 818             MPI_Reduce(MPI_IN_PLACE, r, nr, GMX_MPI_LARGE_INT, MPI_SUM, 0,
 819                        cr->nc.comm_intra);
 820             /* Sum with the buffers reversed */
 821             MPI_Allreduce(MPI_IN_PLACE, r, nr, GMX_MPI_LARGE_INT, MPI_SUM,
 822                           cr->nc.comm_inter);
 823         }
 824         else
 825         {
 826             /* This is here because of the silly MPI specification
 827                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 828             MPI_Reduce(r, NULL, nr, GMX_MPI_LARGE_INT, MPI_SUM, 0, cr->nc.comm_intra);
 829         }
 830         MPI_Bcast(r, nr, GMX_MPI_LARGE_INT, 0, cr->nc.comm_intra);
 831     }
 832     else
 833     {
 834         MPI_Allreduce(MPI_IN_PLACE, r, nr, GMX_MPI_LARGE_INT, MPI_SUM, cr->mpi_comm_mygroup);
 835     }
 836 #else
 837     int i;
 838
 839     if (nr > cr->mpb->libuf_alloc)
 840     {
 841         cr->mpb->libuf_alloc = nr;
 842         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 843     }
 844     if (cr->nc.bUse)
 845     {
 846         /* Use two step summing */
 847         MPI_Allreduce(r, cr->mpb->libuf, nr, GMX_MPI_LARGE_INT, MPI_SUM,
 848                       cr->nc.comm_intra);
 849         if (cr->nc.rank_intra == 0)
 850         {
 851             /* Sum with the buffers reversed */
 852             MPI_Allreduce(cr->mpb->libuf, r, nr, GMX_MPI_LARGE_INT, MPI_SUM,
 853                           cr->nc.comm_inter);
 854         }
 855         MPI_Bcast(r, nr, GMX_MPI_LARGE_INT, 0, cr->nc.comm_intra);
 856     }
 857     else
 858     {
 859         MPI_Allreduce(r, cr->mpb->libuf, nr, GMX_MPI_LARGE_INT, MPI_SUM,
 860                       cr->mpi_comm_mygroup);
 861         for (i = 0; i < nr; i++)
 862         {
 863             r[i] = cr->mpb->libuf[i];
 864         }
 865     }
 866 #endif
 867 #endif
 868 }
 869
 870
 871
 872 #ifdef GMX_MPI
 873 void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
 874 {
 875 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 876     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 877 #else
 878     /* this function is only used in code that is not performance critical,
 879        (during setup, when comm_rec is not the appropriate communication
 880        structure), so this isn't as bad as it looks. */
 881     double *buf;
 882     int     i;
 883
 884     snew(buf, nr);
 885     MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
 886     for (i = 0; i < nr; i++)
 887     {
 888         r[i] = buf[i];
 889     }
 890     sfree(buf);
 891 #endif
 892 }
 893 #endif
 894
 895 #ifdef GMX_MPI
 896 void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
 897 {
 898 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 899     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 900 #else
 901     /* this function is only used in code that is not performance critical,
 902        (during setup, when comm_rec is not the appropriate communication
 903        structure), so this isn't as bad as it looks. */
 904     float *buf;
 905     int    i;
 906
 907     snew(buf, nr);
 908     MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
 909     for (i = 0; i < nr; i++)
 910     {
 911         r[i] = buf[i];
 912     }
 913     sfree(buf);
 914 #endif
 915 }
 916 #endif
 917
 918 void gmx_sumd_sim(int nr, double r[], const gmx_multisim_t *ms)
 919 {
 920 #ifndef GMX_MPI
 921     gmx_call("gmx_sumd_sim");
 922 #else
 923     gmx_sumd_comm(nr, r, ms->mpi_comm_masters);
 924 #endif
 925 }
 926
 927 void gmx_sumf_sim(int nr, float r[], const gmx_multisim_t *ms)
 928 {
 929 #ifndef GMX_MPI
 930     gmx_call("gmx_sumf_sim");
 931 #else
 932     gmx_sumf_comm(nr, r, ms->mpi_comm_masters);
 933 #endif
 934 }
 935
 936 void gmx_sumi_sim(int nr, int r[], const gmx_multisim_t *ms)
 937 {
 938 #ifndef GMX_MPI
 939     gmx_call("gmx_sumi_sim");
 940 #else
 941 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 942     MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 943 #else
 944     /* this is thread-unsafe, but it will do for now: */
 945     int i;
 946
 947     if (nr > ms->mpb->ibuf_alloc)
 948     {
 949         ms->mpb->ibuf_alloc = nr;
 950         srenew(ms->mpb->ibuf, ms->mpb->ibuf_alloc);
 951     }
 952     MPI_Allreduce(r, ms->mpb->ibuf, nr, MPI_INT, MPI_SUM, ms->mpi_comm_masters);
 953     for (i = 0; i < nr; i++)
 954     {
 955         r[i] = ms->mpb->ibuf[i];
 956     }
 957 #endif
 958 #endif
 959 }
 960
 961 void gmx_sumli_sim(int nr, gmx_large_int_t r[], const gmx_multisim_t *ms)
 962 {
 963 #ifndef GMX_MPI
 964     gmx_call("gmx_sumli_sim");
 965 #else
 966 #if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 967     MPI_Allreduce(MPI_IN_PLACE, r, nr, GMX_MPI_LARGE_INT, MPI_SUM,
 968                   ms->mpi_comm_masters);
 969 #else
 970     /* this is thread-unsafe, but it will do for now: */
 971     int i;
 972
 973     if (nr > ms->mpb->libuf_alloc)
 974     {
 975         ms->mpb->libuf_alloc = nr;
 976         srenew(ms->mpb->libuf, ms->mpb->libuf_alloc);
 977     }
 978     MPI_Allreduce(r, ms->mpb->libuf, nr, GMX_MPI_LARGE_INT, MPI_SUM,
 979                   ms->mpi_comm_masters);
 980     for (i = 0; i < nr; i++)
 981     {
 982         r[i] = ms->mpb->libuf[i];
 983     }
 984 #endif
 985 #endif
 986 }
 987
 988
 989 void gmx_finalize_par(void)
 990 {
 991 #ifndef GMX_MPI
 992     /* Compiled without MPI, no MPI finalizing needed */
 993     return;
 994 #else
 995     int initialized, finalized;
 996     int ret;
 997
 998     MPI_Initialized(&initialized);
 999     if (!initialized)
1000     {
1001         return;
1002     }
1003     /* just as a check; we don't want to finalize twice */
1004     MPI_Finalized(&finalized);
1005     if (finalized)
1006     {
1007         return;
1008     }
1009
1010     /* We sync the processes here to try to avoid problems
1011      * with buggy MPI implementations that could cause
1012      * unfinished processes to terminate.
1013      */
1014     MPI_Barrier(MPI_COMM_WORLD);
1015
1016     /*
1017        if (DOMAINDECOMP(cr)) {
1018        if (cr->npmenodes > 0 || cr->dd->bCartesian)
1019         MPI_Comm_free(&cr->mpi_comm_mygroup);
1020        if (cr->dd->bCartesian)
1021         MPI_Comm_free(&cr->mpi_comm_mysim);
1022        }
1023      */
1024
1025     /* Apparently certain mpich implementations cause problems
1026      * with MPI_Finalize. In that case comment out MPI_Finalize.
1027      */
1028     if (debug)
1029     {
1030         fprintf(debug, "Will call MPI_Finalize now\n");
1031     }
1032
1033     ret = MPI_Finalize();
1034     if (debug)
1035     {
1036         fprintf(debug, "Return code from MPI_Finalize = %d\n", ret);
1037     }
1038 #endif
1039 }