src/gromacs/gmxlib/network.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "network.h"
  40
  41 #include "config.h"
  42
  43 #include <cctype>
  44 #include <cstdarg>
  45 #include <cstdlib>
  46 #include <cstring>
  47
  48 #include "gromacs/commandline/filenm.h"
  49 #include "gromacs/mdtypes/commrec.h"
  50 #include "gromacs/utility/basenetwork.h"
  51 #include "gromacs/utility/cstringutil.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "gromacs/utility/futil.h"
  54 #include "gromacs/utility/gmxmpi.h"
  55 #include "gromacs/utility/mpiinplacebuffers.h"
  56 #include "gromacs/utility/real.h"
  57 #include "gromacs/utility/smalloc.h"
  58
  59 /* The source code in this file should be thread-safe.
  60       Please keep it that way. */
  61
  62 void gmx_fill_commrec_from_mpi(t_commrec *cr)
  63 {
  64 #if !GMX_MPI
  65     gmx_call("gmx_fill_commrec_from_mpi");
  66     GMX_UNUSED_VALUE(cr);
  67 #else
  68     if (!gmx_mpi_initialized())
  69     {
  70         gmx_comm("MPI has not been initialized properly");
  71     }
  72
  73     cr->nnodes           = gmx_node_num();
  74     cr->nodeid           = gmx_node_rank();
  75     cr->sim_nodeid       = cr->nodeid;
  76     cr->mpi_comm_mysim   = MPI_COMM_WORLD;
  77     cr->mpi_comm_mygroup = MPI_COMM_WORLD;
  78 #endif
  79 }
  80
  81 CommrecHandle init_commrec()
  82 {
  83     CommrecHandle handle;
  84     t_commrec    *cr;
  85
  86     snew(cr, 1);
  87     handle.reset(cr);
  88
  89 #if GMX_LIB_MPI
  90     gmx_fill_commrec_from_mpi(cr);
  91 #else
  92     cr->mpi_comm_mysim   = MPI_COMM_NULL;
  93     cr->mpi_comm_mygroup = MPI_COMM_NULL;
  94     cr->nnodes           = 1;
  95     cr->sim_nodeid       = 0;
  96     cr->nodeid           = cr->sim_nodeid;
  97 #endif
  98
  99     // TODO cr->duty should not be initialized here
 100     cr->duty = (DUTY_PP | DUTY_PME);
 101
 102 #if GMX_MPI && !MPI_IN_PLACE_EXISTS
 103     /* initialize the MPI_IN_PLACE replacement buffers */
 104     snew(cr->mpb, 1);
 105     cr->mpb->ibuf        = nullptr;
 106     cr->mpb->libuf       = nullptr;
 107     cr->mpb->fbuf        = nullptr;
 108     cr->mpb->dbuf        = nullptr;
 109     cr->mpb->ibuf_alloc  = 0;
 110     cr->mpb->libuf_alloc = 0;
 111     cr->mpb->fbuf_alloc  = 0;
 112     cr->mpb->dbuf_alloc  = 0;
 113 #endif
 114
 115     return handle;
 116 }
 117
 118 void done_commrec(t_commrec *cr)
 119 {
 120     if (MASTER(cr))
 121     {
 122         if (nullptr != cr->dd)
 123         {
 124             // TODO: implement
 125             // done_domdec(cr->dd);
 126         }
 127         done_mpi_in_place_buf(cr->mpb);
 128     }
 129 #if GMX_MPI
 130     // TODO We need to be able to free communicators, but the
 131     // structure of the commrec and domdec initialization code makes
 132     // it hard to avoid both leaks and double frees.
 133     bool mySimIsMyGroup = (cr->mpi_comm_mysim == cr->mpi_comm_mygroup);
 134     if (cr->mpi_comm_mysim != MPI_COMM_NULL &&
 135         cr->mpi_comm_mysim != MPI_COMM_WORLD)
 136     {
 137         // TODO see above
 138         // MPI_Comm_free(&cr->mpi_comm_mysim);
 139     }
 140     if (!mySimIsMyGroup &&
 141         cr->mpi_comm_mygroup != MPI_COMM_NULL &&
 142         cr->mpi_comm_mygroup != MPI_COMM_WORLD)
 143     {
 144         // TODO see above
 145         // MPI_Comm_free(&cr->mpi_comm_mygroup);
 146     }
 147 #endif
 148     sfree(cr);
 149 }
 150
 151 t_commrec *reinitialize_commrec_for_this_thread(const t_commrec *cro)
 152 {
 153 #if GMX_THREAD_MPI
 154     t_commrec *cr;
 155
 156     /* make a thread-specific commrec */
 157     snew(cr, 1);
 158     /* now copy the whole thing, so settings like the number of PME nodes
 159        get propagated. */
 160     *cr = *cro;
 161
 162     /* and we start setting our own thread-specific values for things */
 163     gmx_fill_commrec_from_mpi(cr);
 164
 165     // TODO cr->duty should not be initialized here
 166     cr->duty             = (DUTY_PP | DUTY_PME);
 167
 168     return cr;
 169 #else
 170     GMX_UNUSED_VALUE(cro);
 171     return nullptr;
 172 #endif
 173 }
 174
 175 void gmx_setup_nodecomm(FILE gmx_unused *fplog, t_commrec *cr)
 176 {
 177     gmx_nodecomm_t *nc;
 178
 179     /* Many MPI implementations do not optimize MPI_Allreduce
 180      * (and probably also other global communication calls)
 181      * for multi-core nodes connected by a network.
 182      * We can optimize such communication by using one MPI call
 183      * within each node and one between the nodes.
 184      * For MVAPICH2 and Intel MPI this reduces the time for
 185      * the global_stat communication by 25%
 186      * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 187      * B. Hess, November 2007
 188      */
 189
 190     nc = &cr->nc;
 191
 192     nc->bUse = FALSE;
 193 #if !GMX_THREAD_MPI
 194 #if GMX_MPI
 195     int n, rank;
 196
 197     // TODO PhysicalNodeCommunicator could be extended/used to handle
 198     // the need for per-node per-group communicators.
 199     MPI_Comm_size(cr->mpi_comm_mygroup, &n);
 200     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 201
 202     int nodehash = gmx_physicalnode_id_hash();
 203
 204     if (debug)
 205     {
 206         fprintf(debug, "In gmx_setup_nodecomm: splitting communicator of size %d\n", n);
 207     }
 208
 209
 210     /* The intra-node communicator, split on node number */
 211     MPI_Comm_split(cr->mpi_comm_mygroup, nodehash, rank, &nc->comm_intra);
 212     MPI_Comm_rank(nc->comm_intra, &nc->rank_intra);
 213     if (debug)
 214     {
 215         fprintf(debug, "In gmx_setup_nodecomm: node ID %d rank within node %d\n",
 216                 rank, nc->rank_intra);
 217     }
 218     /* The inter-node communicator, split on rank_intra.
 219      * We actually only need the one for rank=0,
 220      * but it is easier to create them all.
 221      */
 222     MPI_Comm_split(cr->mpi_comm_mygroup, nc->rank_intra, rank, &nc->comm_inter);
 223     /* Check if this really created two step communication */
 224     int ng, ni;
 225
 226     MPI_Comm_size(nc->comm_inter, &ng);
 227     MPI_Comm_size(nc->comm_intra, &ni);
 228     if (debug)
 229     {
 230         fprintf(debug, "In gmx_setup_nodecomm: groups %d, my group size %d\n",
 231                 ng, ni);
 232     }
 233
 234     if (getenv("GMX_NO_NODECOMM") == nullptr &&
 235         ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
 236     {
 237         nc->bUse = TRUE;
 238         if (fplog)
 239         {
 240             fprintf(fplog, "Using two step summing over %d groups of on average %.1f ranks\n\n",
 241                     ng, (real)n/(real)ng);
 242         }
 243         if (nc->rank_intra > 0)
 244         {
 245             MPI_Comm_free(&nc->comm_inter);
 246         }
 247     }
 248     else
 249     {
 250         /* One group or all processes in a separate group, use normal summing */
 251         MPI_Comm_free(&nc->comm_inter);
 252         MPI_Comm_free(&nc->comm_intra);
 253         if (debug)
 254         {
 255             fprintf(debug, "In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
 256         }
 257     }
 258 #endif
 259 #else
 260     /* tMPI runs only on a single node so just use the nodeid */
 261     nc->rank_intra = cr->nodeid;
 262 #endif
 263 }
 264
 265 void gmx_barrier(const t_commrec gmx_unused *cr)
 266 {
 267 #if !GMX_MPI
 268     gmx_call("gmx_barrier");
 269 #else
 270     MPI_Barrier(cr->mpi_comm_mygroup);
 271 #endif
 272 }
 273
 274 void gmx_bcast(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 275 {
 276 #if !GMX_MPI
 277     gmx_call("gmx_bast");
 278 #else
 279     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
 280 #endif
 281 }
 282
 283 void gmx_bcast_sim(int gmx_unused nbytes, void gmx_unused *b, const t_commrec gmx_unused *cr)
 284 {
 285 #if !GMX_MPI
 286     gmx_call("gmx_bast");
 287 #else
 288     MPI_Bcast(b, nbytes, MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mysim);
 289 #endif
 290 }
 291
 292 void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused *cr)
 293 {
 294 #if !GMX_MPI
 295     gmx_call("gmx_sumd");
 296 #else
 297 #if MPI_IN_PLACE_EXISTS
 298     if (cr->nc.bUse)
 299     {
 300         if (cr->nc.rank_intra == 0)
 301         {
 302             /* Use two step summing. */
 303             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, 0,
 304                        cr->nc.comm_intra);
 305             /* Sum the roots of the internal (intra) buffers. */
 306             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 307                           cr->nc.comm_inter);
 308         }
 309         else
 310         {
 311             /* This is here because of the silly MPI specification
 312                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 313             MPI_Reduce(r, nullptr, nr, MPI_DOUBLE, MPI_SUM, 0, cr->nc.comm_intra);
 314         }
 315         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 316     }
 317     else
 318     {
 319         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM,
 320                       cr->mpi_comm_mygroup);
 321     }
 322 #else
 323     int i;
 324
 325     if (nr > cr->mpb->dbuf_alloc)
 326     {
 327         cr->mpb->dbuf_alloc = nr;
 328         srenew(cr->mpb->dbuf, cr->mpb->dbuf_alloc);
 329     }
 330     if (cr->nc.bUse)
 331     {
 332         /* Use two step summing */
 333         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM, cr->nc.comm_intra);
 334         if (cr->nc.rank_intra == 0)
 335         {
 336             /* Sum with the buffers reversed */
 337             MPI_Allreduce(cr->mpb->dbuf, r, nr, MPI_DOUBLE, MPI_SUM,
 338                           cr->nc.comm_inter);
 339         }
 340         MPI_Bcast(r, nr, MPI_DOUBLE, 0, cr->nc.comm_intra);
 341     }
 342     else
 343     {
 344         MPI_Allreduce(r, cr->mpb->dbuf, nr, MPI_DOUBLE, MPI_SUM,
 345                       cr->mpi_comm_mygroup);
 346         for (i = 0; i < nr; i++)
 347         {
 348             r[i] = cr->mpb->dbuf[i];
 349         }
 350     }
 351 #endif
 352 #endif
 353 }
 354
 355 void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused *cr)
 356 {
 357 #if !GMX_MPI
 358     gmx_call("gmx_sumf");
 359 #else
 360 #if MPI_IN_PLACE_EXISTS
 361     if (cr->nc.bUse)
 362     {
 363         /* Use two step summing.  */
 364         if (cr->nc.rank_intra == 0)
 365         {
 366             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, 0,
 367                        cr->nc.comm_intra);
 368             /* Sum the roots of the internal (intra) buffers */
 369             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM,
 370                           cr->nc.comm_inter);
 371         }
 372         else
 373         {
 374             /* This is here because of the silly MPI specification
 375                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 376             MPI_Reduce(r, nullptr, nr, MPI_FLOAT, MPI_SUM, 0, cr->nc.comm_intra);
 377         }
 378         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 379     }
 380     else
 381     {
 382         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, cr->mpi_comm_mygroup);
 383     }
 384 #else
 385     int i;
 386
 387     if (nr > cr->mpb->fbuf_alloc)
 388     {
 389         cr->mpb->fbuf_alloc = nr;
 390         srenew(cr->mpb->fbuf, cr->mpb->fbuf_alloc);
 391     }
 392     if (cr->nc.bUse)
 393     {
 394         /* Use two step summing */
 395         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM, cr->nc.comm_intra);
 396         if (cr->nc.rank_intra == 0)
 397         {
 398             /* Sum with the buffers reversed */
 399             MPI_Allreduce(cr->mpb->fbuf, r, nr, MPI_FLOAT, MPI_SUM,
 400                           cr->nc.comm_inter);
 401         }
 402         MPI_Bcast(r, nr, MPI_FLOAT, 0, cr->nc.comm_intra);
 403     }
 404     else
 405     {
 406         MPI_Allreduce(r, cr->mpb->fbuf, nr, MPI_FLOAT, MPI_SUM,
 407                       cr->mpi_comm_mygroup);
 408         for (i = 0; i < nr; i++)
 409         {
 410             r[i] = cr->mpb->fbuf[i];
 411         }
 412     }
 413 #endif
 414 #endif
 415 }
 416
 417 void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused *cr)
 418 {
 419 #if !GMX_MPI
 420     gmx_call("gmx_sumi");
 421 #else
 422 #if MPI_IN_PLACE_EXISTS
 423     if (cr->nc.bUse)
 424     {
 425         /* Use two step summing */
 426         if (cr->nc.rank_intra == 0)
 427         {
 428             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 429             /* Sum with the buffers reversed */
 430             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 431         }
 432         else
 433         {
 434             /* This is here because of the silly MPI specification
 435                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 436             MPI_Reduce(r, nullptr, nr, MPI_INT, MPI_SUM, 0, cr->nc.comm_intra);
 437         }
 438         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 439     }
 440     else
 441     {
 442         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 443     }
 444 #else
 445     int i;
 446
 447     if (nr > cr->mpb->ibuf_alloc)
 448     {
 449         cr->mpb->ibuf_alloc = nr;
 450         srenew(cr->mpb->ibuf, cr->mpb->ibuf_alloc);
 451     }
 452     if (cr->nc.bUse)
 453     {
 454         /* Use two step summing */
 455         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->nc.comm_intra);
 456         if (cr->nc.rank_intra == 0)
 457         {
 458             /* Sum with the buffers reversed */
 459             MPI_Allreduce(cr->mpb->ibuf, r, nr, MPI_INT, MPI_SUM, cr->nc.comm_inter);
 460         }
 461         MPI_Bcast(r, nr, MPI_INT, 0, cr->nc.comm_intra);
 462     }
 463     else
 464     {
 465         MPI_Allreduce(r, cr->mpb->ibuf, nr, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
 466         for (i = 0; i < nr; i++)
 467         {
 468             r[i] = cr->mpb->ibuf[i];
 469         }
 470     }
 471 #endif
 472 #endif
 473 }
 474
 475 void gmx_sumli(int gmx_unused nr, int64_t gmx_unused r[], const t_commrec gmx_unused *cr)
 476 {
 477 #if !GMX_MPI
 478     gmx_call("gmx_sumli");
 479 #else
 480 #if MPI_IN_PLACE_EXISTS
 481     if (cr->nc.bUse)
 482     {
 483         /* Use two step summing */
 484         if (cr->nc.rank_intra == 0)
 485         {
 486             MPI_Reduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, 0,
 487                        cr->nc.comm_intra);
 488             /* Sum with the buffers reversed */
 489             MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM,
 490                           cr->nc.comm_inter);
 491         }
 492         else
 493         {
 494             /* This is here because of the silly MPI specification
 495                 that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 496             MPI_Reduce(r, nullptr, nr, MPI_INT64_T, MPI_SUM, 0, cr->nc.comm_intra);
 497         }
 498         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 499     }
 500     else
 501     {
 502         MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, cr->mpi_comm_mygroup);
 503     }
 504 #else
 505     int i;
 506
 507     if (nr > cr->mpb->libuf_alloc)
 508     {
 509         cr->mpb->libuf_alloc = nr;
 510         srenew(cr->mpb->libuf, cr->mpb->libuf_alloc);
 511     }
 512     if (cr->nc.bUse)
 513     {
 514         /* Use two step summing */
 515         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 516                       cr->nc.comm_intra);
 517         if (cr->nc.rank_intra == 0)
 518         {
 519             /* Sum with the buffers reversed */
 520             MPI_Allreduce(cr->mpb->libuf, r, nr, MPI_INT64_T, MPI_SUM,
 521                           cr->nc.comm_inter);
 522         }
 523         MPI_Bcast(r, nr, MPI_INT64_T, 0, cr->nc.comm_intra);
 524     }
 525     else
 526     {
 527         MPI_Allreduce(r, cr->mpb->libuf, nr, MPI_INT64_T, MPI_SUM,
 528                       cr->mpi_comm_mygroup);
 529         for (i = 0; i < nr; i++)
 530         {
 531             r[i] = cr->mpb->libuf[i];
 532         }
 533     }
 534 #endif
 535 #endif
 536 }
 537
 538 const char *opt2fn_master(const char *opt, int nfile, const t_filenm fnm[],
 539                           t_commrec *cr)
 540 {
 541     return SIMMASTER(cr) ? opt2fn(opt, nfile, fnm) : nullptr;
 542 }
 543
 544 void gmx_fatal_collective(int f_errno, const char *file, int line,
 545                           MPI_Comm comm, gmx_bool bMaster,
 546                           gmx_fmtstr const char *fmt, ...)
 547 {
 548     va_list  ap;
 549     gmx_bool bFinalize;
 550 #if GMX_MPI
 551     int      result;
 552     /* Check if we are calling on all processes in MPI_COMM_WORLD */
 553     MPI_Comm_compare(comm, MPI_COMM_WORLD, &result);
 554     /* Any result except MPI_UNEQUAL allows us to call MPI_Finalize */
 555     bFinalize = (result != MPI_UNEQUAL);
 556 #else
 557     GMX_UNUSED_VALUE(comm);
 558     bFinalize = TRUE;
 559 #endif
 560
 561     va_start(ap, fmt);
 562     gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
 563     va_end(ap);
 564 }
 565
 566 void simulationBarrier(const t_commrec *cr)
 567 {
 568     if (PAR(cr))
 569     {
 570 #if GMX_MPI
 571         MPI_Barrier(cr->mpi_comm_mysim);
 572 #endif
 573     }
 574 }