src/gromacs/mdlib/pme_pp.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41
  42 #include <stdio.h>
  43 #include <string.h>
  44 #include <math.h>
  45 #include "typedefs.h"
  46 #include "smalloc.h"
  47 #include "gmx_fatal.h"
  48 #include "vec.h"
  49 #include "pme.h"
  50 #include "network.h"
  51 #include "domdec.h"
  52 #include "sighandler.h"
  53
  54 #ifdef GMX_LIB_MPI
  55 #include <mpi.h>
  56 #endif
  57 #ifdef GMX_THREAD_MPI
  58 #include "tmpi.h"
  59 #endif
  60
  61 #define PP_PME_CHARGE         (1<<0)
  62 #define PP_PME_CHARGEB        (1<<1)
  63 #define PP_PME_COORD          (1<<2)
  64 #define PP_PME_FEP            (1<<3)
  65 #define PP_PME_ENER_VIR       (1<<4)
  66 #define PP_PME_FINISH         (1<<5)
  67 #define PP_PME_SWITCHGRID     (1<<6)
  68 #define PP_PME_RESETCOUNTERS  (1<<7)
  69
  70
  71 #define PME_PP_SIGSTOP     (1<<0)
  72 #define PME_PP_SIGSTOPNSS     (1<<1)
  73
  74 typedef struct gmx_pme_pp {
  75 #ifdef GMX_MPI
  76     MPI_Comm     mpi_comm_mysim;
  77 #endif
  78     int          nnode;        /* The number of PP node to communicate with  */
  79     int         *node;         /* The PP node ranks                          */
  80     int          node_peer;    /* The peer PP node rank                      */
  81     int         *nat;          /* The number of atom for each PP node        */
  82     int          flags_charge; /* The flags sent along with the last charges */
  83     real        *chargeA;
  84     real        *chargeB;
  85     rvec        *x;
  86     rvec        *f;
  87     int          nalloc;
  88 #ifdef GMX_MPI
  89     MPI_Request *req;
  90     MPI_Status  *stat;
  91 #endif
  92 } t_gmx_pme_pp;
  93
  94 typedef struct gmx_pme_comm_n_box {
  95     int             natoms;
  96     matrix          box;
  97     int             maxshift_x;
  98     int             maxshift_y;
  99     real            lambda;
 100     int             flags;
 101     gmx_large_int_t step;
 102     ivec            grid_size;  /* For PME grid tuning */
 103     real            ewaldcoeff; /* For PME grid tuning */
 104 } gmx_pme_comm_n_box_t;
 105
 106 typedef struct {
 107     matrix          vir;
 108     real            energy;
 109     real            dvdlambda;
 110     float           cycles;
 111     gmx_stop_cond_t stop_cond;
 112 } gmx_pme_comm_vir_ene_t;
 113
 114
 115
 116
 117 gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr)
 118 {
 119     struct gmx_pme_pp *pme_pp;
 120     int                rank;
 121
 122     snew(pme_pp, 1);
 123
 124 #ifdef GMX_MPI
 125     pme_pp->mpi_comm_mysim = cr->mpi_comm_mysim;
 126     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
 127     get_pme_ddnodes(cr, rank, &pme_pp->nnode, &pme_pp->node, &pme_pp->node_peer);
 128     snew(pme_pp->nat, pme_pp->nnode);
 129     snew(pme_pp->req, 2*pme_pp->nnode);
 130     snew(pme_pp->stat, 2*pme_pp->nnode);
 131     pme_pp->nalloc       = 0;
 132     pme_pp->flags_charge = 0;
 133 #endif
 134
 135     return pme_pp;
 136 }
 137
 138 /* This should be faster with a real non-blocking MPI implementation */
 139 /* #define GMX_PME_DELAYED_WAIT */
 140
 141 static void gmx_pme_send_q_x_wait(gmx_domdec_t *dd)
 142 {
 143 #ifdef GMX_MPI
 144     if (dd->nreq_pme)
 145     {
 146         MPI_Waitall(dd->nreq_pme, dd->req_pme, MPI_STATUSES_IGNORE);
 147         dd->nreq_pme = 0;
 148     }
 149 #endif
 150 }
 151
 152 static void gmx_pme_send_q_x(t_commrec *cr, int flags,
 153                              real *chargeA, real *chargeB,
 154                              matrix box, rvec *x,
 155                              real lambda,
 156                              int maxshift_x, int maxshift_y,
 157                              gmx_large_int_t step)
 158 {
 159     gmx_domdec_t         *dd;
 160     gmx_pme_comm_n_box_t *cnb;
 161     int                   n;
 162
 163     dd = cr->dd;
 164     n  = dd->nat_home;
 165
 166     if (debug)
 167     {
 168         fprintf(debug, "PP node %d sending to PME node %d: %d%s%s\n",
 169                 cr->sim_nodeid, dd->pme_nodeid, n,
 170                 flags & PP_PME_CHARGE ? " charges" : "",
 171                 flags & PP_PME_COORD  ? " coordinates" : "");
 172     }
 173
 174 #ifdef GMX_PME_DELAYED_WAIT
 175     /* When can not use cnb until pending communication has finished */
 176     gmx_pme_send_x_q_wait(dd);
 177 #endif
 178
 179     if (dd->pme_receive_vir_ener)
 180     {
 181         /* Peer PP node: communicate all data */
 182         if (dd->cnb == NULL)
 183         {
 184             snew(dd->cnb, 1);
 185         }
 186         cnb = dd->cnb;
 187
 188         cnb->flags      = flags;
 189         cnb->natoms     = n;
 190         cnb->maxshift_x = maxshift_x;
 191         cnb->maxshift_y = maxshift_y;
 192         cnb->lambda     = lambda;
 193         cnb->step       = step;
 194         if (flags & PP_PME_COORD)
 195         {
 196             copy_mat(box, cnb->box);
 197         }
 198 #ifdef GMX_MPI
 199         MPI_Isend(cnb, sizeof(*cnb), MPI_BYTE,
 200                   dd->pme_nodeid, 0, cr->mpi_comm_mysim,
 201                   &dd->req_pme[dd->nreq_pme++]);
 202 #endif
 203     }
 204     else if (flags & PP_PME_CHARGE)
 205     {
 206 #ifdef GMX_MPI
 207         /* Communicate only the number of atoms */
 208         MPI_Isend(&n, sizeof(n), MPI_BYTE,
 209                   dd->pme_nodeid, 0, cr->mpi_comm_mysim,
 210                   &dd->req_pme[dd->nreq_pme++]);
 211 #endif
 212     }
 213
 214 #ifdef GMX_MPI
 215     if (n > 0)
 216     {
 217         if (flags & PP_PME_CHARGE)
 218         {
 219             MPI_Isend(chargeA, n*sizeof(real), MPI_BYTE,
 220                       dd->pme_nodeid, 1, cr->mpi_comm_mysim,
 221                       &dd->req_pme[dd->nreq_pme++]);
 222         }
 223         if (flags & PP_PME_CHARGEB)
 224         {
 225             MPI_Isend(chargeB, n*sizeof(real), MPI_BYTE,
 226                       dd->pme_nodeid, 2, cr->mpi_comm_mysim,
 227                       &dd->req_pme[dd->nreq_pme++]);
 228         }
 229         if (flags & PP_PME_COORD)
 230         {
 231             MPI_Isend(x[0], n*sizeof(rvec), MPI_BYTE,
 232                       dd->pme_nodeid, 3, cr->mpi_comm_mysim,
 233                       &dd->req_pme[dd->nreq_pme++]);
 234         }
 235     }
 236
 237 #ifndef GMX_PME_DELAYED_WAIT
 238     /* Wait for the data to arrive */
 239     /* We can skip this wait as we are sure x and q will not be modified
 240      * before the next call to gmx_pme_send_x_q or gmx_pme_receive_f.
 241      */
 242     gmx_pme_send_q_x_wait(dd);
 243 #endif
 244 #endif
 245 }
 246
 247 void gmx_pme_send_q(t_commrec *cr,
 248                     gmx_bool bFreeEnergy, real *chargeA, real *chargeB,
 249                     int maxshift_x, int maxshift_y)
 250 {
 251     int flags;
 252
 253     flags = PP_PME_CHARGE;
 254     if (bFreeEnergy)
 255     {
 256         flags |= PP_PME_CHARGEB;
 257     }
 258
 259     gmx_pme_send_q_x(cr, flags,
 260                      chargeA, chargeB, NULL, NULL, 0, maxshift_x, maxshift_y, -1);
 261 }
 262
 263 void gmx_pme_send_x(t_commrec *cr, matrix box, rvec *x,
 264                     gmx_bool bFreeEnergy, real lambda,
 265                     gmx_bool bEnerVir,
 266                     gmx_large_int_t step)
 267 {
 268     int flags;
 269
 270     flags = PP_PME_COORD;
 271     if (bFreeEnergy)
 272     {
 273         flags |= PP_PME_FEP;
 274     }
 275     if (bEnerVir)
 276     {
 277         flags |= PP_PME_ENER_VIR;
 278     }
 279
 280     gmx_pme_send_q_x(cr, flags, NULL, NULL, box, x, lambda, 0, 0, step);
 281 }
 282
 283 void gmx_pme_send_finish(t_commrec *cr)
 284 {
 285     int flags;
 286
 287     flags = PP_PME_FINISH;
 288
 289     gmx_pme_send_q_x(cr, flags, NULL, NULL, NULL, NULL, 0, 0, 0, -1);
 290 }
 291
 292 void gmx_pme_send_switchgrid(t_commrec *cr, ivec grid_size, real ewaldcoeff)
 293 {
 294 #ifdef GMX_MPI
 295     gmx_pme_comm_n_box_t cnb;
 296
 297     /* Only let one PP node signal each PME node */
 298     if (cr->dd->pme_receive_vir_ener)
 299     {
 300         cnb.flags = PP_PME_SWITCHGRID;
 301         copy_ivec(grid_size, cnb.grid_size);
 302         cnb.ewaldcoeff = ewaldcoeff;
 303
 304         /* We send this, uncommon, message blocking to simplify the code */
 305         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
 306                  cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim);
 307     }
 308 #endif
 309 }
 310
 311 void gmx_pme_send_resetcounters(t_commrec *cr, gmx_large_int_t step)
 312 {
 313 #ifdef GMX_MPI
 314     gmx_pme_comm_n_box_t cnb;
 315
 316     /* Only let one PP node signal each PME node */
 317     if (cr->dd->pme_receive_vir_ener)
 318     {
 319         cnb.flags = PP_PME_RESETCOUNTERS;
 320         cnb.step  = step;
 321
 322         /* We send this, uncommon, message blocking to simplify the code */
 323         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
 324                  cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim);
 325     }
 326 #endif
 327 }
 328
 329 int gmx_pme_recv_q_x(struct gmx_pme_pp *pme_pp,
 330                      int *natoms,
 331                      real **chargeA, real **chargeB,
 332                      matrix box, rvec **x, rvec **f,
 333                      int *maxshift_x, int *maxshift_y,
 334                      gmx_bool *bFreeEnergy, real *lambda,
 335                      gmx_bool *bEnerVir,
 336                      gmx_large_int_t *step,
 337                      ivec grid_size, real *ewaldcoeff)
 338 {
 339     gmx_pme_comm_n_box_t cnb;
 340     int                  nat = 0, q, messages, sender;
 341     real                *charge_pp;
 342
 343     messages = 0;
 344
 345     /* avoid compiler warning about unused variable without MPI support */
 346     cnb.flags = 0;
 347 #ifdef GMX_MPI
 348     do
 349     {
 350         /* Receive the send count, box and time step from the peer PP node */
 351         MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
 352                  pme_pp->node_peer, 0,
 353                  pme_pp->mpi_comm_mysim, MPI_STATUS_IGNORE);
 354
 355         if (debug)
 356         {
 357             fprintf(debug, "PME only node receiving:%s%s%s%s%s\n",
 358                     (cnb.flags & PP_PME_CHARGE)        ? " charges" : "",
 359                     (cnb.flags & PP_PME_COORD )        ? " coordinates" : "",
 360                     (cnb.flags & PP_PME_FINISH)        ? " finish" : "",
 361                     (cnb.flags & PP_PME_SWITCHGRID)    ? " switch grid" : "",
 362                     (cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : "");
 363         }
 364
 365         if (cnb.flags & PP_PME_SWITCHGRID)
 366         {
 367             /* Special case, receive the new parameters and return */
 368             copy_ivec(cnb.grid_size, grid_size);
 369             *ewaldcoeff = cnb.ewaldcoeff;
 370
 371             return pmerecvqxSWITCHGRID;
 372         }
 373
 374         if (cnb.flags & PP_PME_RESETCOUNTERS)
 375         {
 376             /* Special case, receive the step and return */
 377             *step = cnb.step;
 378
 379             return pmerecvqxRESETCOUNTERS;
 380         }
 381
 382         if (cnb.flags & PP_PME_CHARGE)
 383         {
 384             /* Receive the send counts from the other PP nodes */
 385             for (sender = 0; sender < pme_pp->nnode; sender++)
 386             {
 387                 if (pme_pp->node[sender] == pme_pp->node_peer)
 388                 {
 389                     pme_pp->nat[sender] = cnb.natoms;
 390                 }
 391                 else
 392                 {
 393                     MPI_Irecv(&(pme_pp->nat[sender]), sizeof(pme_pp->nat[0]),
 394                               MPI_BYTE,
 395                               pme_pp->node[sender], 0,
 396                               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
 397                 }
 398             }
 399             MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
 400             messages = 0;
 401
 402             nat = 0;
 403             for (sender = 0; sender < pme_pp->nnode; sender++)
 404             {
 405                 nat += pme_pp->nat[sender];
 406             }
 407
 408             if (nat > pme_pp->nalloc)
 409             {
 410                 pme_pp->nalloc = over_alloc_dd(nat);
 411                 srenew(pme_pp->chargeA, pme_pp->nalloc);
 412                 if (cnb.flags & PP_PME_CHARGEB)
 413                 {
 414                     srenew(pme_pp->chargeB, pme_pp->nalloc);
 415                 }
 416                 srenew(pme_pp->x, pme_pp->nalloc);
 417                 srenew(pme_pp->f, pme_pp->nalloc);
 418             }
 419
 420             /* maxshift is sent when the charges are sent */
 421             *maxshift_x = cnb.maxshift_x;
 422             *maxshift_y = cnb.maxshift_y;
 423
 424             /* Receive the charges in place */
 425             for (q = 0; q < ((cnb.flags & PP_PME_CHARGEB) ? 2 : 1); q++)
 426             {
 427                 if (q == 0)
 428                 {
 429                     charge_pp = pme_pp->chargeA;
 430                 }
 431                 else
 432                 {
 433                     charge_pp = pme_pp->chargeB;
 434                 }
 435                 nat = 0;
 436                 for (sender = 0; sender < pme_pp->nnode; sender++)
 437                 {
 438                     if (pme_pp->nat[sender] > 0)
 439                     {
 440                         MPI_Irecv(charge_pp+nat,
 441                                   pme_pp->nat[sender]*sizeof(real),
 442                                   MPI_BYTE,
 443                                   pme_pp->node[sender], 1+q,
 444                                   pme_pp->mpi_comm_mysim,
 445                                   &pme_pp->req[messages++]);
 446                         nat += pme_pp->nat[sender];
 447                         if (debug)
 448                         {
 449                             fprintf(debug, "Received from PP node %d: %d "
 450                                     "charges\n",
 451                                     pme_pp->node[sender], pme_pp->nat[sender]);
 452                         }
 453                     }
 454                 }
 455             }
 456
 457             pme_pp->flags_charge = cnb.flags;
 458         }
 459
 460         if (cnb.flags & PP_PME_COORD)
 461         {
 462             if (!(pme_pp->flags_charge & PP_PME_CHARGE))
 463             {
 464                 gmx_incons("PME-only node received coordinates before charges"
 465                            );
 466             }
 467
 468             /* The box, FE flag and lambda are sent along with the coordinates
 469              *  */
 470             copy_mat(cnb.box, box);
 471             *bFreeEnergy = (cnb.flags & PP_PME_FEP);
 472             *lambda      = cnb.lambda;
 473             *bEnerVir    = (cnb.flags & PP_PME_ENER_VIR);
 474
 475             if (*bFreeEnergy && !(pme_pp->flags_charge & PP_PME_CHARGEB))
 476             {
 477                 gmx_incons("PME-only node received free energy request, but "
 478                            "did not receive B-state charges");
 479             }
 480
 481             /* Receive the coordinates in place */
 482             nat = 0;
 483             for (sender = 0; sender < pme_pp->nnode; sender++)
 484             {
 485                 if (pme_pp->nat[sender] > 0)
 486                 {
 487                     MPI_Irecv(pme_pp->x[nat], pme_pp->nat[sender]*sizeof(rvec),
 488                               MPI_BYTE,
 489                               pme_pp->node[sender], 3,
 490                               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
 491                     nat += pme_pp->nat[sender];
 492                     if (debug)
 493                     {
 494                         fprintf(debug, "Received from PP node %d: %d "
 495                                 "coordinates\n",
 496                                 pme_pp->node[sender], pme_pp->nat[sender]);
 497                     }
 498                 }
 499             }
 500         }
 501
 502         /* Wait for the coordinates and/or charges to arrive */
 503         MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
 504         messages = 0;
 505     }
 506     while (!(cnb.flags & (PP_PME_COORD | PP_PME_FINISH)));
 507
 508     *step = cnb.step;
 509 #endif
 510
 511     *natoms  = nat;
 512     *chargeA = pme_pp->chargeA;
 513     *chargeB = pme_pp->chargeB;
 514     *x       = pme_pp->x;
 515     *f       = pme_pp->f;
 516
 517     return ((cnb.flags & PP_PME_FINISH) ? pmerecvqxFINISH : pmerecvqxX);
 518 }
 519
 520 static void receive_virial_energy(t_commrec *cr,
 521                                   matrix vir, real *energy, real *dvdlambda,
 522                                   float *pme_cycles)
 523 {
 524     gmx_pme_comm_vir_ene_t cve;
 525
 526     if (cr->dd->pme_receive_vir_ener)
 527     {
 528         if (debug)
 529         {
 530             fprintf(debug,
 531                     "PP node %d receiving from PME node %d: virial and energy\n",
 532                     cr->sim_nodeid, cr->dd->pme_nodeid);
 533         }
 534 #ifdef GMX_MPI
 535         MPI_Recv(&cve, sizeof(cve), MPI_BYTE, cr->dd->pme_nodeid, 1, cr->mpi_comm_mysim,
 536                  MPI_STATUS_IGNORE);
 537 #else
 538         memset(&cve, 0, sizeof(cve));
 539 #endif
 540
 541         m_add(vir, cve.vir, vir);
 542         *energy     = cve.energy;
 543         *dvdlambda += cve.dvdlambda;
 544         *pme_cycles = cve.cycles;
 545
 546         if (cve.stop_cond != gmx_stop_cond_none)
 547         {
 548             gmx_set_stop_condition(cve.stop_cond);
 549         }
 550     }
 551     else
 552     {
 553         *energy     = 0;
 554         *pme_cycles = 0;
 555     }
 556 }
 557
 558 void gmx_pme_receive_f(t_commrec *cr,
 559                        rvec f[], matrix vir,
 560                        real *energy, real *dvdlambda,
 561                        float *pme_cycles)
 562 {
 563     int natoms, i;
 564
 565 #ifdef GMX_PME_DELAYED_WAIT
 566     /* Wait for the x request to finish */
 567     gmx_pme_send_q_x_wait(cr->dd);
 568 #endif
 569
 570     natoms = cr->dd->nat_home;
 571
 572     if (natoms > cr->dd->pme_recv_f_alloc)
 573     {
 574         cr->dd->pme_recv_f_alloc = over_alloc_dd(natoms);
 575         srenew(cr->dd->pme_recv_f_buf, cr->dd->pme_recv_f_alloc);
 576     }
 577
 578 #ifdef GMX_MPI
 579     MPI_Recv(cr->dd->pme_recv_f_buf[0],
 580              natoms*sizeof(rvec), MPI_BYTE,
 581              cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim,
 582              MPI_STATUS_IGNORE);
 583 #endif
 584
 585     for (i = 0; i < natoms; i++)
 586     {
 587         rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
 588     }
 589
 590
 591     receive_virial_energy(cr, vir, energy, dvdlambda, pme_cycles);
 592 }
 593
 594 void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
 595                                  rvec *f, matrix vir,
 596                                  real energy, real dvdlambda,
 597                                  float cycles)
 598 {
 599     gmx_pme_comm_vir_ene_t cve;
 600     int                    messages, ind_start, ind_end, receiver;
 601
 602     cve.cycles = cycles;
 603
 604     /* Now the evaluated forces have to be transferred to the PP nodes */
 605     messages = 0;
 606     ind_end  = 0;
 607     for (receiver = 0; receiver < pme_pp->nnode; receiver++)
 608     {
 609         ind_start = ind_end;
 610         ind_end   = ind_start + pme_pp->nat[receiver];
 611 #ifdef GMX_MPI
 612         if (MPI_Isend(f[ind_start], (ind_end-ind_start)*sizeof(rvec), MPI_BYTE,
 613                       pme_pp->node[receiver], 0,
 614                       pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0)
 615         {
 616             gmx_comm("MPI_Isend failed in do_pmeonly");
 617         }
 618 #endif
 619     }
 620
 621     /* send virial and energy to our last PP node */
 622     copy_mat(vir, cve.vir);
 623     cve.energy    = energy;
 624     cve.dvdlambda = dvdlambda;
 625     /* check for the signals to send back to a PP node */
 626     cve.stop_cond = gmx_get_stop_condition();
 627
 628     cve.cycles = cycles;
 629
 630     if (debug)
 631     {
 632         fprintf(debug, "PME node sending to PP node %d: virial and energy\n",
 633                 pme_pp->node_peer);
 634     }
 635 #ifdef GMX_MPI
 636     MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
 637               pme_pp->node_peer, 1,
 638               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
 639
 640     /* Wait for the forces to arrive */
 641     MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
 642 #endif
 643 }