src/gromacs/mdlib/partdec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <math.h>
  43
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "smalloc.h"
  47 #include "invblock.h"
  48 #include "macros.h"
  49 #include "main.h"
  50 #include "ns.h"
  51 #include "partdec.h"
  52 #include "splitter.h"
  53 #include "gromacs/random/random.h"
  54 #include "mtop_util.h"
  55 #include "mvdata.h"
  56 #include "vec.h"
  57
  58 typedef struct gmx_partdec_constraint
  59 {
  60     int                  left_range_receive;
  61     int                  right_range_receive;
  62     int                  left_range_send;
  63     int                  right_range_send;
  64     int                  nconstraints;
  65     int *                nlocalatoms;
  66     rvec *               sendbuf;
  67     rvec *               recvbuf;
  68 }
  69 gmx_partdec_constraint_t;
  70
  71
  72 typedef struct gmx_partdec {
  73     int   neighbor[2];                         /* The nodeids of left and right neighb */
  74     int  *cgindex;                             /* The charge group boundaries,         */
  75                                                /* size nnodes+1,                       */
  76                                                /* only allocated with particle decomp. */
  77     int  *index;                               /* The home particle boundaries,        */
  78                                                /* size nnodes+1,                       */
  79                                                /* only allocated with particle decomp. */
  80     int  shift, bshift;                        /* Coordinates are shifted left for     */
  81                                                /* 'shift' systolic pulses, and right   */
  82                                                /* for 'bshift' pulses. Forces are      */
  83                                                /* shifted right for 'shift' pulses     */
  84                                                /* and left for 'bshift' pulses         */
  85                                                /* This way is not necessary to shift   */
  86                                                /* the coordinates over the entire ring */
  87     rvec                          *vbuf;       /* Buffer for summing the forces        */
  88 #ifdef GMX_MPI
  89     MPI_Request                    mpi_req_rx; /* MPI reqs for async transfers */
  90     MPI_Request                    mpi_req_tx;
  91 #endif
  92     gmx_partdec_constraint_t *     constraints;
  93 } gmx_partdec_t;
  94
  95
  96 void gmx_tx(const t_commrec gmx_unused *cr, int gmx_unused dir, void gmx_unused *buf, int gmx_unused bufsize)
  97 {
  98 #ifndef GMX_MPI
  99     gmx_call("gmx_tx");
 100 #else
 101     int        nodeid;
 102     int        tag, flag;
 103     MPI_Status status;
 104
 105     nodeid = cr->pd->neighbor[dir];
 106
 107 #ifdef DEBUG
 108     fprintf(stderr, "gmx_tx: nodeid=%d, buf=%x, bufsize=%d\n",
 109             nodeid, buf, bufsize);
 110 #endif
 111 #ifdef MPI_TEST
 112     /* workaround for crashes encountered with MPI on IRIX 6.5 */
 113     if (cr->pd->mpi_req_tx != MPI_REQUEST_NULL)
 114     {
 115         MPI_Test(&cr->pd->mpi_req_tx, &flag, &status);
 116         if (flag == FALSE)
 117         {
 118             fprintf(stdlog, "gmx_tx called before previous send was complete: nodeid=%d, buf=%x, bufsize=%d\n",
 119                     nodeid, buf, bufsize);
 120             gmx_tx_wait(nodeid);
 121         }
 122     }
 123 #endif
 124     tag = 0;
 125     if (MPI_Isend(buf, bufsize, MPI_BYTE, RANK(cr, nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_tx) != 0)
 126     {
 127         gmx_comm("MPI_Isend Failed");
 128     }
 129 #endif
 130 }
 131
 132 void gmx_tx_wait(const t_commrec gmx_unused *cr)
 133 {
 134 #ifndef GMX_MPI
 135     gmx_call("gmx_tx_wait");
 136 #else
 137     MPI_Status  status;
 138     int         mpi_result;
 139
 140     if ((mpi_result = MPI_Wait(&cr->pd->mpi_req_tx, &status)) != 0)
 141     {
 142         gmx_fatal(FARGS, "MPI_Wait: result=%d", mpi_result);
 143     }
 144 #endif
 145 }
 146
 147 void gmx_rx(const t_commrec gmx_unused *cr, int gmx_unused dir, void gmx_unused *buf, int gmx_unused bufsize)
 148 {
 149 #ifndef GMX_MPI
 150     gmx_call("gmx_rx");
 151 #else
 152     int nodeid;
 153     int tag;
 154
 155     nodeid = cr->pd->neighbor[dir];
 156 #ifdef DEBUG
 157     fprintf(stderr, "gmx_rx: nodeid=%d, buf=%x, bufsize=%d\n",
 158             nodeid, buf, bufsize);
 159 #endif
 160     tag = 0;
 161     if (MPI_Irecv( buf, bufsize, MPI_BYTE, RANK(cr, nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_rx) != 0)
 162     {
 163         gmx_comm("MPI_Recv Failed");
 164     }
 165 #endif
 166 }
 167
 168 void gmx_rx_wait(const t_commrec gmx_unused *cr)
 169 {
 170 #ifndef GMX_MPI
 171     gmx_call("gmx_rx_wait");
 172 #else
 173     MPI_Status  status;
 174     int         mpi_result;
 175
 176     if ((mpi_result = MPI_Wait(&cr->pd->mpi_req_rx, &status)) != 0)
 177     {
 178         gmx_fatal(FARGS, "MPI_Wait: result=%d", mpi_result);
 179     }
 180 #endif
 181 }
 182
 183 void gmx_tx_rx_real(const t_commrec gmx_unused *cr,
 184                     int gmx_unused send_dir, real gmx_unused *send_buf, int gmx_unused send_bufsize,
 185                     int gmx_unused recv_dir, real gmx_unused *recv_buf, int gmx_unused recv_bufsize)
 186 {
 187 #ifndef GMX_MPI
 188     gmx_call("gmx_tx_rx_real");
 189 #else
 190     int        send_nodeid, recv_nodeid;
 191     int        tx_tag = 0, rx_tag = 0;
 192     MPI_Status stat;
 193
 194     send_nodeid = cr->pd->neighbor[send_dir];
 195     recv_nodeid = cr->pd->neighbor[recv_dir];
 196
 197 #ifdef GMX_DOUBLE
 198 #define mpi_type MPI_DOUBLE
 199 #else
 200 #define mpi_type MPI_FLOAT
 201 #endif
 202
 203     if (send_bufsize > 0 && recv_bufsize > 0)
 204     {
 205         MPI_Sendrecv(send_buf, send_bufsize, mpi_type, RANK(cr, send_nodeid), tx_tag,
 206                      recv_buf, recv_bufsize, mpi_type, RANK(cr, recv_nodeid), rx_tag,
 207                      cr->mpi_comm_mygroup, &stat);
 208     }
 209     else if (send_bufsize > 0)
 210     {
 211         MPI_Send(send_buf, send_bufsize, mpi_type, RANK(cr, send_nodeid), tx_tag,
 212                  cr->mpi_comm_mygroup);
 213     }
 214     else if (recv_bufsize > 0)
 215     {
 216         MPI_Recv(recv_buf, recv_bufsize, mpi_type, RANK(cr, recv_nodeid), rx_tag,
 217                  cr->mpi_comm_mygroup, &stat);
 218     }
 219 #undef mpi_type
 220 #endif
 221 }
 222
 223
 224 void gmx_tx_rx_void(const t_commrec gmx_unused *cr,
 225                     int gmx_unused send_dir, void gmx_unused *send_buf, int gmx_unused send_bufsize,
 226                     int gmx_unused recv_dir, void gmx_unused *recv_buf, int gmx_unused recv_bufsize)
 227 {
 228 #ifndef GMX_MPI
 229     gmx_call("gmx_tx_rx_void");
 230 #else
 231     int        send_nodeid, recv_nodeid;
 232     int        tx_tag = 0, rx_tag = 0;
 233     MPI_Status stat;
 234
 235     send_nodeid = cr->pd->neighbor[send_dir];
 236     recv_nodeid = cr->pd->neighbor[recv_dir];
 237
 238
 239     MPI_Sendrecv(send_buf, send_bufsize, MPI_BYTE, RANK(cr, send_nodeid), tx_tag,
 240                  recv_buf, recv_bufsize, MPI_BYTE, RANK(cr, recv_nodeid), rx_tag,
 241                  cr->mpi_comm_mygroup, &stat);
 242
 243 #endif
 244 }
 245
 246
 247 /*void gmx_wait(int dir_send,int dir_recv)*/
 248
 249 void gmx_wait(const t_commrec gmx_unused *cr)
 250 {
 251 #ifndef GMX_MPI
 252     gmx_call("gmx_wait");
 253 #else
 254     gmx_tx_wait(cr);
 255     gmx_rx_wait(cr);
 256 #endif
 257 }
 258
 259 static void set_left_right(t_commrec *cr)
 260 {
 261     cr->pd->neighbor[GMX_LEFT]  = (cr->nnodes + cr->nodeid - 1) % cr->nnodes;
 262     cr->pd->neighbor[GMX_RIGHT] = (cr->nodeid + 1) % cr->nnodes;
 263 }
 264
 265 void pd_move_f(const t_commrec *cr, rvec f[], t_nrnb *nrnb)
 266 {
 267     move_f(cr, f, cr->pd->vbuf, nrnb);
 268 }
 269
 270 int *pd_cgindex(const t_commrec *cr)
 271 {
 272     return cr->pd->cgindex;
 273 }
 274
 275 int *pd_index(const t_commrec *cr)
 276 {
 277     return cr->pd->index;
 278 }
 279
 280 int pd_shift(const t_commrec *cr)
 281 {
 282     return cr->pd->shift;
 283 }
 284
 285 int pd_bshift(const t_commrec *cr)
 286 {
 287     return cr->pd->bshift;
 288 }
 289
 290 void pd_cg_range(const t_commrec *cr, int *cg0, int *cg1)
 291 {
 292     *cg0 = cr->pd->cgindex[cr->nodeid];
 293     *cg1 = cr->pd->cgindex[cr->nodeid+1];
 294 }
 295
 296 void pd_at_range(const t_commrec *cr, int *at0, int *at1)
 297 {
 298     *at0 = cr->pd->index[cr->nodeid];
 299     *at1 = cr->pd->index[cr->nodeid+1];
 300 }
 301
 302 void
 303 pd_get_constraint_range(gmx_partdec_p_t pd, int *start, int *natoms)
 304 {
 305     *start  = pd->constraints->left_range_receive;
 306     *natoms = pd->constraints->right_range_receive-pd->constraints->left_range_receive;
 307 }
 308
 309 int *
 310 pd_constraints_nlocalatoms(gmx_partdec_p_t pd)
 311 {
 312     int *rc;
 313
 314     if (NULL != pd && NULL != pd->constraints)
 315     {
 316         rc = pd->constraints->nlocalatoms;
 317     }
 318     else
 319     {
 320         rc = NULL;
 321     }
 322     return rc;
 323 }
 324
 325
 326
 327
 328 /* This routine is used to communicate the non-home-atoms needed for constrains.
 329  * We have already calculated this range of atoms during setup, and stored in the
 330  * partdec constraints structure.
 331  *
 332  * When called, we send/receive left_range_send/receive atoms to our left (lower)
 333  * node neighbor, and similar to the right (higher) node.
 334  *
 335  * This has not been tested for periodic molecules...
 336  */
 337 void
 338 pd_move_x_constraints(t_commrec gmx_unused *cr,
 339                       rvec      gmx_unused *x0,
 340                       rvec      gmx_unused *x1)
 341 {
 342 #ifdef GMX_MPI
 343     gmx_partdec_t            *pd;
 344     gmx_partdec_constraint_t *pdc;
 345
 346     rvec                *     sendptr;
 347     rvec                *     recvptr;
 348     int                       thisnode;
 349     int                       i;
 350     int                       cnt;
 351     int                       sendcnt, recvcnt;
 352
 353     pd  = cr->pd;
 354     pdc = pd->constraints;
 355
 356     if (pdc == NULL)
 357     {
 358         return;
 359     }
 360
 361     thisnode  = cr->nodeid;
 362
 363     /* First pulse to right */
 364
 365     recvcnt = 3*(pd->index[thisnode]-pdc->left_range_receive);
 366     sendcnt = 3*(cr->pd->index[thisnode+1]-cr->pd->constraints->right_range_send);
 367
 368     if (x1 != NULL)
 369     {
 370         /* Assemble temporary array with both x0 & x1 */
 371         recvptr = pdc->recvbuf;
 372         sendptr = pdc->sendbuf;
 373
 374         cnt = 0;
 375         for (i = pdc->right_range_send; i < pd->index[thisnode+1]; i++)
 376         {
 377             copy_rvec(x0[i], sendptr[cnt++]);
 378         }
 379         for (i = pdc->right_range_send; i < pd->index[thisnode+1]; i++)
 380         {
 381             copy_rvec(x1[i], sendptr[cnt++]);
 382         }
 383         recvcnt *= 2;
 384         sendcnt *= 2;
 385     }
 386     else
 387     {
 388         recvptr = x0 + pdc->left_range_receive;
 389         sendptr = x0 + pdc->right_range_send;
 390     }
 391
 392     gmx_tx_rx_real(cr,
 393                    GMX_RIGHT, (real *)sendptr, sendcnt,
 394                    GMX_LEFT, (real *)recvptr, recvcnt);
 395
 396     if (x1 != NULL)
 397     {
 398         /* copy back to x0/x1 */
 399         cnt = 0;
 400         for (i = pdc->left_range_receive; i < pd->index[thisnode]; i++)
 401         {
 402             copy_rvec(recvptr[cnt++], x0[i]);
 403         }
 404         for (i = pdc->left_range_receive; i < pd->index[thisnode]; i++)
 405         {
 406             copy_rvec(recvptr[cnt++], x1[i]);
 407         }
 408     }
 409
 410     /* And pulse to left */
 411     sendcnt = 3*(pdc->left_range_send-pd->index[thisnode]);
 412     recvcnt = 3*(pdc->right_range_receive-pd->index[thisnode+1]);
 413
 414     if (x1 != NULL)
 415     {
 416         cnt = 0;
 417         for (i = cr->pd->index[thisnode]; i < pdc->left_range_send; i++)
 418         {
 419             copy_rvec(x0[i], sendptr[cnt++]);
 420         }
 421         for (i = cr->pd->index[thisnode]; i < pdc->left_range_send; i++)
 422         {
 423             copy_rvec(x1[i], sendptr[cnt++]);
 424         }
 425         recvcnt *= 2;
 426         sendcnt *= 2;
 427     }
 428     else
 429     {
 430         sendptr = x0 + pd->index[thisnode];
 431         recvptr = x0 + pd->index[thisnode+1];
 432     }
 433
 434     gmx_tx_rx_real(cr,
 435                    GMX_LEFT, (real *)sendptr, sendcnt,
 436                    GMX_RIGHT, (real *)recvptr, recvcnt);
 437
 438     /* Final copy back from buffers */
 439     if (x1 != NULL)
 440     {
 441         /* First copy received data back into x0 & x1 */
 442         cnt = 0;
 443         for (i = pd->index[thisnode+1]; i < pdc->right_range_receive; i++)
 444         {
 445             copy_rvec(recvptr[cnt++], x0[i]);
 446         }
 447         for (i = pd->index[thisnode+1]; i < pdc->right_range_receive; i++)
 448         {
 449             copy_rvec(recvptr[cnt++], x1[i]);
 450         }
 451     }
 452 #endif
 453 }
 454
 455 static int home_cpu(int nnodes, gmx_partdec_t *pd, int atomid)
 456 {
 457     int k;
 458
 459     for (k = 0; (k < nnodes); k++)
 460     {
 461         if (atomid < pd->index[k+1])
 462         {
 463             return k;
 464         }
 465     }
 466     gmx_fatal(FARGS, "Atomid %d is larger than number of atoms (%d)",
 467               atomid+1, pd->index[nnodes]+1);
 468
 469     return -1;
 470 }
 471
 472 static void calc_nsbshift(FILE *fp, int nnodes, gmx_partdec_t *pd, t_idef *idef)
 473 {
 474     int i, j, k;
 475     int lastcg, targetcg, nshift, naaj;
 476     int homeid[32];
 477
 478     pd->bshift = 0;
 479     for (i = 1; (i < nnodes); i++)
 480     {
 481         targetcg = pd->cgindex[i];
 482         for (nshift = i; (nshift > 0) && (pd->cgindex[nshift] > targetcg); nshift--)
 483         {
 484             ;
 485         }
 486         pd->bshift = max(pd->bshift, i-nshift);
 487     }
 488
 489     pd->shift = (nnodes + 1)/2;
 490     for (i = 0; (i < nnodes); i++)
 491     {
 492         lastcg   = pd->cgindex[i+1]-1;
 493         naaj     = calc_naaj(lastcg, pd->cgindex[nnodes]);
 494         targetcg = (lastcg+naaj) % pd->cgindex[nnodes];
 495
 496         /* Search until we find the target charge group */
 497         for (nshift = 0;
 498              (nshift < nnodes) && (targetcg > pd->cgindex[nshift+1]);
 499              nshift++)
 500         {
 501             ;
 502         }
 503         /* Now compute the shift, that is the difference in node index */
 504         nshift = ((nshift - i + nnodes) % nnodes);
 505
 506         if (fp)
 507         {
 508             fprintf(fp, "CPU=%3d, lastcg=%5d, targetcg=%5d, myshift=%5d\n",
 509                     i, lastcg, targetcg, nshift);
 510         }
 511
 512         /* It's the largest shift that matters */
 513         pd->shift = max(nshift, pd->shift);
 514     }
 515     /* Now for the bonded forces */
 516     for (i = 0; (i < F_NRE); i++)
 517     {
 518         if (interaction_function[i].flags & IF_BOND)
 519         {
 520             int nratoms = interaction_function[i].nratoms;
 521             for (j = 0; (j < idef->il[i].nr); j += nratoms+1)
 522             {
 523                 for (k = 1; (k <= nratoms); k++)
 524                 {
 525                     homeid[k-1] = home_cpu(nnodes, pd, idef->il[i].iatoms[j+k]);
 526                 }
 527                 for (k = 1; (k < nratoms); k++)
 528                 {
 529                     pd->shift = max(pd->shift, abs(homeid[k]-homeid[0]));
 530                 }
 531             }
 532         }
 533     }
 534     if (fp)
 535     {
 536         fprintf(fp, "pd->shift = %3d, pd->bshift=%3d\n",
 537                 pd->shift, pd->bshift);
 538     }
 539 }
 540
 541
 542 static void
 543 init_partdec_constraint(t_commrec *cr,
 544                         t_idef *   idef,
 545                         int       *left_range,
 546                         int       *right_range)
 547 {
 548     gmx_partdec_t *            pd = cr->pd;
 549     gmx_partdec_constraint_t  *pdc;
 550     int i, cnt, k;
 551     int ai, aj, nodei, nodej;
 552     int nratoms;
 553     int nodeid;
 554
 555     snew(pdc, 1);
 556     cr->pd->constraints = pdc;
 557
 558
 559     /* Who am I? */
 560     nodeid = cr->nodeid;
 561
 562     /* Setup LINCS communication ranges */
 563     pdc->left_range_receive   = left_range[nodeid];
 564     pdc->right_range_receive  = right_range[nodeid]+1;
 565     pdc->left_range_send      = (nodeid > 0) ? right_range[nodeid-1]+1 : 0;
 566     pdc->right_range_send     = (nodeid < cr->nnodes-1) ? left_range[nodeid+1] : pd->index[cr->nnodes];
 567
 568     snew(pdc->nlocalatoms, idef->il[F_CONSTR].nr);
 569     nratoms = interaction_function[F_CONSTR].nratoms;
 570
 571     for (i = 0, cnt = 0; i < idef->il[F_CONSTR].nr; i += nratoms+1, cnt++)
 572     {
 573         ai    = idef->il[F_CONSTR].iatoms[i+1];
 574         aj    = idef->il[F_CONSTR].iatoms[i+2];
 575         nodei = 0;
 576         while (ai >= pd->index[nodei+1])
 577         {
 578             nodei++;
 579         }
 580         nodej = 0;
 581         while (aj >= pd->index[nodej+1])
 582         {
 583             nodej++;
 584         }
 585         pdc->nlocalatoms[cnt] = 0;
 586         if (nodei == nodeid)
 587         {
 588             pdc->nlocalatoms[cnt]++;
 589         }
 590         if (nodej == nodeid)
 591         {
 592             pdc->nlocalatoms[cnt]++;
 593         }
 594     }
 595     pdc->nconstraints = cnt;
 596
 597     snew(pdc->sendbuf, max(6*(pd->index[cr->nodeid+1]-pd->constraints->right_range_send), 6*(pdc->left_range_send-pd->index[cr->nodeid])));
 598     snew(pdc->recvbuf, max(6*(pd->index[cr->nodeid]-pdc->left_range_receive), 6*(pdc->right_range_receive-pd->index[cr->nodeid+1])));
 599
 600 }
 601
 602 static void init_partdec(FILE *fp, t_commrec *cr, t_block *cgs, int *multinr,
 603                          t_idef *idef)
 604 {
 605     int            i, nodeid;
 606     gmx_partdec_t *pd;
 607
 608     snew(pd, 1);
 609     cr->pd = pd;
 610
 611     set_left_right(cr);
 612
 613     if (cr->nnodes > 1)
 614     {
 615         if (multinr == NULL)
 616         {
 617             gmx_fatal(FARGS, "Internal error in init_partdec: multinr = NULL");
 618         }
 619         snew(pd->index, cr->nnodes+1);
 620         snew(pd->cgindex, cr->nnodes+1);
 621         pd->cgindex[0] = 0;
 622         pd->index[0]   = 0;
 623         for (i = 0; (i < cr->nnodes); i++)
 624         {
 625             pd->cgindex[i+1] = multinr[i];
 626             pd->index[i+1]   = cgs->index[multinr[i]];
 627         }
 628         calc_nsbshift(fp, cr->nnodes, pd, idef);
 629         /* This is a hack to do with bugzilla 148 */
 630         /*pd->shift = cr->nnodes-1;
 631            pd->bshift = 0;*/
 632
 633         /* Allocate a buffer of size natoms of the whole system
 634          * for summing the forces over the nodes.
 635          */
 636         snew(pd->vbuf, cgs->index[cgs->nr]);
 637         pd->constraints = NULL;
 638     }
 639 #ifdef GMX_MPI
 640     pd->mpi_req_tx = MPI_REQUEST_NULL;
 641     pd->mpi_req_rx = MPI_REQUEST_NULL;
 642 #endif
 643 }
 644
 645 static void print_partdec(FILE *fp, const char *title,
 646                           int nnodes, gmx_partdec_t *pd)
 647 {
 648     int i;
 649
 650     fprintf(fp, "%s\n", title);
 651     fprintf(fp, "nnodes:       %5d\n", nnodes);
 652     fprintf(fp, "pd->shift:    %5d\n", pd->shift);
 653     fprintf(fp, "pd->bshift:   %5d\n", pd->bshift);
 654
 655     fprintf(fp, "Nodeid   atom0   #atom     cg0       #cg\n");
 656     for (i = 0; (i < nnodes); i++)
 657     {
 658         fprintf(fp, "%6d%8d%8d%8d%10d\n",
 659                 i,
 660                 pd->index[i], pd->index[i+1]-pd->index[i],
 661                 pd->cgindex[i], pd->cgindex[i+1]-pd->cgindex[i]);
 662     }
 663     fprintf(fp, "\n");
 664 }
 665
 666 static void pr_idef_division(FILE *fp, t_idef *idef, int nnodes, int **multinr)
 667 {
 668     int i, ftype, nr, nra, m0, m1;
 669
 670     fprintf(fp, "Division of bonded forces over processors\n");
 671     fprintf(fp, "%-12s", "CPU");
 672     for (i = 0; (i < nnodes); i++)
 673     {
 674         fprintf(fp, " %5d", i);
 675     }
 676     fprintf(fp, "\n");
 677
 678     for (ftype = 0; (ftype < F_NRE); ftype++)
 679     {
 680         if (idef->il[ftype].nr > 0)
 681         {
 682             nr  = idef->il[ftype].nr;
 683             nra = 1+interaction_function[ftype].nratoms;
 684             fprintf(fp, "%-12s", interaction_function[ftype].name);
 685             /* Loop over processors */
 686             for (i = 0; (i < nnodes); i++)
 687             {
 688                 m0 = (i == 0) ? 0 : multinr[ftype][i-1]/nra;
 689                 m1 = multinr[ftype][i]/nra;
 690                 fprintf(fp, " %5d", m1-m0);
 691             }
 692             fprintf(fp, "\n");
 693         }
 694     }
 695 }
 696
 697 static void select_my_ilist(t_ilist *il, int *multinr, t_commrec *cr)
 698 {
 699     t_iatom *ia;
 700     int      i, start, end, nr;
 701
 702     if (cr->nodeid == 0)
 703     {
 704         start = 0;
 705     }
 706     else
 707     {
 708         start = multinr[cr->nodeid-1];
 709     }
 710     end = multinr[cr->nodeid];
 711
 712     nr = end-start;
 713     if (nr < 0)
 714     {
 715         gmx_fatal(FARGS, "Negative number of atoms (%d) on node %d\n"
 716                   "You have probably not used the same value for -np with grompp"
 717                   " and mdrun",
 718                   nr, cr->nodeid);
 719     }
 720     snew(ia, nr);
 721
 722     for (i = 0; (i < nr); i++)
 723     {
 724         ia[i] = il->iatoms[start+i];
 725     }
 726
 727     sfree(il->iatoms);
 728     il->iatoms = ia;
 729
 730     il->nr = nr;
 731 }
 732
 733 static void select_my_idef(t_idef *idef, int **multinr, t_commrec *cr)
 734 {
 735     int i;
 736
 737     for (i = 0; (i < F_NRE); i++)
 738     {
 739         select_my_ilist(&idef->il[i], multinr[i], cr);
 740     }
 741 }
 742
 743 gmx_localtop_t *split_system(FILE *log,
 744                              gmx_mtop_t *mtop, t_inputrec *inputrec,
 745                              t_commrec *cr)
 746 {
 747     int             i, npp, n, nn;
 748     real           *capacity;
 749     double          tcap = 0, cap;
 750     int            *multinr_cgs, **multinr_nre;
 751     char           *cap_env;
 752     gmx_localtop_t *top;
 753     int            *left_range;
 754     int            *right_range;
 755
 756     /* Time to setup the division of charge groups over processors */
 757     npp = cr->nnodes-cr->npmenodes;
 758     snew(capacity, npp);
 759     cap_env = getenv("GMX_CAPACITY");
 760     if (cap_env == NULL)
 761     {
 762         for (i = 0; (i < npp-1); i++)
 763         {
 764             capacity[i] = 1.0/(double)npp;
 765             tcap       += capacity[i];
 766         }
 767         /* Take care that the sum of capacities is 1.0 */
 768         capacity[npp-1] = 1.0 - tcap;
 769     }
 770     else
 771     {
 772         tcap = 0;
 773         nn   = 0;
 774         for (i = 0; i < npp; i++)
 775         {
 776             cap = 0;
 777             sscanf(cap_env+nn, "%lf%n", &cap, &n);
 778             if (cap == 0)
 779             {
 780                 gmx_fatal(FARGS, "Incorrect entry or number of entries in GMX_CAPACITY='%s'", cap_env);
 781             }
 782             capacity[i] = cap;
 783             tcap       += cap;
 784             nn         += n;
 785         }
 786         for (i = 0; i < npp; i++)
 787         {
 788             capacity[i] /= tcap;
 789         }
 790     }
 791
 792     /* Convert the molecular topology to a fully listed topology */
 793     top = gmx_mtop_generate_local_top(mtop, inputrec);
 794
 795     snew(multinr_cgs, npp);
 796     snew(multinr_nre, F_NRE);
 797     for (i = 0; i < F_NRE; i++)
 798     {
 799         snew(multinr_nre[i], npp);
 800     }
 801
 802
 803     snew(left_range, cr->nnodes);
 804     snew(right_range, cr->nnodes);
 805
 806     /* This computes which entities can be placed on processors */
 807     split_top(log, npp, top, inputrec, &mtop->mols, capacity, multinr_cgs, multinr_nre, left_range, right_range);
 808
 809     sfree(capacity);
 810     init_partdec(log, cr, &(top->cgs), multinr_cgs, &(top->idef));
 811
 812     /* This should be fine */
 813     /*split_idef(&(top->idef),cr->nnodes-cr->npmenodes);*/
 814
 815     select_my_idef(&(top->idef), multinr_nre, cr);
 816
 817     if (top->idef.il[F_CONSTR].nr > 0)
 818     {
 819         init_partdec_constraint(cr, &(top->idef), left_range, right_range);
 820     }
 821
 822     if (log)
 823     {
 824         pr_idef_division(log, &(top->idef), npp, multinr_nre);
 825     }
 826
 827     for (i = 0; i < F_NRE; i++)
 828     {
 829         sfree(multinr_nre[i]);
 830     }
 831     sfree(multinr_nre);
 832     sfree(multinr_cgs);
 833
 834     sfree(left_range);
 835     sfree(right_range);
 836
 837     if (log)
 838     {
 839         print_partdec(log, "Workload division", cr->nnodes, cr->pd);
 840     }
 841
 842     return top;
 843 }
 844
 845 static void
 846 add_to_vsitelist(int **list, int *nitem, int *nalloc, int newitem)
 847 {
 848     int      i, idx;
 849     gmx_bool found;
 850
 851     found = FALSE;
 852     idx   = *nitem;
 853     for (i = 0; i < idx && !found; i++)
 854     {
 855         found = (newitem == (*list)[i]);
 856     }
 857     if (!found)
 858     {
 859         *nalloc += 100;
 860         srenew(*list, *nalloc);
 861         (*list)[idx++] = newitem;
 862         *nitem         = idx;
 863     }
 864 }
 865
 866 gmx_bool setup_parallel_vsites(t_idef *idef, t_commrec *cr,
 867                                t_comm_vsites *vsitecomm)
 868 {
 869     int            i, j, ftype;
 870     int            nra;
 871     gmx_bool       do_comm;
 872     t_iatom       *ia;
 873     gmx_partdec_t *pd;
 874     int            iconstruct;
 875     int            i0, i1;
 876     int            nalloc_left_construct, nalloc_right_construct;
 877     int            sendbuf[2], recvbuf[2];
 878     int            bufsize, leftbuf, rightbuf;
 879
 880     pd = cr->pd;
 881
 882     i0 = pd->index[cr->nodeid];
 883     i1 = pd->index[cr->nodeid+1];
 884
 885     vsitecomm->left_import_construct  = NULL;
 886     vsitecomm->left_import_nconstruct = 0;
 887     nalloc_left_construct             = 0;
 888
 889     vsitecomm->right_import_construct  = NULL;
 890     vsitecomm->right_import_nconstruct = 0;
 891     nalloc_right_construct             = 0;
 892
 893     for (ftype = 0; (ftype < F_NRE); ftype++)
 894     {
 895         if (!(interaction_function[ftype].flags & IF_VSITE) )
 896         {
 897             continue;
 898         }
 899
 900         nra    = interaction_function[ftype].nratoms;
 901         ia     = idef->il[ftype].iatoms;
 902
 903         for (i = 0; i < idef->il[ftype].nr; i += nra+1)
 904         {
 905             for (j = 2; j < 1+nra; j++)
 906             {
 907                 iconstruct = ia[i+j];
 908                 if (iconstruct < i0)
 909                 {
 910                     add_to_vsitelist(&vsitecomm->left_import_construct,
 911                                      &vsitecomm->left_import_nconstruct,
 912                                      &nalloc_left_construct, iconstruct);
 913                 }
 914                 else if (iconstruct >= i1)
 915                 {
 916                     add_to_vsitelist(&vsitecomm->right_import_construct,
 917                                      &vsitecomm->right_import_nconstruct,
 918                                      &nalloc_right_construct, iconstruct);
 919                 }
 920             }
 921         }
 922     }
 923
 924     /* Pre-communicate the array lengths */
 925     gmx_tx_rx_void(cr,
 926                    GMX_RIGHT, (void *)&vsitecomm->right_import_nconstruct, sizeof(int),
 927                    GMX_LEFT, (void *)&vsitecomm->left_export_nconstruct, sizeof(int));
 928     gmx_tx_rx_void(cr,
 929                    GMX_LEFT, (void *)&vsitecomm->left_import_nconstruct, sizeof(int),
 930                    GMX_RIGHT, (void *)&vsitecomm->right_export_nconstruct, sizeof(int));
 931
 932     snew(vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct);
 933     snew(vsitecomm->right_export_construct, vsitecomm->right_export_nconstruct);
 934
 935     /* Communicate the construcing atom index arrays */
 936     gmx_tx_rx_void(cr,
 937                    GMX_RIGHT, (void *)vsitecomm->right_import_construct, vsitecomm->right_import_nconstruct*sizeof(int),
 938                    GMX_LEFT, (void *)vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct*sizeof(int));
 939
 940     /* Communicate the construcing atom index arrays */
 941     gmx_tx_rx_void(cr,
 942                    GMX_LEFT, (void *)vsitecomm->left_import_construct, vsitecomm->left_import_nconstruct*sizeof(int),
 943                    GMX_RIGHT, (void *)vsitecomm->right_export_construct, vsitecomm->right_export_nconstruct*sizeof(int));
 944
 945     leftbuf  = max(vsitecomm->left_export_nconstruct, vsitecomm->left_import_nconstruct);
 946     rightbuf = max(vsitecomm->right_export_nconstruct, vsitecomm->right_import_nconstruct);
 947
 948     bufsize  = max(leftbuf, rightbuf);
 949
 950     do_comm = (bufsize > 0);
 951
 952     snew(vsitecomm->send_buf, 2*bufsize);
 953     snew(vsitecomm->recv_buf, 2*bufsize);
 954
 955     return do_comm;
 956 }
 957
 958 t_state *partdec_init_local_state(t_commrec gmx_unused *cr, t_state *state_global)
 959 {
 960     int      i;
 961     t_state *state_local;
 962
 963     snew(state_local, 1);
 964
 965     /* Copy all the contents */
 966     *state_local = *state_global;
 967     snew(state_local->lambda, efptNR);
 968     /* local storage for lambda */
 969     for (i = 0; i < efptNR; i++)
 970     {
 971         state_local->lambda[i] = state_global->lambda[i];
 972     }
 973     if (state_global->nrngi > 1)
 974     {
 975         /* With stochastic dynamics we need local storage for the random state */
 976         if (state_local->flags & (1<<estLD_RNG))
 977         {
 978             state_local->nrng = gmx_rng_n();
 979             snew(state_local->ld_rng, state_local->nrng);
 980 #ifdef GMX_MPI
 981             if (PAR(cr))
 982             {
 983                 MPI_Scatter(state_global->ld_rng,
 984                             state_local->nrng*sizeof(state_local->ld_rng[0]), MPI_BYTE,
 985                             state_local->ld_rng,
 986                             state_local->nrng*sizeof(state_local->ld_rng[0]), MPI_BYTE,
 987                             MASTERRANK(cr), cr->mpi_comm_mygroup);
 988             }
 989 #endif
 990         }
 991         if (state_local->flags & (1<<estLD_RNGI))
 992         {
 993             snew(state_local->ld_rngi, 1);
 994 #ifdef GMX_MPI
 995             if (PAR(cr))
 996             {
 997                 MPI_Scatter(state_global->ld_rngi,
 998                             sizeof(state_local->ld_rngi[0]), MPI_BYTE,
 999                             state_local->ld_rngi,
1000                             sizeof(state_local->ld_rngi[0]), MPI_BYTE,
1001                             MASTERRANK(cr), cr->mpi_comm_mygroup);
1002             }
1003 #endif
1004         }
1005     }
1006
1007     return state_local;
1008 }