src/gromacs/mdlib/partdec.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14  *
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * GROwing Monsters And Cloning Shrimps
  34  */
  35 /* This file is completely threadsafe - keep it that way! */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41
  42 #include "sysstuff.h"
  43 #include "typedefs.h"
  44 #include "smalloc.h"
  45 #include "invblock.h"
  46 #include "macros.h"
  47 #include "main.h"
  48 #include "ns.h"
  49 #include "partdec.h"
  50 #include "splitter.h"
  51 #include "gmx_random.h"
  52 #include "mtop_util.h"
  53 #include "mvdata.h"
  54 #include "vec.h"
  55
  56 typedef struct gmx_partdec_constraint
  57 {
  58     int                  left_range_receive;
  59     int                  right_range_receive;
  60     int                  left_range_send;
  61     int                  right_range_send;
  62     int                  nconstraints;
  63     int *                nlocalatoms;
  64     rvec *               sendbuf;
  65     rvec *               recvbuf;
  66 }
  67 gmx_partdec_constraint_t;
  68
  69
  70 typedef struct gmx_partdec {
  71     int   neighbor[2];                         /* The nodeids of left and right neighb */
  72     int  *cgindex;                             /* The charge group boundaries,         */
  73                                                /* size nnodes+1,                       */
  74                                                /* only allocated with particle decomp. */
  75     int  *index;                               /* The home particle boundaries,        */
  76                                                /* size nnodes+1,                       */
  77                                                /* only allocated with particle decomp. */
  78     int  shift, bshift;                        /* Coordinates are shifted left for     */
  79                                                /* 'shift' systolic pulses, and right   */
  80                                                /* for 'bshift' pulses. Forces are      */
  81                                                /* shifted right for 'shift' pulses     */
  82                                                /* and left for 'bshift' pulses         */
  83                                                /* This way is not necessary to shift   */
  84                                                /* the coordinates over the entire ring */
  85     rvec                          *vbuf;       /* Buffer for summing the forces        */
  86 #ifdef GMX_MPI
  87     MPI_Request                    mpi_req_rx; /* MPI reqs for async transfers */
  88     MPI_Request                    mpi_req_tx;
  89 #endif
  90     gmx_partdec_constraint_t *     constraints;
  91 } gmx_partdec_t;
  92
  93
  94 void gmx_tx(const t_commrec *cr, int dir, void *buf, int bufsize)
  95 {
  96 #ifndef GMX_MPI
  97     gmx_call("gmx_tx");
  98 #else
  99     int        nodeid;
 100     int        tag, flag;
 101     MPI_Status status;
 102
 103     nodeid = cr->pd->neighbor[dir];
 104
 105 #ifdef DEBUG
 106     fprintf(stderr, "gmx_tx: nodeid=%d, buf=%x, bufsize=%d\n",
 107             nodeid, buf, bufsize);
 108 #endif
 109 #ifdef MPI_TEST
 110     /* workaround for crashes encountered with MPI on IRIX 6.5 */
 111     if (cr->pd->mpi_req_tx != MPI_REQUEST_NULL)
 112     {
 113         MPI_Test(&cr->pd->mpi_req_tx, &flag, &status);
 114         if (flag == FALSE)
 115         {
 116             fprintf(stdlog, "gmx_tx called before previous send was complete: nodeid=%d, buf=%x, bufsize=%d\n",
 117                     nodeid, buf, bufsize);
 118             gmx_tx_wait(nodeid);
 119         }
 120     }
 121 #endif
 122     tag = 0;
 123     if (MPI_Isend(buf, bufsize, MPI_BYTE, RANK(cr, nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_tx) != 0)
 124     {
 125         gmx_comm("MPI_Isend Failed");
 126     }
 127 #endif
 128 }
 129
 130 void gmx_tx_wait(const t_commrec *cr, int dir)
 131 {
 132 #ifndef GMX_MPI
 133     gmx_call("gmx_tx_wait");
 134 #else
 135     MPI_Status  status;
 136     int         mpi_result;
 137
 138     if ((mpi_result = MPI_Wait(&cr->pd->mpi_req_tx, &status)) != 0)
 139     {
 140         gmx_fatal(FARGS, "MPI_Wait: result=%d", mpi_result);
 141     }
 142 #endif
 143 }
 144
 145 void gmx_rx(const t_commrec *cr, int dir, void *buf, int bufsize)
 146 {
 147 #ifndef GMX_MPI
 148     gmx_call("gmx_rx");
 149 #else
 150     int nodeid;
 151     int tag;
 152
 153     nodeid = cr->pd->neighbor[dir];
 154 #ifdef DEBUG
 155     fprintf(stderr, "gmx_rx: nodeid=%d, buf=%x, bufsize=%d\n",
 156             nodeid, buf, bufsize);
 157 #endif
 158     tag = 0;
 159     if (MPI_Irecv( buf, bufsize, MPI_BYTE, RANK(cr, nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_rx) != 0)
 160     {
 161         gmx_comm("MPI_Recv Failed");
 162     }
 163 #endif
 164 }
 165
 166 void gmx_rx_wait(const t_commrec *cr, int nodeid)
 167 {
 168 #ifndef GMX_MPI
 169     gmx_call("gmx_rx_wait");
 170 #else
 171     MPI_Status  status;
 172     int         mpi_result;
 173
 174     if ((mpi_result = MPI_Wait(&cr->pd->mpi_req_rx, &status)) != 0)
 175     {
 176         gmx_fatal(FARGS, "MPI_Wait: result=%d", mpi_result);
 177     }
 178 #endif
 179 }
 180
 181 void gmx_tx_rx_real(const t_commrec *cr,
 182                     int send_dir, real *send_buf, int send_bufsize,
 183                     int recv_dir, real *recv_buf, int recv_bufsize)
 184 {
 185 #ifndef GMX_MPI
 186     gmx_call("gmx_tx_rx_real");
 187 #else
 188     int        send_nodeid, recv_nodeid;
 189     int        tx_tag = 0, rx_tag = 0;
 190     MPI_Status stat;
 191
 192     send_nodeid = cr->pd->neighbor[send_dir];
 193     recv_nodeid = cr->pd->neighbor[recv_dir];
 194
 195 #ifdef GMX_DOUBLE
 196 #define mpi_type MPI_DOUBLE
 197 #else
 198 #define mpi_type MPI_FLOAT
 199 #endif
 200
 201     if (send_bufsize > 0 && recv_bufsize > 0)
 202     {
 203         MPI_Sendrecv(send_buf, send_bufsize, mpi_type, RANK(cr, send_nodeid), tx_tag,
 204                      recv_buf, recv_bufsize, mpi_type, RANK(cr, recv_nodeid), rx_tag,
 205                      cr->mpi_comm_mygroup, &stat);
 206     }
 207     else if (send_bufsize > 0)
 208     {
 209         MPI_Send(send_buf, send_bufsize, mpi_type, RANK(cr, send_nodeid), tx_tag,
 210                  cr->mpi_comm_mygroup);
 211     }
 212     else if (recv_bufsize > 0)
 213     {
 214         MPI_Recv(recv_buf, recv_bufsize, mpi_type, RANK(cr, recv_nodeid), rx_tag,
 215                  cr->mpi_comm_mygroup, &stat);
 216     }
 217 #undef mpi_type
 218 #endif
 219 }
 220
 221
 222 void gmx_tx_rx_void(const t_commrec *cr,
 223                     int send_dir, void *send_buf, int send_bufsize,
 224                     int recv_dir, void *recv_buf, int recv_bufsize)
 225 {
 226 #ifndef GMX_MPI
 227     gmx_call("gmx_tx_rx_void");
 228 #else
 229     int        send_nodeid, recv_nodeid;
 230     int        tx_tag = 0, rx_tag = 0;
 231     MPI_Status stat;
 232
 233     send_nodeid = cr->pd->neighbor[send_dir];
 234     recv_nodeid = cr->pd->neighbor[recv_dir];
 235
 236
 237     MPI_Sendrecv(send_buf, send_bufsize, MPI_BYTE, RANK(cr, send_nodeid), tx_tag,
 238                  recv_buf, recv_bufsize, MPI_BYTE, RANK(cr, recv_nodeid), rx_tag,
 239                  cr->mpi_comm_mygroup, &stat);
 240
 241 #endif
 242 }
 243
 244
 245 /*void gmx_wait(int dir_send,int dir_recv)*/
 246
 247 void gmx_wait(const t_commrec *cr, int dir_send, int dir_recv)
 248 {
 249 #ifndef GMX_MPI
 250     gmx_call("gmx_wait");
 251 #else
 252     gmx_tx_wait(cr, dir_send);
 253     gmx_rx_wait(cr, dir_recv);
 254 #endif
 255 }
 256
 257 static void set_left_right(t_commrec *cr)
 258 {
 259     cr->pd->neighbor[GMX_LEFT]  = (cr->nnodes + cr->nodeid - 1) % cr->nnodes;
 260     cr->pd->neighbor[GMX_RIGHT] = (cr->nodeid + 1) % cr->nnodes;
 261 }
 262
 263 void pd_move_f(const t_commrec *cr, rvec f[], t_nrnb *nrnb)
 264 {
 265     move_f(NULL, cr, GMX_LEFT, GMX_RIGHT, f, cr->pd->vbuf, nrnb);
 266 }
 267
 268 int *pd_cgindex(const t_commrec *cr)
 269 {
 270     return cr->pd->cgindex;
 271 }
 272
 273 int *pd_index(const t_commrec *cr)
 274 {
 275     return cr->pd->index;
 276 }
 277
 278 int pd_shift(const t_commrec *cr)
 279 {
 280     return cr->pd->shift;
 281 }
 282
 283 int pd_bshift(const t_commrec *cr)
 284 {
 285     return cr->pd->bshift;
 286 }
 287
 288 void pd_cg_range(const t_commrec *cr, int *cg0, int *cg1)
 289 {
 290     *cg0 = cr->pd->cgindex[cr->nodeid];
 291     *cg1 = cr->pd->cgindex[cr->nodeid+1];
 292 }
 293
 294 void pd_at_range(const t_commrec *cr, int *at0, int *at1)
 295 {
 296     *at0 = cr->pd->index[cr->nodeid];
 297     *at1 = cr->pd->index[cr->nodeid+1];
 298 }
 299
 300 void
 301 pd_get_constraint_range(gmx_partdec_p_t pd, int *start, int *natoms)
 302 {
 303     *start  = pd->constraints->left_range_receive;
 304     *natoms = pd->constraints->right_range_receive-pd->constraints->left_range_receive;
 305 }
 306
 307 int *
 308 pd_constraints_nlocalatoms(gmx_partdec_p_t pd)
 309 {
 310     int *rc;
 311
 312     if (NULL != pd && NULL != pd->constraints)
 313     {
 314         rc = pd->constraints->nlocalatoms;
 315     }
 316     else
 317     {
 318         rc = NULL;
 319     }
 320     return rc;
 321 }
 322
 323
 324
 325
 326 /* This routine is used to communicate the non-home-atoms needed for constrains.
 327  * We have already calculated this range of atoms during setup, and stored in the
 328  * partdec constraints structure.
 329  *
 330  * When called, we send/receive left_range_send/receive atoms to our left (lower)
 331  * node neighbor, and similar to the right (higher) node.
 332  *
 333  * This has not been tested for periodic molecules...
 334  */
 335 void
 336 pd_move_x_constraints(t_commrec *  cr,
 337                       rvec *       x0,
 338                       rvec *       x1)
 339 {
 340 #ifdef GMX_MPI
 341     gmx_partdec_t            *pd;
 342     gmx_partdec_constraint_t *pdc;
 343
 344     rvec                *     sendptr;
 345     rvec                *     recvptr;
 346     int                       thisnode;
 347     int                       i;
 348     int                       cnt;
 349     int                       sendcnt, recvcnt;
 350
 351     pd  = cr->pd;
 352     pdc = pd->constraints;
 353
 354     if (pdc == NULL)
 355     {
 356         return;
 357     }
 358
 359     thisnode  = cr->nodeid;
 360
 361     /* First pulse to right */
 362
 363     recvcnt = 3*(pd->index[thisnode]-pdc->left_range_receive);
 364     sendcnt = 3*(cr->pd->index[thisnode+1]-cr->pd->constraints->right_range_send);
 365
 366     if (x1 != NULL)
 367     {
 368         /* Assemble temporary array with both x0 & x1 */
 369         recvptr = pdc->recvbuf;
 370         sendptr = pdc->sendbuf;
 371
 372         cnt = 0;
 373         for (i = pdc->right_range_send; i < pd->index[thisnode+1]; i++)
 374         {
 375             copy_rvec(x0[i], sendptr[cnt++]);
 376         }
 377         for (i = pdc->right_range_send; i < pd->index[thisnode+1]; i++)
 378         {
 379             copy_rvec(x1[i], sendptr[cnt++]);
 380         }
 381         recvcnt *= 2;
 382         sendcnt *= 2;
 383     }
 384     else
 385     {
 386         recvptr = x0 + pdc->left_range_receive;
 387         sendptr = x0 + pdc->right_range_send;
 388     }
 389
 390     gmx_tx_rx_real(cr,
 391                    GMX_RIGHT, (real *)sendptr, sendcnt,
 392                    GMX_LEFT, (real *)recvptr, recvcnt);
 393
 394     if (x1 != NULL)
 395     {
 396         /* copy back to x0/x1 */
 397         cnt = 0;
 398         for (i = pdc->left_range_receive; i < pd->index[thisnode]; i++)
 399         {
 400             copy_rvec(recvptr[cnt++], x0[i]);
 401         }
 402         for (i = pdc->left_range_receive; i < pd->index[thisnode]; i++)
 403         {
 404             copy_rvec(recvptr[cnt++], x1[i]);
 405         }
 406     }
 407
 408     /* And pulse to left */
 409     sendcnt = 3*(pdc->left_range_send-pd->index[thisnode]);
 410     recvcnt = 3*(pdc->right_range_receive-pd->index[thisnode+1]);
 411
 412     if (x1 != NULL)
 413     {
 414         cnt = 0;
 415         for (i = cr->pd->index[thisnode]; i < pdc->left_range_send; i++)
 416         {
 417             copy_rvec(x0[i], sendptr[cnt++]);
 418         }
 419         for (i = cr->pd->index[thisnode]; i < pdc->left_range_send; i++)
 420         {
 421             copy_rvec(x1[i], sendptr[cnt++]);
 422         }
 423         recvcnt *= 2;
 424         sendcnt *= 2;
 425     }
 426     else
 427     {
 428         sendptr = x0 + pd->index[thisnode];
 429         recvptr = x0 + pd->index[thisnode+1];
 430     }
 431
 432     gmx_tx_rx_real(cr,
 433                    GMX_LEFT, (real *)sendptr, sendcnt,
 434                    GMX_RIGHT, (real *)recvptr, recvcnt);
 435
 436     /* Final copy back from buffers */
 437     if (x1 != NULL)
 438     {
 439         /* First copy received data back into x0 & x1 */
 440         cnt = 0;
 441         for (i = pd->index[thisnode+1]; i < pdc->right_range_receive; i++)
 442         {
 443             copy_rvec(recvptr[cnt++], x0[i]);
 444         }
 445         for (i = pd->index[thisnode+1]; i < pdc->right_range_receive; i++)
 446         {
 447             copy_rvec(recvptr[cnt++], x1[i]);
 448         }
 449     }
 450 #endif
 451 }
 452
 453 static int home_cpu(int nnodes, gmx_partdec_t *pd, int atomid)
 454 {
 455     int k;
 456
 457     for (k = 0; (k < nnodes); k++)
 458     {
 459         if (atomid < pd->index[k+1])
 460         {
 461             return k;
 462         }
 463     }
 464     gmx_fatal(FARGS, "Atomid %d is larger than number of atoms (%d)",
 465               atomid+1, pd->index[nnodes]+1);
 466
 467     return -1;
 468 }
 469
 470 static void calc_nsbshift(FILE *fp, int nnodes, gmx_partdec_t *pd, t_idef *idef)
 471 {
 472     int i, j, k;
 473     int lastcg, targetcg, nshift, naaj;
 474     int homeid[32];
 475
 476     pd->bshift = 0;
 477     for (i = 1; (i < nnodes); i++)
 478     {
 479         targetcg = pd->cgindex[i];
 480         for (nshift = i; (nshift > 0) && (pd->cgindex[nshift] > targetcg); nshift--)
 481         {
 482             ;
 483         }
 484         pd->bshift = max(pd->bshift, i-nshift);
 485     }
 486
 487     pd->shift = (nnodes + 1)/2;
 488     for (i = 0; (i < nnodes); i++)
 489     {
 490         lastcg   = pd->cgindex[i+1]-1;
 491         naaj     = calc_naaj(lastcg, pd->cgindex[nnodes]);
 492         targetcg = (lastcg+naaj) % pd->cgindex[nnodes];
 493
 494         /* Search until we find the target charge group */
 495         for (nshift = 0;
 496              (nshift < nnodes) && (targetcg > pd->cgindex[nshift+1]);
 497              nshift++)
 498         {
 499             ;
 500         }
 501         /* Now compute the shift, that is the difference in node index */
 502         nshift = ((nshift - i + nnodes) % nnodes);
 503
 504         if (fp)
 505         {
 506             fprintf(fp, "CPU=%3d, lastcg=%5d, targetcg=%5d, myshift=%5d\n",
 507                     i, lastcg, targetcg, nshift);
 508         }
 509
 510         /* It's the largest shift that matters */
 511         pd->shift = max(nshift, pd->shift);
 512     }
 513     /* Now for the bonded forces */
 514     for (i = 0; (i < F_NRE); i++)
 515     {
 516         if (interaction_function[i].flags & IF_BOND)
 517         {
 518             int nratoms = interaction_function[i].nratoms;
 519             for (j = 0; (j < idef->il[i].nr); j += nratoms+1)
 520             {
 521                 for (k = 1; (k <= nratoms); k++)
 522                 {
 523                     homeid[k-1] = home_cpu(nnodes, pd, idef->il[i].iatoms[j+k]);
 524                 }
 525                 for (k = 1; (k < nratoms); k++)
 526                 {
 527                     pd->shift = max(pd->shift, abs(homeid[k]-homeid[0]));
 528                 }
 529             }
 530         }
 531     }
 532     if (fp)
 533     {
 534         fprintf(fp, "pd->shift = %3d, pd->bshift=%3d\n",
 535                 pd->shift, pd->bshift);
 536     }
 537 }
 538
 539
 540 static void
 541 init_partdec_constraint(t_commrec *cr,
 542                         t_idef *   idef,
 543                         int       *left_range,
 544                         int       *right_range)
 545 {
 546     gmx_partdec_t *            pd = cr->pd;
 547     gmx_partdec_constraint_t  *pdc;
 548     int i, cnt, k;
 549     int ai, aj, nodei, nodej;
 550     int nratoms;
 551     int nodeid;
 552
 553     snew(pdc, 1);
 554     cr->pd->constraints = pdc;
 555
 556
 557     /* Who am I? */
 558     nodeid = cr->nodeid;
 559
 560     /* Setup LINCS communication ranges */
 561     pdc->left_range_receive   = left_range[nodeid];
 562     pdc->right_range_receive  = right_range[nodeid]+1;
 563     pdc->left_range_send      = (nodeid > 0) ? right_range[nodeid-1]+1 : 0;
 564     pdc->right_range_send     = (nodeid < cr->nnodes-1) ? left_range[nodeid+1] : pd->index[cr->nnodes];
 565
 566     snew(pdc->nlocalatoms, idef->il[F_CONSTR].nr);
 567     nratoms = interaction_function[F_CONSTR].nratoms;
 568
 569     for (i = 0, cnt = 0; i < idef->il[F_CONSTR].nr; i += nratoms+1, cnt++)
 570     {
 571         ai    = idef->il[F_CONSTR].iatoms[i+1];
 572         aj    = idef->il[F_CONSTR].iatoms[i+2];
 573         nodei = 0;
 574         while (ai >= pd->index[nodei+1])
 575         {
 576             nodei++;
 577         }
 578         nodej = 0;
 579         while (aj >= pd->index[nodej+1])
 580         {
 581             nodej++;
 582         }
 583         pdc->nlocalatoms[cnt] = 0;
 584         if (nodei == nodeid)
 585         {
 586             pdc->nlocalatoms[cnt]++;
 587         }
 588         if (nodej == nodeid)
 589         {
 590             pdc->nlocalatoms[cnt]++;
 591         }
 592     }
 593     pdc->nconstraints = cnt;
 594
 595     snew(pdc->sendbuf, max(6*(pd->index[cr->nodeid+1]-pd->constraints->right_range_send), 6*(pdc->left_range_send-pd->index[cr->nodeid])));
 596     snew(pdc->recvbuf, max(6*(pd->index[cr->nodeid]-pdc->left_range_receive), 6*(pdc->right_range_receive-pd->index[cr->nodeid+1])));
 597
 598 }
 599
 600 static void init_partdec(FILE *fp, t_commrec *cr, t_block *cgs, int *multinr,
 601                          t_idef *idef)
 602 {
 603     int            i, nodeid;
 604     gmx_partdec_t *pd;
 605
 606     snew(pd, 1);
 607     cr->pd = pd;
 608
 609     set_left_right(cr);
 610
 611     if (cr->nnodes > 1)
 612     {
 613         if (multinr == NULL)
 614         {
 615             gmx_fatal(FARGS, "Internal error in init_partdec: multinr = NULL");
 616         }
 617         snew(pd->index, cr->nnodes+1);
 618         snew(pd->cgindex, cr->nnodes+1);
 619         pd->cgindex[0] = 0;
 620         pd->index[0]   = 0;
 621         for (i = 0; (i < cr->nnodes); i++)
 622         {
 623             pd->cgindex[i+1] = multinr[i];
 624             pd->index[i+1]   = cgs->index[multinr[i]];
 625         }
 626         calc_nsbshift(fp, cr->nnodes, pd, idef);
 627         /* This is a hack to do with bugzilla 148 */
 628         /*pd->shift = cr->nnodes-1;
 629            pd->bshift = 0;*/
 630
 631         /* Allocate a buffer of size natoms of the whole system
 632          * for summing the forces over the nodes.
 633          */
 634         snew(pd->vbuf, cgs->index[cgs->nr]);
 635         pd->constraints = NULL;
 636     }
 637 #ifdef GMX_MPI
 638     pd->mpi_req_tx = MPI_REQUEST_NULL;
 639     pd->mpi_req_rx = MPI_REQUEST_NULL;
 640 #endif
 641 }
 642
 643 static void print_partdec(FILE *fp, const char *title,
 644                           int nnodes, gmx_partdec_t *pd)
 645 {
 646     int i;
 647
 648     fprintf(fp, "%s\n", title);
 649     fprintf(fp, "nnodes:       %5d\n", nnodes);
 650     fprintf(fp, "pd->shift:    %5d\n", pd->shift);
 651     fprintf(fp, "pd->bshift:   %5d\n", pd->bshift);
 652
 653     fprintf(fp, "Nodeid   atom0   #atom     cg0       #cg\n");
 654     for (i = 0; (i < nnodes); i++)
 655     {
 656         fprintf(fp, "%6d%8d%8d%8d%10d\n",
 657                 i,
 658                 pd->index[i], pd->index[i+1]-pd->index[i],
 659                 pd->cgindex[i], pd->cgindex[i+1]-pd->cgindex[i]);
 660     }
 661     fprintf(fp, "\n");
 662 }
 663
 664 static void pr_idef_division(FILE *fp, t_idef *idef, int nnodes, int **multinr)
 665 {
 666     int i, ftype, nr, nra, m0, m1;
 667
 668     fprintf(fp, "Division of bonded forces over processors\n");
 669     fprintf(fp, "%-12s", "CPU");
 670     for (i = 0; (i < nnodes); i++)
 671     {
 672         fprintf(fp, " %5d", i);
 673     }
 674     fprintf(fp, "\n");
 675
 676     for (ftype = 0; (ftype < F_NRE); ftype++)
 677     {
 678         if (idef->il[ftype].nr > 0)
 679         {
 680             nr  = idef->il[ftype].nr;
 681             nra = 1+interaction_function[ftype].nratoms;
 682             fprintf(fp, "%-12s", interaction_function[ftype].name);
 683             /* Loop over processors */
 684             for (i = 0; (i < nnodes); i++)
 685             {
 686                 m0 = (i == 0) ? 0 : multinr[ftype][i-1]/nra;
 687                 m1 = multinr[ftype][i]/nra;
 688                 fprintf(fp, " %5d", m1-m0);
 689             }
 690             fprintf(fp, "\n");
 691         }
 692     }
 693 }
 694
 695 static void select_my_ilist(FILE *log, t_ilist *il, int *multinr, t_commrec *cr)
 696 {
 697     t_iatom *ia;
 698     int      i, start, end, nr;
 699
 700     if (cr->nodeid == 0)
 701     {
 702         start = 0;
 703     }
 704     else
 705     {
 706         start = multinr[cr->nodeid-1];
 707     }
 708     end = multinr[cr->nodeid];
 709
 710     nr = end-start;
 711     if (nr < 0)
 712     {
 713         gmx_fatal(FARGS, "Negative number of atoms (%d) on node %d\n"
 714                   "You have probably not used the same value for -np with grompp"
 715                   " and mdrun",
 716                   nr, cr->nodeid);
 717     }
 718     snew(ia, nr);
 719
 720     for (i = 0; (i < nr); i++)
 721     {
 722         ia[i] = il->iatoms[start+i];
 723     }
 724
 725     sfree(il->iatoms);
 726     il->iatoms = ia;
 727
 728     il->nr = nr;
 729 }
 730
 731 static void select_my_idef(FILE *log, t_idef *idef, int **multinr, t_commrec *cr)
 732 {
 733     int i;
 734
 735     for (i = 0; (i < F_NRE); i++)
 736     {
 737         select_my_ilist(log, &idef->il[i], multinr[i], cr);
 738     }
 739 }
 740
 741 gmx_localtop_t *split_system(FILE *log,
 742                              gmx_mtop_t *mtop, t_inputrec *inputrec,
 743                              t_commrec *cr)
 744 {
 745     int             i, npp, n, nn;
 746     real           *capacity;
 747     double          tcap = 0, cap;
 748     int            *multinr_cgs, **multinr_nre;
 749     char           *cap_env;
 750     gmx_localtop_t *top;
 751     int            *left_range;
 752     int            *right_range;
 753
 754     /* Time to setup the division of charge groups over processors */
 755     npp = cr->nnodes-cr->npmenodes;
 756     snew(capacity, npp);
 757     cap_env = getenv("GMX_CAPACITY");
 758     if (cap_env == NULL)
 759     {
 760         for (i = 0; (i < npp-1); i++)
 761         {
 762             capacity[i] = 1.0/(double)npp;
 763             tcap       += capacity[i];
 764         }
 765         /* Take care that the sum of capacities is 1.0 */
 766         capacity[npp-1] = 1.0 - tcap;
 767     }
 768     else
 769     {
 770         tcap = 0;
 771         nn   = 0;
 772         for (i = 0; i < npp; i++)
 773         {
 774             cap = 0;
 775             sscanf(cap_env+nn, "%lf%n", &cap, &n);
 776             if (cap == 0)
 777             {
 778                 gmx_fatal(FARGS, "Incorrect entry or number of entries in GMX_CAPACITY='%s'", cap_env);
 779             }
 780             capacity[i] = cap;
 781             tcap       += cap;
 782             nn         += n;
 783         }
 784         for (i = 0; i < npp; i++)
 785         {
 786             capacity[i] /= tcap;
 787         }
 788     }
 789
 790     /* Convert the molecular topology to a fully listed topology */
 791     top = gmx_mtop_generate_local_top(mtop, inputrec);
 792
 793     snew(multinr_cgs, npp);
 794     snew(multinr_nre, F_NRE);
 795     for (i = 0; i < F_NRE; i++)
 796     {
 797         snew(multinr_nre[i], npp);
 798     }
 799
 800
 801     snew(left_range, cr->nnodes);
 802     snew(right_range, cr->nnodes);
 803
 804     /* This computes which entities can be placed on processors */
 805     split_top(log, npp, top, inputrec, &mtop->mols, capacity, multinr_cgs, multinr_nre, left_range, right_range);
 806
 807     sfree(capacity);
 808     init_partdec(log, cr, &(top->cgs), multinr_cgs, &(top->idef));
 809
 810     /* This should be fine */
 811     /*split_idef(&(top->idef),cr->nnodes-cr->npmenodes);*/
 812
 813     select_my_idef(log, &(top->idef), multinr_nre, cr);
 814
 815     if (top->idef.il[F_CONSTR].nr > 0)
 816     {
 817         init_partdec_constraint(cr, &(top->idef), left_range, right_range);
 818     }
 819
 820     if (log)
 821     {
 822         pr_idef_division(log, &(top->idef), npp, multinr_nre);
 823     }
 824
 825     for (i = 0; i < F_NRE; i++)
 826     {
 827         sfree(multinr_nre[i]);
 828     }
 829     sfree(multinr_nre);
 830     sfree(multinr_cgs);
 831
 832     sfree(left_range);
 833     sfree(right_range);
 834
 835     if (log)
 836     {
 837         print_partdec(log, "Workload division", cr->nnodes, cr->pd);
 838     }
 839
 840     return top;
 841 }
 842
 843 static void
 844 add_to_vsitelist(int **list, int *nitem, int *nalloc, int newitem)
 845 {
 846     int      i, idx;
 847     gmx_bool found;
 848
 849     found = FALSE;
 850     idx   = *nitem;
 851     for (i = 0; i < idx && !found; i++)
 852     {
 853         found = (newitem == (*list)[i]);
 854     }
 855     if (!found)
 856     {
 857         *nalloc += 100;
 858         srenew(*list, *nalloc);
 859         (*list)[idx++] = newitem;
 860         *nitem         = idx;
 861     }
 862 }
 863
 864 gmx_bool setup_parallel_vsites(t_idef *idef, t_commrec *cr,
 865                                t_comm_vsites *vsitecomm)
 866 {
 867     int            i, j, ftype;
 868     int            nra;
 869     gmx_bool       do_comm;
 870     t_iatom       *ia;
 871     gmx_partdec_t *pd;
 872     int            iconstruct;
 873     int            i0, i1;
 874     int            nalloc_left_construct, nalloc_right_construct;
 875     int            sendbuf[2], recvbuf[2];
 876     int            bufsize, leftbuf, rightbuf;
 877
 878     pd = cr->pd;
 879
 880     i0 = pd->index[cr->nodeid];
 881     i1 = pd->index[cr->nodeid+1];
 882
 883     vsitecomm->left_import_construct  = NULL;
 884     vsitecomm->left_import_nconstruct = 0;
 885     nalloc_left_construct             = 0;
 886
 887     vsitecomm->right_import_construct  = NULL;
 888     vsitecomm->right_import_nconstruct = 0;
 889     nalloc_right_construct             = 0;
 890
 891     for (ftype = 0; (ftype < F_NRE); ftype++)
 892     {
 893         if (!(interaction_function[ftype].flags & IF_VSITE) )
 894         {
 895             continue;
 896         }
 897
 898         nra    = interaction_function[ftype].nratoms;
 899         ia     = idef->il[ftype].iatoms;
 900
 901         for (i = 0; i < idef->il[ftype].nr; i += nra+1)
 902         {
 903             for (j = 2; j < 1+nra; j++)
 904             {
 905                 iconstruct = ia[i+j];
 906                 if (iconstruct < i0)
 907                 {
 908                     add_to_vsitelist(&vsitecomm->left_import_construct,
 909                                      &vsitecomm->left_import_nconstruct,
 910                                      &nalloc_left_construct, iconstruct);
 911                 }
 912                 else if (iconstruct >= i1)
 913                 {
 914                     add_to_vsitelist(&vsitecomm->right_import_construct,
 915                                      &vsitecomm->right_import_nconstruct,
 916                                      &nalloc_right_construct, iconstruct);
 917                 }
 918             }
 919         }
 920     }
 921
 922     /* Pre-communicate the array lengths */
 923     gmx_tx_rx_void(cr,
 924                    GMX_RIGHT, (void *)&vsitecomm->right_import_nconstruct, sizeof(int),
 925                    GMX_LEFT, (void *)&vsitecomm->left_export_nconstruct, sizeof(int));
 926     gmx_tx_rx_void(cr,
 927                    GMX_LEFT, (void *)&vsitecomm->left_import_nconstruct, sizeof(int),
 928                    GMX_RIGHT, (void *)&vsitecomm->right_export_nconstruct, sizeof(int));
 929
 930     snew(vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct);
 931     snew(vsitecomm->right_export_construct, vsitecomm->right_export_nconstruct);
 932
 933     /* Communicate the construcing atom index arrays */
 934     gmx_tx_rx_void(cr,
 935                    GMX_RIGHT, (void *)vsitecomm->right_import_construct, vsitecomm->right_import_nconstruct*sizeof(int),
 936                    GMX_LEFT, (void *)vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct*sizeof(int));
 937
 938     /* Communicate the construcing atom index arrays */
 939     gmx_tx_rx_void(cr,
 940                    GMX_LEFT, (void *)vsitecomm->left_import_construct, vsitecomm->left_import_nconstruct*sizeof(int),
 941                    GMX_RIGHT, (void *)vsitecomm->right_export_construct, vsitecomm->right_export_nconstruct*sizeof(int));
 942
 943     leftbuf  = max(vsitecomm->left_export_nconstruct, vsitecomm->left_import_nconstruct);
 944     rightbuf = max(vsitecomm->right_export_nconstruct, vsitecomm->right_import_nconstruct);
 945
 946     bufsize  = max(leftbuf, rightbuf);
 947
 948     do_comm = (bufsize > 0);
 949
 950     snew(vsitecomm->send_buf, 2*bufsize);
 951     snew(vsitecomm->recv_buf, 2*bufsize);
 952
 953     return do_comm;
 954 }
 955
 956 t_state *partdec_init_local_state(t_commrec *cr, t_state *state_global)
 957 {
 958     int      i;
 959     t_state *state_local;
 960
 961     snew(state_local, 1);
 962
 963     /* Copy all the contents */
 964     *state_local = *state_global;
 965     snew(state_local->lambda, efptNR);
 966     /* local storage for lambda */
 967     for (i = 0; i < efptNR; i++)
 968     {
 969         state_local->lambda[i] = state_global->lambda[i];
 970     }
 971     if (state_global->nrngi > 1)
 972     {
 973         /* With stochastic dynamics we need local storage for the random state */
 974         if (state_local->flags & (1<<estLD_RNG))
 975         {
 976             state_local->nrng = gmx_rng_n();
 977             snew(state_local->ld_rng, state_local->nrng);
 978 #ifdef GMX_MPI
 979             if (PAR(cr))
 980             {
 981                 MPI_Scatter(state_global->ld_rng,
 982                             state_local->nrng*sizeof(state_local->ld_rng[0]), MPI_BYTE,
 983                             state_local->ld_rng,
 984                             state_local->nrng*sizeof(state_local->ld_rng[0]), MPI_BYTE,
 985                             MASTERRANK(cr), cr->mpi_comm_mygroup);
 986             }
 987 #endif
 988         }
 989         if (state_local->flags & (1<<estLD_RNGI))
 990         {
 991             snew(state_local->ld_rngi, 1);
 992 #ifdef GMX_MPI
 993             if (PAR(cr))
 994             {
 995                 MPI_Scatter(state_global->ld_rngi,
 996                             sizeof(state_local->ld_rngi[0]), MPI_BYTE,
 997                             state_local->ld_rngi,
 998                             sizeof(state_local->ld_rngi[0]), MPI_BYTE,
 999                             MASTERRANK(cr), cr->mpi_comm_mygroup);
1000             }
1001 #endif
1002         }
1003     }
1004
1005     return state_local;
1006 }