src/mdlib/partdec.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14  *
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * GROwing Monsters And Cloning Shrimps
  34  */
  35 /* This file is completely threadsafe - keep it that way! */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41
  42 #include "sysstuff.h"
  43 #include "typedefs.h"
  44 #include "smalloc.h"
  45 #include "invblock.h"
  46 #include "macros.h"
  47 #include "main.h"
  48 #include "ns.h"
  49 #include "partdec.h"
  50 #include "splitter.h"
  51 #include "gmx_random.h"
  52 #include "mtop_util.h"
  53 #include "mvdata.h"
  54 #include "vec.h"
  55
  56 typedef struct gmx_partdec_constraint
  57 {
  58         int                  left_range_receive;
  59         int                  right_range_receive;
  60         int                  left_range_send;
  61         int                  right_range_send;
  62         int                  nconstraints;
  63         int *                nlocalatoms;
  64         rvec *               sendbuf;
  65         rvec *               recvbuf;
  66 }
  67 gmx_partdec_constraint_t;
  68
  69
  70 typedef struct gmx_partdec {
  71   int  neighbor[2];             /* The nodeids of left and right neighb */
  72   int  *cgindex;                /* The charge group boundaries,         */
  73                                 /* size nnodes+1,                       */
  74                                 /* only allocated with particle decomp. */
  75   int  *index;                  /* The home particle boundaries,        */
  76                                 /* size nnodes+1,                       */
  77                                 /* only allocated with particle decomp. */
  78   int  shift,bshift;            /* Coordinates are shifted left for     */
  79                                 /* 'shift' systolic pulses, and right   */
  80                                 /* for 'bshift' pulses. Forces are      */
  81                                 /* shifted right for 'shift' pulses     */
  82                                 /* and left for 'bshift' pulses         */
  83                                 /* This way is not necessary to shift   */
  84                                 /* the coordinates over the entire ring */
  85   rvec *vbuf;                   /* Buffer for summing the forces        */
  86 #ifdef GMX_MPI
  87   MPI_Request mpi_req_rx;       /* MPI reqs for async transfers */
  88   MPI_Request mpi_req_tx;
  89 #endif
  90  gmx_partdec_constraint_t *     constraints;
  91 } gmx_partdec_t;
  92
  93
  94 void gmx_tx(const t_commrec *cr,int dir,void *buf,int bufsize)
  95 {
  96 #ifndef GMX_MPI
  97   gmx_call("gmx_tx");
  98 #else
  99   int        nodeid;
 100   int        tag,flag;
 101   MPI_Status status;
 102
 103   nodeid = cr->pd->neighbor[dir];
 104
 105 #ifdef DEBUG
 106   fprintf(stderr,"gmx_tx: nodeid=%d, buf=%x, bufsize=%d\n",
 107           nodeid,buf,bufsize);
 108 #endif
 109 #ifdef MPI_TEST
 110   /* workaround for crashes encountered with MPI on IRIX 6.5 */
 111   if (cr->pd->mpi_req_tx != MPI_REQUEST_NULL) {
 112     MPI_Test(&cr->pd->mpi_req_tx,&flag,&status);
 113     if (flag==FALSE) {
 114       fprintf(stdlog,"gmx_tx called before previous send was complete: nodeid=%d, buf=%x, bufsize=%d\n",
 115               nodeid,buf,bufsize);
 116       gmx_tx_wait(nodeid);
 117     }
 118   }
 119 #endif
 120   tag = 0;
 121   if (MPI_Isend(buf,bufsize,MPI_BYTE,RANK(cr,nodeid),tag,cr->mpi_comm_mygroup,&cr->pd->mpi_req_tx) != 0)
 122     gmx_comm("MPI_Isend Failed");
 123 #endif
 124 }
 125
 126 void gmx_tx_wait(const t_commrec *cr, int dir)
 127 {
 128 #ifndef GMX_MPI
 129   gmx_call("gmx_tx_wait");
 130 #else
 131   MPI_Status  status;
 132   int mpi_result;
 133
 134   if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_tx,&status)) != 0)
 135     gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
 136 #endif
 137 }
 138
 139 void gmx_rx(const t_commrec *cr,int dir,void *buf,int bufsize)
 140 {
 141 #ifndef GMX_MPI
 142   gmx_call("gmx_rx");
 143 #else
 144   int nodeid;
 145   int tag;
 146
 147   nodeid = cr->pd->neighbor[dir];
 148 #ifdef DEBUG
 149   fprintf(stderr,"gmx_rx: nodeid=%d, buf=%x, bufsize=%d\n",
 150           nodeid,buf,bufsize);
 151 #endif
 152   tag = 0;
 153   if (MPI_Irecv( buf, bufsize, MPI_BYTE, RANK(cr,nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_rx) != 0 )
 154     gmx_comm("MPI_Recv Failed");
 155 #endif
 156 }
 157
 158 void gmx_rx_wait(const t_commrec *cr, int nodeid)
 159 {
 160 #ifndef GMX_MPI
 161   gmx_call("gmx_rx_wait");
 162 #else
 163   MPI_Status  status;
 164   int mpi_result;
 165
 166   if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_rx,&status)) != 0)
 167     gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
 168 #endif
 169 }
 170
 171 void gmx_tx_rx_real(const t_commrec *cr,
 172                     int send_dir,real *send_buf,int send_bufsize,
 173                     int recv_dir,real *recv_buf,int recv_bufsize)
 174 {
 175 #ifndef GMX_MPI
 176   gmx_call("gmx_tx_rx_real");
 177 #else
 178   int send_nodeid,recv_nodeid;
 179   int tx_tag = 0,rx_tag = 0;
 180   MPI_Status stat;
 181
 182   send_nodeid = cr->pd->neighbor[send_dir];
 183   recv_nodeid = cr->pd->neighbor[recv_dir];
 184
 185 #ifdef GMX_DOUBLE
 186 #define mpi_type MPI_DOUBLE
 187 #else
 188 #define mpi_type MPI_FLOAT
 189 #endif
 190
 191   if (send_bufsize > 0 && recv_bufsize > 0) {
 192     MPI_Sendrecv(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
 193                  recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
 194                  cr->mpi_comm_mygroup,&stat);
 195   } else if (send_bufsize > 0) {
 196     MPI_Send(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
 197              cr->mpi_comm_mygroup);
 198   } else if (recv_bufsize > 0) {
 199     MPI_Recv(recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
 200              cr->mpi_comm_mygroup,&stat);
 201   }
 202 #undef mpi_type
 203 #endif
 204 }
 205
 206
 207 void gmx_tx_rx_void(const t_commrec *cr,
 208                     int send_dir,void *send_buf,int send_bufsize,
 209                     int recv_dir,void *recv_buf,int recv_bufsize)
 210 {
 211 #ifndef GMX_MPI
 212         gmx_call("gmx_tx_rx_void");
 213 #else
 214         int send_nodeid,recv_nodeid;
 215         int tx_tag = 0,rx_tag = 0;
 216         MPI_Status stat;
 217
 218         send_nodeid = cr->pd->neighbor[send_dir];
 219         recv_nodeid = cr->pd->neighbor[recv_dir];
 220
 221
 222         MPI_Sendrecv(send_buf,send_bufsize,MPI_BYTE,RANK(cr,send_nodeid),tx_tag,
 223                                  recv_buf,recv_bufsize,MPI_BYTE,RANK(cr,recv_nodeid),rx_tag,
 224                                  cr->mpi_comm_mygroup,&stat);
 225
 226 #endif
 227 }
 228
 229
 230 /*void gmx_wait(int dir_send,int dir_recv)*/
 231
 232 void gmx_wait(const t_commrec *cr, int dir_send,int dir_recv)
 233 {
 234 #ifndef GMX_MPI
 235   gmx_call("gmx_wait");
 236 #else
 237   gmx_tx_wait(cr, dir_send);
 238   gmx_rx_wait(cr, dir_recv);
 239 #endif
 240 }
 241
 242 static void set_left_right(t_commrec *cr)
 243 {
 244   cr->pd->neighbor[GMX_LEFT]  = (cr->nnodes + cr->nodeid - 1) % cr->nnodes;
 245   cr->pd->neighbor[GMX_RIGHT] = (cr->nodeid + 1) % cr->nnodes;
 246 }
 247
 248 void pd_move_f(const t_commrec *cr,rvec f[],t_nrnb *nrnb)
 249 {
 250   move_f(NULL,cr,GMX_LEFT,GMX_RIGHT,f,cr->pd->vbuf,nrnb);
 251 }
 252
 253 int *pd_cgindex(const t_commrec *cr)
 254 {
 255   return cr->pd->cgindex;
 256 }
 257
 258 int *pd_index(const t_commrec *cr)
 259 {
 260   return cr->pd->index;
 261 }
 262
 263 int pd_shift(const t_commrec *cr)
 264 {
 265   return cr->pd->shift;
 266 }
 267
 268 int pd_bshift(const t_commrec *cr)
 269 {
 270   return cr->pd->bshift;
 271 }
 272
 273 void pd_cg_range(const t_commrec *cr,int *cg0,int *cg1)
 274 {
 275   *cg0 = cr->pd->cgindex[cr->nodeid];
 276   *cg1 = cr->pd->cgindex[cr->nodeid+1];
 277 }
 278
 279 void pd_at_range(const t_commrec *cr,int *at0,int *at1)
 280 {
 281   *at0 = cr->pd->index[cr->nodeid];
 282   *at1 = cr->pd->index[cr->nodeid+1];
 283 }
 284
 285 void
 286 pd_get_constraint_range(gmx_partdec_p_t pd,int *start,int *natoms)
 287 {
 288         *start  = pd->constraints->left_range_receive;
 289         *natoms = pd->constraints->right_range_receive-pd->constraints->left_range_receive;
 290 }
 291
 292 int *
 293 pd_constraints_nlocalatoms(gmx_partdec_p_t pd)
 294 {
 295         int *rc;
 296
 297         if(NULL != pd && NULL != pd->constraints)
 298         {
 299                 rc = pd->constraints->nlocalatoms;
 300         }
 301         else
 302         {
 303                 rc = NULL;
 304         }
 305         return rc;
 306 }
 307
 308
 309
 310
 311 /* This routine is used to communicate the non-home-atoms needed for constrains.
 312  * We have already calculated this range of atoms during setup, and stored in the
 313  * partdec constraints structure.
 314  *
 315  * When called, we send/receive left_range_send/receive atoms to our left (lower)
 316  * node neighbor, and similar to the right (higher) node.
 317  *
 318  * This has not been tested for periodic molecules...
 319  */
 320 void
 321 pd_move_x_constraints(t_commrec *  cr,
 322                                           rvec *       x0,
 323                                           rvec *       x1)
 324 {
 325 #ifdef GMX_MPI
 326         gmx_partdec_t *pd;
 327         gmx_partdec_constraint_t *pdc;
 328
 329         rvec *     sendptr;
 330         rvec *     recvptr;
 331         int        thisnode;
 332         int        i;
 333         int        cnt;
 334         int        sendcnt,recvcnt;
 335
 336         pd  = cr->pd;
 337         pdc = pd->constraints;
 338
 339         thisnode  = cr->nodeid;
 340
 341         /* First pulse to right */
 342
 343         recvcnt = 3*(pd->index[thisnode]-pdc->left_range_receive);
 344         sendcnt = 3*(cr->pd->index[thisnode+1]-cr->pd->constraints->right_range_send);
 345
 346         if(x1!=NULL)
 347         {
 348                 /* Assemble temporary array with both x0 & x1 */
 349                 recvptr = pdc->recvbuf;
 350                 sendptr = pdc->sendbuf;
 351
 352                 cnt = 0;
 353                 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
 354                 {
 355                         copy_rvec(x0[i],sendptr[cnt++]);
 356                 }
 357                 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
 358                 {
 359                         copy_rvec(x1[i],sendptr[cnt++]);
 360                 }
 361                 recvcnt *= 2;
 362                 sendcnt *= 2;
 363         }
 364         else
 365         {
 366                 recvptr = x0 + pdc->left_range_receive;
 367                 sendptr = x0 + pdc->right_range_send;
 368         }
 369
 370         gmx_tx_rx_real(cr,
 371                                    GMX_RIGHT,(real *)sendptr,sendcnt,
 372                                    GMX_LEFT, (real *)recvptr,recvcnt);
 373
 374         if(x1 != NULL)
 375         {
 376                 /* copy back to x0/x1 */
 377                 cnt = 0;
 378                 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
 379                 {
 380                         copy_rvec(recvptr[cnt++],x0[i]);
 381                 }
 382                 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
 383                 {
 384                         copy_rvec(recvptr[cnt++],x1[i]);
 385                 }
 386         }
 387
 388         /* And pulse to left */
 389         sendcnt = 3*(pdc->left_range_send-pd->index[thisnode]);
 390         recvcnt = 3*(pdc->right_range_receive-pd->index[thisnode+1]);
 391
 392         if(x1 != NULL)
 393         {
 394                 cnt = 0;
 395                 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
 396                 {
 397                         copy_rvec(x0[i],sendptr[cnt++]);
 398                 }
 399                 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
 400                 {
 401                         copy_rvec(x1[i],sendptr[cnt++]);
 402                 }
 403                 recvcnt *= 2;
 404                 sendcnt *= 2;
 405         }
 406         else
 407         {
 408                 sendptr = x0 + pd->index[thisnode];
 409                 recvptr = x0 + pd->index[thisnode+1];
 410         }
 411
 412         gmx_tx_rx_real(cr,
 413                                    GMX_LEFT ,(real *)sendptr,sendcnt,
 414                                    GMX_RIGHT,(real *)recvptr,recvcnt);
 415
 416         /* Final copy back from buffers */
 417         if(x1 != NULL)
 418         {
 419                 /* First copy received data back into x0 & x1 */
 420                 cnt = 0;
 421                 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
 422                 {
 423                         copy_rvec(recvptr[cnt++],x0[i]);
 424                 }
 425                 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
 426                 {
 427                         copy_rvec(recvptr[cnt++],x1[i]);
 428                 }
 429         }
 430 #endif
 431 }
 432
 433 static int home_cpu(int nnodes,gmx_partdec_t *pd,int atomid)
 434 {
 435   int k;
 436
 437   for(k=0; (k<nnodes); k++) {
 438     if (atomid<pd->index[k+1])
 439       return k;
 440   }
 441   gmx_fatal(FARGS,"Atomid %d is larger than number of atoms (%d)",
 442             atomid+1,pd->index[nnodes]+1);
 443
 444   return -1;
 445 }
 446
 447 static void calc_nsbshift(FILE *fp,int nnodes,gmx_partdec_t *pd,t_idef *idef)
 448 {
 449   int i,j,k;
 450   int lastcg,targetcg,nshift,naaj;
 451   int homeid[32];
 452
 453   pd->bshift=0;
 454   for(i=1; (i<nnodes); i++) {
 455     targetcg = pd->cgindex[i];
 456     for(nshift=i; (nshift > 0) && (pd->cgindex[nshift] > targetcg); nshift--)
 457       ;
 458     pd->bshift=max(pd->bshift,i-nshift);
 459   }
 460
 461   pd->shift = (nnodes + 1)/2;
 462   for(i=0; (i<nnodes); i++) {
 463     lastcg = pd->cgindex[i+1]-1;
 464     naaj = calc_naaj(lastcg,pd->cgindex[nnodes]);
 465     targetcg = (lastcg+naaj) % pd->cgindex[nnodes];
 466
 467     /* Search until we find the target charge group */
 468     for(nshift=0;
 469         (nshift < nnodes) && (targetcg > pd->cgindex[nshift+1]);
 470         nshift++)
 471       ;
 472     /* Now compute the shift, that is the difference in node index */
 473     nshift=((nshift - i + nnodes) % nnodes);
 474
 475     if (fp)
 476       fprintf(fp,"CPU=%3d, lastcg=%5d, targetcg=%5d, myshift=%5d\n",
 477               i,lastcg,targetcg,nshift);
 478
 479     /* It's the largest shift that matters */
 480     pd->shift=max(nshift,pd->shift);
 481   }
 482   /* Now for the bonded forces */
 483   for(i=0; (i<F_NRE); i++) {
 484     if (interaction_function[i].flags & IF_BOND) {
 485       int nratoms = interaction_function[i].nratoms;
 486       for(j=0; (j<idef->il[i].nr); j+=nratoms+1) {
 487         for(k=1; (k<=nratoms); k++)
 488           homeid[k-1] = home_cpu(nnodes,pd,idef->il[i].iatoms[j+k]);
 489         for(k=1; (k<nratoms); k++)
 490           pd->shift = max(pd->shift,abs(homeid[k]-homeid[0]));
 491       }
 492     }
 493   }
 494   if (fp)
 495     fprintf(fp,"pd->shift = %3d, pd->bshift=%3d\n",
 496             pd->shift,pd->bshift);
 497 }
 498
 499
 500 static void
 501 init_partdec_constraint(t_commrec *cr,
 502                         t_idef *   idef,
 503                                                 int *left_range,
 504                                                 int *right_range)
 505 {
 506         gmx_partdec_t *            pd = cr->pd;
 507         gmx_partdec_constraint_t *pdc;
 508         int i,cnt,k;
 509         int ai,aj,nodei,nodej;
 510         int nratoms;
 511         int nodeid;
 512
 513         snew(pdc,1);
 514         cr->pd->constraints = pdc;
 515
 516
 517         /* Who am I? */
 518     nodeid = cr->nodeid;
 519
 520     /* Setup LINCS communication ranges */
 521     pdc->left_range_receive   = left_range[nodeid];
 522     pdc->right_range_receive  = right_range[nodeid]+1;
 523     pdc->left_range_send      = (nodeid > 0) ? right_range[nodeid-1]+1 : 0;
 524     pdc->right_range_send     = (nodeid < cr->nnodes-1) ? left_range[nodeid+1] : pd->index[cr->nnodes];
 525
 526         snew(pdc->nlocalatoms,idef->il[F_CONSTR].nr);
 527         nratoms = interaction_function[F_CONSTR].nratoms;
 528
 529         for(i=0,cnt=0;i<idef->il[F_CONSTR].nr;i+=nratoms+1,cnt++)
 530         {
 531                 ai = idef->il[F_CONSTR].iatoms[i+1];
 532                 aj = idef->il[F_CONSTR].iatoms[i+2];
 533                 nodei = 0;
 534                 while(ai>=pd->index[nodei+1])
 535                 {
 536                         nodei++;
 537                 }
 538                 nodej = 0;
 539                 while(aj>=pd->index[nodej+1])
 540                 {
 541                         nodej++;
 542                 }
 543                 pdc->nlocalatoms[cnt] = 0;
 544                 if(nodei==nodeid)
 545                 {
 546                         pdc->nlocalatoms[cnt]++;
 547                 }
 548                 if(nodej==nodeid)
 549                 {
 550                         pdc->nlocalatoms[cnt]++;
 551                 }
 552         }
 553         pdc->nconstraints = cnt;
 554
 555         /* This should really be calculated, but 1000 is a _lot_ for overlapping constraints... */
 556         snew(pdc->sendbuf,1000);
 557         snew(pdc->recvbuf,1000);
 558
 559 }
 560
 561 static void init_partdec(FILE *fp,t_commrec *cr,t_block *cgs,int *multinr,
 562                                                  t_idef *idef)
 563 {
 564   int  i,nodeid;
 565   gmx_partdec_t *pd;
 566
 567   snew(pd,1);
 568   cr->pd = pd;
 569
 570   set_left_right(cr);
 571
 572   if (cr->nnodes > 1) {
 573     if (multinr == NULL)
 574       gmx_fatal(FARGS,"Internal error in init_partdec: multinr = NULL");
 575     snew(pd->index,cr->nnodes+1);
 576     snew(pd->cgindex,cr->nnodes+1);
 577     pd->cgindex[0] = 0;
 578     pd->index[0]   = 0;
 579     for(i=0; (i < cr->nnodes); i++) {
 580       pd->cgindex[i+1] = multinr[i];
 581       pd->index[i+1]   = cgs->index[multinr[i]];
 582     }
 583     calc_nsbshift(fp,cr->nnodes,pd,idef);
 584     /* This is a hack to do with bugzilla 148 */
 585     /*pd->shift = cr->nnodes-1;
 586       pd->bshift = 0;*/
 587
 588     /* Allocate a buffer of size natoms of the whole system
 589      * for summing the forces over the nodes.
 590      */
 591     snew(pd->vbuf,cgs->index[cgs->nr]);
 592     pd->constraints = NULL;
 593   }
 594 #ifdef GMX_MPI
 595   pd->mpi_req_tx=MPI_REQUEST_NULL;
 596   pd->mpi_req_rx=MPI_REQUEST_NULL;
 597 #endif
 598 }
 599
 600 static void print_partdec(FILE *fp,const char *title,
 601                           int nnodes,gmx_partdec_t *pd)
 602 {
 603   int i;
 604
 605   fprintf(fp,"%s\n",title);
 606   fprintf(fp,"nnodes:       %5d\n",nnodes);
 607   fprintf(fp,"pd->shift:    %5d\n",pd->shift);
 608   fprintf(fp,"pd->bshift:   %5d\n",pd->bshift);
 609
 610   fprintf(fp,"Nodeid   atom0   #atom     cg0       #cg\n");
 611   for(i=0; (i<nnodes); i++)
 612     fprintf(fp,"%6d%8d%8d%8d%10d\n",
 613             i,
 614             pd->index[i],pd->index[i+1]-pd->index[i],
 615             pd->cgindex[i],pd->cgindex[i+1]-pd->cgindex[i]);
 616   fprintf(fp,"\n");
 617 }
 618
 619 static void pr_idef_division(FILE *fp,t_idef *idef,int nnodes,int **multinr)
 620 {
 621   int i,ftype,nr,nra,m0,m1;
 622
 623   fprintf(fp,"Division of bonded forces over processors\n");
 624   fprintf(fp,"%-12s","CPU");
 625   for(i=0; (i<nnodes); i++)
 626     fprintf(fp," %5d",i);
 627   fprintf(fp,"\n");
 628
 629   for(ftype=0; (ftype<F_NRE); ftype++) {
 630     if (idef->il[ftype].nr > 0) {
 631       nr  = idef->il[ftype].nr;
 632       nra = 1+interaction_function[ftype].nratoms;
 633       fprintf(fp,"%-12s", interaction_function[ftype].name);
 634       /* Loop over processors */
 635       for(i=0; (i<nnodes); i++) {
 636         m0 = (i == 0) ? 0 : multinr[ftype][i-1]/nra;
 637         m1 = multinr[ftype][i]/nra;
 638         fprintf(fp," %5d",m1-m0);
 639       }
 640       fprintf(fp,"\n");
 641     }
 642   }
 643 }
 644
 645 static void select_my_ilist(FILE *log,t_ilist *il,int *multinr,t_commrec *cr)
 646 {
 647   t_iatom *ia;
 648   int     i,start,end,nr;
 649
 650   if (cr->nodeid == 0)
 651     start=0;
 652   else
 653     start=multinr[cr->nodeid-1];
 654   end=multinr[cr->nodeid];
 655
 656   nr=end-start;
 657   if (nr < 0)
 658     gmx_fatal(FARGS,"Negative number of atoms (%d) on node %d\n"
 659                 "You have probably not used the same value for -np with grompp"
 660                 " and mdrun",
 661                 nr,cr->nodeid);
 662   snew(ia,nr);
 663
 664   for(i=0; (i<nr); i++)
 665     ia[i]=il->iatoms[start+i];
 666
 667   sfree(il->iatoms);
 668   il->iatoms=ia;
 669
 670   il->nr=nr;
 671 }
 672
 673 static void select_my_idef(FILE *log,t_idef *idef,int **multinr,t_commrec *cr)
 674 {
 675   int i;
 676
 677   for(i=0; (i<F_NRE); i++)
 678     select_my_ilist(log,&idef->il[i],multinr[i],cr);
 679 }
 680
 681 gmx_localtop_t *split_system(FILE *log,
 682                              gmx_mtop_t *mtop,t_inputrec *inputrec,
 683                              t_commrec *cr)
 684 {
 685   int    i,npp,n,nn;
 686   real   *capacity;
 687   double tcap = 0,cap;
 688   int    *multinr_cgs,**multinr_nre;
 689   char   *cap_env;
 690   gmx_localtop_t *top;
 691   int    *left_range;
 692   int    *right_range;
 693
 694   /* Time to setup the division of charge groups over processors */
 695   npp = cr->nnodes-cr->npmenodes;
 696   snew(capacity,npp);
 697   cap_env = getenv("GMX_CAPACITY");
 698   if (cap_env == NULL) {
 699     for(i=0; (i<npp-1); i++) {
 700       capacity[i] = 1.0/(double)npp;
 701       tcap += capacity[i];
 702     }
 703     /* Take care that the sum of capacities is 1.0 */
 704     capacity[npp-1] = 1.0 - tcap;
 705   } else {
 706     tcap = 0;
 707     nn = 0;
 708     for(i=0; i<npp; i++) {
 709       cap = 0;
 710       sscanf(cap_env+nn,"%lf%n",&cap,&n);
 711       if (cap == 0)
 712         gmx_fatal(FARGS,"Incorrect entry or number of entries in GMX_CAPACITY='%s'",cap_env);
 713       capacity[i] = cap;
 714       tcap += cap;
 715       nn += n;
 716     }
 717     for(i=0; i<npp; i++)
 718       capacity[i] /= tcap;
 719   }
 720
 721   /* Convert the molecular topology to a fully listed topology */
 722   top = gmx_mtop_generate_local_top(mtop,inputrec);
 723
 724   snew(multinr_cgs,npp);
 725   snew(multinr_nre,F_NRE);
 726   for(i=0; i<F_NRE; i++)
 727     snew(multinr_nre[i],npp);
 728
 729
 730   snew(left_range,cr->nnodes);
 731   snew(right_range,cr->nnodes);
 732
 733   /* This computes which entities can be placed on processors */
 734   split_top(log,npp,top,inputrec,&mtop->mols,capacity,multinr_cgs,multinr_nre,left_range,right_range);
 735
 736   sfree(capacity);
 737   init_partdec(log,cr,&(top->cgs),multinr_cgs,&(top->idef));
 738
 739   /* This should be fine */
 740   /*split_idef(&(top->idef),cr->nnodes-cr->npmenodes);*/
 741
 742   select_my_idef(log,&(top->idef),multinr_nre,cr);
 743
 744   if(top->idef.il[F_CONSTR].nr>0)
 745   {
 746           init_partdec_constraint(cr,&(top->idef),left_range,right_range);
 747   }
 748
 749   if (log)
 750     pr_idef_division(log,&(top->idef),npp,multinr_nre);
 751
 752   for(i=0; i<F_NRE; i++)
 753     sfree(multinr_nre[i]);
 754   sfree(multinr_nre);
 755   sfree(multinr_cgs);
 756
 757   sfree(left_range);
 758   sfree(right_range);
 759
 760   if (log)
 761     print_partdec(log,"Workload division",cr->nnodes,cr->pd);
 762
 763   return top;
 764 }
 765
 766 static void create_vsitelist(int nindex, int *list,
 767                              int *targetn, int **listptr)
 768 {
 769   int i,j,k,inr;
 770   int minidx;
 771   int *newlist;
 772
 773   /* remove duplicates */
 774   for(i=0;i<nindex;i++) {
 775     inr=list[i];
 776     for(j=i+1;j<nindex;j++) {
 777       if(list[j]==inr) {
 778         for(k=j;k<nindex-1;k++)
 779           list[k]=list[k+1];
 780         nindex--;
 781       }
 782     }
 783   }
 784
 785   *targetn=nindex;
 786   snew(newlist,nindex);
 787
 788   /* sort into the new array */
 789   for(i=0;i<nindex;i++) {
 790     inr=-1;
 791     for(j=0;j<nindex;j++)
 792       if(list[j]>0 && (inr==-1 || list[j]<list[inr]))
 793         inr=j; /* smallest so far */
 794     newlist[i]=list[inr];
 795     list[inr]=-1;
 796   }
 797   *listptr=newlist;
 798 }
 799
 800 static void
 801 add_to_vsitelist(int **list, int *nitem, int *nalloc,int newitem)
 802 {
 803         int  i,idx;
 804         gmx_bool found;
 805
 806         found = FALSE;
 807         idx = *nitem;
 808         for(i=0; i<idx && !found;i++)
 809         {
 810                 found = (newitem ==(*list)[i]);
 811         }
 812         if(!found)
 813         {
 814                 *nalloc+=100;
 815                 srenew(*list,*nalloc);
 816                 (*list)[idx++] = newitem;
 817                 *nitem = idx;
 818         }
 819 }
 820
 821 gmx_bool setup_parallel_vsites(t_idef *idef,t_commrec *cr,
 822                                                    t_comm_vsites *vsitecomm)
 823 {
 824         int i,j,ftype;
 825         int nra;
 826         gmx_bool do_comm;
 827         t_iatom   *ia;
 828         gmx_partdec_t *pd;
 829         int  iconstruct;
 830         int  i0,i1;
 831         int  nalloc_left_construct,nalloc_right_construct;
 832         int  sendbuf[2],recvbuf[2];
 833         int  bufsize,leftbuf,rightbuf;
 834
 835         pd = cr->pd;
 836
 837         i0 = pd->index[cr->nodeid];
 838         i1 = pd->index[cr->nodeid+1];
 839
 840         vsitecomm->left_import_construct  = NULL;
 841         vsitecomm->left_import_nconstruct = 0;
 842         nalloc_left_construct      = 0;
 843
 844         vsitecomm->right_import_construct  = NULL;
 845         vsitecomm->right_import_nconstruct = 0;
 846         nalloc_right_construct      = 0;
 847
 848         for(ftype=0; (ftype<F_NRE); ftype++)
 849         {
 850                 if ( !(interaction_function[ftype].flags & IF_VSITE) )
 851                 {
 852                         continue;
 853                 }
 854
 855                 nra    = interaction_function[ftype].nratoms;
 856                 ia     = idef->il[ftype].iatoms;
 857
 858                 for(i=0; i<idef->il[ftype].nr;i+=nra+1)
 859                 {
 860                         for(j=2;j<1+nra;j++)
 861                         {
 862                                 iconstruct = ia[i+j];
 863                                 if(iconstruct<i0)
 864                                 {
 865                                         add_to_vsitelist(&vsitecomm->left_import_construct,
 866                                                                          &vsitecomm->left_import_nconstruct,
 867                                                                          &nalloc_left_construct,iconstruct);
 868                                 }
 869                                 else if(iconstruct>=i1)
 870                                 {
 871                                         add_to_vsitelist(&vsitecomm->right_import_construct,
 872                                                                          &vsitecomm->right_import_nconstruct,
 873                                                                          &nalloc_right_construct,iconstruct);
 874                                 }
 875                         }
 876                 }
 877     }
 878
 879         /* Pre-communicate the array lengths */
 880         gmx_tx_rx_void(cr,
 881                                    GMX_RIGHT,(void *)&vsitecomm->right_import_nconstruct,sizeof(int),
 882                                    GMX_LEFT, (void *)&vsitecomm->left_export_nconstruct, sizeof(int));
 883         gmx_tx_rx_void(cr,
 884                                    GMX_LEFT, (void *)&vsitecomm->left_import_nconstruct, sizeof(int),
 885                                    GMX_RIGHT,(void *)&vsitecomm->right_export_nconstruct,sizeof(int));
 886
 887         snew(vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct);
 888         snew(vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct);
 889
 890         /* Communicate the construcing atom index arrays */
 891         gmx_tx_rx_void(cr,
 892                                    GMX_RIGHT,(void *)vsitecomm->right_import_construct,vsitecomm->right_import_nconstruct*sizeof(int),
 893                                    GMX_LEFT, (void *)vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct*sizeof(int));
 894
 895         /* Communicate the construcing atom index arrays */
 896         gmx_tx_rx_void(cr,
 897                                    GMX_LEFT ,(void *)vsitecomm->left_import_construct, vsitecomm->left_import_nconstruct*sizeof(int),
 898                                    GMX_RIGHT,(void *)vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct*sizeof(int));
 899
 900         leftbuf  = max(vsitecomm->left_export_nconstruct,vsitecomm->left_import_nconstruct);
 901         rightbuf = max(vsitecomm->right_export_nconstruct,vsitecomm->right_import_nconstruct);
 902
 903         bufsize  = max(leftbuf,rightbuf);
 904
 905         do_comm = (bufsize>0);
 906
 907         snew(vsitecomm->send_buf,2*bufsize);
 908         snew(vsitecomm->recv_buf,2*bufsize);
 909
 910         return do_comm;
 911 }
 912
 913 t_state *partdec_init_local_state(t_commrec *cr,t_state *state_global)
 914 {
 915   t_state *state_local;
 916
 917   snew(state_local,1);
 918
 919   /* Copy all the contents */
 920   *state_local = *state_global;
 921
 922   if (state_global->nrngi > 1) {
 923     /* With stochastic dynamics we need local storage for the random state */
 924     if (state_local->flags & (1<<estLD_RNG)) {
 925       state_local->nrng = gmx_rng_n();
 926       snew(state_local->ld_rng,state_local->nrng);
 927 #ifdef GMX_MPI
 928       if (PAR(cr)) {
 929         MPI_Scatter(state_global->ld_rng,
 930                     state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
 931                     state_local->ld_rng ,
 932                     state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
 933                     MASTERRANK(cr),cr->mpi_comm_mygroup);
 934       }
 935 #endif
 936     }
 937     if (state_local->flags & (1<<estLD_RNGI)) {
 938       snew(state_local->ld_rngi,1);
 939 #ifdef GMX_MPI
 940       if (PAR(cr)) {
 941         MPI_Scatter(state_global->ld_rngi,
 942                     sizeof(state_local->ld_rngi[0]),MPI_BYTE,
 943                     state_local->ld_rngi,
 944                     sizeof(state_local->ld_rngi[0]),MPI_BYTE,
 945                     MASTERRANK(cr),cr->mpi_comm_mygroup);
 946       }
 947 #endif
 948     }
 949   }
 950
 951   return state_local;
 952 }