src/mdlib/partdec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /* This file is completely threadsafe - keep it that way! */
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif
  42
  43 #include <math.h>
  44
  45 #include "sysstuff.h"
  46 #include "typedefs.h"
  47 #include "smalloc.h"
  48 #include "invblock.h"
  49 #include "macros.h"
  50 #include "main.h"
  51 #include "ns.h"
  52 #include "partdec.h"
  53 #include "splitter.h"
  54 #include "gmx_random.h"
  55 #include "mtop_util.h"
  56 #include "mvdata.h"
  57 #include "vec.h"
  58
  59 typedef struct gmx_partdec_constraint
  60 {
  61         int                  left_range_receive;
  62         int                  right_range_receive;
  63         int                  left_range_send;
  64         int                  right_range_send;
  65         int                  nconstraints;
  66         int *                nlocalatoms;
  67         rvec *               sendbuf;
  68         rvec *               recvbuf;
  69 }
  70 gmx_partdec_constraint_t;
  71
  72
  73 typedef struct gmx_partdec {
  74   int  neighbor[2];             /* The nodeids of left and right neighb */
  75   int  *cgindex;                /* The charge group boundaries,         */
  76                                 /* size nnodes+1,                       */
  77                                 /* only allocated with particle decomp. */
  78   int  *index;                  /* The home particle boundaries,        */
  79                                 /* size nnodes+1,                       */
  80                                 /* only allocated with particle decomp. */
  81   int  shift,bshift;            /* Coordinates are shifted left for     */
  82                                 /* 'shift' systolic pulses, and right   */
  83                                 /* for 'bshift' pulses. Forces are      */
  84                                 /* shifted right for 'shift' pulses     */
  85                                 /* and left for 'bshift' pulses         */
  86                                 /* This way is not necessary to shift   */
  87                                 /* the coordinates over the entire ring */
  88   rvec *vbuf;                   /* Buffer for summing the forces        */
  89 #ifdef GMX_MPI
  90   MPI_Request mpi_req_rx;       /* MPI reqs for async transfers */
  91   MPI_Request mpi_req_tx;
  92 #endif
  93  gmx_partdec_constraint_t *     constraints;
  94 } gmx_partdec_t;
  95
  96
  97 void gmx_tx(const t_commrec *cr,int dir,void *buf,int bufsize)
  98 {
  99 #ifndef GMX_MPI
 100   gmx_call("gmx_tx");
 101 #else
 102   int        nodeid;
 103   int        tag,flag;
 104   MPI_Status status;
 105
 106   nodeid = cr->pd->neighbor[dir];
 107
 108 #ifdef DEBUG
 109   fprintf(stderr,"gmx_tx: nodeid=%d, buf=%x, bufsize=%d\n",
 110           nodeid,buf,bufsize);
 111 #endif
 112 #ifdef MPI_TEST
 113   /* workaround for crashes encountered with MPI on IRIX 6.5 */
 114   if (cr->pd->mpi_req_tx != MPI_REQUEST_NULL) {
 115     MPI_Test(&cr->pd->mpi_req_tx,&flag,&status);
 116     if (flag==FALSE) {
 117       fprintf(stdlog,"gmx_tx called before previous send was complete: nodeid=%d, buf=%x, bufsize=%d\n",
 118               nodeid,buf,bufsize);
 119       gmx_tx_wait(nodeid);
 120     }
 121   }
 122 #endif
 123   tag = 0;
 124   if (MPI_Isend(buf,bufsize,MPI_BYTE,RANK(cr,nodeid),tag,cr->mpi_comm_mygroup,&cr->pd->mpi_req_tx) != 0)
 125     gmx_comm("MPI_Isend Failed");
 126 #endif
 127 }
 128
 129 void gmx_tx_wait(const t_commrec *cr, int dir)
 130 {
 131 #ifndef GMX_MPI
 132   gmx_call("gmx_tx_wait");
 133 #else
 134   MPI_Status  status;
 135   int mpi_result;
 136
 137   if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_tx,&status)) != 0)
 138     gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
 139 #endif
 140 }
 141
 142 void gmx_rx(const t_commrec *cr,int dir,void *buf,int bufsize)
 143 {
 144 #ifndef GMX_MPI
 145   gmx_call("gmx_rx");
 146 #else
 147   int nodeid;
 148   int tag;
 149
 150   nodeid = cr->pd->neighbor[dir];
 151 #ifdef DEBUG
 152   fprintf(stderr,"gmx_rx: nodeid=%d, buf=%x, bufsize=%d\n",
 153           nodeid,buf,bufsize);
 154 #endif
 155   tag = 0;
 156   if (MPI_Irecv( buf, bufsize, MPI_BYTE, RANK(cr,nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_rx) != 0 )
 157     gmx_comm("MPI_Recv Failed");
 158 #endif
 159 }
 160
 161 void gmx_rx_wait(const t_commrec *cr, int nodeid)
 162 {
 163 #ifndef GMX_MPI
 164   gmx_call("gmx_rx_wait");
 165 #else
 166   MPI_Status  status;
 167   int mpi_result;
 168
 169   if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_rx,&status)) != 0)
 170     gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
 171 #endif
 172 }
 173
 174 void gmx_tx_rx_real(const t_commrec *cr,
 175                     int send_dir,real *send_buf,int send_bufsize,
 176                     int recv_dir,real *recv_buf,int recv_bufsize)
 177 {
 178 #ifndef GMX_MPI
 179   gmx_call("gmx_tx_rx_real");
 180 #else
 181   int send_nodeid,recv_nodeid;
 182   int tx_tag = 0,rx_tag = 0;
 183   MPI_Status stat;
 184
 185   send_nodeid = cr->pd->neighbor[send_dir];
 186   recv_nodeid = cr->pd->neighbor[recv_dir];
 187
 188 #ifdef GMX_DOUBLE
 189 #define mpi_type MPI_DOUBLE
 190 #else
 191 #define mpi_type MPI_FLOAT
 192 #endif
 193
 194   if (send_bufsize > 0 && recv_bufsize > 0) {
 195     MPI_Sendrecv(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
 196                  recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
 197                  cr->mpi_comm_mygroup,&stat);
 198   } else if (send_bufsize > 0) {
 199     MPI_Send(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
 200              cr->mpi_comm_mygroup);
 201   } else if (recv_bufsize > 0) {
 202     MPI_Recv(recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
 203              cr->mpi_comm_mygroup,&stat);
 204   }
 205 #undef mpi_type
 206 #endif
 207 }
 208
 209
 210 void gmx_tx_rx_void(const t_commrec *cr,
 211                     int send_dir,void *send_buf,int send_bufsize,
 212                     int recv_dir,void *recv_buf,int recv_bufsize)
 213 {
 214 #ifndef GMX_MPI
 215         gmx_call("gmx_tx_rx_void");
 216 #else
 217         int send_nodeid,recv_nodeid;
 218         int tx_tag = 0,rx_tag = 0;
 219         MPI_Status stat;
 220
 221         send_nodeid = cr->pd->neighbor[send_dir];
 222         recv_nodeid = cr->pd->neighbor[recv_dir];
 223
 224
 225         MPI_Sendrecv(send_buf,send_bufsize,MPI_BYTE,RANK(cr,send_nodeid),tx_tag,
 226                                  recv_buf,recv_bufsize,MPI_BYTE,RANK(cr,recv_nodeid),rx_tag,
 227                                  cr->mpi_comm_mygroup,&stat);
 228
 229 #endif
 230 }
 231
 232
 233 /*void gmx_wait(int dir_send,int dir_recv)*/
 234
 235 void gmx_wait(const t_commrec *cr, int dir_send,int dir_recv)
 236 {
 237 #ifndef GMX_MPI
 238   gmx_call("gmx_wait");
 239 #else
 240   gmx_tx_wait(cr, dir_send);
 241   gmx_rx_wait(cr, dir_recv);
 242 #endif
 243 }
 244
 245 static void set_left_right(t_commrec *cr)
 246 {
 247   cr->pd->neighbor[GMX_LEFT]  = (cr->nnodes + cr->nodeid - 1) % cr->nnodes;
 248   cr->pd->neighbor[GMX_RIGHT] = (cr->nodeid + 1) % cr->nnodes;
 249 }
 250
 251 void pd_move_f(const t_commrec *cr,rvec f[],t_nrnb *nrnb)
 252 {
 253   move_f(NULL,cr,GMX_LEFT,GMX_RIGHT,f,cr->pd->vbuf,nrnb);
 254 }
 255
 256 int *pd_cgindex(const t_commrec *cr)
 257 {
 258   return cr->pd->cgindex;
 259 }
 260
 261 int *pd_index(const t_commrec *cr)
 262 {
 263   return cr->pd->index;
 264 }
 265
 266 int pd_shift(const t_commrec *cr)
 267 {
 268   return cr->pd->shift;
 269 }
 270
 271 int pd_bshift(const t_commrec *cr)
 272 {
 273   return cr->pd->bshift;
 274 }
 275
 276 void pd_cg_range(const t_commrec *cr,int *cg0,int *cg1)
 277 {
 278   *cg0 = cr->pd->cgindex[cr->nodeid];
 279   *cg1 = cr->pd->cgindex[cr->nodeid+1];
 280 }
 281
 282 void pd_at_range(const t_commrec *cr,int *at0,int *at1)
 283 {
 284   *at0 = cr->pd->index[cr->nodeid];
 285   *at1 = cr->pd->index[cr->nodeid+1];
 286 }
 287
 288 void
 289 pd_get_constraint_range(gmx_partdec_p_t pd,int *start,int *natoms)
 290 {
 291         *start  = pd->constraints->left_range_receive;
 292         *natoms = pd->constraints->right_range_receive-pd->constraints->left_range_receive;
 293 }
 294
 295 int *
 296 pd_constraints_nlocalatoms(gmx_partdec_p_t pd)
 297 {
 298         int *rc;
 299
 300         if(NULL != pd && NULL != pd->constraints)
 301         {
 302                 rc = pd->constraints->nlocalatoms;
 303         }
 304         else
 305         {
 306                 rc = NULL;
 307         }
 308         return rc;
 309 }
 310
 311
 312
 313
 314 /* This routine is used to communicate the non-home-atoms needed for constrains.
 315  * We have already calculated this range of atoms during setup, and stored in the
 316  * partdec constraints structure.
 317  *
 318  * When called, we send/receive left_range_send/receive atoms to our left (lower)
 319  * node neighbor, and similar to the right (higher) node.
 320  *
 321  * This has not been tested for periodic molecules...
 322  */
 323 void
 324 pd_move_x_constraints(t_commrec *  cr,
 325                                           rvec *       x0,
 326                                           rvec *       x1)
 327 {
 328 #ifdef GMX_MPI
 329         gmx_partdec_t *pd;
 330         gmx_partdec_constraint_t *pdc;
 331
 332         rvec *     sendptr;
 333         rvec *     recvptr;
 334         int        thisnode;
 335         int        i;
 336         int        cnt;
 337         int        sendcnt,recvcnt;
 338
 339         pd  = cr->pd;
 340         pdc = pd->constraints;
 341
 342        if (pdc == NULL)
 343        {
 344                 return;
 345        }
 346
 347         thisnode  = cr->nodeid;
 348
 349         /* First pulse to right */
 350
 351         recvcnt = 3*(pd->index[thisnode]-pdc->left_range_receive);
 352         sendcnt = 3*(cr->pd->index[thisnode+1]-cr->pd->constraints->right_range_send);
 353
 354         if(x1!=NULL)
 355         {
 356                 /* Assemble temporary array with both x0 & x1 */
 357                 recvptr = pdc->recvbuf;
 358                 sendptr = pdc->sendbuf;
 359
 360                 cnt = 0;
 361                 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
 362                 {
 363                         copy_rvec(x0[i],sendptr[cnt++]);
 364                 }
 365                 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
 366                 {
 367                         copy_rvec(x1[i],sendptr[cnt++]);
 368                 }
 369                 recvcnt *= 2;
 370                 sendcnt *= 2;
 371         }
 372         else
 373         {
 374                 recvptr = x0 + pdc->left_range_receive;
 375                 sendptr = x0 + pdc->right_range_send;
 376         }
 377
 378         gmx_tx_rx_real(cr,
 379                                    GMX_RIGHT,(real *)sendptr,sendcnt,
 380                                    GMX_LEFT, (real *)recvptr,recvcnt);
 381
 382         if(x1 != NULL)
 383         {
 384                 /* copy back to x0/x1 */
 385                 cnt = 0;
 386                 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
 387                 {
 388                         copy_rvec(recvptr[cnt++],x0[i]);
 389                 }
 390                 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
 391                 {
 392                         copy_rvec(recvptr[cnt++],x1[i]);
 393                 }
 394         }
 395
 396         /* And pulse to left */
 397         sendcnt = 3*(pdc->left_range_send-pd->index[thisnode]);
 398         recvcnt = 3*(pdc->right_range_receive-pd->index[thisnode+1]);
 399
 400         if(x1 != NULL)
 401         {
 402                 cnt = 0;
 403                 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
 404                 {
 405                         copy_rvec(x0[i],sendptr[cnt++]);
 406                 }
 407                 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
 408                 {
 409                         copy_rvec(x1[i],sendptr[cnt++]);
 410                 }
 411                 recvcnt *= 2;
 412                 sendcnt *= 2;
 413         }
 414         else
 415         {
 416                 sendptr = x0 + pd->index[thisnode];
 417                 recvptr = x0 + pd->index[thisnode+1];
 418         }
 419
 420         gmx_tx_rx_real(cr,
 421                                    GMX_LEFT ,(real *)sendptr,sendcnt,
 422                                    GMX_RIGHT,(real *)recvptr,recvcnt);
 423
 424         /* Final copy back from buffers */
 425         if(x1 != NULL)
 426         {
 427                 /* First copy received data back into x0 & x1 */
 428                 cnt = 0;
 429                 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
 430                 {
 431                         copy_rvec(recvptr[cnt++],x0[i]);
 432                 }
 433                 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
 434                 {
 435                         copy_rvec(recvptr[cnt++],x1[i]);
 436                 }
 437         }
 438 #endif
 439 }
 440
 441 static int home_cpu(int nnodes,gmx_partdec_t *pd,int atomid)
 442 {
 443   int k;
 444
 445   for(k=0; (k<nnodes); k++) {
 446     if (atomid<pd->index[k+1])
 447       return k;
 448   }
 449   gmx_fatal(FARGS,"Atomid %d is larger than number of atoms (%d)",
 450             atomid+1,pd->index[nnodes]+1);
 451
 452   return -1;
 453 }
 454
 455 static void calc_nsbshift(FILE *fp,int nnodes,gmx_partdec_t *pd,t_idef *idef)
 456 {
 457   int i,j,k;
 458   int lastcg,targetcg,nshift,naaj;
 459   int homeid[32];
 460
 461   pd->bshift=0;
 462   for(i=1; (i<nnodes); i++) {
 463     targetcg = pd->cgindex[i];
 464     for(nshift=i; (nshift > 0) && (pd->cgindex[nshift] > targetcg); nshift--)
 465       ;
 466     pd->bshift=max(pd->bshift,i-nshift);
 467   }
 468
 469   pd->shift = (nnodes + 1)/2;
 470   for(i=0; (i<nnodes); i++) {
 471     lastcg = pd->cgindex[i+1]-1;
 472     naaj = calc_naaj(lastcg,pd->cgindex[nnodes]);
 473     targetcg = (lastcg+naaj) % pd->cgindex[nnodes];
 474
 475     /* Search until we find the target charge group */
 476     for(nshift=0;
 477         (nshift < nnodes) && (targetcg > pd->cgindex[nshift+1]);
 478         nshift++)
 479       ;
 480     /* Now compute the shift, that is the difference in node index */
 481     nshift=((nshift - i + nnodes) % nnodes);
 482
 483     if (fp)
 484       fprintf(fp,"CPU=%3d, lastcg=%5d, targetcg=%5d, myshift=%5d\n",
 485               i,lastcg,targetcg,nshift);
 486
 487     /* It's the largest shift that matters */
 488     pd->shift=max(nshift,pd->shift);
 489   }
 490   /* Now for the bonded forces */
 491   for(i=0; (i<F_NRE); i++) {
 492     if (interaction_function[i].flags & IF_BOND) {
 493       int nratoms = interaction_function[i].nratoms;
 494       for(j=0; (j<idef->il[i].nr); j+=nratoms+1) {
 495         for(k=1; (k<=nratoms); k++)
 496           homeid[k-1] = home_cpu(nnodes,pd,idef->il[i].iatoms[j+k]);
 497         for(k=1; (k<nratoms); k++)
 498           pd->shift = max(pd->shift,abs(homeid[k]-homeid[0]));
 499       }
 500     }
 501   }
 502   if (fp)
 503     fprintf(fp,"pd->shift = %3d, pd->bshift=%3d\n",
 504             pd->shift,pd->bshift);
 505 }
 506
 507
 508 static void
 509 init_partdec_constraint(t_commrec *cr,
 510                         t_idef *   idef,
 511                                                 int *left_range,
 512                                                 int *right_range)
 513 {
 514         gmx_partdec_t *            pd = cr->pd;
 515         gmx_partdec_constraint_t *pdc;
 516         int i,cnt,k;
 517         int ai,aj,nodei,nodej;
 518         int nratoms;
 519         int nodeid;
 520
 521         snew(pdc,1);
 522         cr->pd->constraints = pdc;
 523
 524
 525         /* Who am I? */
 526     nodeid = cr->nodeid;
 527
 528     /* Setup LINCS communication ranges */
 529     pdc->left_range_receive   = left_range[nodeid];
 530     pdc->right_range_receive  = right_range[nodeid]+1;
 531     pdc->left_range_send      = (nodeid > 0) ? right_range[nodeid-1]+1 : 0;
 532     pdc->right_range_send     = (nodeid < cr->nnodes-1) ? left_range[nodeid+1] : pd->index[cr->nnodes];
 533
 534         snew(pdc->nlocalatoms,idef->il[F_CONSTR].nr);
 535         nratoms = interaction_function[F_CONSTR].nratoms;
 536
 537         for(i=0,cnt=0;i<idef->il[F_CONSTR].nr;i+=nratoms+1,cnt++)
 538         {
 539                 ai = idef->il[F_CONSTR].iatoms[i+1];
 540                 aj = idef->il[F_CONSTR].iatoms[i+2];
 541                 nodei = 0;
 542                 while(ai>=pd->index[nodei+1])
 543                 {
 544                         nodei++;
 545                 }
 546                 nodej = 0;
 547                 while(aj>=pd->index[nodej+1])
 548                 {
 549                         nodej++;
 550                 }
 551                 pdc->nlocalatoms[cnt] = 0;
 552                 if(nodei==nodeid)
 553                 {
 554                         pdc->nlocalatoms[cnt]++;
 555                 }
 556                 if(nodej==nodeid)
 557                 {
 558                         pdc->nlocalatoms[cnt]++;
 559                 }
 560         }
 561         pdc->nconstraints = cnt;
 562
 563         snew(pdc->sendbuf,max(6*(pd->index[cr->nodeid+1]-pd->constraints->right_range_send),6*(pdc->left_range_send-pd->index[cr->nodeid])));
 564         snew(pdc->recvbuf,max(6*(pd->index[cr->nodeid]-pdc->left_range_receive),6*(pdc->right_range_receive-pd->index[cr->nodeid+1])));
 565
 566 }
 567
 568 static void init_partdec(FILE *fp,t_commrec *cr,t_block *cgs,int *multinr,
 569                                                  t_idef *idef)
 570 {
 571   int  i,nodeid;
 572   gmx_partdec_t *pd;
 573
 574   snew(pd,1);
 575   cr->pd = pd;
 576
 577   set_left_right(cr);
 578
 579   if (cr->nnodes > 1) {
 580     if (multinr == NULL)
 581       gmx_fatal(FARGS,"Internal error in init_partdec: multinr = NULL");
 582     snew(pd->index,cr->nnodes+1);
 583     snew(pd->cgindex,cr->nnodes+1);
 584     pd->cgindex[0] = 0;
 585     pd->index[0]   = 0;
 586     for(i=0; (i < cr->nnodes); i++) {
 587       pd->cgindex[i+1] = multinr[i];
 588       pd->index[i+1]   = cgs->index[multinr[i]];
 589     }
 590     calc_nsbshift(fp,cr->nnodes,pd,idef);
 591     /* This is a hack to do with bugzilla 148 */
 592     /*pd->shift = cr->nnodes-1;
 593       pd->bshift = 0;*/
 594
 595     /* Allocate a buffer of size natoms of the whole system
 596      * for summing the forces over the nodes.
 597      */
 598     snew(pd->vbuf,cgs->index[cgs->nr]);
 599     pd->constraints = NULL;
 600   }
 601 #ifdef GMX_MPI
 602   pd->mpi_req_tx=MPI_REQUEST_NULL;
 603   pd->mpi_req_rx=MPI_REQUEST_NULL;
 604 #endif
 605 }
 606
 607 static void print_partdec(FILE *fp,const char *title,
 608                           int nnodes,gmx_partdec_t *pd)
 609 {
 610   int i;
 611
 612   fprintf(fp,"%s\n",title);
 613   fprintf(fp,"nnodes:       %5d\n",nnodes);
 614   fprintf(fp,"pd->shift:    %5d\n",pd->shift);
 615   fprintf(fp,"pd->bshift:   %5d\n",pd->bshift);
 616
 617   fprintf(fp,"Nodeid   atom0   #atom     cg0       #cg\n");
 618   for(i=0; (i<nnodes); i++)
 619     fprintf(fp,"%6d%8d%8d%8d%10d\n",
 620             i,
 621             pd->index[i],pd->index[i+1]-pd->index[i],
 622             pd->cgindex[i],pd->cgindex[i+1]-pd->cgindex[i]);
 623   fprintf(fp,"\n");
 624 }
 625
 626 static void pr_idef_division(FILE *fp,t_idef *idef,int nnodes,int **multinr)
 627 {
 628   int i,ftype,nr,nra,m0,m1;
 629
 630   fprintf(fp,"Division of bonded forces over processors\n");
 631   fprintf(fp,"%-12s","CPU");
 632   for(i=0; (i<nnodes); i++)
 633     fprintf(fp," %5d",i);
 634   fprintf(fp,"\n");
 635
 636   for(ftype=0; (ftype<F_NRE); ftype++) {
 637     if (idef->il[ftype].nr > 0) {
 638       nr  = idef->il[ftype].nr;
 639       nra = 1+interaction_function[ftype].nratoms;
 640       fprintf(fp,"%-12s", interaction_function[ftype].name);
 641       /* Loop over processors */
 642       for(i=0; (i<nnodes); i++) {
 643         m0 = (i == 0) ? 0 : multinr[ftype][i-1]/nra;
 644         m1 = multinr[ftype][i]/nra;
 645         fprintf(fp," %5d",m1-m0);
 646       }
 647       fprintf(fp,"\n");
 648     }
 649   }
 650 }
 651
 652 static void select_my_ilist(FILE *log,t_ilist *il,int *multinr,t_commrec *cr)
 653 {
 654   t_iatom *ia;
 655   int     i,start,end,nr;
 656
 657   if (cr->nodeid == 0)
 658     start=0;
 659   else
 660     start=multinr[cr->nodeid-1];
 661   end=multinr[cr->nodeid];
 662
 663   nr=end-start;
 664   if (nr < 0)
 665     gmx_fatal(FARGS,"Negative number of atoms (%d) on node %d\n"
 666                 "You have probably not used the same value for -np with grompp"
 667                 " and mdrun",
 668                 nr,cr->nodeid);
 669   snew(ia,nr);
 670
 671   for(i=0; (i<nr); i++)
 672     ia[i]=il->iatoms[start+i];
 673
 674   sfree(il->iatoms);
 675   il->iatoms=ia;
 676
 677   il->nr=nr;
 678 }
 679
 680 static void select_my_idef(FILE *log,t_idef *idef,int **multinr,t_commrec *cr)
 681 {
 682   int i;
 683
 684   for(i=0; (i<F_NRE); i++)
 685     select_my_ilist(log,&idef->il[i],multinr[i],cr);
 686 }
 687
 688 gmx_localtop_t *split_system(FILE *log,
 689                              gmx_mtop_t *mtop,t_inputrec *inputrec,
 690                              t_commrec *cr)
 691 {
 692   int    i,npp,n,nn;
 693   real   *capacity;
 694   double tcap = 0,cap;
 695   int    *multinr_cgs,**multinr_nre;
 696   char   *cap_env;
 697   gmx_localtop_t *top;
 698   int    *left_range;
 699   int    *right_range;
 700
 701   /* Time to setup the division of charge groups over processors */
 702   npp = cr->nnodes-cr->npmenodes;
 703   snew(capacity,npp);
 704   cap_env = getenv("GMX_CAPACITY");
 705   if (cap_env == NULL) {
 706     for(i=0; (i<npp-1); i++) {
 707       capacity[i] = 1.0/(double)npp;
 708       tcap += capacity[i];
 709     }
 710     /* Take care that the sum of capacities is 1.0 */
 711     capacity[npp-1] = 1.0 - tcap;
 712   } else {
 713     tcap = 0;
 714     nn = 0;
 715     for(i=0; i<npp; i++) {
 716       cap = 0;
 717       sscanf(cap_env+nn,"%lf%n",&cap,&n);
 718       if (cap == 0)
 719         gmx_fatal(FARGS,"Incorrect entry or number of entries in GMX_CAPACITY='%s'",cap_env);
 720       capacity[i] = cap;
 721       tcap += cap;
 722       nn += n;
 723     }
 724     for(i=0; i<npp; i++)
 725       capacity[i] /= tcap;
 726   }
 727
 728   /* Convert the molecular topology to a fully listed topology */
 729   top = gmx_mtop_generate_local_top(mtop,inputrec);
 730
 731   snew(multinr_cgs,npp);
 732   snew(multinr_nre,F_NRE);
 733   for(i=0; i<F_NRE; i++)
 734     snew(multinr_nre[i],npp);
 735
 736
 737   snew(left_range,cr->nnodes);
 738   snew(right_range,cr->nnodes);
 739
 740   /* This computes which entities can be placed on processors */
 741   split_top(log,npp,top,inputrec,&mtop->mols,capacity,multinr_cgs,multinr_nre,left_range,right_range);
 742
 743   sfree(capacity);
 744   init_partdec(log,cr,&(top->cgs),multinr_cgs,&(top->idef));
 745
 746   /* This should be fine */
 747   /*split_idef(&(top->idef),cr->nnodes-cr->npmenodes);*/
 748
 749   select_my_idef(log,&(top->idef),multinr_nre,cr);
 750
 751   if(top->idef.il[F_CONSTR].nr>0)
 752   {
 753           init_partdec_constraint(cr,&(top->idef),left_range,right_range);
 754   }
 755
 756   if (log)
 757     pr_idef_division(log,&(top->idef),npp,multinr_nre);
 758
 759   for(i=0; i<F_NRE; i++)
 760     sfree(multinr_nre[i]);
 761   sfree(multinr_nre);
 762   sfree(multinr_cgs);
 763
 764   sfree(left_range);
 765   sfree(right_range);
 766
 767   if (log)
 768     print_partdec(log,"Workload division",cr->nnodes,cr->pd);
 769
 770   return top;
 771 }
 772
 773 static void
 774 add_to_vsitelist(int **list, int *nitem, int *nalloc,int newitem)
 775 {
 776         int  i,idx;
 777         gmx_bool found;
 778
 779         found = FALSE;
 780         idx = *nitem;
 781         for(i=0; i<idx && !found;i++)
 782         {
 783                 found = (newitem ==(*list)[i]);
 784         }
 785         if(!found)
 786         {
 787                 *nalloc+=100;
 788                 srenew(*list,*nalloc);
 789                 (*list)[idx++] = newitem;
 790                 *nitem = idx;
 791         }
 792 }
 793
 794 gmx_bool setup_parallel_vsites(t_idef *idef,t_commrec *cr,
 795                                                    t_comm_vsites *vsitecomm)
 796 {
 797         int i,j,ftype;
 798         int nra;
 799         gmx_bool do_comm;
 800         t_iatom   *ia;
 801         gmx_partdec_t *pd;
 802         int  iconstruct;
 803         int  i0,i1;
 804         int  nalloc_left_construct,nalloc_right_construct;
 805         int  sendbuf[2],recvbuf[2];
 806         int  bufsize,leftbuf,rightbuf;
 807
 808         pd = cr->pd;
 809
 810         i0 = pd->index[cr->nodeid];
 811         i1 = pd->index[cr->nodeid+1];
 812
 813         vsitecomm->left_import_construct  = NULL;
 814         vsitecomm->left_import_nconstruct = 0;
 815         nalloc_left_construct      = 0;
 816
 817         vsitecomm->right_import_construct  = NULL;
 818         vsitecomm->right_import_nconstruct = 0;
 819         nalloc_right_construct      = 0;
 820
 821         for(ftype=0; (ftype<F_NRE); ftype++)
 822         {
 823                 if ( !(interaction_function[ftype].flags & IF_VSITE) )
 824                 {
 825                         continue;
 826                 }
 827
 828                 nra    = interaction_function[ftype].nratoms;
 829                 ia     = idef->il[ftype].iatoms;
 830
 831                 for(i=0; i<idef->il[ftype].nr;i+=nra+1)
 832                 {
 833                         for(j=2;j<1+nra;j++)
 834                         {
 835                                 iconstruct = ia[i+j];
 836                                 if(iconstruct<i0)
 837                                 {
 838                                         add_to_vsitelist(&vsitecomm->left_import_construct,
 839                                                                          &vsitecomm->left_import_nconstruct,
 840                                                                          &nalloc_left_construct,iconstruct);
 841                                 }
 842                                 else if(iconstruct>=i1)
 843                                 {
 844                                         add_to_vsitelist(&vsitecomm->right_import_construct,
 845                                                                          &vsitecomm->right_import_nconstruct,
 846                                                                          &nalloc_right_construct,iconstruct);
 847                                 }
 848                         }
 849                 }
 850     }
 851
 852         /* Pre-communicate the array lengths */
 853         gmx_tx_rx_void(cr,
 854                                    GMX_RIGHT,(void *)&vsitecomm->right_import_nconstruct,sizeof(int),
 855                                    GMX_LEFT, (void *)&vsitecomm->left_export_nconstruct, sizeof(int));
 856         gmx_tx_rx_void(cr,
 857                                    GMX_LEFT, (void *)&vsitecomm->left_import_nconstruct, sizeof(int),
 858                                    GMX_RIGHT,(void *)&vsitecomm->right_export_nconstruct,sizeof(int));
 859
 860         snew(vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct);
 861         snew(vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct);
 862
 863         /* Communicate the construcing atom index arrays */
 864         gmx_tx_rx_void(cr,
 865                                    GMX_RIGHT,(void *)vsitecomm->right_import_construct,vsitecomm->right_import_nconstruct*sizeof(int),
 866                                    GMX_LEFT, (void *)vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct*sizeof(int));
 867
 868         /* Communicate the construcing atom index arrays */
 869         gmx_tx_rx_void(cr,
 870                                    GMX_LEFT ,(void *)vsitecomm->left_import_construct, vsitecomm->left_import_nconstruct*sizeof(int),
 871                                    GMX_RIGHT,(void *)vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct*sizeof(int));
 872
 873         leftbuf  = max(vsitecomm->left_export_nconstruct,vsitecomm->left_import_nconstruct);
 874         rightbuf = max(vsitecomm->right_export_nconstruct,vsitecomm->right_import_nconstruct);
 875
 876         bufsize  = max(leftbuf,rightbuf);
 877
 878         do_comm = (bufsize>0);
 879
 880         snew(vsitecomm->send_buf,2*bufsize);
 881         snew(vsitecomm->recv_buf,2*bufsize);
 882
 883         return do_comm;
 884 }
 885
 886 t_state *partdec_init_local_state(t_commrec *cr,t_state *state_global)
 887 {
 888   int i;
 889   t_state *state_local;
 890
 891   snew(state_local,1);
 892
 893   /* Copy all the contents */
 894   *state_local = *state_global;
 895   snew(state_local->lambda,efptNR);
 896   /* local storage for lambda */
 897   for (i=0;i<efptNR;i++)
 898     {
 899       state_local->lambda[i] = state_global->lambda[i];
 900     }
 901   if (state_global->nrngi > 1) {
 902     /* With stochastic dynamics we need local storage for the random state */
 903     if (state_local->flags & (1<<estLD_RNG)) {
 904       state_local->nrng = gmx_rng_n();
 905       snew(state_local->ld_rng,state_local->nrng);
 906 #ifdef GMX_MPI
 907       if (PAR(cr)) {
 908         MPI_Scatter(state_global->ld_rng,
 909                     state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
 910                     state_local->ld_rng ,
 911                     state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
 912                     MASTERRANK(cr),cr->mpi_comm_mygroup);
 913       }
 914 #endif
 915     }
 916     if (state_local->flags & (1<<estLD_RNGI)) {
 917       snew(state_local->ld_rngi,1);
 918 #ifdef GMX_MPI
 919       if (PAR(cr)) {
 920         MPI_Scatter(state_global->ld_rngi,
 921                     sizeof(state_local->ld_rngi[0]),MPI_BYTE,
 922                     state_local->ld_rngi,
 923                     sizeof(state_local->ld_rngi[0]),MPI_BYTE,
 924                     MASTERRANK(cr),cr->mpi_comm_mygroup);
 925       }
 926 #endif
 927     }
 928   }
 929
 930   return state_local;
 931 }