Update copyright statements and change license to LGPL
[alexxy/gromacs.git] / src / mdlib / partdec.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 /* This file is completely threadsafe - keep it that way! */
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <math.h>
44
45 #include "sysstuff.h"
46 #include "typedefs.h"
47 #include "smalloc.h"
48 #include "invblock.h"
49 #include "macros.h"
50 #include "main.h"
51 #include "ns.h"
52 #include "partdec.h"
53 #include "splitter.h"
54 #include "gmx_random.h"
55 #include "mtop_util.h"
56 #include "mvdata.h"
57 #include "vec.h"
58
59 typedef struct gmx_partdec_constraint
60 {
61         int                  left_range_receive;
62         int                  right_range_receive;
63         int                  left_range_send;
64         int                  right_range_send;
65         int                  nconstraints;
66         int *                nlocalatoms;
67         rvec *               sendbuf;
68         rvec *               recvbuf;
69
70 gmx_partdec_constraint_t;
71
72
73 typedef struct gmx_partdec {
74   int  neighbor[2];             /* The nodeids of left and right neighb */
75   int  *cgindex;                /* The charge group boundaries,         */
76                                 /* size nnodes+1,                       */
77                                 /* only allocated with particle decomp. */
78   int  *index;                  /* The home particle boundaries,        */
79                                 /* size nnodes+1,                       */
80                                 /* only allocated with particle decomp. */
81   int  shift,bshift;            /* Coordinates are shifted left for     */
82                                 /* 'shift' systolic pulses, and right   */
83                                 /* for 'bshift' pulses. Forces are      */
84                                 /* shifted right for 'shift' pulses     */
85                                 /* and left for 'bshift' pulses         */
86                                 /* This way is not necessary to shift   */
87                                 /* the coordinates over the entire ring */
88   rvec *vbuf;                   /* Buffer for summing the forces        */
89 #ifdef GMX_MPI
90   MPI_Request mpi_req_rx;       /* MPI reqs for async transfers */
91   MPI_Request mpi_req_tx;
92 #endif
93  gmx_partdec_constraint_t *     constraints;    
94 } gmx_partdec_t;
95
96
97 void gmx_tx(const t_commrec *cr,int dir,void *buf,int bufsize)
98 {
99 #ifndef GMX_MPI
100   gmx_call("gmx_tx"); 
101 #else
102   int        nodeid;
103   int        tag,flag;
104   MPI_Status status;
105
106   nodeid = cr->pd->neighbor[dir];
107   
108 #ifdef DEBUG
109   fprintf(stderr,"gmx_tx: nodeid=%d, buf=%x, bufsize=%d\n",
110           nodeid,buf,bufsize);
111 #endif
112 #ifdef MPI_TEST
113   /* workaround for crashes encountered with MPI on IRIX 6.5 */
114   if (cr->pd->mpi_req_tx != MPI_REQUEST_NULL) {
115     MPI_Test(&cr->pd->mpi_req_tx,&flag,&status);
116     if (flag==FALSE) {
117       fprintf(stdlog,"gmx_tx called before previous send was complete: nodeid=%d, buf=%x, bufsize=%d\n",
118               nodeid,buf,bufsize);
119       gmx_tx_wait(nodeid);
120     }
121   }
122 #endif
123   tag = 0;
124   if (MPI_Isend(buf,bufsize,MPI_BYTE,RANK(cr,nodeid),tag,cr->mpi_comm_mygroup,&cr->pd->mpi_req_tx) != 0)
125     gmx_comm("MPI_Isend Failed");
126 #endif
127 }
128
129 void gmx_tx_wait(const t_commrec *cr, int dir)
130 {
131 #ifndef GMX_MPI
132   gmx_call("gmx_tx_wait");
133 #else
134   MPI_Status  status;
135   int mpi_result;
136
137   if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_tx,&status)) != 0)
138     gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
139 #endif
140 }
141
142 void gmx_rx(const t_commrec *cr,int dir,void *buf,int bufsize)
143 {
144 #ifndef GMX_MPI
145   gmx_call("gmx_rx");
146 #else
147   int nodeid;
148   int tag;
149
150   nodeid = cr->pd->neighbor[dir];
151 #ifdef DEBUG
152   fprintf(stderr,"gmx_rx: nodeid=%d, buf=%x, bufsize=%d\n",
153           nodeid,buf,bufsize);
154 #endif
155   tag = 0;
156   if (MPI_Irecv( buf, bufsize, MPI_BYTE, RANK(cr,nodeid), tag, cr->mpi_comm_mygroup, &cr->pd->mpi_req_rx) != 0 )
157     gmx_comm("MPI_Recv Failed");
158 #endif
159 }
160
161 void gmx_rx_wait(const t_commrec *cr, int nodeid)
162 {
163 #ifndef GMX_MPI
164   gmx_call("gmx_rx_wait");
165 #else
166   MPI_Status  status;
167   int mpi_result;
168   
169   if ((mpi_result=MPI_Wait(&cr->pd->mpi_req_rx,&status)) != 0)
170     gmx_fatal(FARGS,"MPI_Wait: result=%d",mpi_result);
171 #endif
172 }
173
174 void gmx_tx_rx_real(const t_commrec *cr,
175                     int send_dir,real *send_buf,int send_bufsize,
176                     int recv_dir,real *recv_buf,int recv_bufsize)
177 {
178 #ifndef GMX_MPI
179   gmx_call("gmx_tx_rx_real");
180 #else
181   int send_nodeid,recv_nodeid;
182   int tx_tag = 0,rx_tag = 0;
183   MPI_Status stat;
184
185   send_nodeid = cr->pd->neighbor[send_dir];
186   recv_nodeid = cr->pd->neighbor[recv_dir];
187                    
188 #ifdef GMX_DOUBLE
189 #define mpi_type MPI_DOUBLE
190 #else
191 #define mpi_type MPI_FLOAT
192 #endif
193
194   if (send_bufsize > 0 && recv_bufsize > 0) {
195     MPI_Sendrecv(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
196                  recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
197                  cr->mpi_comm_mygroup,&stat);
198   } else if (send_bufsize > 0) {
199     MPI_Send(send_buf,send_bufsize,mpi_type,RANK(cr,send_nodeid),tx_tag,
200              cr->mpi_comm_mygroup);
201   } else if (recv_bufsize > 0) {
202     MPI_Recv(recv_buf,recv_bufsize,mpi_type,RANK(cr,recv_nodeid),rx_tag,
203              cr->mpi_comm_mygroup,&stat);
204   }
205 #undef mpi_type
206 #endif
207 }
208
209
210 void gmx_tx_rx_void(const t_commrec *cr,
211                     int send_dir,void *send_buf,int send_bufsize,
212                     int recv_dir,void *recv_buf,int recv_bufsize)
213 {
214 #ifndef GMX_MPI
215         gmx_call("gmx_tx_rx_void");
216 #else
217         int send_nodeid,recv_nodeid;
218         int tx_tag = 0,rx_tag = 0;
219         MPI_Status stat;
220         
221         send_nodeid = cr->pd->neighbor[send_dir];
222         recv_nodeid = cr->pd->neighbor[recv_dir];
223         
224         
225         MPI_Sendrecv(send_buf,send_bufsize,MPI_BYTE,RANK(cr,send_nodeid),tx_tag,
226                                  recv_buf,recv_bufsize,MPI_BYTE,RANK(cr,recv_nodeid),rx_tag,
227                                  cr->mpi_comm_mygroup,&stat);
228
229 #endif
230 }
231
232
233 /*void gmx_wait(int dir_send,int dir_recv)*/
234                 
235 void gmx_wait(const t_commrec *cr, int dir_send,int dir_recv)
236 {
237 #ifndef GMX_MPI
238   gmx_call("gmx_wait");
239 #else
240   gmx_tx_wait(cr, dir_send);
241   gmx_rx_wait(cr, dir_recv);
242 #endif
243 }
244
245 static void set_left_right(t_commrec *cr)
246 {
247   cr->pd->neighbor[GMX_LEFT]  = (cr->nnodes + cr->nodeid - 1) % cr->nnodes;
248   cr->pd->neighbor[GMX_RIGHT] = (cr->nodeid + 1) % cr->nnodes;
249 }
250
251 void pd_move_f(const t_commrec *cr,rvec f[],t_nrnb *nrnb)
252 {
253   move_f(NULL,cr,GMX_LEFT,GMX_RIGHT,f,cr->pd->vbuf,nrnb);
254 }
255
256 int *pd_cgindex(const t_commrec *cr)
257 {
258   return cr->pd->cgindex;
259 }
260
261 int *pd_index(const t_commrec *cr)
262 {
263   return cr->pd->index;
264 }
265
266 int pd_shift(const t_commrec *cr)
267 {
268   return cr->pd->shift;
269 }
270
271 int pd_bshift(const t_commrec *cr)
272 {
273   return cr->pd->bshift;
274 }
275
276 void pd_cg_range(const t_commrec *cr,int *cg0,int *cg1)
277 {
278   *cg0 = cr->pd->cgindex[cr->nodeid];
279   *cg1 = cr->pd->cgindex[cr->nodeid+1];
280 }
281
282 void pd_at_range(const t_commrec *cr,int *at0,int *at1)
283 {
284   *at0 = cr->pd->index[cr->nodeid];
285   *at1 = cr->pd->index[cr->nodeid+1];
286 }
287
288 void
289 pd_get_constraint_range(gmx_partdec_p_t pd,int *start,int *natoms)
290 {
291         *start  = pd->constraints->left_range_receive;
292         *natoms = pd->constraints->right_range_receive-pd->constraints->left_range_receive;
293 }
294
295 int *
296 pd_constraints_nlocalatoms(gmx_partdec_p_t pd)
297 {
298         int *rc;
299         
300         if(NULL != pd && NULL != pd->constraints)
301         {
302                 rc = pd->constraints->nlocalatoms;
303         }
304         else
305         {
306                 rc = NULL;
307         }
308         return rc;
309 }
310
311
312
313
314 /* This routine is used to communicate the non-home-atoms needed for constrains.
315  * We have already calculated this range of atoms during setup, and stored in the
316  * partdec constraints structure.
317  *
318  * When called, we send/receive left_range_send/receive atoms to our left (lower) 
319  * node neighbor, and similar to the right (higher) node.
320  *
321  * This has not been tested for periodic molecules...
322  */
323 void
324 pd_move_x_constraints(t_commrec *  cr,
325                                           rvec *       x0,
326                                           rvec *       x1)
327 {
328 #ifdef GMX_MPI
329         gmx_partdec_t *pd;
330         gmx_partdec_constraint_t *pdc;
331         
332         rvec *     sendptr;
333         rvec *     recvptr;
334         int        thisnode;
335         int        i;
336         int        cnt;
337         int        sendcnt,recvcnt;
338         
339         pd  = cr->pd;
340         pdc = pd->constraints;
341         
342        if (pdc == NULL)
343        {
344                 return;         
345        }
346
347         thisnode  = cr->nodeid;
348         
349         /* First pulse to right */
350         
351         recvcnt = 3*(pd->index[thisnode]-pdc->left_range_receive);
352         sendcnt = 3*(cr->pd->index[thisnode+1]-cr->pd->constraints->right_range_send);
353
354         if(x1!=NULL)
355         {
356                 /* Assemble temporary array with both x0 & x1 */
357                 recvptr = pdc->recvbuf;
358                 sendptr = pdc->sendbuf;
359                 
360                 cnt = 0;
361                 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
362                 {
363                         copy_rvec(x0[i],sendptr[cnt++]);
364                 }
365                 for(i=pdc->right_range_send;i<pd->index[thisnode+1];i++)
366                 {
367                         copy_rvec(x1[i],sendptr[cnt++]);
368                 }
369                 recvcnt *= 2;
370                 sendcnt *= 2;           
371         }
372         else
373         {
374                 recvptr = x0 + pdc->left_range_receive;
375                 sendptr = x0 + pdc->right_range_send;
376         }
377                 
378         gmx_tx_rx_real(cr,
379                                    GMX_RIGHT,(real *)sendptr,sendcnt,
380                                    GMX_LEFT, (real *)recvptr,recvcnt);
381                                 
382         if(x1 != NULL)
383         {
384                 /* copy back to x0/x1 */
385                 cnt = 0;
386                 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
387                 {
388                         copy_rvec(recvptr[cnt++],x0[i]);
389                 }
390                 for(i=pdc->left_range_receive;i<pd->index[thisnode];i++)
391                 {
392                         copy_rvec(recvptr[cnt++],x1[i]);
393                 }
394         }
395         
396         /* And pulse to left */
397         sendcnt = 3*(pdc->left_range_send-pd->index[thisnode]); 
398         recvcnt = 3*(pdc->right_range_receive-pd->index[thisnode+1]);
399
400         if(x1 != NULL)
401         {       
402                 cnt = 0;
403                 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
404                 {
405                         copy_rvec(x0[i],sendptr[cnt++]);
406                 }
407                 for(i=cr->pd->index[thisnode];i<pdc->left_range_send;i++)
408                 {
409                         copy_rvec(x1[i],sendptr[cnt++]);
410                 }
411                 recvcnt *= 2;
412                 sendcnt *= 2;           
413         }
414         else
415         {
416                 sendptr = x0 + pd->index[thisnode];
417                 recvptr = x0 + pd->index[thisnode+1];
418         }
419         
420         gmx_tx_rx_real(cr,
421                                    GMX_LEFT ,(real *)sendptr,sendcnt,
422                                    GMX_RIGHT,(real *)recvptr,recvcnt);
423
424         /* Final copy back from buffers */
425         if(x1 != NULL)
426         {
427                 /* First copy received data back into x0 & x1 */
428                 cnt = 0;
429                 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
430                 {
431                         copy_rvec(recvptr[cnt++],x0[i]);
432                 }
433                 for(i=pd->index[thisnode+1];i<pdc->right_range_receive;i++)
434                 {
435                         copy_rvec(recvptr[cnt++],x1[i]);
436                 }
437         }               
438 #endif
439 }
440
441 static int home_cpu(int nnodes,gmx_partdec_t *pd,int atomid)
442 {
443   int k;
444  
445   for(k=0; (k<nnodes); k++) {
446     if (atomid<pd->index[k+1])
447       return k;
448   }
449   gmx_fatal(FARGS,"Atomid %d is larger than number of atoms (%d)",
450             atomid+1,pd->index[nnodes]+1);
451             
452   return -1;
453 }
454
455 static void calc_nsbshift(FILE *fp,int nnodes,gmx_partdec_t *pd,t_idef *idef)
456 {
457   int i,j,k;
458   int lastcg,targetcg,nshift,naaj;
459   int homeid[32];
460       
461   pd->bshift=0;
462   for(i=1; (i<nnodes); i++) {
463     targetcg = pd->cgindex[i];
464     for(nshift=i; (nshift > 0) && (pd->cgindex[nshift] > targetcg); nshift--)
465       ;
466     pd->bshift=max(pd->bshift,i-nshift);
467   }
468
469   pd->shift = (nnodes + 1)/2;
470   for(i=0; (i<nnodes); i++) {
471     lastcg = pd->cgindex[i+1]-1;
472     naaj = calc_naaj(lastcg,pd->cgindex[nnodes]);
473     targetcg = (lastcg+naaj) % pd->cgindex[nnodes];
474     
475     /* Search until we find the target charge group */
476     for(nshift=0;
477         (nshift < nnodes) && (targetcg > pd->cgindex[nshift+1]);
478         nshift++)
479       ;
480     /* Now compute the shift, that is the difference in node index */
481     nshift=((nshift - i + nnodes) % nnodes);
482     
483     if (fp)
484       fprintf(fp,"CPU=%3d, lastcg=%5d, targetcg=%5d, myshift=%5d\n",
485               i,lastcg,targetcg,nshift);
486             
487     /* It's the largest shift that matters */
488     pd->shift=max(nshift,pd->shift);
489   }
490   /* Now for the bonded forces */
491   for(i=0; (i<F_NRE); i++) {
492     if (interaction_function[i].flags & IF_BOND) {
493       int nratoms = interaction_function[i].nratoms;
494       for(j=0; (j<idef->il[i].nr); j+=nratoms+1) {
495         for(k=1; (k<=nratoms); k++)
496           homeid[k-1] = home_cpu(nnodes,pd,idef->il[i].iatoms[j+k]);
497         for(k=1; (k<nratoms); k++)
498           pd->shift = max(pd->shift,abs(homeid[k]-homeid[0]));
499       }
500     }
501   }
502   if (fp)
503     fprintf(fp,"pd->shift = %3d, pd->bshift=%3d\n",
504             pd->shift,pd->bshift);
505 }
506
507
508 static void
509 init_partdec_constraint(t_commrec *cr,
510                         t_idef *   idef,
511                                                 int *left_range, 
512                                                 int *right_range)
513 {
514         gmx_partdec_t *            pd = cr->pd;
515         gmx_partdec_constraint_t *pdc;
516         int i,cnt,k;
517         int ai,aj,nodei,nodej;
518         int nratoms;
519         int nodeid;
520         
521         snew(pdc,1);
522         cr->pd->constraints = pdc;
523
524         
525         /* Who am I? */
526     nodeid = cr->nodeid;
527         
528     /* Setup LINCS communication ranges */
529     pdc->left_range_receive   = left_range[nodeid];
530     pdc->right_range_receive  = right_range[nodeid]+1;
531     pdc->left_range_send      = (nodeid > 0) ? right_range[nodeid-1]+1 : 0;
532     pdc->right_range_send     = (nodeid < cr->nnodes-1) ? left_range[nodeid+1] : pd->index[cr->nnodes];
533         
534         snew(pdc->nlocalatoms,idef->il[F_CONSTR].nr);
535         nratoms = interaction_function[F_CONSTR].nratoms;
536
537         for(i=0,cnt=0;i<idef->il[F_CONSTR].nr;i+=nratoms+1,cnt++)
538         {
539                 ai = idef->il[F_CONSTR].iatoms[i+1];
540                 aj = idef->il[F_CONSTR].iatoms[i+2];
541                 nodei = 0;
542                 while(ai>=pd->index[nodei+1]) 
543                 {
544                         nodei++;
545                 }
546                 nodej = 0;
547                 while(aj>=pd->index[nodej+1]) 
548                 {
549                         nodej++;
550                 }
551                 pdc->nlocalatoms[cnt] = 0;
552                 if(nodei==nodeid)
553                 {
554                         pdc->nlocalatoms[cnt]++;
555                 }
556                 if(nodej==nodeid)
557                 {
558                         pdc->nlocalatoms[cnt]++;
559                 }
560         }
561         pdc->nconstraints = cnt;
562         
563         snew(pdc->sendbuf,max(6*(pd->index[cr->nodeid+1]-pd->constraints->right_range_send),6*(pdc->left_range_send-pd->index[cr->nodeid])));
564         snew(pdc->recvbuf,max(6*(pd->index[cr->nodeid]-pdc->left_range_receive),6*(pdc->right_range_receive-pd->index[cr->nodeid+1])));
565         
566 }
567
568 static void init_partdec(FILE *fp,t_commrec *cr,t_block *cgs,int *multinr,
569                                                  t_idef *idef)
570 {
571   int  i,nodeid;
572   gmx_partdec_t *pd;
573         
574   snew(pd,1);
575   cr->pd = pd;
576
577   set_left_right(cr);
578   
579   if (cr->nnodes > 1) {
580     if (multinr == NULL)
581       gmx_fatal(FARGS,"Internal error in init_partdec: multinr = NULL");
582     snew(pd->index,cr->nnodes+1);
583     snew(pd->cgindex,cr->nnodes+1);
584     pd->cgindex[0] = 0;
585     pd->index[0]   = 0;
586     for(i=0; (i < cr->nnodes); i++) {
587       pd->cgindex[i+1] = multinr[i];
588       pd->index[i+1]   = cgs->index[multinr[i]];
589     }
590     calc_nsbshift(fp,cr->nnodes,pd,idef);
591     /* This is a hack to do with bugzilla 148 */
592     /*pd->shift = cr->nnodes-1;
593       pd->bshift = 0;*/
594
595     /* Allocate a buffer of size natoms of the whole system
596      * for summing the forces over the nodes.
597      */
598     snew(pd->vbuf,cgs->index[cgs->nr]);
599     pd->constraints = NULL;
600   }
601 #ifdef GMX_MPI
602   pd->mpi_req_tx=MPI_REQUEST_NULL;
603   pd->mpi_req_rx=MPI_REQUEST_NULL;
604 #endif
605 }
606
607 static void print_partdec(FILE *fp,const char *title,
608                           int nnodes,gmx_partdec_t *pd)
609 {
610   int i;
611
612   fprintf(fp,"%s\n",title);
613   fprintf(fp,"nnodes:       %5d\n",nnodes);
614   fprintf(fp,"pd->shift:    %5d\n",pd->shift);
615   fprintf(fp,"pd->bshift:   %5d\n",pd->bshift);
616   
617   fprintf(fp,"Nodeid   atom0   #atom     cg0       #cg\n");
618   for(i=0; (i<nnodes); i++)
619     fprintf(fp,"%6d%8d%8d%8d%10d\n",
620             i,
621             pd->index[i],pd->index[i+1]-pd->index[i],
622             pd->cgindex[i],pd->cgindex[i+1]-pd->cgindex[i]);
623   fprintf(fp,"\n");
624 }
625
626 static void pr_idef_division(FILE *fp,t_idef *idef,int nnodes,int **multinr)
627 {
628   int i,ftype,nr,nra,m0,m1;
629
630   fprintf(fp,"Division of bonded forces over processors\n");
631   fprintf(fp,"%-12s","CPU");
632   for(i=0; (i<nnodes); i++) 
633     fprintf(fp," %5d",i);
634   fprintf(fp,"\n");
635   
636   for(ftype=0; (ftype<F_NRE); ftype++) {
637     if (idef->il[ftype].nr > 0) {
638       nr  = idef->il[ftype].nr;
639       nra = 1+interaction_function[ftype].nratoms;
640       fprintf(fp,"%-12s", interaction_function[ftype].name);
641       /* Loop over processors */
642       for(i=0; (i<nnodes); i++) {
643         m0 = (i == 0) ? 0 : multinr[ftype][i-1]/nra;
644         m1 = multinr[ftype][i]/nra;
645         fprintf(fp," %5d",m1-m0);
646       }
647       fprintf(fp,"\n");
648     }
649   }
650 }
651
652 static void select_my_ilist(FILE *log,t_ilist *il,int *multinr,t_commrec *cr)
653 {
654   t_iatom *ia;
655   int     i,start,end,nr;
656   
657   if (cr->nodeid == 0)
658     start=0;
659   else
660     start=multinr[cr->nodeid-1];
661   end=multinr[cr->nodeid];
662   
663   nr=end-start;
664   if (nr < 0)
665     gmx_fatal(FARGS,"Negative number of atoms (%d) on node %d\n"
666                 "You have probably not used the same value for -np with grompp"
667                 " and mdrun",
668                 nr,cr->nodeid);
669   snew(ia,nr);
670
671   for(i=0; (i<nr); i++)
672     ia[i]=il->iatoms[start+i];
673
674   sfree(il->iatoms);
675   il->iatoms=ia;
676   
677   il->nr=nr;
678 }
679
680 static void select_my_idef(FILE *log,t_idef *idef,int **multinr,t_commrec *cr)
681 {
682   int i;
683   
684   for(i=0; (i<F_NRE); i++)
685     select_my_ilist(log,&idef->il[i],multinr[i],cr);
686 }
687
688 gmx_localtop_t *split_system(FILE *log,
689                              gmx_mtop_t *mtop,t_inputrec *inputrec,
690                              t_commrec *cr)
691 {
692   int    i,npp,n,nn;
693   real   *capacity;
694   double tcap = 0,cap;
695   int    *multinr_cgs,**multinr_nre;
696   char   *cap_env;
697   gmx_localtop_t *top;
698   int    *left_range;
699   int    *right_range;
700         
701   /* Time to setup the division of charge groups over processors */
702   npp = cr->nnodes-cr->npmenodes;
703   snew(capacity,npp);
704   cap_env = getenv("GMX_CAPACITY");
705   if (cap_env == NULL) {
706     for(i=0; (i<npp-1); i++) {
707       capacity[i] = 1.0/(double)npp;
708       tcap += capacity[i];
709     }
710     /* Take care that the sum of capacities is 1.0 */
711     capacity[npp-1] = 1.0 - tcap;
712   } else {
713     tcap = 0;
714     nn = 0;
715     for(i=0; i<npp; i++) {
716       cap = 0;
717       sscanf(cap_env+nn,"%lf%n",&cap,&n);
718       if (cap == 0)
719         gmx_fatal(FARGS,"Incorrect entry or number of entries in GMX_CAPACITY='%s'",cap_env);
720       capacity[i] = cap;
721       tcap += cap;
722       nn += n;
723     }
724     for(i=0; i<npp; i++)
725       capacity[i] /= tcap;
726   }
727
728   /* Convert the molecular topology to a fully listed topology */
729   top = gmx_mtop_generate_local_top(mtop,inputrec);
730
731   snew(multinr_cgs,npp);
732   snew(multinr_nre,F_NRE);
733   for(i=0; i<F_NRE; i++)
734     snew(multinr_nre[i],npp);
735   
736
737   snew(left_range,cr->nnodes);
738   snew(right_range,cr->nnodes);
739         
740   /* This computes which entities can be placed on processors */
741   split_top(log,npp,top,inputrec,&mtop->mols,capacity,multinr_cgs,multinr_nre,left_range,right_range);
742         
743   sfree(capacity);
744   init_partdec(log,cr,&(top->cgs),multinr_cgs,&(top->idef));
745         
746   /* This should be fine */
747   /*split_idef(&(top->idef),cr->nnodes-cr->npmenodes);*/
748   
749   select_my_idef(log,&(top->idef),multinr_nre,cr);
750
751   if(top->idef.il[F_CONSTR].nr>0)
752   {
753           init_partdec_constraint(cr,&(top->idef),left_range,right_range);
754   }
755
756   if (log)
757     pr_idef_division(log,&(top->idef),npp,multinr_nre);
758
759   for(i=0; i<F_NRE; i++)
760     sfree(multinr_nre[i]);
761   sfree(multinr_nre);
762   sfree(multinr_cgs);
763
764   sfree(left_range);
765   sfree(right_range);
766         
767   if (log)
768     print_partdec(log,"Workload division",cr->nnodes,cr->pd);
769
770   return top;
771 }
772
773 static void
774 add_to_vsitelist(int **list, int *nitem, int *nalloc,int newitem)
775 {
776         int  i,idx;
777         gmx_bool found;
778         
779         found = FALSE;
780         idx = *nitem;
781         for(i=0; i<idx && !found;i++)
782         {
783                 found = (newitem ==(*list)[i]);
784         }
785         if(!found)
786         {
787                 *nalloc+=100;
788                 srenew(*list,*nalloc);
789                 (*list)[idx++] = newitem;
790                 *nitem = idx;
791         }
792 }
793
794 gmx_bool setup_parallel_vsites(t_idef *idef,t_commrec *cr,
795                                                    t_comm_vsites *vsitecomm)
796 {
797         int i,j,ftype;
798         int nra;
799         gmx_bool do_comm;
800         t_iatom   *ia;
801         gmx_partdec_t *pd;
802         int  iconstruct;
803         int  i0,i1;
804         int  nalloc_left_construct,nalloc_right_construct;
805         int  sendbuf[2],recvbuf[2];
806         int  bufsize,leftbuf,rightbuf;
807         
808         pd = cr->pd;
809         
810         i0 = pd->index[cr->nodeid];
811         i1 = pd->index[cr->nodeid+1];
812         
813         vsitecomm->left_import_construct  = NULL;       
814         vsitecomm->left_import_nconstruct = 0;
815         nalloc_left_construct      = 0;
816
817         vsitecomm->right_import_construct  = NULL;      
818         vsitecomm->right_import_nconstruct = 0;
819         nalloc_right_construct      = 0;
820         
821         for(ftype=0; (ftype<F_NRE); ftype++) 
822         {
823                 if ( !(interaction_function[ftype].flags & IF_VSITE) )
824                 {
825                         continue;
826                 }
827                 
828                 nra    = interaction_function[ftype].nratoms;
829                 ia     = idef->il[ftype].iatoms;
830                                 
831                 for(i=0; i<idef->il[ftype].nr;i+=nra+1) 
832                 {                               
833                         for(j=2;j<1+nra;j++)
834                         {
835                                 iconstruct = ia[i+j];
836                                 if(iconstruct<i0)
837                                 {
838                                         add_to_vsitelist(&vsitecomm->left_import_construct,
839                                                                          &vsitecomm->left_import_nconstruct,
840                                                                          &nalloc_left_construct,iconstruct);
841                                 }
842                                 else if(iconstruct>=i1)
843                                 {
844                                         add_to_vsitelist(&vsitecomm->right_import_construct,
845                                                                          &vsitecomm->right_import_nconstruct,
846                                                                          &nalloc_right_construct,iconstruct);
847                                 }                               
848                         }
849                 }
850     }
851         
852         /* Pre-communicate the array lengths */
853         gmx_tx_rx_void(cr,
854                                    GMX_RIGHT,(void *)&vsitecomm->right_import_nconstruct,sizeof(int),
855                                    GMX_LEFT, (void *)&vsitecomm->left_export_nconstruct, sizeof(int));
856         gmx_tx_rx_void(cr,
857                                    GMX_LEFT, (void *)&vsitecomm->left_import_nconstruct, sizeof(int),
858                                    GMX_RIGHT,(void *)&vsitecomm->right_export_nconstruct,sizeof(int));
859         
860         snew(vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct);
861         snew(vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct);
862         
863         /* Communicate the construcing atom index arrays */
864         gmx_tx_rx_void(cr,
865                                    GMX_RIGHT,(void *)vsitecomm->right_import_construct,vsitecomm->right_import_nconstruct*sizeof(int),
866                                    GMX_LEFT, (void *)vsitecomm->left_export_construct, vsitecomm->left_export_nconstruct*sizeof(int));
867         
868         /* Communicate the construcing atom index arrays */
869         gmx_tx_rx_void(cr,
870                                    GMX_LEFT ,(void *)vsitecomm->left_import_construct, vsitecomm->left_import_nconstruct*sizeof(int),
871                                    GMX_RIGHT,(void *)vsitecomm->right_export_construct,vsitecomm->right_export_nconstruct*sizeof(int));
872
873         leftbuf  = max(vsitecomm->left_export_nconstruct,vsitecomm->left_import_nconstruct);
874         rightbuf = max(vsitecomm->right_export_nconstruct,vsitecomm->right_import_nconstruct);
875         
876         bufsize  = max(leftbuf,rightbuf);
877         
878         do_comm = (bufsize>0);
879                                 
880         snew(vsitecomm->send_buf,2*bufsize);
881         snew(vsitecomm->recv_buf,2*bufsize);
882
883         return do_comm;
884 }
885
886 t_state *partdec_init_local_state(t_commrec *cr,t_state *state_global)
887 {
888   int i;
889   t_state *state_local;
890
891   snew(state_local,1);
892
893   /* Copy all the contents */
894   *state_local = *state_global;
895   snew(state_local->lambda,efptNR);
896   /* local storage for lambda */
897   for (i=0;i<efptNR;i++)
898     {
899       state_local->lambda[i] = state_global->lambda[i];
900     }
901   if (state_global->nrngi > 1) {
902     /* With stochastic dynamics we need local storage for the random state */
903     if (state_local->flags & (1<<estLD_RNG)) {
904       state_local->nrng = gmx_rng_n();
905       snew(state_local->ld_rng,state_local->nrng);
906 #ifdef GMX_MPI
907       if (PAR(cr)) {
908         MPI_Scatter(state_global->ld_rng,
909                     state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
910                     state_local->ld_rng ,
911                     state_local->nrng*sizeof(state_local->ld_rng[0]),MPI_BYTE,
912                     MASTERRANK(cr),cr->mpi_comm_mygroup);
913       }
914 #endif
915     }
916     if (state_local->flags & (1<<estLD_RNGI)) {
917       snew(state_local->ld_rngi,1);
918 #ifdef GMX_MPI
919       if (PAR(cr)) {
920         MPI_Scatter(state_global->ld_rngi,
921                     sizeof(state_local->ld_rngi[0]),MPI_BYTE,
922                     state_local->ld_rngi,
923                     sizeof(state_local->ld_rngi[0]),MPI_BYTE,
924                     MASTERRANK(cr),cr->mpi_comm_mygroup);
925       }
926 #endif
927     }
928   }
929
930   return state_local;
931 }