1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
52 #include "gmx_fatal.h"
60 static t_sf *init_sf(int nr)
66 for(i=0; (i<nr); i++) {
74 static void done_sf(int nr,t_sf *sf)
78 for(i=0; (i<nr); i++) {
86 static void push_sf(t_sf *sf,int nr,t_iatom ia[])
90 srenew(sf->ia,sf->nr+nr);
92 sf->ia[sf->nr+i]=ia[i];
96 static int min_nodeid(int nr,atom_id list[],int hid[])
98 int i,nodeid,minnodeid;
101 gmx_incons("Invalid node number");
102 minnodeid=hid[list[0]];
103 for (i=1; (i<nr); i++)
104 if ((nodeid=hid[list[i]]) < minnodeid)
111 static void split_force2(t_inputrec *ir, int nnodes,int hid[],int ftype,t_ilist *ilist,
113 int *constr_min_nodeid,int * constr_max_nodeid,
114 int *left_range, int *right_range)
116 int i,j,k,type,nodeid,nratoms,tnr;
119 int node_low_ai,node_low_aj,node_high_ai,node_high_aj;
120 int node_low,node_high;
125 sf = init_sf(nnodes);
127 node_high = node_low = 0;
130 /* Walk along all the bonded forces, find the appropriate node
131 * to calc it on, and add it to that nodes list.
133 for (i=0; i<ilist->nr; i+=(1+nratoms))
135 type = ilist->iatoms[i];
136 nratoms = interaction_function[ftype].nratoms;
138 if (ftype == F_CONSTR)
140 ai = ilist->iatoms[i+1];
141 aj = ilist->iatoms[i+2];
147 if(ir->eConstrAlg == econtLINCS)
149 node_low_ai = constr_min_nodeid[ai];
150 node_low_aj = constr_min_nodeid[aj];
151 node_high_ai = constr_max_nodeid[ai];
152 node_high_aj = constr_max_nodeid[aj];
154 node_low = min(node_low_ai,node_low_aj);
155 node_high = max(node_high_ai,node_high_aj);
157 if (node_high-nodei > 1 || nodei-node_low > 1 ||
158 node_high-nodej > 1 || nodej-node_low > 1 )
160 gmx_fatal(FARGS,"Constraint dependencies further away than next-neighbor\n"
161 "in particle decomposition. Constraint between atoms %d--%d evaluated\n"
162 "on node %d and %d, but atom %d has connections within %d bonds (lincs_order)\n"
163 "of node %d, and atom %d has connections within %d bonds of node %d.\n"
164 "Reduce the # nodes, lincs_order, or\n"
165 "try domain decomposition.",ai,aj,nodei,nodej,ai,ir->nProjOrder,node_low,aj,ir->nProjOrder,node_high);
168 if (node_low < nodei || node_low < nodej)
170 right_range[node_low] = max(right_range[node_low],aj);
172 if (node_high > nodei || node_high > nodej)
174 left_range[node_high] = min(left_range[node_high],ai);
180 if (hid[ilist->iatoms[i+2]] != nodei)
181 gmx_fatal(FARGS,"Shake block crossing node boundaries\n"
182 "constraint between atoms (%d,%d) (try LINCS instead!)",
183 ilist->iatoms[i+1]+1,ilist->iatoms[i+2]+1);
186 else if (ftype == F_SETTLE)
188 /* Only the first particle is stored for settles ... */
189 ai=ilist->iatoms[i+1];
191 if (nodeid != hid[ilist->iatoms[i+2]] ||
192 nodeid != hid[ilist->iatoms[i+3]])
193 gmx_fatal(FARGS,"Settle block crossing node boundaries\n"
194 "constraint between atoms %d, %d, %d)",
195 ai,ilist->iatoms[i+2],ilist->iatoms[i+3]);
197 else if(interaction_function[ftype].flags & IF_VSITE)
199 /* Virtual sites are special, since we need to pre-communicate
200 * their coordinates to construct vsites before then main
201 * coordinate communication.
202 * Vsites can have constructing atoms both larger and smaller than themselves.
203 * To minimize communication and book-keeping, each vsite is constructed on
204 * the home node of the atomnr of the vsite.
205 * Since the vsite coordinates too have to be communicated to the next node,
208 * 1. Pre-communicate coordinates of constructing atoms
209 * 2. Construct the vsite
210 * 3. Perform main coordinate communication
212 * Note that this has change from gromacs 4.0 and earlier, where the vsite
213 * was constructed on the home node of the lowest index of any of the constructing
214 * atoms and the vsite itself.
219 else if(ftype==F_VSITE4FD || ftype==F_VSITE4FDN)
224 /* Vsites are constructed on the home node of the actual site to save communication
225 * and simplify the book-keeping.
227 nodeid=hid[ilist->iatoms[i+1]];
229 for(k=2;k<nvsite_constr+2;k++)
231 if(hid[ilist->iatoms[i+k]]<(nodeid-1) ||
232 hid[ilist->iatoms[i+k]]>(nodeid+1))
233 gmx_fatal(FARGS,"Virtual site %d and its constructing"
234 " atoms are not on the same or adjacent\n"
235 " nodes. This is necessary to avoid a lot\n"
236 " of extra communication. The easiest way"
237 " to ensure this is to place virtual sites\n"
238 " close to the constructing atoms.\n"
239 " Sorry, but you will have to rework your topology!\n",
245 nodeid=min_nodeid(nratoms,&ilist->iatoms[i+1],hid);
248 if (ftype == F_CONSTR && ir->eConstrAlg == econtLINCS)
250 push_sf(&(sf[nodeid]),nratoms+1,&(ilist->iatoms[i]));
254 push_sf(&(sf[node_low]),nratoms+1,&(ilist->iatoms[i]));
259 push_sf(&(sf[node_high]),nratoms+1,&(ilist->iatoms[i]));
265 push_sf(&(sf[nodeid]),nratoms+1,&(ilist->iatoms[i]));
272 srenew(ilist->iatoms,ilist->nr);
276 for(nodeid=0; (nodeid<nnodes); nodeid++) {
277 for (i=0; (i<sf[nodeid].nr); i++)
278 ilist->iatoms[tnr++]=sf[nodeid].ia[i];
280 multinr[nodeid]=(nodeid==0) ? 0 : multinr[nodeid-1];
281 multinr[nodeid]+=sf[nodeid].nr;
284 if (tnr != ilist->nr)
285 gmx_incons("Splitting forces over processors");
290 static int *home_index(int nnodes,t_block *cgs,int *multinr)
292 /* This routine determines the node id for each particle */
294 int nodeid,j0,j1,j,k;
296 snew(hid,cgs->index[cgs->nr]);
297 /* Initiate to -1 to make it possible to check afterwards,
298 * all hid's should be set in the loop below
300 for(k=0; (k<cgs->index[cgs->nr]); k++)
303 /* loop over nodes */
304 for(nodeid=0; (nodeid<nnodes); nodeid++) {
305 j0 = (nodeid==0) ? 0 : multinr[nodeid-1];
306 j1 = multinr[nodeid];
308 /* j0 and j1 are the boundariesin the index array */
309 for(j=j0; (j<j1); j++) {
310 for(k=cgs->index[j]; (k<cgs->index[j+1]); k++) {
315 /* Now verify that all hid's are not -1 */
316 for(k=0; (k<cgs->index[cgs->nr]); k++)
318 gmx_fatal(FARGS,"hid[%d] = -1, cgs->nr = %d, natoms = %d",
319 k,cgs->nr,cgs->index[cgs->nr]);
328 void set_bor(t_border *b,int atom,int ic,int is)
331 fprintf(debug,"border @ atom %5d [ ic = %5d, is = %5d ]\n",atom,ic,is);
337 static gmx_bool is_bor(atom_id ai[],int i)
339 return ((ai[i] != ai[i-1]) || ((ai[i] == NO_ATID) && (ai[i-1] == NO_ATID)));
342 static t_border *mk_border(FILE *fp,int natom,atom_id *invcgs,
343 atom_id *invshk,int *nb)
347 int i,j,is,ic,ns,nc,nbor;
350 for(i=0; (i<natom); i++) {
351 fprintf(debug,"atom: %6d cgindex: %6d shkindex: %6d\n",
352 i, invcgs[i], invshk[i]);
359 for(i=1; (i<natom); i++) {
360 if (is_bor(invcgs,i))
362 if (is_bor(invshk,i))
369 fprintf(fp,"There are %d charge group borders",nc);
372 fprintf(fp," and %d shake borders",ns);
376 snew(bor,max(nc,ns));
378 while ((ic < nc) || (is < ns)) {
379 if (sbor[is] == cbor[ic]) {
380 set_bor(&(bor[nbor]),cbor[ic],ic,is);
385 else if (cbor[ic] > sbor[is]) {
387 set_bor(&(bor[nbor]),cbor[ic],ic,is);
397 is++;/*gmx_fatal(FARGS,"Can't happen is=%d, ic=%d (%s, %d)",
398 is,ic,__FILE__,__LINE__);*/
401 fprintf(fp,"There are %d total borders\n",nbor);
404 fprintf(debug,"There are %d actual bor entries\n",nbor);
405 for(i=0; (i<nbor); i++)
406 fprintf(debug,"bor[%5d] = atom: %d ic: %d is: %d\n",i,
407 bor[i].atom,bor[i].ic,bor[i].is);
415 static void split_blocks(FILE *fp,t_inputrec *ir, int nnodes,
416 t_block *cgs,t_blocka *sblock,real capacity[],
421 int nodeid,last_shk,nbor;
426 atom_id *shknum,*cgsnum;
428 natoms = cgs->index[cgs->nr];
431 pr_block(debug,0,"cgs",cgs,TRUE);
432 pr_blocka(debug,0,"sblock",sblock,TRUE);
436 cgsnum = make_invblock(cgs,natoms+1);
437 shknum = make_invblocka(sblock,natoms+1);
438 border = mk_border(fp,natoms,cgsnum,shknum,&nbor);
440 snew(maxatom,nnodes);
441 tload = capacity[0]*natoms;
444 /* Start at bor is 1, to force the first block on the first processor */
445 for(i=0; (i<nbor) && (tload < natoms); i++) {
453 if ((fabs(b0-tload)<fabs(b1-tload))) {
454 /* New nodeid time */
455 multinr_cgs[nodeid] = border[i].ic;
456 maxatom[nodeid] = b0;
457 tcap -= capacity[nodeid];
460 /* Recompute target load */
461 tload = b0 + (natoms-b0)*capacity[nodeid]/tcap;
464 printf("tload: %g tcap: %g nodeid: %d\n",tload,tcap,nodeid);
467 /* Now the last one... */
468 while (nodeid < nnodes) {
469 multinr_cgs[nodeid] = cgs->nr;
470 /* Store atom number, see above */
471 maxatom[nodeid] = natoms;
474 if (nodeid != nnodes) {
475 gmx_fatal(FARGS,"nodeid = %d, nnodes = %d, file %s, line %d",
476 nodeid,nnodes,__FILE__,__LINE__);
479 for(i=nnodes-1; (i>0); i--)
480 maxatom[i]-=maxatom[i-1];
483 fprintf(fp,"Division over nodes in atoms:\n");
484 for(i=0; (i<nnodes); i++)
485 fprintf(fp," %7d",maxatom[i]);
499 static int sid_comp(const void *a,const void *b)
509 return (sa->atom-sb->atom);
514 static int mk_grey(int nnodes,egCol egc[],t_graph *g,int *AtomI,
515 int maxsid,t_sid sid[])
523 /* Loop over all the bonds */
524 for(j=0; (j<g->nedge[ai]); j++) {
525 aj=g->edge[ai][j]-g0;
526 /* If there is a white one, make it gray and set pbc */
527 if (egc[aj] == egcolWhite) {
532 /* Check whether this one has been set before... */
533 range_check(aj+g0,0,maxsid);
534 range_check(ai+g0,0,maxsid);
535 if (sid[aj+g0].sid != -1)
536 gmx_fatal(FARGS,"sid[%d]=%d, sid[%d]=%d, file %s, line %d",
537 ai,sid[ai+g0].sid,aj,sid[aj+g0].sid,__FILE__,__LINE__);
539 sid[aj+g0].sid = sid[ai+g0].sid;
540 sid[aj+g0].atom = aj+g0;
548 static int first_colour(int fC,egCol Col,t_graph *g,egCol egc[])
549 /* Return the first node with colour Col starting at fC.
550 * return -1 if none found.
555 for(i=fC; (i<g->nnodes); i++)
556 if ((g->nedge[i] > 0) && (egc[i]==Col))
562 static int mk_sblocks(FILE *fp,t_graph *g,int maxsid,t_sid sid[])
565 int nW,nG,nB; /* Number of Grey, Black, White */
566 int fW,fG; /* First of each category */
567 egCol *egc=NULL; /* The colour of each node */
585 /* We even have a loop invariant:
586 * nW+nG+nB == g->nbound
590 fprintf(fp,"Walking down the molecule graph to make constraint-blocks\n");
593 /* Find the first white, this will allways be a larger
594 * number than before, because no nodes are made white
597 if ((fW=first_colour(fW,egcolWhite,g,egc)) == -1)
598 gmx_fatal(FARGS,"No WHITE nodes found while nW=%d\n",nW);
600 /* Make the first white node grey, and set the block number */
602 range_check(fW+g0,0,maxsid);
603 sid[fW+g0].sid = nblock++;
607 /* Initial value for the first grey */
611 fprintf(debug,"Starting G loop (nW=%d, nG=%d, nB=%d, total %d)\n",
615 if ((fG=first_colour(fG,egcolGrey,g,egc)) == -1)
616 gmx_fatal(FARGS,"No GREY nodes found while nG=%d\n",nG);
618 /* Make the first grey node black */
623 /* Make all the neighbours of this black node grey
624 * and set their block number
626 ng=mk_grey(nnodes,egc,g,&fG,maxsid,sid);
627 /* ng is the number of white nodes made grey */
635 fprintf(debug,"Found %d shake blocks\n",nblock);
645 static int ms_comp(const void *a, const void *b)
647 t_merge_sid *ma = (t_merge_sid *)a;
648 t_merge_sid *mb = (t_merge_sid *)b;
651 d = ma->first-mb->first;
653 return ma->last-mb->last;
658 static int merge_sid(int i0,int at_start,int at_end,int nsid,t_sid sid[],
661 int i,j,k,n,isid,ndel;
665 /* We try to remdy the following problem:
666 * Atom: 1 2 3 4 5 6 7 8 9 10
667 * Sid: 0 -1 1 0 -1 1 1 1 1 1
670 /* Determine first and last atom in each shake ID */
673 for(k=0; (k<nsid); k++) {
674 ms[k].first = at_end+1;
678 for(i=at_start; (i<at_end); i++) {
680 range_check(isid,-1,nsid);
682 ms[isid].first = min(ms[isid].first,sid[i].atom);
683 ms[isid].last = max(ms[isid].last,sid[i].atom);
686 qsort(ms,nsid,sizeof(ms[0]),ms_comp);
688 /* Now merge the overlapping ones */
690 for(k=0; (k<nsid); ) {
691 for(j=k+1; (j<nsid); ) {
692 if (ms[j].first <= ms[k].last) {
693 ms[k].last = max(ms[k].last,ms[j].last);
694 ms[k].first = min(ms[k].first,ms[j].first);
707 for(k=0; (k<nsid); k++) {
708 while ((k < nsid-1) && (ms[k].sid == -1)) {
709 for(j=k+1; (j<nsid); j++) {
710 memcpy(&(ms[j-1]),&(ms[j]),sizeof(ms[0]));
716 for(k=at_start; (k<at_end); k++) {
721 sblock->nalloc_index = sblock->nr+1;
722 snew(sblock->index,sblock->nalloc_index);
723 sblock->nra = at_end - at_start;
724 sblock->nalloc_a = sblock->nra;
725 snew(sblock->a,sblock->nalloc_a);
726 sblock->index[0] = 0;
727 for(k=n=0; (k<nsid); k++) {
728 sblock->index[k+1] = sblock->index[k] + ms[k].last - ms[k].first+1;
729 for(j=ms[k].first; (j<=ms[k].last); j++) {
730 range_check(n,0,sblock->nra);
732 range_check(j,0,at_end);
733 if (sid[j].sid == -1)
736 fprintf(stderr,"Double sids (%d, %d) for atom %d\n",sid[j].sid,k,j);
740 /* Removed 2007-09-04
741 sblock->index[k+1] = natoms;
742 for(k=0; (k<natoms); k++)
743 if (sid[k].sid == -1)
748 assert(sblock->index[k] == sblock->nra);
754 void gen_sblocks(FILE *fp,int at_start,int at_end,
755 t_idef *idef,t_blocka *sblock,
759 int i,i0,j,k,istart,n;
763 g=mk_graph(NULL,idef,at_start,at_end,TRUE,bSettle);
765 p_graph(debug,"Graaf Dracula",g);
767 for(i=at_start; (i<at_end); i++) {
771 nsid=mk_sblocks(fp,g,at_end,sid);
776 /* Now sort the shake blocks... */
777 qsort(sid+at_start,at_end-at_start,(size_t)sizeof(sid[0]),sid_comp);
780 fprintf(debug,"Sorted shake block\n");
781 for(i=at_start; (i<at_end); i++)
782 fprintf(debug,"sid[%5d] = atom:%5d sid:%5d\n",i,sid[i].atom,sid[i].sid);
784 /* Now check how many are NOT -1, i.e. how many have to be shaken */
785 for(i0=at_start; (i0<at_end); i0++)
786 if (sid[i0].sid > -1)
789 /* Now we have the sids that have to be shaken. We'll check the min and
790 * max atom numbers and this determines the shake block. DvdS 2007-07-19.
791 * For the purpose of making boundaries all atoms in between need to be
792 * part of the shake block too. There may be cases where blocks overlap
793 * and they will have to be merged.
795 nsid = merge_sid(i0,at_start,at_end,nsid,sid,sblock);
796 /* Now sort the shake blocks again... */
797 /*qsort(sid,natoms,(size_t)sizeof(sid[0]),sid_comp);*/
799 /* Fill the sblock struct */
800 /* sblock->nr = nsid;
801 sblock->nra = natoms;
802 srenew(sblock->a,sblock->nra);
803 srenew(sblock->index,sblock->nr+1);
808 sblock->index[n++]=k;
810 istart = sid[i].atom;
811 while ((i<natoms-1) && (sid[i+1].sid == isid))
813 /* After while: we found a new block, or are thru with the atoms */
814 /* for(j=istart; (j<=sid[i].atom); j++,k++)
816 sblock->index[n] = k;
820 gmx_fatal(FARGS,"Death Horror: nsid = %d, n= %d",nsid,n);
826 /* Due to unknown reason this free generates a problem sometimes */
830 fprintf(debug,"Done gen_sblocks\n");
833 static t_blocka block2blocka(t_block *block)
838 blocka.nr = block->nr;
839 blocka.nalloc_index = blocka.nr + 1;
840 snew(blocka.index,blocka.nalloc_index);
841 for(i=0; i<=block->nr; i++)
842 blocka.index[i] = block->index[i];
843 blocka.nra = block->index[block->nr];
844 blocka.nalloc_a = blocka.nra;
845 snew(blocka.a,blocka.nalloc_a);
846 for(i=0; i<blocka.nra; i++)
856 } pd_constraintlist_t;
860 find_constraint_range_recursive(pd_constraintlist_t * constraintlist,
869 for(i=0;i<constraintlist[thisatom].nconstr;i++)
871 j = constraintlist[thisatom].index[i];
873 *min_atomid = (j<*min_atomid) ? j : *min_atomid;
874 *max_atomid = (j>*max_atomid) ? j : *max_atomid;
878 find_constraint_range_recursive(constraintlist,j,depth-1,min_atomid,max_atomid);
884 pd_determine_constraints_range(t_inputrec * ir,
895 int min_atomid,max_atomid;
896 pd_constraintlist_t *constraintlist;
898 nratoms = interaction_function[F_CONSTR].nratoms;
899 depth = ir->nProjOrder;
901 snew(constraintlist,natoms);
903 /* Make a list of all the connections */
904 for(i=0;i<ilist->nr;i+=nratoms+1)
906 ai=ilist->iatoms[i+1];
907 aj=ilist->iatoms[i+2];
908 constraintlist[ai].index[constraintlist[ai].nconstr++]=aj;
909 constraintlist[aj].index[constraintlist[aj].nconstr++]=ai;
912 for(i=0;i<natoms;i++)
917 find_constraint_range_recursive(constraintlist,i,depth,&min_atomid,&max_atomid);
919 min_nodeid[i] = hid[min_atomid];
920 max_nodeid[i] = hid[max_atomid];
922 sfree(constraintlist);
926 void split_top(FILE *fp,int nnodes,gmx_localtop_t *top,t_inputrec *ir,t_block *mols,
927 real *capacity,int *multinr_cgs,int **multinr_nre, int *left_range, int * right_range)
929 int natoms,i,j,k,mj,atom,maxatom,sstart,send,bstart,nodeid;
932 int ftype,nvsite_constr,nra,nrd;
934 int minhome,ihome,minidx;
935 int *constr_min_nodeid;
936 int *constr_max_nodeid;
941 natoms = mols->index[mols->nr];
944 fprintf(fp,"splitting topology...\n");
946 /*#define MOL_BORDER*/
947 /*Removed the above to allow splitting molecules with h-bond constraints
948 over processors. The results in DP are the same. */
949 init_blocka(&sblock);
950 if(ir->eConstrAlg != econtLINCS)
953 /* Make a special shake block that includes settles */
954 gen_sblocks(fp,0,natoms,&top->idef,&sblock,TRUE);
956 sblock = block2blocka(mols);
960 split_blocks(fp,ir,nnodes,&top->cgs,&sblock,capacity,multinr_cgs);
962 homeind=home_index(nnodes,&top->cgs,multinr_cgs);
964 snew(constr_min_nodeid,natoms);
965 snew(constr_max_nodeid,natoms);
967 if(top->idef.il[F_CONSTR].nr>0)
969 pd_determine_constraints_range(ir,natoms,&top->idef.il[F_CONSTR],homeind,constr_min_nodeid,constr_max_nodeid);
973 /* Not 100% necessary, but it is a bad habit to have uninitialized arrays around... */
974 for(i=0;i<natoms;i++)
976 constr_min_nodeid[i] = constr_max_nodeid[i] = homeind[i];
980 /* Default limits (no communication) for PD constraints */
982 for(i=1;i<nnodes;i++)
984 left_range[i] = top->cgs.index[multinr_cgs[i-1]];
985 right_range[i-1] = left_range[i]-1;
987 right_range[nnodes-1] = top->cgs.index[multinr_cgs[nnodes-1]]-1;
989 for(j=0; (j<F_NRE); j++)
990 split_force2(ir, nnodes,homeind,j,&top->idef.il[j],multinr_nre[j],constr_min_nodeid,constr_max_nodeid,
991 left_range,right_range);
993 sfree(constr_min_nodeid);
994 sfree(constr_max_nodeid);
997 done_blocka(&sblock);