2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
54 #include "gmx_fatal.h"
62 static t_sf *init_sf(int nr)
68 for(i=0; (i<nr); i++) {
76 static void done_sf(int nr,t_sf *sf)
80 for(i=0; (i<nr); i++) {
88 static void push_sf(t_sf *sf,int nr,t_iatom ia[])
92 srenew(sf->ia,sf->nr+nr);
94 sf->ia[sf->nr+i]=ia[i];
98 static int min_nodeid(int nr,atom_id list[],int hid[])
100 int i,nodeid,minnodeid;
103 gmx_incons("Invalid node number");
104 minnodeid=hid[list[0]];
105 for (i=1; (i<nr); i++)
106 if ((nodeid=hid[list[i]]) < minnodeid)
113 static void split_force2(t_inputrec *ir, int nnodes,int hid[],int ftype,t_ilist *ilist,
115 int *constr_min_nodeid,int * constr_max_nodeid,
116 int *left_range, int *right_range)
118 int i,j,k,type,nodeid,nratoms,tnr;
121 int node_low_ai,node_low_aj,node_high_ai,node_high_aj;
122 int node_low,node_high;
127 sf = init_sf(nnodes);
129 node_high = node_low = 0;
132 /* Walk along all the bonded forces, find the appropriate node
133 * to calc it on, and add it to that nodes list.
135 for (i=0; i<ilist->nr; i+=(1+nratoms))
137 type = ilist->iatoms[i];
138 nratoms = interaction_function[ftype].nratoms;
140 if (ftype == F_CONSTR)
142 ai = ilist->iatoms[i+1];
143 aj = ilist->iatoms[i+2];
149 if(ir->eConstrAlg == econtLINCS)
151 node_low_ai = constr_min_nodeid[ai];
152 node_low_aj = constr_min_nodeid[aj];
153 node_high_ai = constr_max_nodeid[ai];
154 node_high_aj = constr_max_nodeid[aj];
156 node_low = min(node_low_ai,node_low_aj);
157 node_high = max(node_high_ai,node_high_aj);
159 if (node_high-nodei > 1 || nodei-node_low > 1 ||
160 node_high-nodej > 1 || nodej-node_low > 1 )
162 gmx_fatal(FARGS,"Constraint dependencies further away than next-neighbor\n"
163 "in particle decomposition. Constraint between atoms %d--%d evaluated\n"
164 "on node %d and %d, but atom %d has connections within %d bonds (lincs_order)\n"
165 "of node %d, and atom %d has connections within %d bonds of node %d.\n"
166 "Reduce the # nodes, lincs_order, or\n"
167 "try domain decomposition.",ai,aj,nodei,nodej,ai,ir->nProjOrder,node_low,aj,ir->nProjOrder,node_high);
170 if (node_low < nodei || node_low < nodej)
172 right_range[node_low] = max(right_range[node_low],aj);
174 if (node_high > nodei || node_high > nodej)
176 left_range[node_high] = min(left_range[node_high],ai);
182 if (hid[ilist->iatoms[i+2]] != nodei)
183 gmx_fatal(FARGS,"Shake block crossing node boundaries\n"
184 "constraint between atoms (%d,%d) (try LINCS instead!)",
185 ilist->iatoms[i+1]+1,ilist->iatoms[i+2]+1);
188 else if (ftype == F_SETTLE)
190 /* Only the first particle is stored for settles ... */
191 ai=ilist->iatoms[i+1];
193 if (nodeid != hid[ilist->iatoms[i+2]] ||
194 nodeid != hid[ilist->iatoms[i+3]])
195 gmx_fatal(FARGS,"Settle block crossing node boundaries\n"
196 "constraint between atoms %d, %d, %d)",
197 ai,ilist->iatoms[i+2],ilist->iatoms[i+3]);
199 else if(interaction_function[ftype].flags & IF_VSITE)
201 /* Virtual sites are special, since we need to pre-communicate
202 * their coordinates to construct vsites before then main
203 * coordinate communication.
204 * Vsites can have constructing atoms both larger and smaller than themselves.
205 * To minimize communication and book-keeping, each vsite is constructed on
206 * the home node of the atomnr of the vsite.
207 * Since the vsite coordinates too have to be communicated to the next node,
210 * 1. Pre-communicate coordinates of constructing atoms
211 * 2. Construct the vsite
212 * 3. Perform main coordinate communication
214 * Note that this has change from gromacs 4.0 and earlier, where the vsite
215 * was constructed on the home node of the lowest index of any of the constructing
216 * atoms and the vsite itself.
221 else if(ftype==F_VSITE4FD || ftype==F_VSITE4FDN)
226 /* Vsites are constructed on the home node of the actual site to save communication
227 * and simplify the book-keeping.
229 nodeid=hid[ilist->iatoms[i+1]];
231 for(k=2;k<nvsite_constr+2;k++)
233 if(hid[ilist->iatoms[i+k]]<(nodeid-1) ||
234 hid[ilist->iatoms[i+k]]>(nodeid+1))
235 gmx_fatal(FARGS,"Virtual site %d and its constructing"
236 " atoms are not on the same or adjacent\n"
237 " nodes. This is necessary to avoid a lot\n"
238 " of extra communication. The easiest way"
239 " to ensure this is to place virtual sites\n"
240 " close to the constructing atoms.\n"
241 " Sorry, but you will have to rework your topology!\n",
247 nodeid=min_nodeid(nratoms,&ilist->iatoms[i+1],hid);
250 if (ftype == F_CONSTR && ir->eConstrAlg == econtLINCS)
252 push_sf(&(sf[nodeid]),nratoms+1,&(ilist->iatoms[i]));
256 push_sf(&(sf[node_low]),nratoms+1,&(ilist->iatoms[i]));
261 push_sf(&(sf[node_high]),nratoms+1,&(ilist->iatoms[i]));
267 push_sf(&(sf[nodeid]),nratoms+1,&(ilist->iatoms[i]));
274 srenew(ilist->iatoms,ilist->nr);
278 for(nodeid=0; (nodeid<nnodes); nodeid++) {
279 for (i=0; (i<sf[nodeid].nr); i++)
280 ilist->iatoms[tnr++]=sf[nodeid].ia[i];
282 multinr[nodeid]=(nodeid==0) ? 0 : multinr[nodeid-1];
283 multinr[nodeid]+=sf[nodeid].nr;
286 if (tnr != ilist->nr)
287 gmx_incons("Splitting forces over processors");
292 static int *home_index(int nnodes,t_block *cgs,int *multinr)
294 /* This routine determines the node id for each particle */
296 int nodeid,j0,j1,j,k;
298 snew(hid,cgs->index[cgs->nr]);
299 /* Initiate to -1 to make it possible to check afterwards,
300 * all hid's should be set in the loop below
302 for(k=0; (k<cgs->index[cgs->nr]); k++)
305 /* loop over nodes */
306 for(nodeid=0; (nodeid<nnodes); nodeid++) {
307 j0 = (nodeid==0) ? 0 : multinr[nodeid-1];
308 j1 = multinr[nodeid];
310 /* j0 and j1 are the boundariesin the index array */
311 for(j=j0; (j<j1); j++) {
312 for(k=cgs->index[j]; (k<cgs->index[j+1]); k++) {
317 /* Now verify that all hid's are not -1 */
318 for(k=0; (k<cgs->index[cgs->nr]); k++)
320 gmx_fatal(FARGS,"hid[%d] = -1, cgs->nr = %d, natoms = %d",
321 k,cgs->nr,cgs->index[cgs->nr]);
330 void set_bor(t_border *b,int atom,int ic,int is)
333 fprintf(debug,"border @ atom %5d [ ic = %5d, is = %5d ]\n",atom,ic,is);
339 static gmx_bool is_bor(atom_id ai[],int i)
341 return ((ai[i] != ai[i-1]) || ((ai[i] == NO_ATID) && (ai[i-1] == NO_ATID)));
344 static t_border *mk_border(FILE *fp,int natom,atom_id *invcgs,
345 atom_id *invshk,int *nb)
349 int i,j,is,ic,ns,nc,nbor;
352 for(i=0; (i<natom); i++) {
353 fprintf(debug,"atom: %6d cgindex: %6d shkindex: %6d\n",
354 i, invcgs[i], invshk[i]);
361 for(i=1; (i<natom); i++) {
362 if (is_bor(invcgs,i))
364 if (is_bor(invshk,i))
371 fprintf(fp,"There are %d charge group borders",nc);
374 fprintf(fp," and %d shake borders",ns);
378 snew(bor,max(nc,ns));
380 while ((ic < nc) || (is < ns)) {
381 if (sbor[is] == cbor[ic]) {
382 set_bor(&(bor[nbor]),cbor[ic],ic,is);
387 else if (cbor[ic] > sbor[is]) {
389 set_bor(&(bor[nbor]),cbor[ic],ic,is);
399 is++;/*gmx_fatal(FARGS,"Can't happen is=%d, ic=%d (%s, %d)",
400 is,ic,__FILE__,__LINE__);*/
403 fprintf(fp,"There are %d total borders\n",nbor);
406 fprintf(debug,"There are %d actual bor entries\n",nbor);
407 for(i=0; (i<nbor); i++)
408 fprintf(debug,"bor[%5d] = atom: %d ic: %d is: %d\n",i,
409 bor[i].atom,bor[i].ic,bor[i].is);
417 static void split_blocks(FILE *fp,t_inputrec *ir, int nnodes,
418 t_block *cgs,t_blocka *sblock,real capacity[],
423 int nodeid,last_shk,nbor;
428 atom_id *shknum,*cgsnum;
430 natoms = cgs->index[cgs->nr];
433 pr_block(debug,0,"cgs",cgs,TRUE);
434 pr_blocka(debug,0,"sblock",sblock,TRUE);
438 cgsnum = make_invblock(cgs,natoms+1);
439 shknum = make_invblocka(sblock,natoms+1);
440 border = mk_border(fp,natoms,cgsnum,shknum,&nbor);
442 snew(maxatom,nnodes);
443 tload = capacity[0]*natoms;
446 /* Start at bor is 1, to force the first block on the first processor */
447 for(i=0; (i<nbor) && (tload < natoms); i++) {
455 if ((fabs(b0-tload)<fabs(b1-tload))) {
456 /* New nodeid time */
457 multinr_cgs[nodeid] = border[i].ic;
458 maxatom[nodeid] = b0;
459 tcap -= capacity[nodeid];
462 /* Recompute target load */
463 tload = b0 + (natoms-b0)*capacity[nodeid]/tcap;
466 printf("tload: %g tcap: %g nodeid: %d\n",tload,tcap,nodeid);
469 /* Now the last one... */
470 while (nodeid < nnodes) {
471 multinr_cgs[nodeid] = cgs->nr;
472 /* Store atom number, see above */
473 maxatom[nodeid] = natoms;
476 if (nodeid != nnodes) {
477 gmx_fatal(FARGS,"nodeid = %d, nnodes = %d, file %s, line %d",
478 nodeid,nnodes,__FILE__,__LINE__);
481 for(i=nnodes-1; (i>0); i--)
482 maxatom[i]-=maxatom[i-1];
485 fprintf(fp,"Division over nodes in atoms:\n");
486 for(i=0; (i<nnodes); i++)
487 fprintf(fp," %7d",maxatom[i]);
501 static int sid_comp(const void *a,const void *b)
511 return (sa->atom-sb->atom);
516 static int mk_grey(int nnodes,egCol egc[],t_graph *g,int *AtomI,
517 int maxsid,t_sid sid[])
525 /* Loop over all the bonds */
526 for(j=0; (j<g->nedge[ai]); j++) {
527 aj=g->edge[ai][j]-g0;
528 /* If there is a white one, make it gray and set pbc */
529 if (egc[aj] == egcolWhite) {
534 /* Check whether this one has been set before... */
535 range_check(aj+g0,0,maxsid);
536 range_check(ai+g0,0,maxsid);
537 if (sid[aj+g0].sid != -1)
538 gmx_fatal(FARGS,"sid[%d]=%d, sid[%d]=%d, file %s, line %d",
539 ai,sid[ai+g0].sid,aj,sid[aj+g0].sid,__FILE__,__LINE__);
541 sid[aj+g0].sid = sid[ai+g0].sid;
542 sid[aj+g0].atom = aj+g0;
550 static int first_colour(int fC,egCol Col,t_graph *g,egCol egc[])
551 /* Return the first node with colour Col starting at fC.
552 * return -1 if none found.
557 for(i=fC; (i<g->nnodes); i++)
558 if ((g->nedge[i] > 0) && (egc[i]==Col))
564 static int mk_sblocks(FILE *fp,t_graph *g,int maxsid,t_sid sid[])
567 int nW,nG,nB; /* Number of Grey, Black, White */
568 int fW,fG; /* First of each category */
569 egCol *egc=NULL; /* The colour of each node */
587 /* We even have a loop invariant:
588 * nW+nG+nB == g->nbound
592 fprintf(fp,"Walking down the molecule graph to make constraint-blocks\n");
595 /* Find the first white, this will allways be a larger
596 * number than before, because no nodes are made white
599 if ((fW=first_colour(fW,egcolWhite,g,egc)) == -1)
600 gmx_fatal(FARGS,"No WHITE nodes found while nW=%d\n",nW);
602 /* Make the first white node grey, and set the block number */
604 range_check(fW+g0,0,maxsid);
605 sid[fW+g0].sid = nblock++;
609 /* Initial value for the first grey */
613 fprintf(debug,"Starting G loop (nW=%d, nG=%d, nB=%d, total %d)\n",
617 if ((fG=first_colour(fG,egcolGrey,g,egc)) == -1)
618 gmx_fatal(FARGS,"No GREY nodes found while nG=%d\n",nG);
620 /* Make the first grey node black */
625 /* Make all the neighbours of this black node grey
626 * and set their block number
628 ng=mk_grey(nnodes,egc,g,&fG,maxsid,sid);
629 /* ng is the number of white nodes made grey */
637 fprintf(debug,"Found %d shake blocks\n",nblock);
647 static int ms_comp(const void *a, const void *b)
649 t_merge_sid *ma = (t_merge_sid *)a;
650 t_merge_sid *mb = (t_merge_sid *)b;
653 d = ma->first-mb->first;
655 return ma->last-mb->last;
660 static int merge_sid(int i0,int at_start,int at_end,int nsid,t_sid sid[],
663 int i,j,k,n,isid,ndel;
667 /* We try to remdy the following problem:
668 * Atom: 1 2 3 4 5 6 7 8 9 10
669 * Sid: 0 -1 1 0 -1 1 1 1 1 1
672 /* Determine first and last atom in each shake ID */
675 for(k=0; (k<nsid); k++) {
676 ms[k].first = at_end+1;
680 for(i=at_start; (i<at_end); i++) {
682 range_check(isid,-1,nsid);
684 ms[isid].first = min(ms[isid].first,sid[i].atom);
685 ms[isid].last = max(ms[isid].last,sid[i].atom);
688 qsort(ms,nsid,sizeof(ms[0]),ms_comp);
690 /* Now merge the overlapping ones */
692 for(k=0; (k<nsid); ) {
693 for(j=k+1; (j<nsid); ) {
694 if (ms[j].first <= ms[k].last) {
695 ms[k].last = max(ms[k].last,ms[j].last);
696 ms[k].first = min(ms[k].first,ms[j].first);
709 for(k=0; (k<nsid); k++) {
710 while ((k < nsid-1) && (ms[k].sid == -1)) {
711 for(j=k+1; (j<nsid); j++) {
712 memcpy(&(ms[j-1]),&(ms[j]),sizeof(ms[0]));
718 for(k=at_start; (k<at_end); k++) {
723 sblock->nalloc_index = sblock->nr+1;
724 snew(sblock->index,sblock->nalloc_index);
725 sblock->nra = at_end - at_start;
726 sblock->nalloc_a = sblock->nra;
727 snew(sblock->a,sblock->nalloc_a);
728 sblock->index[0] = 0;
729 for(k=n=0; (k<nsid); k++) {
730 sblock->index[k+1] = sblock->index[k] + ms[k].last - ms[k].first+1;
731 for(j=ms[k].first; (j<=ms[k].last); j++) {
732 range_check(n,0,sblock->nra);
734 range_check(j,0,at_end);
735 if (sid[j].sid == -1)
738 fprintf(stderr,"Double sids (%d, %d) for atom %d\n",sid[j].sid,k,j);
742 /* Removed 2007-09-04
743 sblock->index[k+1] = natoms;
744 for(k=0; (k<natoms); k++)
745 if (sid[k].sid == -1)
750 assert(sblock->index[k] == sblock->nra);
756 void gen_sblocks(FILE *fp,int at_start,int at_end,
757 t_idef *idef,t_blocka *sblock,
761 int i,i0,j,k,istart,n;
765 g=mk_graph(NULL,idef,at_start,at_end,TRUE,bSettle);
767 p_graph(debug,"Graaf Dracula",g);
769 for(i=at_start; (i<at_end); i++) {
773 nsid=mk_sblocks(fp,g,at_end,sid);
778 /* Now sort the shake blocks... */
779 qsort(sid+at_start,at_end-at_start,(size_t)sizeof(sid[0]),sid_comp);
782 fprintf(debug,"Sorted shake block\n");
783 for(i=at_start; (i<at_end); i++)
784 fprintf(debug,"sid[%5d] = atom:%5d sid:%5d\n",i,sid[i].atom,sid[i].sid);
786 /* Now check how many are NOT -1, i.e. how many have to be shaken */
787 for(i0=at_start; (i0<at_end); i0++)
788 if (sid[i0].sid > -1)
791 /* Now we have the sids that have to be shaken. We'll check the min and
792 * max atom numbers and this determines the shake block. DvdS 2007-07-19.
793 * For the purpose of making boundaries all atoms in between need to be
794 * part of the shake block too. There may be cases where blocks overlap
795 * and they will have to be merged.
797 nsid = merge_sid(i0,at_start,at_end,nsid,sid,sblock);
798 /* Now sort the shake blocks again... */
799 /*qsort(sid,natoms,(size_t)sizeof(sid[0]),sid_comp);*/
801 /* Fill the sblock struct */
802 /* sblock->nr = nsid;
803 sblock->nra = natoms;
804 srenew(sblock->a,sblock->nra);
805 srenew(sblock->index,sblock->nr+1);
810 sblock->index[n++]=k;
812 istart = sid[i].atom;
813 while ((i<natoms-1) && (sid[i+1].sid == isid))
815 /* After while: we found a new block, or are thru with the atoms */
816 /* for(j=istart; (j<=sid[i].atom); j++,k++)
818 sblock->index[n] = k;
822 gmx_fatal(FARGS,"Death Horror: nsid = %d, n= %d",nsid,n);
828 /* Due to unknown reason this free generates a problem sometimes */
832 fprintf(debug,"Done gen_sblocks\n");
835 static t_blocka block2blocka(t_block *block)
840 blocka.nr = block->nr;
841 blocka.nalloc_index = blocka.nr + 1;
842 snew(blocka.index,blocka.nalloc_index);
843 for(i=0; i<=block->nr; i++)
844 blocka.index[i] = block->index[i];
845 blocka.nra = block->index[block->nr];
846 blocka.nalloc_a = blocka.nra;
847 snew(blocka.a,blocka.nalloc_a);
848 for(i=0; i<blocka.nra; i++)
858 } pd_constraintlist_t;
862 find_constraint_range_recursive(pd_constraintlist_t * constraintlist,
871 for(i=0;i<constraintlist[thisatom].nconstr;i++)
873 j = constraintlist[thisatom].index[i];
875 *min_atomid = (j<*min_atomid) ? j : *min_atomid;
876 *max_atomid = (j>*max_atomid) ? j : *max_atomid;
880 find_constraint_range_recursive(constraintlist,j,depth-1,min_atomid,max_atomid);
886 pd_determine_constraints_range(t_inputrec * ir,
897 int min_atomid,max_atomid;
898 pd_constraintlist_t *constraintlist;
900 nratoms = interaction_function[F_CONSTR].nratoms;
901 depth = ir->nProjOrder;
903 snew(constraintlist,natoms);
905 /* Make a list of all the connections */
906 for(i=0;i<ilist->nr;i+=nratoms+1)
908 ai=ilist->iatoms[i+1];
909 aj=ilist->iatoms[i+2];
910 constraintlist[ai].index[constraintlist[ai].nconstr++]=aj;
911 constraintlist[aj].index[constraintlist[aj].nconstr++]=ai;
914 for(i=0;i<natoms;i++)
919 find_constraint_range_recursive(constraintlist,i,depth,&min_atomid,&max_atomid);
921 min_nodeid[i] = hid[min_atomid];
922 max_nodeid[i] = hid[max_atomid];
924 sfree(constraintlist);
928 void split_top(FILE *fp,int nnodes,gmx_localtop_t *top,t_inputrec *ir,t_block *mols,
929 real *capacity,int *multinr_cgs,int **multinr_nre, int *left_range, int * right_range)
931 int natoms,i,j,k,mj,atom,maxatom,sstart,send,bstart,nodeid;
934 int ftype,nvsite_constr,nra,nrd;
936 int minhome,ihome,minidx;
937 int *constr_min_nodeid;
938 int *constr_max_nodeid;
943 natoms = mols->index[mols->nr];
946 fprintf(fp,"splitting topology...\n");
948 /*#define MOL_BORDER*/
949 /*Removed the above to allow splitting molecules with h-bond constraints
950 over processors. The results in DP are the same. */
951 init_blocka(&sblock);
952 if(ir->eConstrAlg != econtLINCS)
955 /* Make a special shake block that includes settles */
956 gen_sblocks(fp,0,natoms,&top->idef,&sblock,TRUE);
958 sblock = block2blocka(mols);
962 split_blocks(fp,ir,nnodes,&top->cgs,&sblock,capacity,multinr_cgs);
964 homeind=home_index(nnodes,&top->cgs,multinr_cgs);
966 snew(constr_min_nodeid,natoms);
967 snew(constr_max_nodeid,natoms);
969 if(top->idef.il[F_CONSTR].nr>0)
971 pd_determine_constraints_range(ir,natoms,&top->idef.il[F_CONSTR],homeind,constr_min_nodeid,constr_max_nodeid);
975 /* Not 100% necessary, but it is a bad habit to have uninitialized arrays around... */
976 for(i=0;i<natoms;i++)
978 constr_min_nodeid[i] = constr_max_nodeid[i] = homeind[i];
982 /* Default limits (no communication) for PD constraints */
984 for(i=1;i<nnodes;i++)
986 left_range[i] = top->cgs.index[multinr_cgs[i-1]];
987 right_range[i-1] = left_range[i]-1;
989 right_range[nnodes-1] = top->cgs.index[multinr_cgs[nnodes-1]]-1;
991 for(j=0; (j<F_NRE); j++)
992 split_force2(ir, nnodes,homeind,j,&top->idef.il[j],multinr_nre[j],constr_min_nodeid,constr_max_nodeid,
993 left_range,right_range);
995 sfree(constr_min_nodeid);
996 sfree(constr_max_nodeid);
999 done_blocka(&sblock);