1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
52 #include "gmx_fatal.h"
60 static t_sf *init_sf(int nr)
66 for (i = 0; (i < nr); i++)
75 static void done_sf(int nr, t_sf *sf)
79 for (i = 0; (i < nr); i++)
88 static void push_sf(t_sf *sf, int nr, t_iatom ia[])
92 srenew(sf->ia, sf->nr+nr);
93 for (i = 0; (i < nr); i++)
95 sf->ia[sf->nr+i] = ia[i];
100 static int min_nodeid(int nr, atom_id list[], int hid[])
102 int i, nodeid, minnodeid;
106 gmx_incons("Invalid node number");
108 minnodeid = hid[list[0]];
109 for (i = 1; (i < nr); i++)
111 if ((nodeid = hid[list[i]]) < minnodeid)
121 static void split_force2(t_inputrec *ir, int nnodes, int hid[], int ftype, t_ilist *ilist,
123 int *constr_min_nodeid, int * constr_max_nodeid,
124 int *left_range, int *right_range)
126 int i, j, k, type, nodeid, nratoms, tnr;
129 int node_low_ai, node_low_aj, node_high_ai, node_high_aj;
130 int node_low, node_high;
135 sf = init_sf(nnodes);
137 node_high = node_low = 0;
140 /* Walk along all the bonded forces, find the appropriate node
141 * to calc it on, and add it to that nodes list.
143 for (i = 0; i < ilist->nr; i += (1+nratoms))
145 type = ilist->iatoms[i];
146 nratoms = interaction_function[ftype].nratoms;
148 if (ftype == F_CONSTR)
150 ai = ilist->iatoms[i+1];
151 aj = ilist->iatoms[i+2];
157 if (ir->eConstrAlg == econtLINCS)
159 node_low_ai = constr_min_nodeid[ai];
160 node_low_aj = constr_min_nodeid[aj];
161 node_high_ai = constr_max_nodeid[ai];
162 node_high_aj = constr_max_nodeid[aj];
164 node_low = min(node_low_ai, node_low_aj);
165 node_high = max(node_high_ai, node_high_aj);
167 if (node_high-nodei > 1 || nodei-node_low > 1 ||
168 node_high-nodej > 1 || nodej-node_low > 1)
170 gmx_fatal(FARGS, "Constraint dependencies further away than next-neighbor\n"
171 "in particle decomposition. Constraint between atoms %d--%d evaluated\n"
172 "on node %d and %d, but atom %d has connections within %d bonds (lincs_order)\n"
173 "of node %d, and atom %d has connections within %d bonds of node %d.\n"
174 "Reduce the # nodes, lincs_order, or\n"
175 "try domain decomposition.", ai, aj, nodei, nodej, ai, ir->nProjOrder, node_low, aj, ir->nProjOrder, node_high);
178 if (node_low < nodei || node_low < nodej)
180 right_range[node_low] = max(right_range[node_low], aj);
182 if (node_high > nodei || node_high > nodej)
184 left_range[node_high] = min(left_range[node_high], ai);
190 if (hid[ilist->iatoms[i+2]] != nodei)
192 gmx_fatal(FARGS, "Shake block crossing node boundaries\n"
193 "constraint between atoms (%d,%d) (try LINCS instead!)",
194 ilist->iatoms[i+1]+1, ilist->iatoms[i+2]+1);
198 else if (ftype == F_SETTLE)
200 /* Only the first particle is stored for settles ... */
201 ai = ilist->iatoms[i+1];
203 if (nodeid != hid[ilist->iatoms[i+2]] ||
204 nodeid != hid[ilist->iatoms[i+3]])
206 gmx_fatal(FARGS, "Settle block crossing node boundaries\n"
207 "constraint between atoms %d, %d, %d)",
208 ai, ilist->iatoms[i+2], ilist->iatoms[i+3]);
211 else if (interaction_function[ftype].flags & IF_VSITE)
213 /* Virtual sites are special, since we need to pre-communicate
214 * their coordinates to construct vsites before then main
215 * coordinate communication.
216 * Vsites can have constructing atoms both larger and smaller than themselves.
217 * To minimize communication and book-keeping, each vsite is constructed on
218 * the home node of the atomnr of the vsite.
219 * Since the vsite coordinates too have to be communicated to the next node,
222 * 1. Pre-communicate coordinates of constructing atoms
223 * 2. Construct the vsite
224 * 3. Perform main coordinate communication
226 * Note that this has change from gromacs 4.0 and earlier, where the vsite
227 * was constructed on the home node of the lowest index of any of the constructing
228 * atoms and the vsite itself.
231 if (ftype == F_VSITE2)
235 else if (ftype == F_VSITE4FD || ftype == F_VSITE4FDN)
244 /* Vsites are constructed on the home node of the actual site to save communication
245 * and simplify the book-keeping.
247 nodeid = hid[ilist->iatoms[i+1]];
249 for (k = 2; k < nvsite_constr+2; k++)
251 if (hid[ilist->iatoms[i+k]] < (nodeid-1) ||
252 hid[ilist->iatoms[i+k]] > (nodeid+1))
254 gmx_fatal(FARGS, "Virtual site %d and its constructing"
255 " atoms are not on the same or adjacent\n"
256 " nodes. This is necessary to avoid a lot\n"
257 " of extra communication. The easiest way"
258 " to ensure this is to place virtual sites\n"
259 " close to the constructing atoms.\n"
260 " Sorry, but you will have to rework your topology!\n",
267 nodeid = min_nodeid(nratoms, &ilist->iatoms[i+1], hid);
270 if (ftype == F_CONSTR && ir->eConstrAlg == econtLINCS)
272 push_sf(&(sf[nodeid]), nratoms+1, &(ilist->iatoms[i]));
274 if (node_low < nodeid)
276 push_sf(&(sf[node_low]), nratoms+1, &(ilist->iatoms[i]));
279 if (node_high > nodeid)
281 push_sf(&(sf[node_high]), nratoms+1, &(ilist->iatoms[i]));
287 push_sf(&(sf[nodeid]), nratoms+1, &(ilist->iatoms[i]));
294 srenew(ilist->iatoms, ilist->nr);
298 for (nodeid = 0; (nodeid < nnodes); nodeid++)
300 for (i = 0; (i < sf[nodeid].nr); i++)
302 ilist->iatoms[tnr++] = sf[nodeid].ia[i];
305 multinr[nodeid] = (nodeid == 0) ? 0 : multinr[nodeid-1];
306 multinr[nodeid] += sf[nodeid].nr;
309 if (tnr != ilist->nr)
311 gmx_incons("Splitting forces over processors");
317 static int *home_index(int nnodes, t_block *cgs, int *multinr)
319 /* This routine determines the node id for each particle */
321 int nodeid, j0, j1, j, k;
323 snew(hid, cgs->index[cgs->nr]);
324 /* Initiate to -1 to make it possible to check afterwards,
325 * all hid's should be set in the loop below
327 for (k = 0; (k < cgs->index[cgs->nr]); k++)
332 /* loop over nodes */
333 for (nodeid = 0; (nodeid < nnodes); nodeid++)
335 j0 = (nodeid == 0) ? 0 : multinr[nodeid-1];
336 j1 = multinr[nodeid];
338 /* j0 and j1 are the boundariesin the index array */
339 for (j = j0; (j < j1); j++)
341 for (k = cgs->index[j]; (k < cgs->index[j+1]); k++)
347 /* Now verify that all hid's are not -1 */
348 for (k = 0; (k < cgs->index[cgs->nr]); k++)
352 gmx_fatal(FARGS, "hid[%d] = -1, cgs->nr = %d, natoms = %d",
353 k, cgs->nr, cgs->index[cgs->nr]);
364 void set_bor(t_border *b, int atom, int ic, int is)
368 fprintf(debug, "border @ atom %5d [ ic = %5d, is = %5d ]\n", atom, ic, is);
375 static gmx_bool is_bor(atom_id ai[], int i)
377 return ((ai[i] != ai[i-1]) || ((ai[i] == NO_ATID) && (ai[i-1] == NO_ATID)));
380 static t_border *mk_border(FILE *fp, int natom, atom_id *invcgs,
381 atom_id *invshk, int *nb)
384 atom_id *sbor, *cbor;
385 int i, j, is, ic, ns, nc, nbor;
389 for (i = 0; (i < natom); i++)
391 fprintf(debug, "atom: %6d cgindex: %6d shkindex: %6d\n",
392 i, invcgs[i], invshk[i]);
399 for (i = 1; (i < natom); i++)
401 if (is_bor(invcgs, i))
405 if (is_bor(invshk, i))
414 fprintf(fp, "There are %d charge group borders", nc);
417 fprintf(fp, " and %d shake borders", ns);
421 snew(bor, max(nc, ns));
423 while ((ic < nc) || (is < ns))
425 if (sbor[is] == cbor[ic])
427 set_bor(&(bor[nbor]), cbor[ic], ic, is);
438 else if (cbor[ic] > sbor[is])
442 set_bor(&(bor[nbor]), cbor[ic], ic, is);
460 is++; /*gmx_fatal(FARGS,"Can't happen is=%d, ic=%d (%s, %d)",
461 is,ic,__FILE__,__LINE__);*/
466 fprintf(fp, "There are %d total borders\n", nbor);
471 fprintf(debug, "There are %d actual bor entries\n", nbor);
472 for (i = 0; (i < nbor); i++)
474 fprintf(debug, "bor[%5d] = atom: %d ic: %d is: %d\n", i,
475 bor[i].atom, bor[i].ic, bor[i].is);
484 static void split_blocks(FILE *fp, int nnodes,
485 t_block *cgs, t_blocka *sblock, real capacity[],
488 int natoms, *maxatom;
489 int i, ii, ai, b0, b1;
490 int nodeid, last_shk, nbor;
495 atom_id *shknum, *cgsnum;
497 natoms = cgs->index[cgs->nr];
501 pr_block(debug, 0, "cgs", cgs, TRUE);
502 pr_blocka(debug, 0, "sblock", sblock, TRUE);
506 cgsnum = make_invblock(cgs, natoms+1);
507 shknum = make_invblocka(sblock, natoms+1);
508 border = mk_border(fp, natoms, cgsnum, shknum, &nbor);
510 snew(maxatom, nnodes);
511 tload = capacity[0]*natoms;
514 /* Start at bor is 1, to force the first block on the first processor */
515 for (i = 0; (i < nbor) && (tload < natoms); i++)
519 b1 = border[i+1].atom;
528 if ((fabs(b0-tload) < fabs(b1-tload)))
530 /* New nodeid time */
531 multinr_cgs[nodeid] = border[i].ic;
532 maxatom[nodeid] = b0;
533 tcap -= capacity[nodeid];
536 /* Recompute target load */
537 tload = b0 + (natoms-b0)*capacity[nodeid]/tcap;
541 printf("tload: %g tcap: %g nodeid: %d\n", tload, tcap, nodeid);
545 /* Now the last one... */
546 while (nodeid < nnodes)
548 multinr_cgs[nodeid] = cgs->nr;
549 /* Store atom number, see above */
550 maxatom[nodeid] = natoms;
553 if (nodeid != nnodes)
555 gmx_fatal(FARGS, "nodeid = %d, nnodes = %d, file %s, line %d",
556 nodeid, nnodes, __FILE__, __LINE__);
559 for (i = nnodes-1; (i > 0); i--)
561 maxatom[i] -= maxatom[i-1];
566 fprintf(fp, "Division over nodes in atoms:\n");
567 for (i = 0; (i < nnodes); i++)
569 fprintf(fp, " %7d", maxatom[i]);
584 static int sid_comp(const void *a, const void *b)
592 dd = sa->sid-sb->sid;
595 return (sa->atom-sb->atom);
603 static int mk_grey(egCol egc[], t_graph *g, int *AtomI,
604 int maxsid, t_sid sid[])
606 int j, ng, ai, aj, g0;
612 /* Loop over all the bonds */
613 for (j = 0; (j < g->nedge[ai]); j++)
615 aj = g->edge[ai][j]-g0;
616 /* If there is a white one, make it gray and set pbc */
617 if (egc[aj] == egcolWhite)
625 /* Check whether this one has been set before... */
626 range_check(aj+g0, 0, maxsid);
627 range_check(ai+g0, 0, maxsid);
628 if (sid[aj+g0].sid != -1)
630 gmx_fatal(FARGS, "sid[%d]=%d, sid[%d]=%d, file %s, line %d",
631 ai, sid[ai+g0].sid, aj, sid[aj+g0].sid, __FILE__, __LINE__);
635 sid[aj+g0].sid = sid[ai+g0].sid;
636 sid[aj+g0].atom = aj+g0;
644 static int first_colour(int fC, egCol Col, t_graph *g, egCol egc[])
645 /* Return the first node with colour Col starting at fC.
646 * return -1 if none found.
651 for (i = fC; (i < g->nnodes); i++)
653 if ((g->nedge[i] > 0) && (egc[i] == Col))
662 static int mk_sblocks(FILE *fp, t_graph *g, int maxsid, t_sid sid[])
665 int nW, nG, nB; /* Number of Grey, Black, White */
666 int fW, fG; /* First of each category */
667 egCol *egc = NULL; /* The colour of each node */
687 /* We even have a loop invariant:
688 * nW+nG+nB == g->nbound
693 fprintf(fp, "Walking down the molecule graph to make constraint-blocks\n");
698 /* Find the first white, this will allways be a larger
699 * number than before, because no nodes are made white
702 if ((fW = first_colour(fW, egcolWhite, g, egc)) == -1)
704 gmx_fatal(FARGS, "No WHITE nodes found while nW=%d\n", nW);
707 /* Make the first white node grey, and set the block number */
709 range_check(fW+g0, 0, maxsid);
710 sid[fW+g0].sid = nblock++;
714 /* Initial value for the first grey */
719 fprintf(debug, "Starting G loop (nW=%d, nG=%d, nB=%d, total %d)\n",
720 nW, nG, nB, nW+nG+nB);
725 if ((fG = first_colour(fG, egcolGrey, g, egc)) == -1)
727 gmx_fatal(FARGS, "No GREY nodes found while nG=%d\n", nG);
730 /* Make the first grey node black */
731 egc[fG] = egcolBlack;
735 /* Make all the neighbours of this black node grey
736 * and set their block number
738 ng = mk_grey(egc, g, &fG, maxsid, sid);
739 /* ng is the number of white nodes made grey */
748 fprintf(debug, "Found %d shake blocks\n", nblock);
756 int first, last, sid;
759 static int ms_comp(const void *a, const void *b)
761 t_merge_sid *ma = (t_merge_sid *)a;
762 t_merge_sid *mb = (t_merge_sid *)b;
765 d = ma->first-mb->first;
768 return ma->last-mb->last;
776 static int merge_sid(int at_start, int at_end, int nsid, t_sid sid[],
779 int i, j, k, n, isid, ndel;
783 /* We try to remdy the following problem:
784 * Atom: 1 2 3 4 5 6 7 8 9 10
785 * Sid: 0 -1 1 0 -1 1 1 1 1 1
788 /* Determine first and last atom in each shake ID */
791 for (k = 0; (k < nsid); k++)
793 ms[k].first = at_end+1;
797 for (i = at_start; (i < at_end); i++)
800 range_check(isid, -1, nsid);
803 ms[isid].first = min(ms[isid].first, sid[i].atom);
804 ms[isid].last = max(ms[isid].last, sid[i].atom);
807 qsort(ms, nsid, sizeof(ms[0]), ms_comp);
809 /* Now merge the overlapping ones */
811 for (k = 0; (k < nsid); )
813 for (j = k+1; (j < nsid); )
815 if (ms[j].first <= ms[k].last)
817 ms[k].last = max(ms[k].last, ms[j].last);
818 ms[k].first = min(ms[k].first, ms[j].first);
834 for (k = 0; (k < nsid); k++)
836 while ((k < nsid-1) && (ms[k].sid == -1))
838 for (j = k+1; (j < nsid); j++)
840 memcpy(&(ms[j-1]), &(ms[j]), sizeof(ms[0]));
846 for (k = at_start; (k < at_end); k++)
852 sblock->nalloc_index = sblock->nr+1;
853 snew(sblock->index, sblock->nalloc_index);
854 sblock->nra = at_end - at_start;
855 sblock->nalloc_a = sblock->nra;
856 snew(sblock->a, sblock->nalloc_a);
857 sblock->index[0] = 0;
858 for (k = n = 0; (k < nsid); k++)
860 sblock->index[k+1] = sblock->index[k] + ms[k].last - ms[k].first+1;
861 for (j = ms[k].first; (j <= ms[k].last); j++)
863 range_check(n, 0, sblock->nra);
865 range_check(j, 0, at_end);
866 if (sid[j].sid == -1)
872 fprintf(stderr, "Double sids (%d, %d) for atom %d\n", sid[j].sid, k, j);
877 /* Removed 2007-09-04
878 sblock->index[k+1] = natoms;
879 for(k=0; (k<natoms); k++)
880 if (sid[k].sid == -1)
885 assert(sblock->index[k] == sblock->nra);
891 void gen_sblocks(FILE *fp, int at_start, int at_end,
892 t_idef *idef, t_blocka *sblock,
896 int i, i0, j, k, istart, n;
900 g = mk_graph(NULL, idef, at_start, at_end, TRUE, bSettle);
903 p_graph(debug, "Graaf Dracula", g);
906 for (i = at_start; (i < at_end); i++)
911 nsid = mk_sblocks(fp, g, at_end, sid);
918 /* Now sort the shake blocks... */
919 qsort(sid+at_start, at_end-at_start, (size_t)sizeof(sid[0]), sid_comp);
923 fprintf(debug, "Sorted shake block\n");
924 for (i = at_start; (i < at_end); i++)
926 fprintf(debug, "sid[%5d] = atom:%5d sid:%5d\n", i, sid[i].atom, sid[i].sid);
929 /* Now check how many are NOT -1, i.e. how many have to be shaken */
930 for (i0 = at_start; (i0 < at_end); i0++)
932 if (sid[i0].sid > -1)
938 /* Now we have the sids that have to be shaken. We'll check the min and
939 * max atom numbers and this determines the shake block. DvdS 2007-07-19.
940 * For the purpose of making boundaries all atoms in between need to be
941 * part of the shake block too. There may be cases where blocks overlap
942 * and they will have to be merged.
944 nsid = merge_sid(at_start, at_end, nsid, sid, sblock);
945 /* Now sort the shake blocks again... */
946 /*qsort(sid,natoms,(size_t)sizeof(sid[0]),sid_comp);*/
948 /* Fill the sblock struct */
949 /* sblock->nr = nsid;
950 sblock->nra = natoms;
951 srenew(sblock->a,sblock->nra);
952 srenew(sblock->index,sblock->nr+1);
957 sblock->index[n++]=k;
959 istart = sid[i].atom;
960 while ((i<natoms-1) && (sid[i+1].sid == isid))
962 /* After while: we found a new block, or are thru with the atoms */
963 /* for(j=istart; (j<=sid[i].atom); j++,k++)
965 sblock->index[n] = k;
969 gmx_fatal(FARGS,"Death Horror: nsid = %d, n= %d",nsid,n);
975 /* Due to unknown reason this free generates a problem sometimes */
980 fprintf(debug, "Done gen_sblocks\n");
984 static t_blocka block2blocka(t_block *block)
989 blocka.nr = block->nr;
990 blocka.nalloc_index = blocka.nr + 1;
991 snew(blocka.index, blocka.nalloc_index);
992 for (i = 0; i <= block->nr; i++)
994 blocka.index[i] = block->index[i];
996 blocka.nra = block->index[block->nr];
997 blocka.nalloc_a = blocka.nra;
998 snew(blocka.a, blocka.nalloc_a);
999 for (i = 0; i < blocka.nra; i++)
1011 } pd_constraintlist_t;
1015 find_constraint_range_recursive(pd_constraintlist_t * constraintlist,
1024 for (i = 0; i < constraintlist[thisatom].nconstr; i++)
1026 j = constraintlist[thisatom].index[i];
1028 *min_atomid = (j < *min_atomid) ? j : *min_atomid;
1029 *max_atomid = (j > *max_atomid) ? j : *max_atomid;
1033 find_constraint_range_recursive(constraintlist, j, depth-1, min_atomid, max_atomid);
1039 pd_determine_constraints_range(t_inputrec * ir,
1050 int min_atomid, max_atomid;
1051 pd_constraintlist_t *constraintlist;
1053 nratoms = interaction_function[F_CONSTR].nratoms;
1054 depth = ir->nProjOrder;
1056 snew(constraintlist, natoms);
1058 /* Make a list of all the connections */
1059 for (i = 0; i < ilist->nr; i += nratoms+1)
1061 ai = ilist->iatoms[i+1];
1062 aj = ilist->iatoms[i+2];
1063 constraintlist[ai].index[constraintlist[ai].nconstr++] = aj;
1064 constraintlist[aj].index[constraintlist[aj].nconstr++] = ai;
1067 for (i = 0; i < natoms; i++)
1072 find_constraint_range_recursive(constraintlist, i, depth, &min_atomid, &max_atomid);
1074 min_nodeid[i] = hid[min_atomid];
1075 max_nodeid[i] = hid[max_atomid];
1077 sfree(constraintlist);
1081 void split_top(FILE *fp, int nnodes, gmx_localtop_t *top, t_inputrec *ir, t_block *mols,
1082 real *capacity, int *multinr_cgs, int **multinr_nre, int *left_range, int * right_range)
1084 int natoms, i, j, k, mj, atom, maxatom, sstart, send, bstart, nodeid;
1087 int ftype, nvsite_constr, nra, nrd;
1089 int minhome, ihome, minidx;
1090 int *constr_min_nodeid;
1091 int *constr_max_nodeid;
1098 natoms = mols->index[mols->nr];
1102 fprintf(fp, "splitting topology...\n");
1105 /*#define MOL_BORDER*/
1106 /*Removed the above to allow splitting molecules with h-bond constraints
1107 over processors. The results in DP are the same. */
1108 init_blocka(&sblock);
1109 if (ir->eConstrAlg != econtLINCS)
1112 /* Make a special shake block that includes settles */
1113 gen_sblocks(fp, 0, natoms, &top->idef, &sblock, TRUE);
1115 sblock = block2blocka(mols);
1119 split_blocks(fp, nnodes, &top->cgs, &sblock, capacity, multinr_cgs);
1121 homeind = home_index(nnodes, &top->cgs, multinr_cgs);
1123 snew(constr_min_nodeid, natoms);
1124 snew(constr_max_nodeid, natoms);
1126 if (top->idef.il[F_CONSTR].nr > 0)
1128 pd_determine_constraints_range(ir, natoms, &top->idef.il[F_CONSTR], homeind, constr_min_nodeid, constr_max_nodeid);
1132 /* Not 100% necessary, but it is a bad habit to have uninitialized arrays around... */
1133 for (i = 0; i < natoms; i++)
1135 constr_min_nodeid[i] = constr_max_nodeid[i] = homeind[i];
1139 /* Default limits (no communication) for PD constraints */
1141 for (i = 1; i < nnodes; i++)
1143 left_range[i] = top->cgs.index[multinr_cgs[i-1]];
1144 right_range[i-1] = left_range[i]-1;
1146 right_range[nnodes-1] = top->cgs.index[multinr_cgs[nnodes-1]]-1;
1148 for (j = 0; (j < F_NRE); j++)
1150 split_force2(ir, nnodes, homeind, j, &top->idef.il[j], multinr_nre[j], constr_min_nodeid, constr_max_nodeid,
1151 left_range, right_range);
1154 sfree(constr_min_nodeid);
1155 sfree(constr_max_nodeid);
1158 done_blocka(&sblock);