3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * GROningen Mixture of Alchemy and Childrens' Stories
40 #include "types/commrec.h"
42 #include "gmx_fatal.h"
56 static const t_nrnb_data nbdata[eNRNB] = {
57 { "LJ", 33 }, /* nb_kernel010 */
58 { "Buckingham", 61 }, /* nb_kernel020 */
59 { "VdW(T)", 54 }, /* nb_kernel030 */
60 { "Coulomb", 27 }, /* nb_kernel100 */
61 { "Coulomb [W3]", 80 }, /* nb_kernel101 */
62 { "Coulomb [W3-W3]", 234 }, /* nb_kernel102 */
63 { "Coulomb [W4]", 80 }, /* nb_kernel103 */
64 { "Coulomb [W4-W4]", 234 }, /* nb_kernel104 */
65 { "Coulomb + LJ", 38 }, /* nb_kernel110 */
66 { "Coulomb + LJ [W3]", 91 }, /* nb_kernel111 */
67 { "Coulomb + LJ [W3-W3]", 245 }, /* nb_kernel112 */
68 { "Coulomb + LJ [W4]", 113 }, /* nb_kernel113 */
69 { "Coulomb + LJ [W4-W4]", 267 }, /* nb_kernel114 */
70 { "Coulomb + Bham ", 64 }, /* nb_kernel120 */
71 { "Coulomb + Bham [W3]", 117 }, /* nb_kernel121 */
72 { "Coulomb + Bham [W3-W3]", 271 }, /* nb_kernel122 */
73 { "Coulomb + Bham [W4]", 141 }, /* nb_kernel123 */
74 { "Coulomb + Bham [W4-W4]", 295 }, /* nb_kernel124 */
75 { "Coulomb + VdW(T) ", 59 }, /* nb_kernel130 */
76 { "Coulomb + VdW(T) [W3]", 112 }, /* nb_kernel131 */
77 { "Coulomb + VdW(T) [W3-W3]", 266 }, /* nb_kernel132 */
78 { "Coulomb + VdW(T) [W4]", 134 }, /* nb_kernel133 */
79 { "Coulomb + VdW(T) [W4-W4]", 288 }, /* nb_kernel134 */
80 { "RF Coul", 33 }, /* nb_kernel200 */
81 { "RF Coul [W3]", 98 }, /* nb_kernel201 */
82 { "RF Coul [W3-W3]", 288 }, /* nb_kernel202 */
83 { "RF Coul [W4]", 98 }, /* nb_kernel203 */
84 { "RF Coul [W4-W4]", 288 }, /* nb_kernel204 */
85 { "RF Coul + LJ", 44 }, /* nb_kernel210 */
86 { "RF Coul + LJ [W3]", 109 }, /* nb_kernel211 */
87 { "RF Coul + LJ [W3-W3]", 299 }, /* nb_kernel212 */
88 { "RF Coul + LJ [W4]", 131 }, /* nb_kernel213 */
89 { "RF Coul + LJ [W4-W4]", 321 }, /* nb_kernel214 */
90 { "RF Coul + Bham ", 70 }, /* nb_kernel220 */
91 { "RF Coul + Bham [W3]", 135 }, /* nb_kernel221 */
92 { "RF Coul + Bham [W3-W3]", 325 }, /* nb_kernel222 */
93 { "RF Coul + Bham [W4]", 159 }, /* nb_kernel223 */
94 { "RF Coul + Bham [W4-W4]", 349 }, /* nb_kernel224 */
95 { "RF Coul + VdW(T) ", 65 }, /* nb_kernel230 */
96 { "RF Coul + VdW(T) [W3]", 130 }, /* nb_kernel231 */
97 { "RF Coul + VdW(T) [W3-W3]", 320 }, /* nb_kernel232 */
98 { "RF Coul + VdW(T) [W4]", 152 }, /* nb_kernel233 */
99 { "RF Coul + VdW(T) [W4-W4]", 342 }, /* nb_kernel234 */
100 { "Coul(T)", 42 }, /* nb_kernel300 */
101 { "Coul(T) [W3]", 125 }, /* nb_kernel301 */
102 { "Coul(T) [W3-W3]", 369 }, /* nb_kernel302 */
103 { "Coul(T) [W4]", 125 }, /* nb_kernel303 */
104 { "Coul(T) [W4-W4]", 369 }, /* nb_kernel304 */
105 { "Coul(T) + LJ", 55 }, /* nb_kernel310 */
106 { "Coul(T) + LJ [W3]", 138 }, /* nb_kernel311 */
107 { "Coul(T) + LJ [W3-W3]", 382 }, /* nb_kernel312 */
108 { "Coul(T) + LJ [W4]", 158 }, /* nb_kernel313 */
109 { "Coul(T) + LJ [W4-W4]", 402 }, /* nb_kernel314 */
110 { "Coul(T) + Bham", 81 }, /* nb_kernel320 */
111 { "Coul(T) + Bham [W3]", 164 }, /* nb_kernel321 */
112 { "Coul(T) + Bham [W3-W3]", 408 }, /* nb_kernel322 */
113 { "Coul(T) + Bham [W4]", 186 }, /* nb_kernel323 */
114 { "Coul(T) + Bham [W4-W4]", 430 }, /* nb_kernel324 */
115 { "Coul(T) + VdW(T)", 68 }, /* nb_kernel330 */
116 { "Coul(T) + VdW(T) [W3]", 151 }, /* nb_kernel331 */
117 { "Coul(T) + VdW(T) [W3-W3]", 395 }, /* nb_kernel332 */
118 { "Coul(T) + VdW(T) [W4]", 179 }, /* nb_kernel333 */
119 { "Coul(T) + VdW(T) [W4-W4]", 423 }, /* nb_kernel334 */
120 { "Generalized Born Coulomb", 48 }, /* nb_kernel400 */
121 { "GB Coulomb + LJ", 61 }, /* nb_kernel410 */
122 { "GB Coulomb + VdW(T)", 79 }, /* nb_kernel430 */
123 { "LJ NF", 19 }, /* nb_kernel010nf */
124 { "Buckingham NF", 48 }, /* nb_kernel020nf */
125 { "VdW(T) NF", 33 }, /* nb_kernel030nf */
126 { "Coulomb NF", 16 }, /* nb_kernel100nf */
127 { "Coulomb [W3] NF", 47 }, /* nb_kernel101nf */
128 { "Coulomb [W3-W3] NF", 135 }, /* nb_kernel102nf */
129 { "Coulomb [W4] NF", 47 }, /* nb_kernel103nf */
130 { "Coulomb [W4-W4] NF", 135 }, /* nb_kernel104nf */
131 { "Coulomb + LJ NF", 24 }, /* nb_kernel110nf */
132 { "Coulomb + LJ [W3] NF", 55 }, /* nb_kernel111nf */
133 { "Coulomb + LJ [W3-W3] NF", 143 }, /* nb_kernel112nf */
134 { "Coulomb + LJ [W4] NF", 66 }, /* nb_kernel113nf */
135 { "Coulomb + LJ [W4-W4] NF", 154 }, /* nb_kernel114nf */
136 { "Coulomb + Bham NF", 51 }, /* nb_kernel120nf */
137 { "Coulomb + Bham [W3] NF", 82 }, /* nb_kernel121nf */
138 { "Coulomb + Bham [W3-W3] NF", 170 }, /* nb_kernel122nf */
139 { "Coulomb + Bham [W4] NF", 95 }, /* nb_kernel123nf */
140 { "Coulomb + Bham [W4-W4] NF", 183 }, /* nb_kernel124nf */
141 { "Coulomb + VdW(T) NF", 36 }, /* nb_kernel130nf */
142 { "Coulomb + VdW(T) [W3] NF", 67 }, /* nb_kernel131nf */
143 { "Coulomb + VdW(T) [W3-W3] NF", 155 }, /* nb_kernel132nf */
144 { "Coulomb + VdW(T) [W4] NF", 80 }, /* nb_kernel133nf */
145 { "Coulomb + VdW(T) [W4-W4] NF", 168 }, /* nb_kernel134nf */
146 { "RF Coul NF", 19 }, /* nb_kernel200nf */
147 { "RF Coul [W3] NF", 56 }, /* nb_kernel201nf */
148 { "RF Coul [W3-W3] NF", 162 }, /* nb_kernel202nf */
149 { "RF Coul [W4] NF", 56 }, /* nb_kernel203nf */
150 { "RF Coul [W4-W4] NF", 162 }, /* nb_kernel204nf */
151 { "RF Coul + LJ NF", 27 }, /* nb_kernel210nf */
152 { "RF Coul + LJ [W3] NF", 64 }, /* nb_kernel211nf */
153 { "RF Coul + LJ [W3-W3] NF", 170 }, /* nb_kernel212nf */
154 { "RF Coul + LJ [W4] NF", 75 }, /* nb_kernel213nf */
155 { "RF Coul + LJ [W4-W4] NF", 181 }, /* nb_kernel214nf */
156 { "RF Coul + Bham NF", 54 }, /* nb_kernel220nf */
157 { "RF Coul + Bham [W3] NF", 91 }, /* nb_kernel221nf */
158 { "RF Coul + Bham [W3-W3] NF", 197 }, /* nb_kernel222nf */
159 { "RF Coul + Bham [W4] NF", 104 }, /* nb_kernel223nf */
160 { "RF Coul + Bham [W4-W4] NF", 210 }, /* nb_kernel224nf */
161 { "RF Coul + VdW(T) NF", 39 }, /* nb_kernel230nf */
162 { "RF Coul + VdW(T) [W3] NF", 76 }, /* nb_kernel231nf */
163 { "RF Coul + VdW(T) [W3-W3] NF", 182 }, /* nb_kernel232nf */
164 { "RF Coul + VdW(T) [W4] NF", 89 }, /* nb_kernel233nf */
165 { "RF Coul + VdW(T) [W4-W4] NF", 195 }, /* nb_kernel234nf */
166 { "Coul(T) NF", 26 }, /* nb_kernel300nf */
167 { "Coul(T) [W3] NF", 77 }, /* nb_kernel301nf */
168 { "Coul(T) [W3-W3] NF", 225 }, /* nb_kernel302nf */
169 { "Coul(T) [W4] NF", 77 }, /* nb_kernel303nf */
170 { "Coul(T) [W4-W4] NF", 225 }, /* nb_kernel304nf */
171 { "Coul(T) + LJ NF", 34 }, /* nb_kernel310nf */
172 { "Coul(T) + LJ [W3] NF", 85 }, /* nb_kernel311nf */
173 { "Coul(T) + LJ [W3-W3] NF", 233 }, /* nb_kernel312nf */
174 { "Coul(T) + LJ [W4] NF", 96 }, /* nb_kernel313nf */
175 { "Coul(T) + LJ [W4-W4] NF", 244 }, /* nb_kernel314nf */
176 { "Coul(T) + Bham NF", 61 }, /* nb_kernel320nf */
177 { "Coul(T) + Bham [W3] NF", 112 }, /* nb_kernel321nf */
178 { "Coul(T) + Bham [W3-W3] NF", 260 }, /* nb_kernel322nf */
179 { "Coul(T) + Bham [W4] NF", 125 }, /* nb_kernel323nf */
180 { "Coul(T) + Bham [W4-W4] NF", 273 }, /* nb_kernel324nf */
181 { "Coul(T) + VdW(T) NF", 42 }, /* nb_kernel330nf */
182 { "Coul(T) + VdW(T) [W3] NF", 93 }, /* nb_kernel331nf */
183 { "Coul(T) + VdW(T) [W3-W3] NF", 241 }, /* nb_kernel332nf */
184 { "Coul(T) + VdW(T) [W4] NF", 110 }, /* nb_kernel333nf */
185 { "Coul(T) + VdW(T) [W4-W4] NF", 258 }, /* nb_kernel334nf */
186 { "Generalized Born Coulomb NF", 29 }, /* nb_kernel400nf */
187 { "GB Coulomb + LJ NF", 37 }, /* nb_kernel410nf */
188 { "GB Coulomb + VdW(T) NF", 49 }, /* nb_kernel430nf */
189 { "Free energy innerloop", 150 }, /* free energy, estimate */
190 { "All-vs-All, Coul + LJ", 38 },
191 { "All-vs-All, GB + LJ", 61 },
192 { "Outer nonbonded loop", 10 },
193 { "Pair Search distance check", 9 }, /* nbnxn pair dist. check */
194 /* nbnxn kernel flops are based on inner-loops without exclusion checks.
195 * Plain Coulomb runs through the RF kernels, except with CUDA.
196 * invsqrt is counted as 6 flops: 1 for _mm_rsqt_ps + 5 for iteration.
197 * The flops are equal for plain-C, x86 SIMD and CUDA, except for:
198 * - plain-C kernel uses one flop more for Coulomb-only (F) than listed
199 * - x86 SIMD LJ geom-comb.rule kernels (fastest) use 2 more flops
200 * - x86 SIMD LJ LB-comb.rule kernels (fast) use 3 (8 for F+E) more flops
201 * - GPU always does exclusions, which requires 2-4 flops, but as invsqrt
202 * is always counted as 6 flops, this roughly compensates.
204 { "LJ + Coulomb RF (F)", 38 }, /* nbnxn kernel LJ+RF, no ener */
205 { "LJ + Coulomb RF (F+E)", 54 },
206 { "LJ + Coulomb tabulated (F)", 41 }, /* nbnxn kernel LJ+tab, no en */
207 { "LJ + Coulomb tabulated (F+E)", 59 },
208 { "LJ (F)", 33 }, /* nbnxn kernel LJ, no ener */
210 { "Coulomb RF (F)", 31 }, /* nbnxn kernel RF, no ener */
211 { "Coulomb RF (F+E)", 36 },
212 { "Coulomb tabulated (F)", 34 }, /* nbnxn kernel tab, no ener */
213 { "Coulomb tabulated (F+E)", 41 },
214 { "1,4 nonbonded interactions", 90 },
215 { "Born radii (Still)", 47 },
216 { "Born radii (HCT/OBC)", 183 },
217 { "Born force chain rule", 15 },
218 { "All-vs-All Still radii", 47 },
219 { "All-vs-All HCT/OBC radii", 183 },
220 { "All-vs-All Born chain rule", 15 },
221 { "Calc Weights", 36 },
223 { "Spread Q Bspline", 2 },
225 { "Gather F Bspline", 6 },
227 { "Convolution", 4 },
230 { "Reset In Box", 3 },
236 { "FENE Bonds", 58 },
237 { "Tab. Bonds", 62 },
238 { "Restraint Potential", 86 },
239 { "Linear Angles", 57 },
241 { "G96Angles", 150 },
242 { "Quartic Angles", 160 },
243 { "Tab. Angles", 169 },
245 { "Impropers", 208 },
246 { "RB-Dihedrals", 247 },
247 { "Four. Dihedrals", 247 },
248 { "Tab. Dihedrals", 227 },
249 { "Dist. Restr.", 200 },
250 { "Orient. Restr.", 200 },
251 { "Dihedral Restr.", 200 },
252 { "Pos. Restr.", 50 },
253 { "Angle Restr.", 191 },
254 { "Angle Restr. Z", 164 },
255 { "Morse Potent.", 83 },
256 { "Cubic Bonds", 54 },
258 { "Polarization", 59 },
259 { "Anharmonic Polarization", 72 },
260 { "Water Pol.", 62 },
261 { "Thole Pol.", 296 },
264 { "Ext.ens. Update", 54 },
271 { "Constraint-V", 8 },
272 { "Shake-Init", 10 },
273 { "Constraint-Vir", 24 },
275 { "Virtual Site 2", 23 },
276 { "Virtual Site 3", 37 },
277 { "Virtual Site 3fd", 95 },
278 { "Virtual Site 3fad", 176 },
279 { "Virtual Site 3out", 87 },
280 { "Virtual Site 4fd", 110 },
281 { "Virtual Site 4fdn", 254 },
282 { "Virtual Site N", 15 },
283 { "Mixed Generalized Born stuff", 10 }
287 void init_nrnb(t_nrnb *nrnb)
291 for(i=0; (i<eNRNB); i++)
295 void cp_nrnb(t_nrnb *dest, t_nrnb *src)
299 for(i=0; (i<eNRNB); i++)
300 dest->n[i]=src->n[i];
303 void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
307 for(i=0; (i<eNRNB); i++)
308 dest->n[i]=s1->n[i]+s2->n[i];
311 void print_nrnb(FILE *out, t_nrnb *nrnb)
315 for(i=0; (i<eNRNB); i++)
317 fprintf(out," %-26s %10.0f.\n",nbdata[i].name,nrnb->n[i]);
320 void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
324 printf("nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
325 nbdata[enr].name,enr,inc,file,line);
329 void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
332 double mni,frac,tfrac,tflop;
333 const char *myline = "-----------------------------------------------------------------------------";
336 for(i=0; (i<eNR_NBKERNEL_NR); i++) {
337 if (strstr(nbdata[i].name,"W3-W3") != NULL)
338 *nbfs += 9e-6*nrnb->n[i];
339 else if (strstr(nbdata[i].name,"W3") != NULL)
340 *nbfs += 3e-6*nrnb->n[i];
341 else if (strstr(nbdata[i].name,"W4-W4") != NULL)
342 *nbfs += 10e-6*nrnb->n[i];
343 else if (strstr(nbdata[i].name,"W4") != NULL)
344 *nbfs += 4e-6*nrnb->n[i];
346 *nbfs += 1e-6*nrnb->n[i];
349 for(i=0; (i<eNRNB); i++)
350 tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
353 fprintf(out,"No MEGA Flopsen this time\n");
357 fprintf(out,"\n\tM E G A - F L O P S A C C O U N T I N G\n\n");
361 fprintf(out," RF=Reaction-Field FE=Free Energy SCFE=Soft-Core/Free Energy\n");
362 fprintf(out," T=Tabulated W3=SPC/TIP3p W4=TIP4p (single or pairs)\n");
363 fprintf(out," NF=No Forces\n\n");
365 fprintf(out," %-32s %16s %15s %7s\n",
366 "Computing:","M-Number","M-Flops","% Flops");
367 fprintf(out,"%s\n",myline);
371 for(i=0; (i<eNRNB); i++) {
372 mni = 1e-6*nrnb->n[i];
373 *mflop += mni*nbdata[i].flop;
374 frac = 100.0*mni*nbdata[i].flop/tflop;
377 fprintf(out," %-32s %16.6f %15.3f %6.1f\n",
378 nbdata[i].name,mni,mni*nbdata[i].flop,frac);
381 fprintf(out,"%s\n",myline);
382 fprintf(out," %-32s %16s %15.3f %6.1f\n",
383 "Total","",*mflop,tfrac);
384 fprintf(out,"%s\n\n",myline);
388 void print_perf(FILE *out,double nodetime,double realtime,int nprocs,
389 gmx_large_int_t nsteps,real delta_t,
390 double nbfs,double mflop,
399 fprintf(out,"%12s %12s %12s %10s\n","","Core t (s)","Wall t (s)","(%)");
400 fprintf(out,"%12s %12.3f %12.3f %10.1f\n","Time:",
401 nodetime, realtime, 100.0*nodetime/realtime);
402 /* only print day-hour-sec format if realtime is more than 30 min */
403 if (realtime > 30*60)
405 fprintf(out,"%12s %12s","","");
406 pr_difftime(out,realtime);
410 mflop = mflop/realtime;
411 runtime = nsteps*delta_t;
413 if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
415 fprintf(out,"%12s %12s %12s\n",
416 "","(ns/day)","(hour/ns)");
417 fprintf(out,"%12s %12.3f %12.3f\n","Performance:",
418 runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
422 fprintf(out,"%12s %12s %12s %12s %12s\n",
423 "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
424 "(ns/day)","(hour/ns)");
425 fprintf(out,"%12s %12.3f %12.3f %12.3f %12.3f\n","Performance:",
426 nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
427 runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
432 if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
434 fprintf(out,"%12s %14s\n",
436 fprintf(out,"%12s %14.1f\n","Performance:",
437 nsteps*3600.0/realtime);
441 fprintf(out,"%12s %12s %12s %14s\n",
442 "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
444 fprintf(out,"%12s %12.3f %12.3f %14.1f\n","Performance:",
445 nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
446 nsteps*3600.0/realtime);
452 int cost_nrnb(int enr)
454 return nbdata[enr].flop;
457 const char *nrnb_str(int enr)
459 return nbdata[enr].name;
462 static const int force_index[]={
463 eNR_BONDS, eNR_ANGLES, eNR_PROPER, eNR_IMPROPER,
464 eNR_RB, eNR_DISRES, eNR_ORIRES, eNR_POSRES,
465 eNR_NS, eNR_NBKERNEL_OUTER
467 #define NFORCE_INDEX asize(force_index)
469 static const int constr_index[]={
470 eNR_SHAKE, eNR_SHAKE_RIJ, eNR_SETTLE, eNR_UPDATE, eNR_PCOUPL,
471 eNR_CONSTR_VIR,eNR_CONSTR_V
473 #define NCONSTR_INDEX asize(constr_index)
475 static double pr_av(FILE *log,t_commrec *cr,
476 double fav,double ftot[],const char *title)
483 fav /= cr->nnodes - cr->npmenodes;
484 fprintf(log,"\n %-26s",title);
485 for(i=0; (i<cr->nnodes); i++) {
486 dperc=(100.0*ftot[i])/fav;
489 fprintf(log,"%3d ",perc);
493 fprintf(log,"%6d%%\n\n",perc);
501 void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
504 double dperc,unb,uf,us;
510 snew(ftot,cr->nnodes);
511 snew(stot,cr->nnodes);
513 for(i=0; (i<cr->nnodes); i++) {
514 add_nrnb(av,av,&(nrnb[i]));
515 /* Cost due to forces */
516 for(j=0; (j<eNR_NBKERNEL_NR); j++)
517 ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
518 for(j=0; (j<NFORCE_INDEX); j++)
519 ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
521 for(j=0; (j<NCONSTR_INDEX); j++) {
522 stot[i]+=nrnb[i].n[constr_index[j]]*cost_nrnb(constr_index[j]);
525 for(j=0; (j<eNRNB); j++)
526 av->n[j]=av->n[j]/(double)(cr->nnodes - cr->npmenodes);
528 fprintf(log,"\nDetailed load balancing info in percentage of average\n");
530 fprintf(log," Type NODE:");
531 for(i=0; (i<cr->nnodes); i++)
532 fprintf(log,"%3d ",i);
533 fprintf(log,"Scaling\n");
534 fprintf(log,"---------------------------");
535 for(i=0; (i<cr->nnodes); i++)
537 fprintf(log,"-------\n");
539 for(j=0; (j<eNRNB); j++) {
542 fprintf(log," %-26s",nrnb_str(j));
543 for(i=0; (i<cr->nnodes); i++) {
544 dperc=(100.0*nrnb[i].n[j])/av->n[j];
547 fprintf(log,"%3d ",perc);
551 fprintf(log,"%6d%%\n",perc);
558 for(i=0; (i<cr->nnodes); i++) {
562 uf=pr_av(log,cr,fav,ftot,"Total Force");
563 us=pr_av(log,cr,sav,stot,"Total Constr.");
565 unb=(uf*fav+us*sav)/(fav+sav);
568 fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);