3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2008, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
32 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
40 #ifdef HAVE_SYS_TIME_H
56 #include "checkpoint.h"
63 /* Enum for situations that can occur during log file parsing, the
64 * corresponding string entries can be found in do_the_tests() in
65 * const char* ParseLog[] */
71 eParselogResetProblem,
82 int nPMEnodes; /* number of PME-only nodes used in this test */
83 int nx, ny, nz; /* DD grid */
84 int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
85 double *Gcycles; /* This can contain more than one value if doing multiple tests */
89 float *PME_f_load; /* PME mesh/force load average*/
90 float PME_f_load_Av; /* Average average ;) ... */
91 char *mdrun_cmd_line; /* Mdrun command line used for this test */
97 int nr_inputfiles; /* The number of tpr and mdp input files */
98 gmx_large_int_t orig_sim_steps; /* Number of steps to be done in the real simulation */
99 real *rcoulomb; /* The coulomb radii [0...nr_inputfiles] */
100 real *rvdw; /* The vdW radii */
101 real *rlist; /* Neighbourlist cutoff radius */
103 int *nkx, *nky, *nkz;
104 real *fsx, *fsy, *fsz; /* Fourierspacing in x,y,z dimension */
108 static void sep_line(FILE *fp)
110 fprintf(fp, "\n------------------------------------------------------------\n");
114 /* Wrapper for system calls */
115 static int gmx_system_call(char *command)
118 gmx_fatal(FARGS,"No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n",command);
120 return ( system(command) );
125 /* Check if string starts with substring */
126 static gmx_bool str_starts(const char *string, const char *substring)
128 return ( strncmp(string, substring, strlen(substring)) == 0);
132 static void cleandata(t_perf *perfdata, int test_nr)
134 perfdata->Gcycles[test_nr] = 0.0;
135 perfdata->ns_per_day[test_nr] = 0.0;
136 perfdata->PME_f_load[test_nr] = 0.0;
142 static gmx_bool is_equal(real a, real b)
144 real diff, eps=1.0e-7;
149 if (diff < 0.0) diff = -diff;
158 static void finalize(const char *fn_out)
164 fp = fopen(fn_out,"r");
165 fprintf(stdout,"\n\n");
167 while( fgets(buf,STRLEN-1,fp) != NULL )
169 fprintf(stdout,"%s",buf);
172 fprintf(stdout,"\n\n");
176 enum {eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr};
178 static int parse_logfile(const char *logfile, const char *errfile,
179 t_perf *perfdata, int test_nr, int presteps, gmx_large_int_t cpt_steps,
183 char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
184 const char matchstrdd[]="Domain decomposition grid";
185 const char matchstrcr[]="resetting all time and cycle counters";
186 const char matchstrbal[]="Average PME mesh/force load:";
187 const char matchstring[]="R E A L C Y C L E A N D T I M E A C C O U N T I N G";
188 const char errSIG[]="signal, stopping at the next";
191 float dum1,dum2,dum3;
193 gmx_large_int_t resetsteps=-1;
194 gmx_bool bFoundResetStr = FALSE;
195 gmx_bool bResetChecked = FALSE;
198 if (!gmx_fexist(logfile))
200 fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
201 cleandata(perfdata, test_nr);
202 return eParselogNotFound;
205 fp = fopen(logfile, "r");
206 perfdata->PME_f_load[test_nr] = -1.0;
207 perfdata->guessPME = -1;
209 iFound = eFoundNothing;
211 iFound = eFoundDDStr; /* Skip some case statements */
213 while (fgets(line, STRLEN, fp) != NULL)
215 /* Remove leading spaces */
218 /* Check for TERM and INT signals from user: */
219 if ( strstr(line, errSIG) != NULL )
222 cleandata(perfdata, test_nr);
223 return eParselogTerm;
226 /* Check whether cycle resetting worked */
227 if (presteps > 0 && !bFoundResetStr)
229 if (strstr(line, matchstrcr) != NULL)
231 sprintf(dumstring, "Step %s", gmx_large_int_pfmt);
232 sscanf(line, dumstring, &resetsteps);
233 bFoundResetStr = TRUE;
234 if (resetsteps == presteps+cpt_steps)
236 bResetChecked = TRUE;
240 sprintf(dumstring , gmx_large_int_pfmt, resetsteps);
241 sprintf(dumstring2, gmx_large_int_pfmt, presteps+cpt_steps);
242 fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
243 " though they were supposed to be reset at step %s!\n",
244 dumstring, dumstring2);
249 /* Look for strings that appear in a certain order in the log file: */
253 /* Look for domain decomp grid and separate PME nodes: */
254 if (str_starts(line, matchstrdd))
256 sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
257 &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
258 if (perfdata->nPMEnodes == -1)
259 perfdata->guessPME = npme;
260 else if (perfdata->nPMEnodes != npme)
261 gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
262 iFound = eFoundDDStr;
264 /* Catch a few errors that might have occured: */
265 else if (str_starts(line, "There is no domain decomposition for"))
268 return eParselogNoDDGrid;
270 else if (str_starts(line, "reading tpx file"))
273 return eParselogTPXVersion;
275 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
278 return eParselogNotParallel;
282 /* Look for PME mesh/force balance (not necessarily present, though) */
283 if (str_starts(line, matchstrbal))
284 sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
285 /* Look for matchstring */
286 if (str_starts(line, matchstring))
287 iFound = eFoundAccountingStr;
289 case eFoundAccountingStr:
290 /* Already found matchstring - look for cycle data */
291 if (str_starts(line, "Total "))
293 sscanf(line,"Total %d %lf",&procs,&(perfdata->Gcycles[test_nr]));
294 iFound = eFoundCycleStr;
298 /* Already found cycle data - look for remaining performance info and return */
299 if (str_starts(line, "Performance:"))
301 sscanf(line,"%s %f %f %f %f", dumstring, &dum1, &dum2, &(perfdata->ns_per_day[test_nr]), &dum3);
303 if (bResetChecked || presteps == 0)
306 return eParselogResetProblem;
312 /* Close the log file */
315 /* Check why there is no performance data in the log file.
316 * Did a fatal errors occur? */
317 if (gmx_fexist(errfile))
319 fp = fopen(errfile, "r");
320 while (fgets(line, STRLEN, fp) != NULL)
322 if ( str_starts(line, "Fatal error:") )
324 if (fgets(line, STRLEN, fp) != NULL)
325 fprintf(stderr, "\nWARNING: An error occured during this benchmark:\n"
328 cleandata(perfdata, test_nr);
329 return eParselogFatal;
336 fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
339 /* Giving up ... we could not find out why there is no performance data in
341 fprintf(stdout, "No performance data in log file.\n");
342 cleandata(perfdata, test_nr);
344 return eParselogNoPerfData;
348 static gmx_bool analyze_data(
357 int *index_tpr, /* OUT: Nr of mdp file with best settings */
358 int *npme_optimal) /* OUT: Optimal number of PME nodes */
361 int line=0, line_win=-1;
362 int k_win=-1, i_win=-1, winPME;
363 double s=0.0; /* standard deviation */
366 char str_PME_f_load[13];
367 gmx_bool bCanUseOrigTPR;
368 gmx_bool bRefinedCoul, bRefinedVdW, bRefinedGrid;
374 fprintf(fp, "Summary of successful runs:\n");
375 fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
377 fprintf(fp, " DD grid");
382 for (k=0; k<ntprs; k++)
384 for (i=0; i<ntests; i++)
386 /* Select the right dataset: */
387 pd = &(perfdata[k][i]);
389 pd->Gcycles_Av = 0.0;
390 pd->PME_f_load_Av = 0.0;
391 pd->ns_per_day_Av = 0.0;
393 if (pd->nPMEnodes == -1)
394 sprintf(strbuf, "(%3d)", pd->guessPME);
396 sprintf(strbuf, " ");
398 /* Get the average run time of a setting */
399 for (j=0; j<nrepeats; j++)
401 pd->Gcycles_Av += pd->Gcycles[j];
402 pd->PME_f_load_Av += pd->PME_f_load[j];
404 pd->Gcycles_Av /= nrepeats;
405 pd->PME_f_load_Av /= nrepeats;
407 for (j=0; j<nrepeats; j++)
409 if (pd->ns_per_day[j] > 0.0)
410 pd->ns_per_day_Av += pd->ns_per_day[j];
413 /* Somehow the performance number was not aquired for this run,
414 * therefor set the average to some negative value: */
415 pd->ns_per_day_Av = -1.0f*nrepeats;
419 pd->ns_per_day_Av /= nrepeats;
422 if (pd->PME_f_load_Av > 0.0)
423 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
425 sprintf(str_PME_f_load, "%s", " - ");
428 /* We assume we had a successful run if both averages are positive */
429 if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
431 /* Output statistics if repeats were done */
434 /* Calculate the standard deviation */
436 for (j=0; j<nrepeats; j++)
437 s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
441 fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
442 line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
443 pd->ns_per_day_Av, str_PME_f_load);
445 fprintf(fp, " %3d %3d %3d", pd->nx, pd->ny, pd->nz);
448 /* Store the index of the best run found so far in 'winner': */
449 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
461 gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
465 winPME = perfdata[k_win][i_win].nPMEnodes;
469 /* We stuck to a fixed number of PME-only nodes */
470 sprintf(strbuf, "settings No. %d", k_win);
474 /* We have optimized the number of PME-only nodes */
476 sprintf(strbuf, "%s", "the automatic number of PME nodes");
478 sprintf(strbuf, "%d PME nodes", winPME);
480 fprintf(fp, "Best performance was achieved with %s", strbuf);
481 if ((nrepeats > 1) && (ntests > 1))
482 fprintf(fp, " (see line %d)", line_win);
485 /* Only mention settings if they were modified: */
486 bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]);
487 bRefinedVdW = !is_equal(info->rvdw[k_win] , info->rvdw[0] );
488 bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] &&
489 info->nky[k_win] == info->nky[0] &&
490 info->nkz[k_win] == info->nkz[0]);
492 if (bRefinedCoul || bRefinedVdW || bRefinedGrid)
494 fprintf(fp, "Optimized PME settings:\n");
495 bCanUseOrigTPR = FALSE;
499 bCanUseOrigTPR = TRUE;
503 fprintf(fp, " New Coulomb radius: %f nm (was %f nm)\n", info->rcoulomb[k_win], info->rcoulomb[0]);
506 fprintf(fp, " New Van der Waals radius: %f nm (was %f nm)\n", info->rvdw[k_win], info->rvdw[0]);
509 fprintf(fp, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
510 info->nkx[0], info->nky[0], info->nkz[0]);
512 if (bCanUseOrigTPR && ntprs > 1)
513 fprintf(fp, "and original PME settings.\n");
517 /* Return the index of the mdp file that showed the highest performance
518 * and the optimal number of PME nodes */
520 *npme_optimal = winPME;
522 return bCanUseOrigTPR;
526 /* Get the commands we need to set up the runs from environment variables */
527 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char cmd_np[],
528 char *cmd_mdrun[], int repeats)
535 const char def_mpirun[] = "mpirun";
536 const char def_mdrun[] = "mdrun";
537 const char filename[] = "benchtest.log";
538 const char match_mpi[] = "NNODES=";
539 const char match_mdrun[]= "Program: ";
540 const char empty_mpirun[] = "";
541 gmx_bool bMdrun = FALSE;
542 gmx_bool bMPI = FALSE;
545 /* Get the commands we need to set up the runs from environment variables */
548 if ( (cp = getenv("MPIRUN")) != NULL)
549 *cmd_mpirun = strdup(cp);
551 *cmd_mpirun = strdup(def_mpirun);
555 *cmd_mpirun = strdup(empty_mpirun);
558 if ( (cp = getenv("MDRUN" )) != NULL )
559 *cmd_mdrun = strdup(cp);
561 *cmd_mdrun = strdup(def_mdrun);
564 /* If no simulations have to be performed, we are done here */
568 /* Run a small test to see whether mpirun + mdrun work */
569 fprintf(stdout, "Making sure that mdrun can be executed. ");
572 snew(command, strlen(*cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
573 sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", *cmd_mdrun, cmd_np, filename);
577 snew(command, strlen(*cmd_mpirun) + strlen(cmd_np) + strlen(*cmd_mdrun) + strlen(filename) + 50);
578 sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", *cmd_mpirun, cmd_np, *cmd_mdrun, filename);
580 fprintf(stdout, "Trying '%s' ... ", command);
581 make_backup(filename);
582 gmx_system_call(command);
584 /* Check if we find the characteristic string in the output: */
585 if (!gmx_fexist(filename))
586 gmx_fatal(FARGS, "Output from test run could not be found.");
588 fp = fopen(filename, "r");
589 /* We need to scan the whole output file, since sometimes the queuing system
590 * also writes stuff to stdout/err */
593 cp2=fgets(line, STRLEN, fp);
596 if ( str_starts(line, match_mdrun) )
598 if ( str_starts(line, match_mpi) )
608 gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
610 "seems to have been compiled with MPI instead.",
618 gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
620 "seems to have been compiled without MPI support.",
627 gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
631 fprintf(stdout, "passed.\n");
639 static void launch_simulation(
640 gmx_bool bLaunch, /* Should the simulation be launched? */
641 FILE *fp, /* General log file */
642 gmx_bool bThreads, /* whether to use threads */
643 char *cmd_mpirun, /* Command for mpirun */
644 char *cmd_np, /* Switch for -np or -nt or empty */
645 char *cmd_mdrun, /* Command for mdrun */
646 char *args_for_mdrun, /* Arguments for mdrun */
647 const char *simulation_tpr, /* This tpr will be simulated */
648 int nnodes, /* Number of nodes to run on */
649 int nPMEnodes) /* Number of PME nodes to use */
654 /* Make enough space for the system call command,
655 * (100 extra chars for -npme ... etc. options should suffice): */
656 snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
658 /* Note that the -passall options requires args_for_mdrun to be at the end
659 * of the command line string */
662 sprintf(command, "%s%s-npme %d -s %s %s",
663 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
667 sprintf(command, "%s%s%s -npme %d -s %s %s",
668 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
671 fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch? "Using":"Please use", command);
675 /* Now the real thing! */
678 fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
681 gmx_system_call(command);
686 static void modify_PMEsettings(
687 gmx_large_int_t simsteps, /* Set this value as number of time steps */
688 const char *fn_best_tpr, /* tpr file with the best performance */
689 const char *fn_sim_tpr) /* name of tpr file to be launched */
697 read_tpx_state(fn_best_tpr,ir,&state,NULL,&mtop);
699 /* Set nsteps to the right value */
700 ir->nsteps = simsteps;
702 /* Write the tpr file which will be launched */
703 sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, gmx_large_int_pfmt);
704 fprintf(stdout,buf,ir->nsteps);
706 write_tpx_state(fn_sim_tpr,ir,&state,&mtop);
712 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
714 /* Make additional TPR files with more computational load for the
715 * direct space processors: */
716 static void make_benchmark_tprs(
717 const char *fn_sim_tpr, /* READ : User-provided tpr file */
718 char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */
719 gmx_large_int_t benchsteps, /* Number of time steps for benchmark runs */
720 gmx_large_int_t statesteps, /* Step counter in checkpoint file */
721 real rmin, /* Minimal Coulomb radius */
722 real rmax, /* Maximal Coulomb radius */
723 int *ntprs, /* No. of TPRs to write, each with a different
724 rcoulomb and fourierspacing */
725 t_inputinfo *info, /* Contains information about mdp file options */
726 FILE *fp) /* Write the output here */
732 real nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials */
735 gmx_bool bNote = FALSE;
736 real add; /* Add this to rcoul for the next test */
737 real fac = 1.0; /* Scaling factor for Coulomb radius */
738 real fourierspacing; /* Basic fourierspacing from tpr */
741 sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
742 *ntprs > 1? "s":"", gmx_large_int_pfmt, benchsteps>1?"s":"");
743 fprintf(stdout, buf, benchsteps);
746 sprintf(buf, " (adding %s steps from checkpoint file)", gmx_large_int_pfmt);
747 fprintf(stdout, buf, statesteps);
748 benchsteps += statesteps;
750 fprintf(stdout, ".\n");
754 read_tpx_state(fn_sim_tpr,ir,&state,NULL,&mtop);
756 /* Check if some kind of PME was chosen */
757 if (EEL_PME(ir->coulombtype) == FALSE)
758 gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
761 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
762 if ( (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist) )
764 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
765 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
767 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
768 else if (ir->rcoulomb > ir->rlist)
770 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
771 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
774 /* Reduce the number of steps for the benchmarks */
775 info->orig_sim_steps = ir->nsteps;
776 ir->nsteps = benchsteps;
778 /* For PME-switch potentials, keep the radial distance of the buffer region */
779 nlist_buffer = ir->rlist - ir->rcoulomb;
781 /* Determine length of triclinic box vectors */
786 box_size[d] += state.box[d][i]*state.box[d][i];
787 box_size[d] = sqrt(box_size[d]);
790 /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
791 info->fsx[0] = box_size[XX]/ir->nkx;
792 info->fsy[0] = box_size[YY]/ir->nky;
793 info->fsz[0] = box_size[ZZ]/ir->nkz;
795 /* Reconstruct the fourierspacing from the number of PME grid points found in the tpr */
796 fourierspacing = max(box_size[ZZ]/ir->nkz, max(box_size[XX]/ir->nkx, box_size[YY]/ir->nky));
798 fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
800 /* For performance comparisons the number of particles is useful to have */
801 fprintf(fp, " Number of particles : %d\n", mtop.natoms);
803 /* Print information about settings of which some are potentially modified: */
804 fprintf(fp, " Coulomb type : %s\n", EELTYPE(ir->coulombtype));
805 fprintf(fp, " Grid spacing x y z : %f %f %f\n",
806 box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
807 fprintf(fp, " Van der Waals type : %s\n", EVDWTYPE(ir->vdwtype));
808 if (EVDW_SWITCHED(ir->vdwtype))
809 fprintf(fp, " rvdw_switch : %f nm\n", ir->rvdw_switch);
810 if (EPME_SWITCHED(ir->coulombtype))
811 fprintf(fp, " rlist : %f nm\n", ir->rlist);
812 if (ir->rlistlong != max_cutoff(ir->rvdw,ir->rcoulomb))
813 fprintf(fp, " rlistlong : %f nm\n", ir->rlistlong);
815 /* Print a descriptive line about the tpr settings tested */
816 fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
817 fprintf(fp, " No. scaling rcoulomb");
818 fprintf(fp, " nkx nky nkz");
819 fprintf(fp, " spacing");
820 if (evdwCUT == ir->vdwtype)
821 fprintf(fp, " rvdw");
822 if (EPME_SWITCHED(ir->coulombtype))
823 fprintf(fp, " rlist");
824 if ( ir->rlistlong != max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb)) )
825 fprintf(fp, " rlistlong");
826 fprintf(fp, " tpr file\n");
828 /* Loop to create the requested number of tpr input files */
829 for (j = 0; j < *ntprs; j++)
831 /* The first .tpr is the provided one, just need to modify nsteps,
832 * so skip the following block */
835 /* Determine which Coulomb radii rc to use in the benchmarks */
836 add = (rmax-rmin)/(*ntprs-1);
837 if (is_equal(rmin,info->rcoulomb[0]))
839 ir->rcoulomb = rmin + j*add;
841 else if (is_equal(rmax,info->rcoulomb[0]))
843 ir->rcoulomb = rmin + (j-1)*add;
847 /* rmin != rcoul != rmax, ergo test between rmin and rmax */
848 add = (rmax-rmin)/(*ntprs-2);
849 ir->rcoulomb = rmin + (j-1)*add;
852 /* Determine the scaling factor fac */
853 fac = ir->rcoulomb/info->rcoulomb[0];
855 /* Scale the Fourier grid spacing */
856 ir->nkx = ir->nky = ir->nkz = 0;
857 calc_grid(NULL,state.box,fourierspacing*fac,&ir->nkx,&ir->nky,&ir->nkz);
859 /* Adjust other radii since various conditions neet to be fulfilled */
860 if (eelPME == ir->coulombtype)
862 /* plain PME, rcoulomb must be equal to rlist */
863 ir->rlist = ir->rcoulomb;
867 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
868 ir->rlist = ir->rcoulomb + nlist_buffer;
871 if (evdwCUT == ir->vdwtype)
873 /* For vdw cutoff, rvdw >= rlist */
874 ir->rvdw = max(info->rvdw[0], ir->rlist);
877 ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
879 } /* end of "if (j != 0)" */
881 /* for j==0: Save the original settings
882 * for j >0: Save modified radii and Fourier grids */
883 info->rcoulomb[j] = ir->rcoulomb;
884 info->rvdw[j] = ir->rvdw;
885 info->nkx[j] = ir->nkx;
886 info->nky[j] = ir->nky;
887 info->nkz[j] = ir->nkz;
888 info->rlist[j] = ir->rlist;
889 info->rlistlong[j] = ir->rlistlong;
890 info->fsx[j] = fac*fourierspacing;
891 info->fsy[j] = fac*fourierspacing;
892 info->fsz[j] = fac*fourierspacing;
894 /* Write the benchmark tpr file */
895 strncpy(fn_bench_tprs[j],fn_sim_tpr,strlen(fn_sim_tpr)-strlen(".tpr"));
896 sprintf(buf, "_bench%.2d.tpr", j);
897 strcat(fn_bench_tprs[j], buf);
898 fprintf(stdout,"Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
899 fprintf(stdout, gmx_large_int_pfmt, ir->nsteps);
901 fprintf(stdout,", scaling factor %f\n", fac);
903 fprintf(stdout,", unmodified settings\n");
905 write_tpx_state(fn_bench_tprs[j],ir,&state,&mtop);
907 /* Write information about modified tpr settings to log file */
908 fprintf(fp, "%4d%10f%10f", j, fac, ir->rcoulomb);
909 fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
910 fprintf(fp, " %9f ", info->fsx[j]);
911 if (evdwCUT == ir->vdwtype)
912 fprintf(fp, "%10f", ir->rvdw);
913 if (EPME_SWITCHED(ir->coulombtype))
914 fprintf(fp, "%10f", ir->rlist);
915 if ( info->rlistlong[0] != max_cutoff(info->rlist[0],max_cutoff(info->rvdw[0],info->rcoulomb[0])) )
916 fprintf(fp, "%10f", ir->rlistlong);
917 fprintf(fp, " %-14s\n",fn_bench_tprs[j]);
919 /* Make it clear to the user that some additional settings were modified */
920 if ( !is_equal(ir->rvdw , info->rvdw[0])
921 || !is_equal(ir->rlistlong, info->rlistlong[0]) )
927 fprintf(fp, "\nNote that in addition to the Coulomb radius and the Fourier grid\n"
928 "other input settings were also changed (see table above).\n"
929 "Please check if the modified settings are appropriate.\n");
936 /* Rename the files we want to keep to some meaningful filename and
938 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
939 int nPMEnodes, int nr, gmx_bool bKeepStderr)
941 char numstring[STRLEN];
942 char newfilename[STRLEN];
948 fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
950 for (i=0; i<nfile; i++)
952 opt = (char *)fnm[i].opt;
953 if ( strcmp(opt, "-p") == 0 )
955 /* do nothing; keep this file */
958 else if (strcmp(opt, "-bg") == 0)
960 /* Give the log file a nice name so one can later see which parameters were used */
963 sprintf(numstring, "_%d", nr);
964 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg",nfile,fnm), k, nnodes, nPMEnodes, numstring);
965 if (gmx_fexist(opt2fn("-bg",nfile,fnm)))
967 fprintf(stdout, "renaming log file to %s\n", newfilename);
968 make_backup(newfilename);
969 rename(opt2fn("-bg",nfile,fnm), newfilename);
972 else if (strcmp(opt, "-err") == 0)
974 /* This file contains the output of stderr. We want to keep it in
975 * cases where there have been problems. */
976 fn = opt2fn(opt, nfile, fnm);
979 sprintf(numstring, "_%d", nr);
980 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
985 fprintf(stdout, "Saving stderr output in %s\n", newfilename);
986 make_backup(newfilename);
987 rename(fn, newfilename);
991 fprintf(stdout, "Deleting %s\n", fn);
996 /* Delete the files which are created for each benchmark run: (options -b*) */
997 else if ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt,nfile,fnm) || !is_optional(&fnm[i])) )
999 fn = opt2fn(opt, nfile, fnm);
1002 fprintf(stdout, "Deleting %s\n", fn);
1010 /* Returns the largest common factor of n1 and n2 */
1011 static int largest_common_factor(int n1, int n2)
1016 for (factor=nmax; factor > 0; factor--)
1018 if ( 0==(n1 % factor) && 0==(n2 % factor) )
1023 return 0; /* one for the compiler */
1026 enum {eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr};
1028 /* Create a list of numbers of PME nodes to test */
1029 static void make_npme_list(
1030 const char *npmevalues_opt, /* Make a complete list with all
1031 * possibilities or a short list that keeps only
1032 * reasonable numbers of PME nodes */
1033 int *nentries, /* Number of entries we put in the nPMEnodes list */
1034 int *nPMEnodes[], /* Each entry contains the value for -npme */
1035 int nnodes, /* Total number of nodes to do the tests on */
1036 int minPMEnodes, /* Minimum number of PME nodes */
1037 int maxPMEnodes) /* Maximum number of PME nodes */
1040 int min_factor=1; /* We request that npp and npme have this minimal
1041 * largest common factor (depends on npp) */
1042 int nlistmax; /* Max. list size */
1043 int nlist; /* Actual number of entries in list */
1047 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1048 if ( 0 == strcmp(npmevalues_opt, "all") )
1052 else if ( 0 == strcmp(npmevalues_opt, "subset") )
1054 eNPME = eNpmeSubset;
1056 else /* "auto" or "range" */
1060 else if (nnodes < 128)
1061 eNPME = eNpmeReduced;
1063 eNPME = eNpmeSubset;
1066 /* Calculate how many entries we could possibly have (in case of -npme all) */
1069 nlistmax = maxPMEnodes - minPMEnodes + 3;
1070 if (0 == minPMEnodes)
1076 /* Now make the actual list which is at most of size nlist */
1077 snew(*nPMEnodes, nlistmax);
1078 nlist = 0; /* start counting again, now the real entries in the list */
1079 for (i = 0; i < nlistmax - 2; i++)
1081 npme = maxPMEnodes - i;
1092 /* For 2d PME we want a common largest factor of at least the cube
1093 * root of the number of PP nodes */
1094 min_factor = (int) pow(npp, 1.0/3.0);
1097 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1100 if (largest_common_factor(npp, npme) >= min_factor)
1102 (*nPMEnodes)[nlist] = npme;
1106 /* We always test 0 PME nodes and the automatic number */
1107 *nentries = nlist + 2;
1108 (*nPMEnodes)[nlist ] = 0;
1109 (*nPMEnodes)[nlist+1] = -1;
1111 fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1112 for (i=0; i<*nentries-1; i++)
1113 fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1114 fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1118 /* Allocate memory to store the performance data */
1119 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1124 for (k=0; k<ntprs; k++)
1126 snew(perfdata[k], datasets);
1127 for (i=0; i<datasets; i++)
1129 for (j=0; j<repeats; j++)
1131 snew(perfdata[k][i].Gcycles , repeats);
1132 snew(perfdata[k][i].ns_per_day, repeats);
1133 snew(perfdata[k][i].PME_f_load, repeats);
1140 /* Check for errors on mdrun -h */
1141 static void make_sure_it_runs(char *mdrun_cmd_line, int length, FILE *fp)
1143 char *command, *msg;
1147 snew(command, length + 15);
1148 snew(msg , length + 500);
1150 fprintf(stdout, "Making shure the benchmarks can be executed ...\n");
1151 sprintf(command, "%s-h -quiet", mdrun_cmd_line);
1152 ret = gmx_system_call(command);
1156 /* To prevent confusion, do not again issue a gmx_fatal here since we already
1157 * get the error message from mdrun itself */
1158 sprintf(msg, "Cannot run the benchmark simulations! Please check the error message of\n"
1159 "mdrun for the source of the problem. Did you provide a command line\n"
1160 "argument that neither g_tune_pme nor mdrun understands? Offending command:\n"
1161 "\n%s\n\n", command);
1163 fprintf(stderr, "%s", msg);
1165 fprintf(fp , "%s", msg);
1175 static void do_the_tests(
1176 FILE *fp, /* General g_tune_pme output file */
1177 char **tpr_names, /* Filenames of the input files to test */
1178 int maxPMEnodes, /* Max fraction of nodes to use for PME */
1179 int minPMEnodes, /* Min fraction of nodes to use for PME */
1180 int npme_fixed, /* If >= -1, test fixed number of PME
1182 const char *npmevalues_opt, /* Which -npme values should be tested */
1183 t_perf **perfdata, /* Here the performace data is stored */
1184 int *pmeentries, /* Entries in the nPMEnodes list */
1185 int repeats, /* Repeat each test this often */
1186 int nnodes, /* Total number of nodes = nPP + nPME */
1187 int nr_tprs, /* Total number of tpr files to test */
1188 gmx_bool bThreads, /* Threads or MPI? */
1189 char *cmd_mpirun, /* mpirun command string */
1190 char *cmd_np, /* "-np", "-n", whatever mpirun needs */
1191 char *cmd_mdrun, /* mdrun command string */
1192 char *cmd_args_bench, /* arguments for mdrun in a string */
1193 const t_filenm *fnm, /* List of filenames from command line */
1194 int nfile, /* Number of files specified on the cmdl. */
1195 int sim_part, /* For checkpointing */
1196 int presteps, /* DLB equilibration steps, is checked */
1197 gmx_large_int_t cpt_steps) /* Time step counter in the checkpoint */
1199 int i,nr,k,ret,count=0,totaltests;
1200 int *nPMEnodes=NULL;
1203 char *command, *cmd_stub;
1205 gmx_bool bResetProblem=FALSE;
1206 gmx_bool bFirst=TRUE;
1209 /* This string array corresponds to the eParselog enum type at the start
1211 const char* ParseLog[] = {"OK.",
1212 "Logfile not found!",
1213 "No timings, logfile truncated?",
1214 "Run was terminated.",
1215 "Counters were not reset properly.",
1216 "No DD grid found for these settings.",
1217 "TPX version conflict!",
1218 "mdrun was not started in parallel!",
1219 "An error occured." };
1220 char str_PME_f_load[13];
1223 /* Allocate space for the mdrun command line. 100 extra characters should
1224 be more than enough for the -npme etcetera arguments */
1225 cmdline_length = strlen(cmd_mpirun)
1228 + strlen(cmd_args_bench)
1229 + strlen(tpr_names[0]) + 100;
1230 snew(command , cmdline_length);
1231 snew(cmd_stub, cmdline_length);
1233 /* Construct the part of the command line that stays the same for all tests: */
1236 sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1240 sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1243 /* Create a list of numbers of PME nodes to test */
1244 if (npme_fixed < -1)
1246 make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1247 nnodes, minPMEnodes, maxPMEnodes);
1253 nPMEnodes[0] = npme_fixed;
1254 fprintf(stderr, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes[0]);
1259 fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1261 finalize(opt2fn("-p", nfile, fnm));
1265 /* Allocate one dataset for each tpr input file: */
1266 init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1268 /*****************************************/
1269 /* Main loop over all tpr files to test: */
1270 /*****************************************/
1271 totaltests = nr_tprs*(*pmeentries)*repeats;
1272 for (k=0; k<nr_tprs;k++)
1274 fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1275 fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
1276 /* Loop over various numbers of PME nodes: */
1277 for (i = 0; i < *pmeentries; i++)
1279 pd = &perfdata[k][i];
1281 /* Loop over the repeats for each scenario: */
1282 for (nr = 0; nr < repeats; nr++)
1284 pd->nPMEnodes = nPMEnodes[i];
1286 /* Add -npme and -s to the command line and save it. Note that
1287 * the -passall (if set) options requires cmd_args_bench to be
1288 * at the end of the command line string */
1289 snew(pd->mdrun_cmd_line, cmdline_length);
1290 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1291 cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1293 /* To prevent that all benchmarks fail due to a show-stopper argument
1294 * on the mdrun command line, we make a quick check with mdrun -h first */
1296 make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp);
1299 /* Do a benchmark simulation: */
1301 sprintf(buf, ", pass %d/%d", nr+1, repeats);
1304 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1305 (100.0*count)/totaltests,
1306 k+1, nr_tprs, i+1, *pmeentries, buf);
1307 make_backup(opt2fn("-err",nfile,fnm));
1308 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err",nfile,fnm));
1309 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1310 gmx_system_call(command);
1312 /* Collect the performance data from the log file; also check stderr
1313 * for fatal errors */
1314 ret = parse_logfile(opt2fn("-bg",nfile,fnm), opt2fn("-err",nfile,fnm),
1315 pd, nr, presteps, cpt_steps, nnodes);
1316 if ((presteps > 0) && (ret == eParselogResetProblem))
1317 bResetProblem = TRUE;
1319 if (-1 == pd->nPMEnodes)
1320 sprintf(buf, "(%3d)", pd->guessPME);
1325 if (pd->PME_f_load[nr] > 0.0)
1326 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1328 sprintf(str_PME_f_load, "%s", " - ");
1330 /* Write the data we got to disk */
1331 fprintf(fp, "%4d%s %12.3f %12.3f %s %s", pd->nPMEnodes,
1332 buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1333 if (! (ret==eParselogOK || ret==eParselogNoDDGrid || ret==eParselogNotFound) )
1334 fprintf(fp, " Check %s file for problems.", ret==eParselogFatal? "err":"log");
1339 /* Do some cleaning up and delete the files we do not need any more */
1340 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret==eParselogFatal);
1342 /* If the first run with this number of processors already failed, do not try again: */
1343 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1345 fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1346 count += repeats-(nr+1);
1349 } /* end of repeats loop */
1350 } /* end of -npme loop */
1351 } /* end of tpr file loop */
1356 fprintf(fp, "WARNING: The cycle and time step counters could not be reset\n"
1357 "properly. The reason could be that mpirun did not manage to\n"
1358 "export the environment variable GMX_RESET_COUNTER. You might\n"
1359 "have to give a special switch to mpirun for that.\n"
1360 "Alternatively, you can manually set GMX_RESET_COUNTER to the\n"
1361 "value normally provided by -presteps.");
1369 static void check_input(
1376 real maxPMEfraction,
1377 real minPMEfraction,
1379 gmx_large_int_t bench_nsteps,
1380 const t_filenm *fnm,
1390 /* Make sure the input file exists */
1391 if (!gmx_fexist(opt2fn("-s",nfile,fnm)))
1392 gmx_fatal(FARGS, "File %s not found.", opt2fn("-s",nfile,fnm));
1394 /* Make sure that the checkpoint file is not overwritten during benchmarking */
1395 if ( (0 == strcmp(opt2fn("-cpi",nfile,fnm), opt2fn("-bcpo",nfile,fnm)) ) && (sim_part > 1) )
1396 gmx_fatal(FARGS, "Checkpoint input (-cpi) and benchmark checkpoint output (-bcpo) files must not be identical.\n"
1397 "The checkpoint input file must not be overwritten during the benchmarks.\n");
1399 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1401 gmx_fatal(FARGS, "Number of repeats < 0!");
1403 /* Check number of nodes */
1405 gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1407 /* Automatically choose -ntpr if not set */
1415 /* Set a reasonable scaling factor for rcoulomb */
1417 *rmax = rcoulomb * 1.2;
1419 fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs==1?"":"s");
1424 fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1427 /* Make shure that rmin <= rcoulomb <= rmax */
1428 if (*rmin <= 0) *rmin = rcoulomb;
1429 if (*rmax <= 0) *rmax = rcoulomb;
1430 if ( !(*rmin <= *rmax) )
1432 gmx_fatal(FARGS, "Please choose the Coulomb radii such that rmin <= rmax.\n"
1433 "rmin = %g, rmax = %g, actual rcoul from .tpr file = %g\n", *rmin, *rmax, rcoulomb);
1435 /* Add test scenarios if rmin or rmax were set */
1438 if ( !is_equal(*rmin, rcoulomb) && (*ntprs == 1) )
1441 fprintf(stderr, "NOTE: Setting -rmin to %g changed -ntpr to %d\n",
1444 if (!is_equal(*rmax, rcoulomb) && (*ntprs == 1) )
1447 fprintf(stderr, "NOTE: Setting -rmax to %g changed -ntpr to %d\n",
1452 /* If one of rmin, rmax is set, we need 2 tpr files at minimum */
1453 if ( !is_equal(*rmax, rcoulomb) || !is_equal(*rmin, rcoulomb) )
1454 *ntprs = max(*ntprs, 2);
1456 /* If both rmin, rmax are set, we need 3 tpr files at minimum */
1457 if ( !is_equal(*rmax, rcoulomb) && !is_equal(*rmin, rcoulomb) )
1458 *ntprs = max(*ntprs, 3);
1462 fprintf(stderr, "NOTE: Your rmin, rmax setting changed -ntpr to %d\n", *ntprs);
1467 if (is_equal(*rmin,rcoulomb) && is_equal(rcoulomb,*rmax)) /* We have just a single rc */
1469 fprintf(stderr, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n"
1470 "Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n"
1471 "with correspondingly adjusted PME grid settings\n");
1476 /* Check whether max and min fraction are within required values */
1477 if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1478 gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1479 if (minPMEfraction > 0.5 || minPMEfraction < 0)
1480 gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1481 if (maxPMEfraction < minPMEfraction)
1482 gmx_fatal(FARGS, "-max must be larger or equal to -min");
1484 /* Check whether the number of steps - if it was set - has a reasonable value */
1485 if (bench_nsteps < 0)
1486 gmx_fatal(FARGS, "Number of steps must be positive.");
1488 if (bench_nsteps > 10000 || bench_nsteps < 100)
1490 fprintf(stderr, "WARNING: steps=");
1491 fprintf(stderr, gmx_large_int_pfmt, bench_nsteps);
1492 fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100)? "few" : "many");
1497 gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1500 /* Check for rcoulomb scaling if more than one .tpr file is tested */
1503 if (*rmin/rcoulomb < 0.75 || *rmax/rcoulomb > 1.25)
1504 fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1507 /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning
1508 * only. We need to check whether the requested number of PME-only nodes
1510 if (npme_fixed > -1)
1512 /* No more than 50% of all nodes can be assigned as PME-only nodes. */
1513 if (2*npme_fixed > nnodes)
1515 gmx_fatal(FARGS, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n",
1516 nnodes/2, nnodes, npme_fixed);
1518 if ((npme_fixed > 0) && (5*npme_fixed < nnodes))
1520 fprintf(stderr, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n",
1521 100.0*((real)npme_fixed / (real)nnodes));
1523 if (opt2parg_bSet("-min",npargs,pa) || opt2parg_bSet("-max",npargs,pa))
1525 fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n"
1526 " fixed number of PME-only nodes is requested with -fix.\n");
1532 /* Returns TRUE when "opt" is needed at launch time */
1533 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1535 /* Apart from the input .tpr we need all options that were set
1536 * on the command line and that do not start with -b */
1537 if (0 == strncmp(opt,"-b", 2) || 0 == strncmp(opt,"-s", 2))
1547 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1548 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1550 /* Apart from the input .tpr, all files starting with "-b" are for
1551 * _b_enchmark files exclusively */
1552 if (0 == strncmp(opt,"-s", 2)) return FALSE;
1553 if (0 == strncmp(opt,"-b", 2) || 0 == strncmp(opt,"-s", 2))
1555 if (!bOptional || bSet)
1565 if (bSet) /* These are additional input files like -cpi -ei */
1573 /* Adds 'buf' to 'str' */
1574 static void add_to_string(char **str, char *buf)
1579 len = strlen(*str) + strlen(buf) + 1;
1585 /* Create the command line for the benchmark as well as for the real run */
1586 static void create_command_line_snippets(
1588 gmx_bool bAppendFiles,
1589 gmx_bool bKeepAndNumCPT,
1590 gmx_bool bResetHWay,
1596 const char *procstring, /* How to pass the number of processors to $MPIRUN */
1597 char *cmd_np[], /* Actual command line snippet, e.g. '-np <N>' */
1598 char *cmd_args_bench[], /* command line arguments for benchmark runs */
1599 char *cmd_args_launch[], /* command line arguments for simulation run */
1600 char extra_args[]) /* Add this to the end of the command line */
1605 char strbuf[STRLEN];
1608 /* strlen needs at least '\0' as a string: */
1609 snew(*cmd_args_bench ,1);
1610 snew(*cmd_args_launch,1);
1611 *cmd_args_launch[0]='\0';
1612 *cmd_args_bench[0] ='\0';
1615 /*******************************************/
1616 /* 1. Process other command line arguments */
1617 /*******************************************/
1620 /* Add equilibration steps to benchmark options */
1621 sprintf(strbuf, "-resetstep %d ", presteps);
1622 add_to_string(cmd_args_bench, strbuf);
1624 /* These switches take effect only at launch time */
1625 if (FALSE == bAppendFiles)
1627 add_to_string(cmd_args_launch, "-noappend ");
1631 add_to_string(cmd_args_launch, "-cpnum ");
1635 add_to_string(cmd_args_launch, "-resethway ");
1638 /********************/
1639 /* 2. Process files */
1640 /********************/
1641 for (i=0; i<nfile; i++)
1643 opt = (char *)fnm[i].opt;
1644 name = opt2fn(opt,nfile,fnm);
1646 /* Strbuf contains the options, now let's sort out where we need that */
1647 sprintf(strbuf, "%s %s ", opt, name);
1649 if ( is_bench_file(opt, opt2bSet(opt,nfile,fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1651 /* All options starting with -b* need the 'b' removed,
1652 * therefore overwrite strbuf */
1653 if (0 == strncmp(opt, "-b", 2))
1654 sprintf(strbuf, "-%s %s ", &opt[2], name);
1656 add_to_string(cmd_args_bench, strbuf);
1659 if ( is_launch_file(opt,opt2bSet(opt,nfile,fnm)) )
1660 add_to_string(cmd_args_launch, strbuf);
1663 add_to_string(cmd_args_bench , extra_args);
1664 add_to_string(cmd_args_launch, extra_args);
1668 /* Set option opt */
1669 static void setopt(const char *opt,int nfile,t_filenm fnm[])
1673 for(i=0; (i<nfile); i++)
1674 if (strcmp(opt,fnm[i].opt)==0)
1675 fnm[i].flag |= ffSET;
1679 /* This routine inspects the tpr file and ...
1680 * 1. checks for output files that get triggered by a tpr option. These output
1681 * files are marked as 'set' to allow for a proper cleanup after each
1683 * 2. returns the PME:PP load ratio
1684 * 3. returns rcoulomb from the tpr */
1685 static float inspect_tpr(int nfile, t_filenm fnm[], real *rcoulomb)
1687 gmx_bool bPull; /* Is pulling requested in .tpr file? */
1688 gmx_bool bTpi; /* Is test particle insertion requested? */
1689 gmx_bool bFree; /* Is a free energy simulation requested? */
1690 gmx_bool bNM; /* Is a normal mode analysis requested? */
1696 /* Check tpr file for options that trigger extra output files */
1697 read_tpx_state(opt2fn("-s",nfile,fnm),&ir,&state,NULL,&mtop);
1698 bPull = (epullNO != ir.ePull);
1699 bFree = (efepNO != ir.efep );
1700 bNM = (eiNM == ir.eI );
1701 bTpi = EI_TPI(ir.eI);
1703 /* Set these output files on the tuning command-line */
1706 setopt("-pf" , nfile, fnm);
1707 setopt("-px" , nfile, fnm);
1711 setopt("-dhdl", nfile, fnm);
1715 setopt("-tpi" , nfile, fnm);
1716 setopt("-tpid", nfile, fnm);
1720 setopt("-mtx" , nfile, fnm);
1723 *rcoulomb = ir.rcoulomb;
1725 /* Return the estimate for the number of PME nodes */
1726 return pme_load_estimate(&mtop,&ir,state.box);
1730 static void couple_files_options(int nfile, t_filenm fnm[])
1733 gmx_bool bSet,bBench;
1738 for (i=0; i<nfile; i++)
1740 opt = (char *)fnm[i].opt;
1741 bSet = ((fnm[i].flag & ffSET) != 0);
1742 bBench = (0 == strncmp(opt,"-b", 2));
1744 /* Check optional files */
1745 /* If e.g. -eo is set, then -beo also needs to be set */
1746 if (is_optional(&fnm[i]) && bSet && !bBench)
1748 sprintf(buf, "-b%s", &opt[1]);
1749 setopt(buf,nfile,fnm);
1751 /* If -beo is set, then -eo also needs to be! */
1752 if (is_optional(&fnm[i]) && bSet && bBench)
1754 sprintf(buf, "-%s", &opt[2]);
1755 setopt(buf,nfile,fnm);
1761 static double gettime()
1763 #ifdef HAVE_GETTIMEOFDAY
1767 gettimeofday(&t,NULL);
1769 seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
1775 seconds = time(NULL);
1782 #define BENCHSTEPS (1000)
1784 int gmx_tune_pme(int argc,char *argv[])
1786 const char *desc[] = {
1787 "For a given number [TT]-np[tt] or [TT]-nt[tt] of processors/threads, this program systematically",
1788 "times [TT]mdrun[tt] with various numbers of PME-only nodes and determines",
1789 "which setting is fastest. It will also test whether performance can",
1790 "be enhanced by shifting load from the reciprocal to the real space",
1791 "part of the Ewald sum. ",
1792 "Simply pass your [TT].tpr[tt] file to [TT]g_tune_pme[tt] together with other options",
1793 "for [TT]mdrun[tt] as needed.[PAR]",
1794 "Which executables are used can be set in the environment variables",
1795 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
1796 "will be used as defaults. Note that for certain MPI frameworks you",
1797 "need to provide a machine- or hostfile. This can also be passed",
1798 "via the MPIRUN variable, e.g.[PAR]",
1799 "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
1800 "Please call [TT]g_tune_pme[tt] with the normal options you would pass to",
1801 "[TT]mdrun[tt] and add [TT]-np[tt] for the number of processors to perform the",
1802 "tests on, or [TT]-nt[tt] for the number of threads. You can also add [TT]-r[tt]",
1803 "to repeat each test several times to get better statistics. [PAR]",
1804 "[TT]g_tune_pme[tt] can test various real space / reciprocal space workloads",
1805 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
1806 "written with enlarged cutoffs and smaller Fourier grids respectively.",
1807 "Typically, the first test (number 0) will be with the settings from the input",
1808 "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff",
1809 "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ",
1810 "In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ",
1811 "The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier "
1812 "spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
1813 "if you just seek the optimal number of PME-only nodes; in that case",
1814 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
1815 "For the benchmark runs, the default of 1000 time steps should suffice for most",
1816 "MD systems. The dynamic load balancing needs about 100 time steps",
1817 "to adapt to local load imbalances, therefore the time step counters",
1818 "are by default reset after 100 steps. For large systems (>1M atoms), as well as ",
1819 "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
1820 "From the 'DD' load imbalance entries in the md.log output file you",
1821 "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
1822 "[TT]g_tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
1823 "After calling [TT]mdrun[tt] several times, detailed performance information",
1824 "is available in the output file [TT]perf.out.[tt] ",
1825 "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written",
1826 "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]",
1827 "If you want the simulation to be started automatically with the",
1828 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
1833 int pmeentries=0; /* How many values for -npme do we actually test for each tpr file */
1834 real maxPMEfraction=0.50;
1835 real minPMEfraction=0.25;
1836 int maxPMEnodes, minPMEnodes;
1837 float guessPMEratio; /* guessed PME:PP ratio based on the tpr file */
1838 float guessPMEnodes;
1839 int npme_fixed=-2; /* If >= -1, use only this number
1840 * of PME-only nodes */
1842 real rmin=0.0,rmax=0.0; /* min and max value for rcoulomb if scaling is requested */
1843 real rcoulomb=-1.0; /* Coulomb radius as set in .tpr file */
1844 gmx_large_int_t bench_nsteps=BENCHSTEPS;
1845 gmx_large_int_t new_sim_nsteps=-1; /* -1 indicates: not set by the user */
1846 gmx_large_int_t cpt_steps=0; /* Step counter in .cpt input file */
1847 int presteps=100; /* Do a full cycle reset after presteps steps */
1848 gmx_bool bOverwrite=FALSE, bKeepTPR;
1849 gmx_bool bLaunch=FALSE;
1850 char *ExtraArgs=NULL;
1851 char **tpr_names=NULL;
1852 const char *simulation_tpr=NULL;
1853 int best_npme, best_tpr;
1854 int sim_part = 1; /* For benchmarks with checkpoint files */
1857 /* Default program names if nothing else is found */
1858 char *cmd_mpirun=NULL, *cmd_mdrun=NULL;
1859 char *cmd_args_bench, *cmd_args_launch;
1862 t_perf **perfdata=NULL;
1868 /* Print out how long the tuning took */
1871 static t_filenm fnm[] = {
1873 { efOUT, "-p", "perf", ffWRITE },
1874 { efLOG, "-err", "bencherr", ffWRITE },
1875 { efTPX, "-so", "tuned", ffWRITE },
1877 { efTPX, NULL, NULL, ffREAD },
1878 { efTRN, "-o", NULL, ffWRITE },
1879 { efXTC, "-x", NULL, ffOPTWR },
1880 { efCPT, "-cpi", NULL, ffOPTRD },
1881 { efCPT, "-cpo", NULL, ffOPTWR },
1882 { efSTO, "-c", "confout", ffWRITE },
1883 { efEDR, "-e", "ener", ffWRITE },
1884 { efLOG, "-g", "md", ffWRITE },
1885 { efXVG, "-dhdl", "dhdl", ffOPTWR },
1886 { efXVG, "-field", "field", ffOPTWR },
1887 { efXVG, "-table", "table", ffOPTRD },
1888 { efXVG, "-tabletf", "tabletf", ffOPTRD },
1889 { efXVG, "-tablep", "tablep", ffOPTRD },
1890 { efXVG, "-tableb", "table", ffOPTRD },
1891 { efTRX, "-rerun", "rerun", ffOPTRD },
1892 { efXVG, "-tpi", "tpi", ffOPTWR },
1893 { efXVG, "-tpid", "tpidist", ffOPTWR },
1894 { efEDI, "-ei", "sam", ffOPTRD },
1895 { efEDO, "-eo", "sam", ffOPTWR },
1896 { efGCT, "-j", "wham", ffOPTRD },
1897 { efGCT, "-jo", "bam", ffOPTWR },
1898 { efXVG, "-ffout", "gct", ffOPTWR },
1899 { efXVG, "-devout", "deviatie", ffOPTWR },
1900 { efXVG, "-runav", "runaver", ffOPTWR },
1901 { efXVG, "-px", "pullx", ffOPTWR },
1902 { efXVG, "-pf", "pullf", ffOPTWR },
1903 { efXVG, "-ro", "rotation", ffOPTWR },
1904 { efLOG, "-ra", "rotangles",ffOPTWR },
1905 { efLOG, "-rs", "rotslabs", ffOPTWR },
1906 { efLOG, "-rt", "rottorque",ffOPTWR },
1907 { efMTX, "-mtx", "nm", ffOPTWR },
1908 { efNDX, "-dn", "dipole", ffOPTWR },
1909 /* Output files that are deleted after each benchmark run */
1910 { efTRN, "-bo", "bench", ffWRITE },
1911 { efXTC, "-bx", "bench", ffWRITE },
1912 { efCPT, "-bcpo", "bench", ffWRITE },
1913 { efSTO, "-bc", "bench", ffWRITE },
1914 { efEDR, "-be", "bench", ffWRITE },
1915 { efLOG, "-bg", "bench", ffWRITE },
1916 { efEDO, "-beo", "bench", ffOPTWR },
1917 { efXVG, "-bdhdl", "benchdhdl",ffOPTWR },
1918 { efXVG, "-bfield", "benchfld" ,ffOPTWR },
1919 { efXVG, "-btpi", "benchtpi", ffOPTWR },
1920 { efXVG, "-btpid", "benchtpid",ffOPTWR },
1921 { efGCT, "-bjo", "bench", ffOPTWR },
1922 { efXVG, "-bffout", "benchgct", ffOPTWR },
1923 { efXVG, "-bdevout","benchdev", ffOPTWR },
1924 { efXVG, "-brunav", "benchrnav",ffOPTWR },
1925 { efXVG, "-bpx", "benchpx", ffOPTWR },
1926 { efXVG, "-bpf", "benchpf", ffOPTWR },
1927 { efXVG, "-bro", "benchrot", ffOPTWR },
1928 { efLOG, "-bra", "benchrota",ffOPTWR },
1929 { efLOG, "-brs", "benchrots",ffOPTWR },
1930 { efLOG, "-brt", "benchrott",ffOPTWR },
1931 { efMTX, "-bmtx", "benchn", ffOPTWR },
1932 { efNDX, "-bdn", "bench", ffOPTWR }
1935 gmx_bool bThreads = FALSE;
1939 const char *procstring[] =
1940 { NULL, "-np", "-n", "none", NULL };
1941 const char *npmevalues_opt[] =
1942 { NULL, "auto", "all", "subset", NULL };
1944 gmx_bool bAppendFiles=TRUE;
1945 gmx_bool bKeepAndNumCPT=FALSE;
1946 gmx_bool bResetCountersHalfWay=FALSE;
1947 gmx_bool bBenchmark=TRUE;
1949 output_env_t oenv=NULL;
1952 /***********************/
1953 /* g_tune_pme options: */
1954 /***********************/
1955 { "-np", FALSE, etINT, {&nnodes},
1956 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
1957 { "-npstring", FALSE, etENUM, {procstring},
1958 "Specify the number of processors to [TT]$MPIRUN[tt] using this string"},
1959 { "-nt", FALSE, etINT, {&nthreads},
1960 "Number of threads to run the tests on (turns MPI & mpirun off)"},
1961 { "-r", FALSE, etINT, {&repeats},
1962 "Repeat each test this often" },
1963 { "-max", FALSE, etREAL, {&maxPMEfraction},
1964 "Max fraction of PME nodes to test with" },
1965 { "-min", FALSE, etREAL, {&minPMEfraction},
1966 "Min fraction of PME nodes to test with" },
1967 { "-npme", FALSE, etENUM, {npmevalues_opt},
1968 "Within -min and -max, benchmark all possible values for [TT]-npme[tt], or just a reasonable subset. "
1969 "Auto neglects -min and -max and chooses reasonable values around a guess for npme derived from the .tpr"},
1970 { "-fix", FALSE, etINT, {&npme_fixed},
1971 "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
1972 { "-rmax", FALSE, etREAL, {&rmax},
1973 "If >0, maximal rcoulomb for -ntpr>1 (rcoulomb upscaling results in fourier grid downscaling)" },
1974 { "-rmin", FALSE, etREAL, {&rmin},
1975 "If >0, minimal rcoulomb for -ntpr>1" },
1976 { "-ntpr", FALSE, etINT, {&ntprs},
1977 "Number of [TT].tpr[tt] files to benchmark. Create this many files with different rcoulomb scaling factors depending on -rmin and -rmax. "
1978 "If < 1, automatically choose the number of [TT].tpr[tt] files to test" },
1979 { "-steps", FALSE, etGMX_LARGE_INT, {&bench_nsteps},
1980 "Take timings for this many steps in the benchmark runs" },
1981 { "-resetstep",FALSE, etINT, {&presteps},
1982 "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" },
1983 { "-simsteps", FALSE, etGMX_LARGE_INT, {&new_sim_nsteps},
1984 "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" },
1985 { "-launch", FALSE, etBOOL, {&bLaunch},
1986 "Launch the real simulation after optimization" },
1987 { "-bench", FALSE, etBOOL, {&bBenchmark},
1988 "Run the benchmarks or just create the input [TT].tpr[tt] files?" },
1989 /******************/
1990 /* mdrun options: */
1991 /******************/
1992 /* We let g_tune_pme parse and understand these options, because we need to
1993 * prevent that they appear on the mdrun command line for the benchmarks */
1994 { "-append", FALSE, etBOOL, {&bAppendFiles},
1995 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
1996 { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
1997 "Keep and number checkpoint files (launch only)" },
1998 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
1999 "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
2003 #define NFILE asize(fnm)
2005 CopyRight(stderr,argv[0]);
2007 seconds = gettime();
2009 parse_common_args(&argc,argv,PCA_NOEXIT_ON_ARGS,
2010 NFILE,fnm,asize(pa),pa,asize(desc),desc,
2013 /* Store the remaining unparsed command line entries in a string which
2014 * is then attached to the mdrun command line */
2016 ExtraArgs[0] = '\0';
2017 for (i=1; i<argc; i++) /* argc will now be 1 if everything was understood */
2019 add_to_string(&ExtraArgs, argv[i]);
2020 add_to_string(&ExtraArgs, " ");
2023 if (opt2parg_bSet("-nt",asize(pa),pa))
2026 if (opt2parg_bSet("-npstring",asize(pa),pa))
2027 fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2030 gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2031 /* and now we just set this; a bit of an ugly hack*/
2034 /* Check for PME:PP ratio and whether tpr triggers additional output files */
2035 guessPMEratio = inspect_tpr(NFILE,fnm,&rcoulomb);
2037 /* Automatically set -beo options if -eo is set etc. */
2038 couple_files_options(NFILE,fnm);
2040 /* Construct the command line arguments for benchmark runs
2041 * as well as for the simulation run */
2043 sprintf(bbuf," -nt %d ", nthreads);
2045 sprintf(bbuf," -np %d ", nnodes);
2049 create_command_line_snippets(bThreads,bAppendFiles,bKeepAndNumCPT,bResetCountersHalfWay,presteps,
2050 NFILE,fnm,asize(pa),pa,procstring[0],
2051 &cmd_np, &cmd_args_bench, &cmd_args_launch,
2054 /* Read in checkpoint file if requested */
2056 if(opt2bSet("-cpi",NFILE,fnm))
2059 cr->duty=DUTY_PP; /* makes the following routine happy */
2060 read_checkpoint_simulation_part(opt2fn("-cpi",NFILE,fnm),
2061 &sim_part,&cpt_steps,cr,
2062 FALSE,NFILE,fnm,NULL,NULL);
2065 /* sim_part will now be 1 if no checkpoint file was found */
2067 gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi",NFILE,fnm));
2070 /* Open performance output file and write header info */
2071 fp = ffopen(opt2fn("-p",NFILE,fnm),"w");
2073 /* Make a quick consistency check of command line parameters */
2074 check_input(nnodes, repeats, &ntprs, &rmin, rcoulomb, &rmax,
2075 maxPMEfraction, minPMEfraction, npme_fixed,
2076 bench_nsteps, fnm, NFILE, sim_part, presteps,
2079 /* Determine the maximum and minimum number of PME nodes to test,
2080 * the actual list of settings is build in do_the_tests(). */
2081 if ((nnodes > 2) && (npme_fixed < -1))
2083 if (0 == strcmp(npmevalues_opt[0], "auto"))
2085 /* Determine the npme range automatically based on the PME:PP load guess */
2086 if (guessPMEratio > 1.0)
2088 /* More PME than PP work, probably we do not need separate PME nodes at all! */
2089 maxPMEnodes=nnodes/2;
2090 minPMEnodes=nnodes/2;
2094 /* PME : PP load is in the range 0..1, let's test around the guess */
2095 guessPMEnodes = nnodes/(1.0 + 1.0/guessPMEratio);
2096 minPMEnodes = floor(0.7*guessPMEnodes);
2097 maxPMEnodes = ceil(1.6*guessPMEnodes);
2098 maxPMEnodes = min(maxPMEnodes, nnodes/2);
2103 /* Determine the npme range based on user input */
2104 maxPMEnodes = floor(maxPMEfraction*nnodes);
2105 minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2106 fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2107 if (maxPMEnodes != minPMEnodes)
2108 fprintf(stdout, "- %d ", maxPMEnodes);
2109 fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2118 /* Get the commands we need to set up the runs from environment variables */
2119 get_program_paths(bThreads, &cmd_mpirun, cmd_np, &cmd_mdrun, repeats);
2121 /* Print some header info to file */
2123 fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
2125 fprintf(fp, "%s for Gromacs %s\n", ShortProgram(),GromacsVersion());
2128 fprintf(fp, "Number of nodes : %d\n", nnodes);
2129 fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
2130 if ( strcmp(procstring[0], "none") != 0)
2131 fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]);
2133 fprintf(fp, "Not setting number of nodes in system call\n");
2136 fprintf(fp, "Number of threads : %d\n", nnodes);
2138 fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun);
2139 fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench);
2140 fprintf(fp, "Benchmark steps : ");
2141 fprintf(fp, gmx_large_int_pfmt, bench_nsteps);
2143 fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2146 fprintf(fp, "Checkpoint time step : ");
2147 fprintf(fp, gmx_large_int_pfmt, cpt_steps);
2150 fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2152 if (new_sim_nsteps >= 0)
2155 fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so",NFILE,fnm));
2156 fprintf(stderr, gmx_large_int_pfmt, new_sim_nsteps+cpt_steps);
2157 fprintf(stderr, " steps.\n");
2158 fprintf(fp, "Simulation steps : ");
2159 fprintf(fp, gmx_large_int_pfmt, new_sim_nsteps);
2163 fprintf(fp, "Repeats for each test : %d\n", repeats);
2165 if (npme_fixed >= -1)
2167 fprintf(fp, "Fixing -npme at : %d\n", npme_fixed);
2170 fprintf(fp, "Input file : %s\n", opt2fn("-s",NFILE,fnm));
2171 fprintf(fp, " PME/PP load estimate : %g\n", guessPMEratio);
2173 /* Allocate memory for the inputinfo struct: */
2175 info->nr_inputfiles = ntprs;
2176 for (i=0; i<ntprs; i++)
2178 snew(info->rcoulomb , ntprs);
2179 snew(info->rvdw , ntprs);
2180 snew(info->rlist , ntprs);
2181 snew(info->rlistlong, ntprs);
2182 snew(info->nkx , ntprs);
2183 snew(info->nky , ntprs);
2184 snew(info->nkz , ntprs);
2185 snew(info->fsx , ntprs);
2186 snew(info->fsy , ntprs);
2187 snew(info->fsz , ntprs);
2189 /* Make alternative tpr files to test: */
2190 snew(tpr_names, ntprs);
2191 for (i=0; i<ntprs; i++)
2192 snew(tpr_names[i], STRLEN);
2194 /* It can be that ntprs is reduced by make_benchmark_tprs if not enough
2195 * different grids could be found. */
2196 make_benchmark_tprs(opt2fn("-s",NFILE,fnm), tpr_names, bench_nsteps+presteps,
2197 cpt_steps, rmin, rmax, &ntprs, info, fp);
2199 /********************************************************************************/
2200 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2201 /********************************************************************************/
2202 snew(perfdata, ntprs);
2205 do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
2206 repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2207 cmd_args_bench, fnm, NFILE, sim_part, presteps, cpt_steps);
2209 fprintf(fp, "\nTuning took%8.1f minutes.\n", (gettime()-seconds)/60.0);
2211 /* Analyse the results and give a suggestion for optimal settings: */
2212 bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2213 repeats, info, &best_tpr, &best_npme);
2215 /* Take the best-performing tpr file and enlarge nsteps to original value */
2216 if ( bKeepTPR && !bOverwrite )
2218 simulation_tpr = opt2fn("-s",NFILE,fnm);
2222 simulation_tpr = opt2fn("-so",NFILE,fnm);
2223 modify_PMEsettings(bOverwrite? (new_sim_nsteps+cpt_steps) :
2224 info->orig_sim_steps, tpr_names[best_tpr],
2228 /* Let's get rid of the temporary benchmark input files */
2229 for (i=0; i<ntprs; i++)
2231 fprintf(stdout, "Deleting temporary benchmark input file %s\n", tpr_names[i]);
2232 remove(tpr_names[i]);
2235 /* Now start the real simulation if the user requested it ... */
2236 launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2237 cmd_args_launch, simulation_tpr, nnodes, best_npme);
2241 /* ... or simply print the performance results to screen: */
2243 finalize(opt2fn("-p", NFILE, fnm));