2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
41 #ifdef HAVE_SYS_TIME_H
47 #include "gromacs/commandline/pargs.h"
52 #include "gromacs/fileio/tpxio.h"
56 #include "checkpoint.h"
61 #include "gromacs/timing/walltime_accounting.h"
64 /* Enum for situations that can occur during log file parsing, the
65 * corresponding string entries can be found in do_the_tests() in
66 * const char* ParseLog[] */
72 eParselogResetProblem,
76 eParselogLargePrimeFactor,
84 int nPMEnodes; /* number of PME-only nodes used in this test */
85 int nx, ny, nz; /* DD grid */
86 int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */
87 double *Gcycles; /* This can contain more than one value if doing multiple tests */
91 float *PME_f_load; /* PME mesh/force load average*/
92 float PME_f_load_Av; /* Average average ;) ... */
93 char *mdrun_cmd_line; /* Mdrun command line used for this test */
99 int nr_inputfiles; /* The number of tpr and mdp input files */
100 gmx_int64_t orig_sim_steps; /* Number of steps to be done in the real simulation */
101 gmx_int64_t orig_init_step; /* Init step for the real simulation */
102 real *rcoulomb; /* The coulomb radii [0...nr_inputfiles] */
103 real *rvdw; /* The vdW radii */
104 real *rlist; /* Neighbourlist cutoff radius */
106 int *nkx, *nky, *nkz;
107 real *fsx, *fsy, *fsz; /* Fourierspacing in x,y,z dimension */
111 static void sep_line(FILE *fp)
113 fprintf(fp, "\n------------------------------------------------------------\n");
117 /* Wrapper for system calls */
118 static int gmx_system_call(char *command)
121 gmx_fatal(FARGS, "No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n", command);
123 return ( system(command) );
128 /* Check if string starts with substring */
129 static gmx_bool str_starts(const char *string, const char *substring)
131 return ( strncmp(string, substring, strlen(substring)) == 0);
135 static void cleandata(t_perf *perfdata, int test_nr)
137 perfdata->Gcycles[test_nr] = 0.0;
138 perfdata->ns_per_day[test_nr] = 0.0;
139 perfdata->PME_f_load[test_nr] = 0.0;
145 static gmx_bool is_equal(real a, real b)
147 real diff, eps = 1.0e-7;
168 static void finalize(const char *fn_out)
174 fp = fopen(fn_out, "r");
175 fprintf(stdout, "\n\n");
177 while (fgets(buf, STRLEN-1, fp) != NULL)
179 fprintf(stdout, "%s", buf);
182 fprintf(stdout, "\n\n");
187 eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr
190 static int parse_logfile(const char *logfile, const char *errfile,
191 t_perf *perfdata, int test_nr, int presteps, gmx_int64_t cpt_steps,
195 char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
196 const char matchstrdd[] = "Domain decomposition grid";
197 const char matchstrcr[] = "resetting all time and cycle counters";
198 const char matchstrbal[] = "Average PME mesh/force load:";
199 const char matchstring[] = "R E A L C Y C L E A N D T I M E A C C O U N T I N G";
200 const char errSIG[] = "signal, stopping at the next";
203 float dum1, dum2, dum3, dum4;
206 gmx_int64_t resetsteps = -1;
207 gmx_bool bFoundResetStr = FALSE;
208 gmx_bool bResetChecked = FALSE;
211 if (!gmx_fexist(logfile))
213 fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
214 cleandata(perfdata, test_nr);
215 return eParselogNotFound;
218 fp = fopen(logfile, "r");
219 perfdata->PME_f_load[test_nr] = -1.0;
220 perfdata->guessPME = -1;
222 iFound = eFoundNothing;
225 iFound = eFoundDDStr; /* Skip some case statements */
228 while (fgets(line, STRLEN, fp) != NULL)
230 /* Remove leading spaces */
233 /* Check for TERM and INT signals from user: */
234 if (strstr(line, errSIG) != NULL)
237 cleandata(perfdata, test_nr);
238 return eParselogTerm;
241 /* Check whether cycle resetting worked */
242 if (presteps > 0 && !bFoundResetStr)
244 if (strstr(line, matchstrcr) != NULL)
246 sprintf(dumstring, "step %s", "%"GMX_SCNd64);
247 sscanf(line, dumstring, &resetsteps);
248 bFoundResetStr = TRUE;
249 if (resetsteps == presteps+cpt_steps)
251 bResetChecked = TRUE;
255 sprintf(dumstring, "%"GMX_PRId64, resetsteps);
256 sprintf(dumstring2, "%"GMX_PRId64, presteps+cpt_steps);
257 fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
258 " though they were supposed to be reset at step %s!\n",
259 dumstring, dumstring2);
264 /* Look for strings that appear in a certain order in the log file: */
268 /* Look for domain decomp grid and separate PME nodes: */
269 if (str_starts(line, matchstrdd))
271 sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
272 &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
273 if (perfdata->nPMEnodes == -1)
275 perfdata->guessPME = npme;
277 else if (perfdata->nPMEnodes != npme)
279 gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
281 iFound = eFoundDDStr;
283 /* Catch a few errors that might have occured: */
284 else if (str_starts(line, "There is no domain decomposition for"))
287 return eParselogNoDDGrid;
289 else if (str_starts(line, "The number of nodes you selected"))
292 return eParselogLargePrimeFactor;
294 else if (str_starts(line, "reading tpx file"))
297 return eParselogTPXVersion;
299 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
302 return eParselogNotParallel;
306 /* Look for PME mesh/force balance (not necessarily present, though) */
307 if (str_starts(line, matchstrbal))
309 sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
311 /* Look for matchstring */
312 if (str_starts(line, matchstring))
314 iFound = eFoundAccountingStr;
317 case eFoundAccountingStr:
318 /* Already found matchstring - look for cycle data */
319 if (str_starts(line, "Total "))
321 sscanf(line, "Total %d %lf", &procs, &(perfdata->Gcycles[test_nr]));
322 iFound = eFoundCycleStr;
326 /* Already found cycle data - look for remaining performance info and return */
327 if (str_starts(line, "Performance:"))
329 ndum = sscanf(line, "%s %f %f %f %f", dumstring, &dum1, &dum2, &dum3, &dum4);
330 /* (ns/day) is the second last entry, depending on whether GMX_DETAILED_PERF_STATS was set in print_perf(), nrnb.c */
331 perfdata->ns_per_day[test_nr] = (ndum == 5) ? dum3 : dum1;
333 if (bResetChecked || presteps == 0)
339 return eParselogResetProblem;
346 /* Close the log file */
349 /* Check why there is no performance data in the log file.
350 * Did a fatal errors occur? */
351 if (gmx_fexist(errfile))
353 fp = fopen(errfile, "r");
354 while (fgets(line, STRLEN, fp) != NULL)
356 if (str_starts(line, "Fatal error:") )
358 if (fgets(line, STRLEN, fp) != NULL)
360 fprintf(stderr, "\nWARNING: An error occured during this benchmark:\n"
364 cleandata(perfdata, test_nr);
365 return eParselogFatal;
372 fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
375 /* Giving up ... we could not find out why there is no performance data in
377 fprintf(stdout, "No performance data in log file.\n");
378 cleandata(perfdata, test_nr);
380 return eParselogNoPerfData;
384 static gmx_bool analyze_data(
393 int *index_tpr, /* OUT: Nr of mdp file with best settings */
394 int *npme_optimal) /* OUT: Optimal number of PME nodes */
397 int line = 0, line_win = -1;
398 int k_win = -1, i_win = -1, winPME;
399 double s = 0.0; /* standard deviation */
402 char str_PME_f_load[13];
403 gmx_bool bCanUseOrigTPR;
404 gmx_bool bRefinedCoul, bRefinedVdW, bRefinedGrid;
410 fprintf(fp, "Summary of successful runs:\n");
411 fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
414 fprintf(fp, " DD grid");
420 for (k = 0; k < ntprs; k++)
422 for (i = 0; i < ntests; i++)
424 /* Select the right dataset: */
425 pd = &(perfdata[k][i]);
427 pd->Gcycles_Av = 0.0;
428 pd->PME_f_load_Av = 0.0;
429 pd->ns_per_day_Av = 0.0;
431 if (pd->nPMEnodes == -1)
433 sprintf(strbuf, "(%3d)", pd->guessPME);
437 sprintf(strbuf, " ");
440 /* Get the average run time of a setting */
441 for (j = 0; j < nrepeats; j++)
443 pd->Gcycles_Av += pd->Gcycles[j];
444 pd->PME_f_load_Av += pd->PME_f_load[j];
446 pd->Gcycles_Av /= nrepeats;
447 pd->PME_f_load_Av /= nrepeats;
449 for (j = 0; j < nrepeats; j++)
451 if (pd->ns_per_day[j] > 0.0)
453 pd->ns_per_day_Av += pd->ns_per_day[j];
457 /* Somehow the performance number was not aquired for this run,
458 * therefor set the average to some negative value: */
459 pd->ns_per_day_Av = -1.0f*nrepeats;
463 pd->ns_per_day_Av /= nrepeats;
466 if (pd->PME_f_load_Av > 0.0)
468 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
472 sprintf(str_PME_f_load, "%s", " - ");
476 /* We assume we had a successful run if both averages are positive */
477 if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
479 /* Output statistics if repeats were done */
482 /* Calculate the standard deviation */
484 for (j = 0; j < nrepeats; j++)
486 s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
491 fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
492 line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
493 pd->ns_per_day_Av, str_PME_f_load);
496 fprintf(fp, " %3d %3d %3d", pd->nx, pd->ny, pd->nz);
500 /* Store the index of the best run found so far in 'winner': */
501 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
514 gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
519 winPME = perfdata[k_win][i_win].nPMEnodes;
523 /* We stuck to a fixed number of PME-only nodes */
524 sprintf(strbuf, "settings No. %d", k_win);
528 /* We have optimized the number of PME-only nodes */
531 sprintf(strbuf, "%s", "the automatic number of PME nodes");
535 sprintf(strbuf, "%d PME nodes", winPME);
538 fprintf(fp, "Best performance was achieved with %s", strbuf);
539 if ((nrepeats > 1) && (ntests > 1))
541 fprintf(fp, " (see line %d)", line_win);
545 /* Only mention settings if they were modified: */
546 bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]);
547 bRefinedVdW = !is_equal(info->rvdw[k_win], info->rvdw[0] );
548 bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] &&
549 info->nky[k_win] == info->nky[0] &&
550 info->nkz[k_win] == info->nkz[0]);
552 if (bRefinedCoul || bRefinedVdW || bRefinedGrid)
554 fprintf(fp, "Optimized PME settings:\n");
555 bCanUseOrigTPR = FALSE;
559 bCanUseOrigTPR = TRUE;
564 fprintf(fp, " New Coulomb radius: %f nm (was %f nm)\n", info->rcoulomb[k_win], info->rcoulomb[0]);
569 fprintf(fp, " New Van der Waals radius: %f nm (was %f nm)\n", info->rvdw[k_win], info->rvdw[0]);
574 fprintf(fp, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
575 info->nkx[0], info->nky[0], info->nkz[0]);
578 if (bCanUseOrigTPR && ntprs > 1)
580 fprintf(fp, "and original PME settings.\n");
585 /* Return the index of the mdp file that showed the highest performance
586 * and the optimal number of PME nodes */
588 *npme_optimal = winPME;
590 return bCanUseOrigTPR;
594 /* Get the commands we need to set up the runs from environment variables */
595 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char *cmd_mdrun[])
599 const char def_mpirun[] = "mpirun";
600 const char def_mdrun[] = "mdrun";
602 const char empty_mpirun[] = "";
604 /* Get the commands we need to set up the runs from environment variables */
607 if ( (cp = getenv("MPIRUN")) != NULL)
609 *cmd_mpirun = strdup(cp);
613 *cmd_mpirun = strdup(def_mpirun);
618 *cmd_mpirun = strdup(empty_mpirun);
621 if ( (cp = getenv("MDRUN" )) != NULL)
623 *cmd_mdrun = strdup(cp);
627 *cmd_mdrun = strdup(def_mdrun);
631 /* Check that the commands will run mdrun (perhaps via mpirun) by
632 * running a very quick test simulation. Requires MPI environment to
633 * be available if applicable. */
634 static void check_mdrun_works(gmx_bool bThreads,
635 const char *cmd_mpirun,
637 const char *cmd_mdrun)
639 char *command = NULL;
643 const char filename[] = "benchtest.log";
645 /* This string should always be identical to the one in copyrite.c,
646 * gmx_print_version_info() in the defined(GMX_MPI) section */
647 const char match_mpi[] = "MPI library: MPI";
648 const char match_mdrun[] = "Program: ";
649 gmx_bool bMdrun = FALSE;
650 gmx_bool bMPI = FALSE;
652 /* Run a small test to see whether mpirun + mdrun work */
653 fprintf(stdout, "Making sure that mdrun can be executed. ");
656 snew(command, strlen(cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
657 sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", cmd_mdrun, cmd_np, filename);
661 snew(command, strlen(cmd_mpirun) + strlen(cmd_np) + strlen(cmd_mdrun) + strlen(filename) + 50);
662 sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", cmd_mpirun, cmd_np, cmd_mdrun, filename);
664 fprintf(stdout, "Trying '%s' ... ", command);
665 make_backup(filename);
666 gmx_system_call(command);
668 /* Check if we find the characteristic string in the output: */
669 if (!gmx_fexist(filename))
671 gmx_fatal(FARGS, "Output from test run could not be found.");
674 fp = fopen(filename, "r");
675 /* We need to scan the whole output file, since sometimes the queuing system
676 * also writes stuff to stdout/err */
679 cp = fgets(line, STRLEN, fp);
682 if (str_starts(line, match_mdrun) )
686 if (str_starts(line, match_mpi) )
698 gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
700 "seems to have been compiled with MPI instead.",
708 gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
710 "seems to have been compiled without MPI support.",
717 gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
721 fprintf(stdout, "passed.\n");
729 static void launch_simulation(
730 gmx_bool bLaunch, /* Should the simulation be launched? */
731 FILE *fp, /* General log file */
732 gmx_bool bThreads, /* whether to use threads */
733 char *cmd_mpirun, /* Command for mpirun */
734 char *cmd_np, /* Switch for -np or -ntmpi or empty */
735 char *cmd_mdrun, /* Command for mdrun */
736 char *args_for_mdrun, /* Arguments for mdrun */
737 const char *simulation_tpr, /* This tpr will be simulated */
738 int nPMEnodes) /* Number of PME nodes to use */
743 /* Make enough space for the system call command,
744 * (100 extra chars for -npme ... etc. options should suffice): */
745 snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
747 /* Note that the -passall options requires args_for_mdrun to be at the end
748 * of the command line string */
751 sprintf(command, "%s%s-npme %d -s %s %s",
752 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
756 sprintf(command, "%s%s%s -npme %d -s %s %s",
757 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
760 fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch ? "Using" : "Please use", command);
764 /* Now the real thing! */
767 fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
770 gmx_system_call(command);
775 static void modify_PMEsettings(
776 gmx_int64_t simsteps, /* Set this value as number of time steps */
777 gmx_int64_t init_step, /* Set this value as init_step */
778 const char *fn_best_tpr, /* tpr file with the best performance */
779 const char *fn_sim_tpr) /* name of tpr file to be launched */
787 read_tpx_state(fn_best_tpr, ir, &state, NULL, &mtop);
789 /* Reset nsteps and init_step to the value of the input .tpr file */
790 ir->nsteps = simsteps;
791 ir->init_step = init_step;
793 /* Write the tpr file which will be launched */
794 sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, "%"GMX_PRId64);
795 fprintf(stdout, buf, ir->nsteps);
797 write_tpx_state(fn_sim_tpr, ir, &state, &mtop);
803 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
805 /* Make additional TPR files with more computational load for the
806 * direct space processors: */
807 static void make_benchmark_tprs(
808 const char *fn_sim_tpr, /* READ : User-provided tpr file */
809 char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */
810 gmx_int64_t benchsteps, /* Number of time steps for benchmark runs */
811 gmx_int64_t statesteps, /* Step counter in checkpoint file */
812 real rmin, /* Minimal Coulomb radius */
813 real rmax, /* Maximal Coulomb radius */
814 real bScaleRvdw, /* Scale rvdw along with rcoulomb */
815 int *ntprs, /* No. of TPRs to write, each with a different
816 rcoulomb and fourierspacing */
817 t_inputinfo *info, /* Contains information about mdp file options */
818 FILE *fp) /* Write the output here */
824 real nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials */
827 gmx_bool bNote = FALSE;
828 real add; /* Add this to rcoul for the next test */
829 real fac = 1.0; /* Scaling factor for Coulomb radius */
830 real fourierspacing; /* Basic fourierspacing from tpr */
833 sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
834 *ntprs > 1 ? "s" : "", "%"GMX_PRId64, benchsteps > 1 ? "s" : "");
835 fprintf(stdout, buf, benchsteps);
838 sprintf(buf, " (adding %s steps from checkpoint file)", "%"GMX_PRId64);
839 fprintf(stdout, buf, statesteps);
840 benchsteps += statesteps;
842 fprintf(stdout, ".\n");
846 read_tpx_state(fn_sim_tpr, ir, &state, NULL, &mtop);
848 /* Check if some kind of PME was chosen */
849 if (EEL_PME(ir->coulombtype) == FALSE)
851 gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
855 /* Check if rcoulomb == rlist, which is necessary for plain PME. */
856 if ( (ir->cutoff_scheme != ecutsVERLET) &&
857 (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist))
859 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
860 EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
862 /* For other PME types, rcoulomb is allowed to be smaller than rlist */
863 else if (ir->rcoulomb > ir->rlist)
865 gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
866 EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
869 if (bScaleRvdw && ir->rvdw != ir->rcoulomb)
871 fprintf(stdout, "NOTE: input rvdw != rcoulomb, will not scale rvdw\n");
875 /* Reduce the number of steps for the benchmarks */
876 info->orig_sim_steps = ir->nsteps;
877 ir->nsteps = benchsteps;
878 /* We must not use init_step from the input tpr file for the benchmarks */
879 info->orig_init_step = ir->init_step;
882 /* For PME-switch potentials, keep the radial distance of the buffer region */
883 nlist_buffer = ir->rlist - ir->rcoulomb;
885 /* Determine length of triclinic box vectors */
886 for (d = 0; d < DIM; d++)
889 for (i = 0; i < DIM; i++)
891 box_size[d] += state.box[d][i]*state.box[d][i];
893 box_size[d] = sqrt(box_size[d]);
896 if (ir->fourier_spacing > 0)
898 info->fsx[0] = ir->fourier_spacing;
899 info->fsy[0] = ir->fourier_spacing;
900 info->fsz[0] = ir->fourier_spacing;
904 /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
905 info->fsx[0] = box_size[XX]/ir->nkx;
906 info->fsy[0] = box_size[YY]/ir->nky;
907 info->fsz[0] = box_size[ZZ]/ir->nkz;
910 /* If no value for the fourierspacing was provided on the command line, we
911 * use the reconstruction from the tpr file */
912 if (ir->fourier_spacing > 0)
914 /* Use the spacing from the tpr */
915 fourierspacing = ir->fourier_spacing;
919 /* Use the maximum observed spacing */
920 fourierspacing = max(max(info->fsx[0], info->fsy[0]), info->fsz[0]);
923 fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
925 /* For performance comparisons the number of particles is useful to have */
926 fprintf(fp, " Number of particles : %d\n", mtop.natoms);
928 /* Print information about settings of which some are potentially modified: */
929 fprintf(fp, " Coulomb type : %s\n", EELTYPE(ir->coulombtype));
930 fprintf(fp, " Grid spacing x y z : %f %f %f\n",
931 box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
932 fprintf(fp, " Van der Waals type : %s\n", EVDWTYPE(ir->vdwtype));
933 if (EVDW_SWITCHED(ir->vdwtype))
935 fprintf(fp, " rvdw_switch : %f nm\n", ir->rvdw_switch);
937 if (EPME_SWITCHED(ir->coulombtype))
939 fprintf(fp, " rlist : %f nm\n", ir->rlist);
941 if (ir->rlistlong != max_cutoff(ir->rvdw, ir->rcoulomb))
943 fprintf(fp, " rlistlong : %f nm\n", ir->rlistlong);
946 /* Print a descriptive line about the tpr settings tested */
947 fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
948 fprintf(fp, " No. scaling rcoulomb");
949 fprintf(fp, " nkx nky nkz");
950 fprintf(fp, " spacing");
951 if (evdwCUT == ir->vdwtype)
953 fprintf(fp, " rvdw");
955 if (EPME_SWITCHED(ir->coulombtype))
957 fprintf(fp, " rlist");
959 if (ir->rlistlong != max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)) )
961 fprintf(fp, " rlistlong");
963 fprintf(fp, " tpr file\n");
965 /* Loop to create the requested number of tpr input files */
966 for (j = 0; j < *ntprs; j++)
968 /* The first .tpr is the provided one, just need to modify nsteps,
969 * so skip the following block */
972 /* Determine which Coulomb radii rc to use in the benchmarks */
973 add = (rmax-rmin)/(*ntprs-1);
974 if (is_equal(rmin, info->rcoulomb[0]))
976 ir->rcoulomb = rmin + j*add;
978 else if (is_equal(rmax, info->rcoulomb[0]))
980 ir->rcoulomb = rmin + (j-1)*add;
984 /* rmin != rcoul != rmax, ergo test between rmin and rmax */
985 add = (rmax-rmin)/(*ntprs-2);
986 ir->rcoulomb = rmin + (j-1)*add;
989 /* Determine the scaling factor fac */
990 fac = ir->rcoulomb/info->rcoulomb[0];
992 /* Scale the Fourier grid spacing */
993 ir->nkx = ir->nky = ir->nkz = 0;
994 calc_grid(NULL, state.box, fourierspacing*fac, &ir->nkx, &ir->nky, &ir->nkz);
996 /* Adjust other radii since various conditions neet to be fulfilled */
997 if (eelPME == ir->coulombtype)
999 /* plain PME, rcoulomb must be equal to rlist */
1000 ir->rlist = ir->rcoulomb;
1004 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
1005 ir->rlist = ir->rcoulomb + nlist_buffer;
1008 if (bScaleRvdw && evdwCUT == ir->vdwtype)
1010 /* For vdw cutoff, rvdw >= rlist */
1011 ir->rvdw = max(info->rvdw[0], ir->rlist);
1014 ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
1016 } /* end of "if (j != 0)" */
1018 /* for j==0: Save the original settings
1019 * for j >0: Save modified radii and Fourier grids */
1020 info->rcoulomb[j] = ir->rcoulomb;
1021 info->rvdw[j] = ir->rvdw;
1022 info->nkx[j] = ir->nkx;
1023 info->nky[j] = ir->nky;
1024 info->nkz[j] = ir->nkz;
1025 info->rlist[j] = ir->rlist;
1026 info->rlistlong[j] = ir->rlistlong;
1027 info->fsx[j] = fac*fourierspacing;
1028 info->fsy[j] = fac*fourierspacing;
1029 info->fsz[j] = fac*fourierspacing;
1031 /* Write the benchmark tpr file */
1032 strncpy(fn_bench_tprs[j], fn_sim_tpr, strlen(fn_sim_tpr)-strlen(".tpr"));
1033 sprintf(buf, "_bench%.2d.tpr", j);
1034 strcat(fn_bench_tprs[j], buf);
1035 fprintf(stdout, "Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
1036 fprintf(stdout, "%"GMX_PRId64, ir->nsteps);
1039 fprintf(stdout, ", scaling factor %f\n", fac);
1043 fprintf(stdout, ", unmodified settings\n");
1046 write_tpx_state(fn_bench_tprs[j], ir, &state, &mtop);
1048 /* Write information about modified tpr settings to log file */
1049 fprintf(fp, "%4d%10f%10f", j, fac, ir->rcoulomb);
1050 fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
1051 fprintf(fp, " %9f ", info->fsx[j]);
1052 if (evdwCUT == ir->vdwtype)
1054 fprintf(fp, "%10f", ir->rvdw);
1056 if (EPME_SWITCHED(ir->coulombtype))
1058 fprintf(fp, "%10f", ir->rlist);
1060 if (info->rlistlong[0] != max_cutoff(info->rlist[0], max_cutoff(info->rvdw[0], info->rcoulomb[0])) )
1062 fprintf(fp, "%10f", ir->rlistlong);
1064 fprintf(fp, " %-14s\n", fn_bench_tprs[j]);
1066 /* Make it clear to the user that some additional settings were modified */
1067 if (!is_equal(ir->rvdw, info->rvdw[0])
1068 || !is_equal(ir->rlistlong, info->rlistlong[0]) )
1075 fprintf(fp, "\nNote that in addition to the Coulomb radius and the Fourier grid\n"
1076 "other input settings were also changed (see table above).\n"
1077 "Please check if the modified settings are appropriate.\n");
1085 /* Rename the files we want to keep to some meaningful filename and
1086 * delete the rest */
1087 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
1088 int nPMEnodes, int nr, gmx_bool bKeepStderr)
1090 char numstring[STRLEN];
1091 char newfilename[STRLEN];
1092 const char *fn = NULL;
1097 fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
1099 for (i = 0; i < nfile; i++)
1101 opt = (char *)fnm[i].opt;
1102 if (strcmp(opt, "-p") == 0)
1104 /* do nothing; keep this file */
1107 else if (strcmp(opt, "-bg") == 0)
1109 /* Give the log file a nice name so one can later see which parameters were used */
1110 numstring[0] = '\0';
1113 sprintf(numstring, "_%d", nr);
1115 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg", nfile, fnm), k, nnodes, nPMEnodes, numstring);
1116 if (gmx_fexist(opt2fn("-bg", nfile, fnm)))
1118 fprintf(stdout, "renaming log file to %s\n", newfilename);
1119 make_backup(newfilename);
1120 rename(opt2fn("-bg", nfile, fnm), newfilename);
1123 else if (strcmp(opt, "-err") == 0)
1125 /* This file contains the output of stderr. We want to keep it in
1126 * cases where there have been problems. */
1127 fn = opt2fn(opt, nfile, fnm);
1128 numstring[0] = '\0';
1131 sprintf(numstring, "_%d", nr);
1133 sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
1138 fprintf(stdout, "Saving stderr output in %s\n", newfilename);
1139 make_backup(newfilename);
1140 rename(fn, newfilename);
1144 fprintf(stdout, "Deleting %s\n", fn);
1149 /* Delete the files which are created for each benchmark run: (options -b*) */
1150 else if ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt, nfile, fnm) || !is_optional(&fnm[i])) )
1152 fn = opt2fn(opt, nfile, fnm);
1155 fprintf(stdout, "Deleting %s\n", fn);
1163 /* Returns the largest common factor of n1 and n2 */
1164 static int largest_common_factor(int n1, int n2)
1169 for (factor = nmax; factor > 0; factor--)
1171 if (0 == (n1 % factor) && 0 == (n2 % factor) )
1176 return 0; /* one for the compiler */
1180 eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr
1183 /* Create a list of numbers of PME nodes to test */
1184 static void make_npme_list(
1185 const char *npmevalues_opt, /* Make a complete list with all
1186 * possibilities or a short list that keeps only
1187 * reasonable numbers of PME nodes */
1188 int *nentries, /* Number of entries we put in the nPMEnodes list */
1189 int *nPMEnodes[], /* Each entry contains the value for -npme */
1190 int nnodes, /* Total number of nodes to do the tests on */
1191 int minPMEnodes, /* Minimum number of PME nodes */
1192 int maxPMEnodes) /* Maximum number of PME nodes */
1195 int min_factor = 1; /* We request that npp and npme have this minimal
1196 * largest common factor (depends on npp) */
1197 int nlistmax; /* Max. list size */
1198 int nlist; /* Actual number of entries in list */
1202 /* Do we need to check all possible values for -npme or is a reduced list enough? */
1203 if (0 == strcmp(npmevalues_opt, "all") )
1207 else if (0 == strcmp(npmevalues_opt, "subset") )
1209 eNPME = eNpmeSubset;
1211 else /* "auto" or "range" */
1217 else if (nnodes < 128)
1219 eNPME = eNpmeReduced;
1223 eNPME = eNpmeSubset;
1227 /* Calculate how many entries we could possibly have (in case of -npme all) */
1230 nlistmax = maxPMEnodes - minPMEnodes + 3;
1231 if (0 == minPMEnodes)
1241 /* Now make the actual list which is at most of size nlist */
1242 snew(*nPMEnodes, nlistmax);
1243 nlist = 0; /* start counting again, now the real entries in the list */
1244 for (i = 0; i < nlistmax - 2; i++)
1246 npme = maxPMEnodes - i;
1257 /* For 2d PME we want a common largest factor of at least the cube
1258 * root of the number of PP nodes */
1259 min_factor = (int) pow(npp, 1.0/3.0);
1262 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1265 if (largest_common_factor(npp, npme) >= min_factor)
1267 (*nPMEnodes)[nlist] = npme;
1271 /* We always test 0 PME nodes and the automatic number */
1272 *nentries = nlist + 2;
1273 (*nPMEnodes)[nlist ] = 0;
1274 (*nPMEnodes)[nlist+1] = -1;
1276 fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1277 for (i = 0; i < *nentries-1; i++)
1279 fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1281 fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1285 /* Allocate memory to store the performance data */
1286 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1291 for (k = 0; k < ntprs; k++)
1293 snew(perfdata[k], datasets);
1294 for (i = 0; i < datasets; i++)
1296 for (j = 0; j < repeats; j++)
1298 snew(perfdata[k][i].Gcycles, repeats);
1299 snew(perfdata[k][i].ns_per_day, repeats);
1300 snew(perfdata[k][i].PME_f_load, repeats);
1307 /* Check for errors on mdrun -h */
1308 static void make_sure_it_runs(char *mdrun_cmd_line, int length, FILE *fp)
1310 char *command, *msg;
1314 snew(command, length + 15);
1315 snew(msg, length + 500);
1317 fprintf(stdout, "Making sure the benchmarks can be executed ...\n");
1318 /* FIXME: mdrun -h no longer actually does anything useful.
1319 * It unconditionally prints the help, ignoring all other options. */
1320 sprintf(command, "%s-h -quiet", mdrun_cmd_line);
1321 ret = gmx_system_call(command);
1325 /* To prevent confusion, do not again issue a gmx_fatal here since we already
1326 * get the error message from mdrun itself */
1327 sprintf(msg, "Cannot run the benchmark simulations! Please check the error message of\n"
1328 "mdrun for the source of the problem. Did you provide a command line\n"
1329 "argument that neither g_tune_pme nor mdrun understands? Offending command:\n"
1330 "\n%s\n\n", command);
1332 fprintf(stderr, "%s", msg);
1334 fprintf(fp, "%s", msg);
1344 static void do_the_tests(
1345 FILE *fp, /* General g_tune_pme output file */
1346 char **tpr_names, /* Filenames of the input files to test */
1347 int maxPMEnodes, /* Max fraction of nodes to use for PME */
1348 int minPMEnodes, /* Min fraction of nodes to use for PME */
1349 int npme_fixed, /* If >= -1, test fixed number of PME
1351 const char *npmevalues_opt, /* Which -npme values should be tested */
1352 t_perf **perfdata, /* Here the performace data is stored */
1353 int *pmeentries, /* Entries in the nPMEnodes list */
1354 int repeats, /* Repeat each test this often */
1355 int nnodes, /* Total number of nodes = nPP + nPME */
1356 int nr_tprs, /* Total number of tpr files to test */
1357 gmx_bool bThreads, /* Threads or MPI? */
1358 char *cmd_mpirun, /* mpirun command string */
1359 char *cmd_np, /* "-np", "-n", whatever mpirun needs */
1360 char *cmd_mdrun, /* mdrun command string */
1361 char *cmd_args_bench, /* arguments for mdrun in a string */
1362 const t_filenm *fnm, /* List of filenames from command line */
1363 int nfile, /* Number of files specified on the cmdl. */
1364 int presteps, /* DLB equilibration steps, is checked */
1365 gmx_int64_t cpt_steps) /* Time step counter in the checkpoint */
1367 int i, nr, k, ret, count = 0, totaltests;
1368 int *nPMEnodes = NULL;
1371 char *command, *cmd_stub;
1373 gmx_bool bResetProblem = FALSE;
1374 gmx_bool bFirst = TRUE;
1377 /* This string array corresponds to the eParselog enum type at the start
1379 const char* ParseLog[] = {
1381 "Logfile not found!",
1382 "No timings, logfile truncated?",
1383 "Run was terminated.",
1384 "Counters were not reset properly.",
1385 "No DD grid found for these settings.",
1386 "TPX version conflict!",
1387 "mdrun was not started in parallel!",
1388 "Number of PP nodes has a prime factor that is too large.",
1391 char str_PME_f_load[13];
1394 /* Allocate space for the mdrun command line. 100 extra characters should
1395 be more than enough for the -npme etcetera arguments */
1396 cmdline_length = strlen(cmd_mpirun)
1399 + strlen(cmd_args_bench)
1400 + strlen(tpr_names[0]) + 100;
1401 snew(command, cmdline_length);
1402 snew(cmd_stub, cmdline_length);
1404 /* Construct the part of the command line that stays the same for all tests: */
1407 sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1411 sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1414 /* Create a list of numbers of PME nodes to test */
1415 if (npme_fixed < -1)
1417 make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1418 nnodes, minPMEnodes, maxPMEnodes);
1424 nPMEnodes[0] = npme_fixed;
1425 fprintf(stderr, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes[0]);
1430 fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1432 finalize(opt2fn("-p", nfile, fnm));
1436 /* Allocate one dataset for each tpr input file: */
1437 init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1439 /*****************************************/
1440 /* Main loop over all tpr files to test: */
1441 /*****************************************/
1442 totaltests = nr_tprs*(*pmeentries)*repeats;
1443 for (k = 0; k < nr_tprs; k++)
1445 fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1446 fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
1447 /* Loop over various numbers of PME nodes: */
1448 for (i = 0; i < *pmeentries; i++)
1450 pd = &perfdata[k][i];
1452 /* Loop over the repeats for each scenario: */
1453 for (nr = 0; nr < repeats; nr++)
1455 pd->nPMEnodes = nPMEnodes[i];
1457 /* Add -npme and -s to the command line and save it. Note that
1458 * the -passall (if set) options requires cmd_args_bench to be
1459 * at the end of the command line string */
1460 snew(pd->mdrun_cmd_line, cmdline_length);
1461 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1462 cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1464 /* To prevent that all benchmarks fail due to a show-stopper argument
1465 * on the mdrun command line, we make a quick check with mdrun -h first */
1468 make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp);
1472 /* Do a benchmark simulation: */
1475 sprintf(buf, ", pass %d/%d", nr+1, repeats);
1481 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1482 (100.0*count)/totaltests,
1483 k+1, nr_tprs, i+1, *pmeentries, buf);
1484 make_backup(opt2fn("-err", nfile, fnm));
1485 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err", nfile, fnm));
1486 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1487 gmx_system_call(command);
1489 /* Collect the performance data from the log file; also check stderr
1490 * for fatal errors */
1491 ret = parse_logfile(opt2fn("-bg", nfile, fnm), opt2fn("-err", nfile, fnm),
1492 pd, nr, presteps, cpt_steps, nnodes);
1493 if ((presteps > 0) && (ret == eParselogResetProblem))
1495 bResetProblem = TRUE;
1498 if (-1 == pd->nPMEnodes)
1500 sprintf(buf, "(%3d)", pd->guessPME);
1508 if (pd->PME_f_load[nr] > 0.0)
1510 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1514 sprintf(str_PME_f_load, "%s", " - ");
1517 /* Write the data we got to disk */
1518 fprintf(fp, "%4d%s %12.3f %12.3f %s %s", pd->nPMEnodes,
1519 buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1520 if (!(ret == eParselogOK || ret == eParselogNoDDGrid || ret == eParselogNotFound) )
1522 fprintf(fp, " Check %s file for problems.", ret == eParselogFatal ? "err" : "log");
1528 /* Do some cleaning up and delete the files we do not need any more */
1529 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret == eParselogFatal);
1531 /* If the first run with this number of processors already failed, do not try again: */
1532 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1534 fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1535 count += repeats-(nr+1);
1538 } /* end of repeats loop */
1539 } /* end of -npme loop */
1540 } /* end of tpr file loop */
1545 fprintf(fp, "WARNING: The cycle and time step counters could not be reset properly. ");
1553 static void check_input(
1560 real maxPMEfraction,
1561 real minPMEfraction,
1563 gmx_int64_t bench_nsteps,
1564 const t_filenm *fnm,
1574 /* Make sure the input file exists */
1575 if (!gmx_fexist(opt2fn("-s", nfile, fnm)))
1577 gmx_fatal(FARGS, "File %s not found.", opt2fn("-s", nfile, fnm));
1580 /* Make sure that the checkpoint file is not overwritten during benchmarking */
1581 if ( (0 == strcmp(opt2fn("-cpi", nfile, fnm), opt2fn("-bcpo", nfile, fnm)) ) && (sim_part > 1) )
1583 gmx_fatal(FARGS, "Checkpoint input (-cpi) and benchmark checkpoint output (-bcpo) files must not be identical.\n"
1584 "The checkpoint input file must not be overwritten during the benchmarks.\n");
1587 /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1590 gmx_fatal(FARGS, "Number of repeats < 0!");
1593 /* Check number of nodes */
1596 gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
1599 /* Automatically choose -ntpr if not set */
1609 /* Set a reasonable scaling factor for rcoulomb */
1612 *rmax = rcoulomb * 1.2;
1615 fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs == 1 ? "" : "s");
1621 fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1625 /* Make shure that rmin <= rcoulomb <= rmax */
1634 if (!(*rmin <= *rmax) )
1636 gmx_fatal(FARGS, "Please choose the Coulomb radii such that rmin <= rmax.\n"
1637 "rmin = %g, rmax = %g, actual rcoul from .tpr file = %g\n", *rmin, *rmax, rcoulomb);
1639 /* Add test scenarios if rmin or rmax were set */
1642 if (!is_equal(*rmin, rcoulomb) && (*ntprs == 1) )
1645 fprintf(stderr, "NOTE: Setting -rmin to %g changed -ntpr to %d\n",
1648 if (!is_equal(*rmax, rcoulomb) && (*ntprs == 1) )
1651 fprintf(stderr, "NOTE: Setting -rmax to %g changed -ntpr to %d\n",
1656 /* If one of rmin, rmax is set, we need 2 tpr files at minimum */
1657 if (!is_equal(*rmax, rcoulomb) || !is_equal(*rmin, rcoulomb) )
1659 *ntprs = max(*ntprs, 2);
1662 /* If both rmin, rmax are set, we need 3 tpr files at minimum */
1663 if (!is_equal(*rmax, rcoulomb) && !is_equal(*rmin, rcoulomb) )
1665 *ntprs = max(*ntprs, 3);
1670 fprintf(stderr, "NOTE: Your rmin, rmax setting changed -ntpr to %d\n", *ntprs);
1675 if (is_equal(*rmin, rcoulomb) && is_equal(rcoulomb, *rmax)) /* We have just a single rc */
1677 fprintf(stderr, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n"
1678 "Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n"
1679 "with correspondingly adjusted PME grid settings\n");
1684 /* Check whether max and min fraction are within required values */
1685 if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1687 gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1689 if (minPMEfraction > 0.5 || minPMEfraction < 0)
1691 gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1693 if (maxPMEfraction < minPMEfraction)
1695 gmx_fatal(FARGS, "-max must be larger or equal to -min");
1698 /* Check whether the number of steps - if it was set - has a reasonable value */
1699 if (bench_nsteps < 0)
1701 gmx_fatal(FARGS, "Number of steps must be positive.");
1704 if (bench_nsteps > 10000 || bench_nsteps < 100)
1706 fprintf(stderr, "WARNING: steps=");
1707 fprintf(stderr, "%"GMX_PRId64, bench_nsteps);
1708 fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100) ? "few" : "many");
1713 gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1716 /* Check for rcoulomb scaling if more than one .tpr file is tested */
1719 if (*rmin/rcoulomb < 0.75 || *rmax/rcoulomb > 1.25)
1721 fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1725 /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning
1726 * only. We need to check whether the requested number of PME-only nodes
1728 if (npme_fixed > -1)
1730 /* No more than 50% of all nodes can be assigned as PME-only nodes. */
1731 if (2*npme_fixed > nnodes)
1733 gmx_fatal(FARGS, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n",
1734 nnodes/2, nnodes, npme_fixed);
1736 if ((npme_fixed > 0) && (5*npme_fixed < nnodes))
1738 fprintf(stderr, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n",
1739 100.0*((real)npme_fixed / (real)nnodes));
1741 if (opt2parg_bSet("-min", npargs, pa) || opt2parg_bSet("-max", npargs, pa))
1743 fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n"
1744 " fixed number of PME-only nodes is requested with -fix.\n");
1750 /* Returns TRUE when "opt" is needed at launch time */
1751 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1753 if (0 == strncmp(opt, "-swap", 5))
1758 /* Apart from the input .tpr and the output log files we need all options that
1759 * were set on the command line and that do not start with -b */
1760 if (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2)
1761 || 0 == strncmp(opt, "-err", 4) || 0 == strncmp(opt, "-p", 2) )
1770 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1771 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1773 /* Apart from the input .tpr, all files starting with "-b" are for
1774 * _b_enchmark files exclusively */
1775 if (0 == strncmp(opt, "-s", 2))
1780 if (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2))
1782 if (!bOptional || bSet)
1799 if (bSet) /* These are additional input files like -cpi -ei */
1812 /* Adds 'buf' to 'str' */
1813 static void add_to_string(char **str, char *buf)
1818 len = strlen(*str) + strlen(buf) + 1;
1824 /* Create the command line for the benchmark as well as for the real run */
1825 static void create_command_line_snippets(
1826 gmx_bool bAppendFiles,
1827 gmx_bool bKeepAndNumCPT,
1828 gmx_bool bResetHWay,
1832 char *cmd_args_bench[], /* command line arguments for benchmark runs */
1833 char *cmd_args_launch[], /* command line arguments for simulation run */
1834 char extra_args[]) /* Add this to the end of the command line */
1839 char strbuf[STRLEN];
1842 /* strlen needs at least '\0' as a string: */
1843 snew(*cmd_args_bench, 1);
1844 snew(*cmd_args_launch, 1);
1845 *cmd_args_launch[0] = '\0';
1846 *cmd_args_bench[0] = '\0';
1849 /*******************************************/
1850 /* 1. Process other command line arguments */
1851 /*******************************************/
1854 /* Add equilibration steps to benchmark options */
1855 sprintf(strbuf, "-resetstep %d ", presteps);
1856 add_to_string(cmd_args_bench, strbuf);
1858 /* These switches take effect only at launch time */
1859 if (FALSE == bAppendFiles)
1861 add_to_string(cmd_args_launch, "-noappend ");
1865 add_to_string(cmd_args_launch, "-cpnum ");
1869 add_to_string(cmd_args_launch, "-resethway ");
1872 /********************/
1873 /* 2. Process files */
1874 /********************/
1875 for (i = 0; i < nfile; i++)
1877 opt = (char *)fnm[i].opt;
1878 name = opt2fn(opt, nfile, fnm);
1880 /* Strbuf contains the options, now let's sort out where we need that */
1881 sprintf(strbuf, "%s %s ", opt, name);
1883 if (is_bench_file(opt, opt2bSet(opt, nfile, fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1885 /* All options starting with -b* need the 'b' removed,
1886 * therefore overwrite strbuf */
1887 if (0 == strncmp(opt, "-b", 2))
1889 sprintf(strbuf, "-%s %s ", &opt[2], name);
1892 add_to_string(cmd_args_bench, strbuf);
1895 if (is_launch_file(opt, opt2bSet(opt, nfile, fnm)) )
1897 add_to_string(cmd_args_launch, strbuf);
1901 add_to_string(cmd_args_bench, extra_args);
1902 add_to_string(cmd_args_launch, extra_args);
1906 /* Set option opt */
1907 static void setopt(const char *opt, int nfile, t_filenm fnm[])
1911 for (i = 0; (i < nfile); i++)
1913 if (strcmp(opt, fnm[i].opt) == 0)
1915 fnm[i].flag |= ffSET;
1921 /* This routine inspects the tpr file and ...
1922 * 1. checks for output files that get triggered by a tpr option. These output
1923 * files are marked as 'set' to allow for a proper cleanup after each
1925 * 2. returns the PME:PP load ratio
1926 * 3. returns rcoulomb from the tpr */
1927 static float inspect_tpr(int nfile, t_filenm fnm[], real *rcoulomb)
1929 gmx_bool bPull; /* Is pulling requested in .tpr file? */
1930 gmx_bool bTpi; /* Is test particle insertion requested? */
1931 gmx_bool bFree; /* Is a free energy simulation requested? */
1932 gmx_bool bNM; /* Is a normal mode analysis requested? */
1933 gmx_bool bSwap; /* Is water/ion position swapping requested? */
1939 /* Check tpr file for options that trigger extra output files */
1940 read_tpx_state(opt2fn("-s", nfile, fnm), &ir, &state, NULL, &mtop);
1941 bPull = (epullNO != ir.ePull);
1942 bFree = (efepNO != ir.efep );
1943 bNM = (eiNM == ir.eI );
1944 bSwap = (eswapNO != ir.eSwapCoords);
1945 bTpi = EI_TPI(ir.eI);
1947 /* Set these output files on the tuning command-line */
1950 setopt("-pf", nfile, fnm);
1951 setopt("-px", nfile, fnm);
1955 setopt("-dhdl", nfile, fnm);
1959 setopt("-tpi", nfile, fnm);
1960 setopt("-tpid", nfile, fnm);
1964 setopt("-mtx", nfile, fnm);
1968 setopt("-swap", nfile, fnm);
1971 *rcoulomb = ir.rcoulomb;
1973 /* Return the estimate for the number of PME nodes */
1974 return pme_load_estimate(&mtop, &ir, state.box);
1978 static void couple_files_options(int nfile, t_filenm fnm[])
1981 gmx_bool bSet, bBench;
1986 for (i = 0; i < nfile; i++)
1988 opt = (char *)fnm[i].opt;
1989 bSet = ((fnm[i].flag & ffSET) != 0);
1990 bBench = (0 == strncmp(opt, "-b", 2));
1992 /* Check optional files */
1993 /* If e.g. -eo is set, then -beo also needs to be set */
1994 if (is_optional(&fnm[i]) && bSet && !bBench)
1996 sprintf(buf, "-b%s", &opt[1]);
1997 setopt(buf, nfile, fnm);
1999 /* If -beo is set, then -eo also needs to be! */
2000 if (is_optional(&fnm[i]) && bSet && bBench)
2002 sprintf(buf, "-%s", &opt[2]);
2003 setopt(buf, nfile, fnm);
2009 #define BENCHSTEPS (1000)
2011 int gmx_tune_pme(int argc, char *argv[])
2013 const char *desc[] = {
2014 "For a given number [TT]-np[tt] or [TT]-ntmpi[tt] of processors/threads, [THISMODULE] systematically",
2015 "times [gmx-mdrun] with various numbers of PME-only nodes and determines",
2016 "which setting is fastest. It will also test whether performance can",
2017 "be enhanced by shifting load from the reciprocal to the real space",
2018 "part of the Ewald sum. ",
2019 "Simply pass your [TT].tpr[tt] file to [THISMODULE] together with other options",
2020 "for [gmx-mdrun] as needed.[PAR]",
2021 "Which executables are used can be set in the environment variables",
2022 "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
2023 "will be used as defaults. Note that for certain MPI frameworks you",
2024 "need to provide a machine- or hostfile. This can also be passed",
2025 "via the MPIRUN variable, e.g.[PAR]",
2026 "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
2027 "Please call [THISMODULE] with the normal options you would pass to",
2028 "[gmx-mdrun] and add [TT]-np[tt] for the number of processors to perform the",
2029 "tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]",
2030 "to repeat each test several times to get better statistics. [PAR]",
2031 "[THISMODULE] can test various real space / reciprocal space workloads",
2032 "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
2033 "written with enlarged cutoffs and smaller Fourier grids respectively.",
2034 "Typically, the first test (number 0) will be with the settings from the input",
2035 "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff",
2036 "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ",
2037 "In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ",
2038 "The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier "
2039 "spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
2040 "if you just seek the optimal number of PME-only nodes; in that case",
2041 "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
2042 "For the benchmark runs, the default of 1000 time steps should suffice for most",
2043 "MD systems. The dynamic load balancing needs about 100 time steps",
2044 "to adapt to local load imbalances, therefore the time step counters",
2045 "are by default reset after 100 steps. For large systems (>1M atoms), as well as ",
2046 "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
2047 "From the 'DD' load imbalance entries in the md.log output file you",
2048 "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
2049 "[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
2050 "After calling [gmx-mdrun] several times, detailed performance information",
2051 "is available in the output file [TT]perf.out[tt].",
2052 "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written",
2053 "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]",
2054 "If you want the simulation to be started automatically with the",
2055 "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
2060 int pmeentries = 0; /* How many values for -npme do we actually test for each tpr file */
2061 real maxPMEfraction = 0.50;
2062 real minPMEfraction = 0.25;
2063 int maxPMEnodes, minPMEnodes;
2064 float guessPMEratio; /* guessed PME:PP ratio based on the tpr file */
2065 float guessPMEnodes;
2066 int npme_fixed = -2; /* If >= -1, use only this number
2067 * of PME-only nodes */
2069 real rmin = 0.0, rmax = 0.0; /* min and max value for rcoulomb if scaling is requested */
2070 real rcoulomb = -1.0; /* Coulomb radius as set in .tpr file */
2071 gmx_bool bScaleRvdw = TRUE;
2072 gmx_int64_t bench_nsteps = BENCHSTEPS;
2073 gmx_int64_t new_sim_nsteps = -1; /* -1 indicates: not set by the user */
2074 gmx_int64_t cpt_steps = 0; /* Step counter in .cpt input file */
2075 int presteps = 100; /* Do a full cycle reset after presteps steps */
2076 gmx_bool bOverwrite = FALSE, bKeepTPR;
2077 gmx_bool bLaunch = FALSE;
2078 char *ExtraArgs = NULL;
2079 char **tpr_names = NULL;
2080 const char *simulation_tpr = NULL;
2081 int best_npme, best_tpr;
2082 int sim_part = 1; /* For benchmarks with checkpoint files */
2085 /* Default program names if nothing else is found */
2086 char *cmd_mpirun = NULL, *cmd_mdrun = NULL;
2087 char *cmd_args_bench, *cmd_args_launch;
2088 char *cmd_np = NULL;
2090 t_perf **perfdata = NULL;
2096 /* Print out how long the tuning took */
2099 static t_filenm fnm[] = {
2101 { efOUT, "-p", "perf", ffWRITE },
2102 { efLOG, "-err", "bencherr", ffWRITE },
2103 { efTPX, "-so", "tuned", ffWRITE },
2105 { efTPX, NULL, NULL, ffREAD },
2106 { efTRN, "-o", NULL, ffWRITE },
2107 { efCOMPRESSED, "-x", NULL, ffOPTWR },
2108 { efCPT, "-cpi", NULL, ffOPTRD },
2109 { efCPT, "-cpo", NULL, ffOPTWR },
2110 { efSTO, "-c", "confout", ffWRITE },
2111 { efEDR, "-e", "ener", ffWRITE },
2112 { efLOG, "-g", "md", ffWRITE },
2113 { efXVG, "-dhdl", "dhdl", ffOPTWR },
2114 { efXVG, "-field", "field", ffOPTWR },
2115 { efXVG, "-table", "table", ffOPTRD },
2116 { efXVG, "-tabletf", "tabletf", ffOPTRD },
2117 { efXVG, "-tablep", "tablep", ffOPTRD },
2118 { efXVG, "-tableb", "table", ffOPTRD },
2119 { efTRX, "-rerun", "rerun", ffOPTRD },
2120 { efXVG, "-tpi", "tpi", ffOPTWR },
2121 { efXVG, "-tpid", "tpidist", ffOPTWR },
2122 { efEDI, "-ei", "sam", ffOPTRD },
2123 { efXVG, "-eo", "edsam", ffOPTWR },
2124 { efXVG, "-devout", "deviatie", ffOPTWR },
2125 { efXVG, "-runav", "runaver", ffOPTWR },
2126 { efXVG, "-px", "pullx", ffOPTWR },
2127 { efXVG, "-pf", "pullf", ffOPTWR },
2128 { efXVG, "-ro", "rotation", ffOPTWR },
2129 { efLOG, "-ra", "rotangles", ffOPTWR },
2130 { efLOG, "-rs", "rotslabs", ffOPTWR },
2131 { efLOG, "-rt", "rottorque", ffOPTWR },
2132 { efMTX, "-mtx", "nm", ffOPTWR },
2133 { efNDX, "-dn", "dipole", ffOPTWR },
2134 { efXVG, "-swap", "swapions", ffOPTWR },
2135 /* Output files that are deleted after each benchmark run */
2136 { efTRN, "-bo", "bench", ffWRITE },
2137 { efXTC, "-bx", "bench", ffWRITE },
2138 { efCPT, "-bcpo", "bench", ffWRITE },
2139 { efSTO, "-bc", "bench", ffWRITE },
2140 { efEDR, "-be", "bench", ffWRITE },
2141 { efLOG, "-bg", "bench", ffWRITE },
2142 { efXVG, "-beo", "benchedo", ffOPTWR },
2143 { efXVG, "-bdhdl", "benchdhdl", ffOPTWR },
2144 { efXVG, "-bfield", "benchfld", ffOPTWR },
2145 { efXVG, "-btpi", "benchtpi", ffOPTWR },
2146 { efXVG, "-btpid", "benchtpid", ffOPTWR },
2147 { efXVG, "-bdevout", "benchdev", ffOPTWR },
2148 { efXVG, "-brunav", "benchrnav", ffOPTWR },
2149 { efXVG, "-bpx", "benchpx", ffOPTWR },
2150 { efXVG, "-bpf", "benchpf", ffOPTWR },
2151 { efXVG, "-bro", "benchrot", ffOPTWR },
2152 { efLOG, "-bra", "benchrota", ffOPTWR },
2153 { efLOG, "-brs", "benchrots", ffOPTWR },
2154 { efLOG, "-brt", "benchrott", ffOPTWR },
2155 { efMTX, "-bmtx", "benchn", ffOPTWR },
2156 { efNDX, "-bdn", "bench", ffOPTWR },
2157 { efXVG, "-bswap", "benchswp", ffOPTWR }
2160 gmx_bool bThreads = FALSE;
2164 const char *procstring[] =
2165 { NULL, "-np", "-n", "none", NULL };
2166 const char *npmevalues_opt[] =
2167 { NULL, "auto", "all", "subset", NULL };
2169 gmx_bool bAppendFiles = TRUE;
2170 gmx_bool bKeepAndNumCPT = FALSE;
2171 gmx_bool bResetCountersHalfWay = FALSE;
2172 gmx_bool bBenchmark = TRUE;
2174 output_env_t oenv = NULL;
2177 /***********************/
2178 /* g_tune_pme options: */
2179 /***********************/
2180 { "-np", FALSE, etINT, {&nnodes},
2181 "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
2182 { "-npstring", FALSE, etENUM, {procstring},
2183 "Specify the number of processors to [TT]$MPIRUN[tt] using this string"},
2184 { "-ntmpi", FALSE, etINT, {&nthreads},
2185 "Number of MPI-threads to run the tests on (turns MPI & mpirun off)"},
2186 { "-r", FALSE, etINT, {&repeats},
2187 "Repeat each test this often" },
2188 { "-max", FALSE, etREAL, {&maxPMEfraction},
2189 "Max fraction of PME nodes to test with" },
2190 { "-min", FALSE, etREAL, {&minPMEfraction},
2191 "Min fraction of PME nodes to test with" },
2192 { "-npme", FALSE, etENUM, {npmevalues_opt},
2193 "Within -min and -max, benchmark all possible values for [TT]-npme[tt], or just a reasonable subset. "
2194 "Auto neglects -min and -max and chooses reasonable values around a guess for npme derived from the .tpr"},
2195 { "-fix", FALSE, etINT, {&npme_fixed},
2196 "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
2197 { "-rmax", FALSE, etREAL, {&rmax},
2198 "If >0, maximal rcoulomb for -ntpr>1 (rcoulomb upscaling results in fourier grid downscaling)" },
2199 { "-rmin", FALSE, etREAL, {&rmin},
2200 "If >0, minimal rcoulomb for -ntpr>1" },
2201 { "-scalevdw", FALSE, etBOOL, {&bScaleRvdw},
2202 "Scale rvdw along with rcoulomb"},
2203 { "-ntpr", FALSE, etINT, {&ntprs},
2204 "Number of [TT].tpr[tt] files to benchmark. Create this many files with different rcoulomb scaling factors depending on -rmin and -rmax. "
2205 "If < 1, automatically choose the number of [TT].tpr[tt] files to test" },
2206 { "-steps", FALSE, etINT64, {&bench_nsteps},
2207 "Take timings for this many steps in the benchmark runs" },
2208 { "-resetstep", FALSE, etINT, {&presteps},
2209 "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" },
2210 { "-simsteps", FALSE, etINT64, {&new_sim_nsteps},
2211 "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" },
2212 { "-launch", FALSE, etBOOL, {&bLaunch},
2213 "Launch the real simulation after optimization" },
2214 { "-bench", FALSE, etBOOL, {&bBenchmark},
2215 "Run the benchmarks or just create the input [TT].tpr[tt] files?" },
2216 /******************/
2217 /* mdrun options: */
2218 /******************/
2219 /* We let g_tune_pme parse and understand these options, because we need to
2220 * prevent that they appear on the mdrun command line for the benchmarks */
2221 { "-append", FALSE, etBOOL, {&bAppendFiles},
2222 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2223 { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
2224 "Keep and number checkpoint files (launch only)" },
2225 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2226 "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
2229 #define NFILE asize(fnm)
2231 seconds = gmx_gettime();
2233 if (!parse_common_args(&argc, argv, PCA_NOEXIT_ON_ARGS,
2234 NFILE, fnm, asize(pa), pa, asize(desc), desc,
2240 /* Store the remaining unparsed command line entries in a string which
2241 * is then attached to the mdrun command line */
2243 ExtraArgs[0] = '\0';
2244 for (i = 1; i < argc; i++) /* argc will now be 1 if everything was understood */
2246 add_to_string(&ExtraArgs, argv[i]);
2247 add_to_string(&ExtraArgs, " ");
2250 if (opt2parg_bSet("-ntmpi", asize(pa), pa))
2253 if (opt2parg_bSet("-npstring", asize(pa), pa))
2255 fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2260 gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2262 /* and now we just set this; a bit of an ugly hack*/
2265 /* Check for PME:PP ratio and whether tpr triggers additional output files */
2266 guessPMEratio = inspect_tpr(NFILE, fnm, &rcoulomb);
2268 /* Automatically set -beo options if -eo is set etc. */
2269 couple_files_options(NFILE, fnm);
2271 /* Construct the command line arguments for benchmark runs
2272 * as well as for the simulation run */
2275 sprintf(bbuf, " -ntmpi %d ", nthreads);
2279 /* This string will be used for MPI runs and will appear after the
2280 * mpirun command. */
2281 if (strcmp(procstring[0], "none") != 0)
2283 sprintf(bbuf, " %s %d ", procstring[0], nnodes);
2293 create_command_line_snippets(bAppendFiles, bKeepAndNumCPT, bResetCountersHalfWay, presteps,
2294 NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs);
2296 /* Read in checkpoint file if requested */
2298 if (opt2bSet("-cpi", NFILE, fnm))
2301 cr->duty = DUTY_PP; /* makes the following routine happy */
2302 read_checkpoint_simulation_part(opt2fn("-cpi", NFILE, fnm),
2303 &sim_part, &cpt_steps, cr,
2304 FALSE, NFILE, fnm, NULL, NULL);
2307 /* sim_part will now be 1 if no checkpoint file was found */
2310 gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi", NFILE, fnm));
2314 /* Open performance output file and write header info */
2315 fp = ffopen(opt2fn("-p", NFILE, fnm), "w");
2317 /* Make a quick consistency check of command line parameters */
2318 check_input(nnodes, repeats, &ntprs, &rmin, rcoulomb, &rmax,
2319 maxPMEfraction, minPMEfraction, npme_fixed,
2320 bench_nsteps, fnm, NFILE, sim_part, presteps,
2323 /* Determine the maximum and minimum number of PME nodes to test,
2324 * the actual list of settings is build in do_the_tests(). */
2325 if ((nnodes > 2) && (npme_fixed < -1))
2327 if (0 == strcmp(npmevalues_opt[0], "auto"))
2329 /* Determine the npme range automatically based on the PME:PP load guess */
2330 if (guessPMEratio > 1.0)
2332 /* More PME than PP work, probably we do not need separate PME nodes at all! */
2333 maxPMEnodes = nnodes/2;
2334 minPMEnodes = nnodes/2;
2338 /* PME : PP load is in the range 0..1, let's test around the guess */
2339 guessPMEnodes = nnodes/(1.0 + 1.0/guessPMEratio);
2340 minPMEnodes = floor(0.7*guessPMEnodes);
2341 maxPMEnodes = ceil(1.6*guessPMEnodes);
2342 maxPMEnodes = min(maxPMEnodes, nnodes/2);
2347 /* Determine the npme range based on user input */
2348 maxPMEnodes = floor(maxPMEfraction*nnodes);
2349 minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2350 fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2351 if (maxPMEnodes != minPMEnodes)
2353 fprintf(stdout, "- %d ", maxPMEnodes);
2355 fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
2364 /* Get the commands we need to set up the runs from environment variables */
2365 get_program_paths(bThreads, &cmd_mpirun, &cmd_mdrun);
2366 if (bBenchmark && repeats > 0)
2368 check_mdrun_works(bThreads, cmd_mpirun, cmd_np, cmd_mdrun);
2371 /* Print some header info to file */
2373 fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
2375 fprintf(fp, "%s for Gromacs %s\n", ShortProgram(), GromacsVersion());
2378 fprintf(fp, "Number of nodes : %d\n", nnodes);
2379 fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
2380 if (strcmp(procstring[0], "none") != 0)
2382 fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]);
2386 fprintf(fp, "Not setting number of nodes in system call\n");
2391 fprintf(fp, "Number of threads : %d\n", nnodes);
2394 fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun);
2395 fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench);
2396 fprintf(fp, "Benchmark steps : ");
2397 fprintf(fp, "%"GMX_PRId64, bench_nsteps);
2399 fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2402 fprintf(fp, "Checkpoint time step : ");
2403 fprintf(fp, "%"GMX_PRId64, cpt_steps);
2406 fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2408 if (new_sim_nsteps >= 0)
2411 fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so", NFILE, fnm));
2412 fprintf(stderr, "%"GMX_PRId64, new_sim_nsteps+cpt_steps);
2413 fprintf(stderr, " steps.\n");
2414 fprintf(fp, "Simulation steps : ");
2415 fprintf(fp, "%"GMX_PRId64, new_sim_nsteps);
2420 fprintf(fp, "Repeats for each test : %d\n", repeats);
2423 if (npme_fixed >= -1)
2425 fprintf(fp, "Fixing -npme at : %d\n", npme_fixed);
2428 fprintf(fp, "Input file : %s\n", opt2fn("-s", NFILE, fnm));
2429 fprintf(fp, " PME/PP load estimate : %g\n", guessPMEratio);
2431 /* Allocate memory for the inputinfo struct: */
2433 info->nr_inputfiles = ntprs;
2434 for (i = 0; i < ntprs; i++)
2436 snew(info->rcoulomb, ntprs);
2437 snew(info->rvdw, ntprs);
2438 snew(info->rlist, ntprs);
2439 snew(info->rlistlong, ntprs);
2440 snew(info->nkx, ntprs);
2441 snew(info->nky, ntprs);
2442 snew(info->nkz, ntprs);
2443 snew(info->fsx, ntprs);
2444 snew(info->fsy, ntprs);
2445 snew(info->fsz, ntprs);
2447 /* Make alternative tpr files to test: */
2448 snew(tpr_names, ntprs);
2449 for (i = 0; i < ntprs; i++)
2451 snew(tpr_names[i], STRLEN);
2454 /* It can be that ntprs is reduced by make_benchmark_tprs if not enough
2455 * different grids could be found. */
2456 make_benchmark_tprs(opt2fn("-s", NFILE, fnm), tpr_names, bench_nsteps+presteps,
2457 cpt_steps, rmin, rmax, bScaleRvdw, &ntprs, info, fp);
2459 /********************************************************************************/
2460 /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */
2461 /********************************************************************************/
2462 snew(perfdata, ntprs);
2465 do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
2466 repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2467 cmd_args_bench, fnm, NFILE, presteps, cpt_steps);
2469 fprintf(fp, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds)/60.0);
2471 /* Analyse the results and give a suggestion for optimal settings: */
2472 bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2473 repeats, info, &best_tpr, &best_npme);
2475 /* Take the best-performing tpr file and enlarge nsteps to original value */
2476 if (bKeepTPR && !bOverwrite)
2478 simulation_tpr = opt2fn("-s", NFILE, fnm);
2482 simulation_tpr = opt2fn("-so", NFILE, fnm);
2483 modify_PMEsettings(bOverwrite ? (new_sim_nsteps+cpt_steps) : info->orig_sim_steps,
2484 info->orig_init_step, tpr_names[best_tpr], simulation_tpr);
2487 /* Let's get rid of the temporary benchmark input files */
2488 for (i = 0; i < ntprs; i++)
2490 fprintf(stdout, "Deleting temporary benchmark input file %s\n", tpr_names[i]);
2491 remove(tpr_names[i]);
2494 /* Now start the real simulation if the user requested it ... */
2495 launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2496 cmd_args_launch, simulation_tpr, best_npme);
2500 /* ... or simply print the performance results to screen: */
2503 finalize(opt2fn("-p", NFILE, fnm));