src/gromacs/gmxana/gmx_tune_pme.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "config.h"
  36
  37 #include <stdlib.h>
  38 #include <time.h>
  39 #ifdef HAVE_SYS_TIME_H
  40 #include <sys/time.h>
  41 #endif
  42
  43 #include "typedefs.h"
  44 #include "types/commrec.h"
  45 #include "gromacs/math/vec.h"
  46 #include "gromacs/fileio/tpxio.h"
  47 #include "gromacs/utility/cstringutil.h"
  48 #include "readinp.h"
  49 #include "calcgrid.h"
  50 #include "checkpoint.h"
  51 #include "macros.h"
  52 #include "gmx_ana.h"
  53 #include "names.h"
  54 #include "perf_est.h"
  55 #include "inputrec.h"
  56 #include "gromacs/timing/walltime_accounting.h"
  57 #include "gromacs/math/utilities.h"
  58
  59 #include "gromacs/commandline/pargs.h"
  60 #include "gromacs/utility/baseversion.h"
  61 #include "gromacs/utility/fatalerror.h"
  62 #include "gromacs/utility/smalloc.h"
  63
  64 /* Enum for situations that can occur during log file parsing, the
  65  * corresponding string entries can be found in do_the_tests() in
  66  * const char* ParseLog[] */
  67 enum {
  68     eParselogOK,
  69     eParselogNotFound,
  70     eParselogNoPerfData,
  71     eParselogTerm,
  72     eParselogResetProblem,
  73     eParselogNoDDGrid,
  74     eParselogTPXVersion,
  75     eParselogNotParallel,
  76     eParselogLargePrimeFactor,
  77     eParselogFatal,
  78     eParselogNr
  79 };
  80
  81
  82 typedef struct
  83 {
  84     int     nPMEnodes;    /* number of PME-only nodes used in this test */
  85     int     nx, ny, nz;   /* DD grid */
  86     int     guessPME;     /* if nPMEnodes == -1, this is the guessed number of PME nodes */
  87     double *Gcycles;      /* This can contain more than one value if doing multiple tests */
  88     double  Gcycles_Av;
  89     float  *ns_per_day;
  90     float   ns_per_day_Av;
  91     float  *PME_f_load;     /* PME mesh/force load average*/
  92     float   PME_f_load_Av;  /* Average average ;) ... */
  93     char   *mdrun_cmd_line; /* Mdrun command line used for this test */
  94 } t_perf;
  95
  96
  97 typedef struct
  98 {
  99     int             nr_inputfiles;  /* The number of tpr and mdp input files */
 100     gmx_int64_t     orig_sim_steps; /* Number of steps to be done in the real simulation */
 101     gmx_int64_t     orig_init_step; /* Init step for the real simulation */
 102     real           *rcoulomb;       /* The coulomb radii [0...nr_inputfiles] */
 103     real           *rvdw;           /* The vdW radii */
 104     real           *rlist;          /* Neighbourlist cutoff radius */
 105     real           *rlistlong;
 106     int            *nkx, *nky, *nkz;
 107     real           *fsx, *fsy, *fsz; /* Fourierspacing in x,y,z dimension */
 108 } t_inputinfo;
 109
 110
 111 static void sep_line(FILE *fp)
 112 {
 113     fprintf(fp, "\n------------------------------------------------------------\n");
 114 }
 115
 116
 117 /* Wrapper for system calls */
 118 static int gmx_system_call(char *command)
 119 {
 120 #ifdef GMX_NO_SYSTEM
 121     gmx_fatal(FARGS, "No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n", command);
 122 #else
 123     return ( system(command) );
 124 #endif
 125 }
 126
 127
 128 /* Check if string starts with substring */
 129 static gmx_bool str_starts(const char *string, const char *substring)
 130 {
 131     return ( strncmp(string, substring, strlen(substring)) == 0);
 132 }
 133
 134
 135 static void cleandata(t_perf *perfdata, int test_nr)
 136 {
 137     perfdata->Gcycles[test_nr]    = 0.0;
 138     perfdata->ns_per_day[test_nr] = 0.0;
 139     perfdata->PME_f_load[test_nr] = 0.0;
 140
 141     return;
 142 }
 143
 144
 145 static void remove_if_exists(const char *fn)
 146 {
 147     if (gmx_fexist(fn))
 148     {
 149         fprintf(stdout, "Deleting %s\n", fn);
 150         remove(fn);
 151     }
 152 }
 153
 154
 155 static void finalize(const char *fn_out)
 156 {
 157     char  buf[STRLEN];
 158     FILE *fp;
 159
 160
 161     fp = fopen(fn_out, "r");
 162     fprintf(stdout, "\n\n");
 163
 164     while (fgets(buf, STRLEN-1, fp) != NULL)
 165     {
 166         fprintf(stdout, "%s", buf);
 167     }
 168     fclose(fp);
 169     fprintf(stdout, "\n\n");
 170 }
 171
 172
 173 enum {
 174     eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr
 175 };
 176
 177 static int parse_logfile(const char *logfile, const char *errfile,
 178                          t_perf *perfdata, int test_nr, int presteps, gmx_int64_t cpt_steps,
 179                          int nnodes)
 180 {
 181     FILE           *fp;
 182     char            line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN];
 183     const char      matchstrdd[]  = "Domain decomposition grid";
 184     const char      matchstrcr[]  = "resetting all time and cycle counters";
 185     const char      matchstrbal[] = "Average PME mesh/force load:";
 186     const char      matchstring[] = "R E A L   C Y C L E   A N D   T I M E   A C C O U N T I N G";
 187     const char      errSIG[]      = "signal, stopping at the next";
 188     int             iFound;
 189     float           dum1, dum2, dum3, dum4;
 190     int             ndum;
 191     int             npme;
 192     gmx_int64_t     resetsteps     = -1;
 193     gmx_bool        bFoundResetStr = FALSE;
 194     gmx_bool        bResetChecked  = FALSE;
 195
 196
 197     if (!gmx_fexist(logfile))
 198     {
 199         fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile);
 200         cleandata(perfdata, test_nr);
 201         return eParselogNotFound;
 202     }
 203
 204     fp = fopen(logfile, "r");
 205     perfdata->PME_f_load[test_nr] = -1.0;
 206     perfdata->guessPME            = -1;
 207
 208     iFound = eFoundNothing;
 209     if (1 == nnodes)
 210     {
 211         iFound = eFoundDDStr; /* Skip some case statements */
 212     }
 213
 214     while (fgets(line, STRLEN, fp) != NULL)
 215     {
 216         /* Remove leading spaces */
 217         ltrim(line);
 218
 219         /* Check for TERM and INT signals from user: */
 220         if (strstr(line, errSIG) != NULL)
 221         {
 222             fclose(fp);
 223             cleandata(perfdata, test_nr);
 224             return eParselogTerm;
 225         }
 226
 227         /* Check whether cycle resetting  worked */
 228         if (presteps > 0 && !bFoundResetStr)
 229         {
 230             if (strstr(line, matchstrcr) != NULL)
 231             {
 232                 sprintf(dumstring, "step %s", "%"GMX_SCNd64);
 233                 sscanf(line, dumstring, &resetsteps);
 234                 bFoundResetStr = TRUE;
 235                 if (resetsteps == presteps+cpt_steps)
 236                 {
 237                     bResetChecked = TRUE;
 238                 }
 239                 else
 240                 {
 241                     sprintf(dumstring, "%"GMX_PRId64, resetsteps);
 242                     sprintf(dumstring2, "%"GMX_PRId64, presteps+cpt_steps);
 243                     fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n"
 244                             "         though they were supposed to be reset at step %s!\n",
 245                             dumstring, dumstring2);
 246                 }
 247             }
 248         }
 249
 250         /* Look for strings that appear in a certain order in the log file: */
 251         switch (iFound)
 252         {
 253             case eFoundNothing:
 254                 /* Look for domain decomp grid and separate PME nodes: */
 255                 if (str_starts(line, matchstrdd))
 256                 {
 257                     sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
 258                            &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
 259                     if (perfdata->nPMEnodes == -1)
 260                     {
 261                         perfdata->guessPME = npme;
 262                     }
 263                     else if (perfdata->nPMEnodes != npme)
 264                     {
 265                         gmx_fatal(FARGS, "PME ranks from command line and output file are not identical");
 266                     }
 267                     iFound = eFoundDDStr;
 268                 }
 269                 /* Catch a few errors that might have occured: */
 270                 else if (str_starts(line, "There is no domain decomposition for"))
 271                 {
 272                     fclose(fp);
 273                     return eParselogNoDDGrid;
 274                 }
 275                 else if (str_starts(line, "The number of ranks you selected"))
 276                 {
 277                     fclose(fp);
 278                     return eParselogLargePrimeFactor;
 279                 }
 280                 else if (str_starts(line, "reading tpx file"))
 281                 {
 282                     fclose(fp);
 283                     return eParselogTPXVersion;
 284                 }
 285                 else if (str_starts(line, "The -dd or -npme option request a parallel simulation"))
 286                 {
 287                     fclose(fp);
 288                     return eParselogNotParallel;
 289                 }
 290                 break;
 291             case eFoundDDStr:
 292                 /* Look for PME mesh/force balance (not necessarily present, though) */
 293                 if (str_starts(line, matchstrbal))
 294                 {
 295                     sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr]));
 296                 }
 297                 /* Look for matchstring */
 298                 if (str_starts(line, matchstring))
 299                 {
 300                     iFound = eFoundAccountingStr;
 301                 }
 302                 break;
 303             case eFoundAccountingStr:
 304                 /* Already found matchstring - look for cycle data */
 305                 if (str_starts(line, "Total  "))
 306                 {
 307                     sscanf(line, "Total %*f %lf", &(perfdata->Gcycles[test_nr]));
 308                     iFound = eFoundCycleStr;
 309                 }
 310                 break;
 311             case eFoundCycleStr:
 312                 /* Already found cycle data - look for remaining performance info and return */
 313                 if (str_starts(line, "Performance:"))
 314                 {
 315                     ndum = sscanf(line, "%s %f %f %f %f", dumstring, &dum1, &dum2, &dum3, &dum4);
 316                     /* (ns/day) is the second last entry, depending on whether GMX_DETAILED_PERF_STATS was set in print_perf(), nrnb.c */
 317                     perfdata->ns_per_day[test_nr] = (ndum == 5) ? dum3 : dum1;
 318                     fclose(fp);
 319                     if (bResetChecked || presteps == 0)
 320                     {
 321                         return eParselogOK;
 322                     }
 323                     else
 324                     {
 325                         return eParselogResetProblem;
 326                     }
 327                 }
 328                 break;
 329         }
 330     } /* while */
 331
 332     /* Close the log file */
 333     fclose(fp);
 334
 335     /* Check why there is no performance data in the log file.
 336      * Did a fatal errors occur? */
 337     if (gmx_fexist(errfile))
 338     {
 339         fp = fopen(errfile, "r");
 340         while (fgets(line, STRLEN, fp) != NULL)
 341         {
 342             if (str_starts(line, "Fatal error:") )
 343             {
 344                 if (fgets(line, STRLEN, fp) != NULL)
 345                 {
 346                     fprintf(stderr, "\nWARNING: An error occured during this benchmark:\n"
 347                             "%s\n", line);
 348                 }
 349                 fclose(fp);
 350                 cleandata(perfdata, test_nr);
 351                 return eParselogFatal;
 352             }
 353         }
 354         fclose(fp);
 355     }
 356     else
 357     {
 358         fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile);
 359     }
 360
 361     /* Giving up ... we could not find out why there is no performance data in
 362      * the log file. */
 363     fprintf(stdout, "No performance data in log file.\n");
 364     cleandata(perfdata, test_nr);
 365
 366     return eParselogNoPerfData;
 367 }
 368
 369
 370 static gmx_bool analyze_data(
 371         FILE         *fp,
 372         const char   *fn,
 373         t_perf      **perfdata,
 374         int           nnodes,
 375         int           ntprs,
 376         int           ntests,
 377         int           nrepeats,
 378         t_inputinfo  *info,
 379         int          *index_tpr,    /* OUT: Nr of mdp file with best settings */
 380         int          *npme_optimal) /* OUT: Optimal number of PME nodes */
 381 {
 382     int      i, j, k;
 383     int      line  = 0, line_win = -1;
 384     int      k_win = -1, i_win = -1, winPME;
 385     double   s     = 0.0; /* standard deviation */
 386     t_perf  *pd;
 387     char     strbuf[STRLEN];
 388     char     str_PME_f_load[13];
 389     gmx_bool bCanUseOrigTPR;
 390     gmx_bool bRefinedCoul, bRefinedVdW, bRefinedGrid;
 391
 392
 393     if (nrepeats > 1)
 394     {
 395         sep_line(fp);
 396         fprintf(fp, "Summary of successful runs:\n");
 397         fprintf(fp, "Line tpr PME ranks  Gcycles Av.     Std.dev.       ns/day        PME/f");
 398         if (nnodes > 1)
 399         {
 400             fprintf(fp, "    DD grid");
 401         }
 402         fprintf(fp, "\n");
 403     }
 404
 405
 406     for (k = 0; k < ntprs; k++)
 407     {
 408         for (i = 0; i < ntests; i++)
 409         {
 410             /* Select the right dataset: */
 411             pd = &(perfdata[k][i]);
 412
 413             pd->Gcycles_Av    = 0.0;
 414             pd->PME_f_load_Av = 0.0;
 415             pd->ns_per_day_Av = 0.0;
 416
 417             if (pd->nPMEnodes == -1)
 418             {
 419                 sprintf(strbuf, "(%3d)", pd->guessPME);
 420             }
 421             else
 422             {
 423                 sprintf(strbuf, "     ");
 424             }
 425
 426             /* Get the average run time of a setting */
 427             for (j = 0; j < nrepeats; j++)
 428             {
 429                 pd->Gcycles_Av    += pd->Gcycles[j];
 430                 pd->PME_f_load_Av += pd->PME_f_load[j];
 431             }
 432             pd->Gcycles_Av    /= nrepeats;
 433             pd->PME_f_load_Av /= nrepeats;
 434
 435             for (j = 0; j < nrepeats; j++)
 436             {
 437                 if (pd->ns_per_day[j] > 0.0)
 438                 {
 439                     pd->ns_per_day_Av += pd->ns_per_day[j];
 440                 }
 441                 else
 442                 {
 443                     /* Somehow the performance number was not aquired for this run,
 444                      * therefor set the average to some negative value: */
 445                     pd->ns_per_day_Av = -1.0f*nrepeats;
 446                     break;
 447                 }
 448             }
 449             pd->ns_per_day_Av /= nrepeats;
 450
 451             /* Nicer output: */
 452             if (pd->PME_f_load_Av > 0.0)
 453             {
 454                 sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av);
 455             }
 456             else
 457             {
 458                 sprintf(str_PME_f_load, "%s", "         -  ");
 459             }
 460
 461
 462             /* We assume we had a successful run if both averages are positive */
 463             if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0)
 464             {
 465                 /* Output statistics if repeats were done */
 466                 if (nrepeats > 1)
 467                 {
 468                     /* Calculate the standard deviation */
 469                     s = 0.0;
 470                     for (j = 0; j < nrepeats; j++)
 471                     {
 472                         s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 );
 473                     }
 474                     s /= (nrepeats - 1);
 475                     s  = sqrt(s);
 476
 477                     fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s",
 478                             line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s,
 479                             pd->ns_per_day_Av, str_PME_f_load);
 480                     if (nnodes > 1)
 481                     {
 482                         fprintf(fp, "  %3d %3d %3d", pd->nx, pd->ny, pd->nz);
 483                     }
 484                     fprintf(fp, "\n");
 485                 }
 486                 /* Store the index of the best run found so far in 'winner': */
 487                 if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) )
 488                 {
 489                     k_win    = k;
 490                     i_win    = i;
 491                     line_win = line;
 492                 }
 493                 line++;
 494             }
 495         }
 496     }
 497
 498     if (k_win == -1)
 499     {
 500         gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn);
 501     }
 502
 503     sep_line(fp);
 504
 505     winPME = perfdata[k_win][i_win].nPMEnodes;
 506
 507     if (1 == ntests)
 508     {
 509         /* We stuck to a fixed number of PME-only nodes */
 510         sprintf(strbuf, "settings No. %d", k_win);
 511     }
 512     else
 513     {
 514         /* We have optimized the number of PME-only nodes */
 515         if (winPME == -1)
 516         {
 517             sprintf(strbuf, "%s", "the automatic number of PME ranks");
 518         }
 519         else
 520         {
 521             sprintf(strbuf, "%d PME ranks", winPME);
 522         }
 523     }
 524     fprintf(fp, "Best performance was achieved with %s", strbuf);
 525     if ((nrepeats > 1) && (ntests > 1))
 526     {
 527         fprintf(fp, " (see line %d)", line_win);
 528     }
 529     fprintf(fp, "\n");
 530
 531     /* Only mention settings if they were modified: */
 532     bRefinedCoul = !gmx_within_tol(info->rcoulomb[k_win], info->rcoulomb[0], GMX_REAL_EPS);
 533     bRefinedVdW  = !gmx_within_tol(info->rvdw[k_win], info->rvdw[0], GMX_REAL_EPS);
 534     bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] &&
 535                      info->nky[k_win] == info->nky[0] &&
 536                      info->nkz[k_win] == info->nkz[0]);
 537
 538     if (bRefinedCoul || bRefinedVdW || bRefinedGrid)
 539     {
 540         fprintf(fp, "Optimized PME settings:\n");
 541         bCanUseOrigTPR = FALSE;
 542     }
 543     else
 544     {
 545         bCanUseOrigTPR = TRUE;
 546     }
 547
 548     if (bRefinedCoul)
 549     {
 550         fprintf(fp, "   New Coulomb radius: %f nm (was %f nm)\n", info->rcoulomb[k_win], info->rcoulomb[0]);
 551     }
 552
 553     if (bRefinedVdW)
 554     {
 555         fprintf(fp, "   New Van der Waals radius: %f nm (was %f nm)\n", info->rvdw[k_win], info->rvdw[0]);
 556     }
 557
 558     if (bRefinedGrid)
 559     {
 560         fprintf(fp, "   New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info->nkx[k_win], info->nky[k_win], info->nkz[k_win],
 561                 info->nkx[0], info->nky[0], info->nkz[0]);
 562     }
 563
 564     if (bCanUseOrigTPR && ntprs > 1)
 565     {
 566         fprintf(fp, "and original PME settings.\n");
 567     }
 568
 569     fflush(fp);
 570
 571     /* Return the index of the mdp file that showed the highest performance
 572      * and the optimal number of PME nodes */
 573     *index_tpr    = k_win;
 574     *npme_optimal = winPME;
 575
 576     return bCanUseOrigTPR;
 577 }
 578
 579
 580 /* Get the commands we need to set up the runs from environment variables */
 581 static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char *cmd_mdrun[])
 582 {
 583     char      *cp;
 584     FILE      *fp;
 585     const char def_mpirun[]   = "mpirun";
 586     const char def_mdrun[]    = "mdrun";
 587
 588     const char empty_mpirun[] = "";
 589
 590     /* Get the commands we need to set up the runs from environment variables */
 591     if (!bThreads)
 592     {
 593         if ( (cp = getenv("MPIRUN")) != NULL)
 594         {
 595             *cmd_mpirun = gmx_strdup(cp);
 596         }
 597         else
 598         {
 599             *cmd_mpirun = gmx_strdup(def_mpirun);
 600         }
 601     }
 602     else
 603     {
 604         *cmd_mpirun = gmx_strdup(empty_mpirun);
 605     }
 606
 607     if ( (cp = getenv("MDRUN" )) != NULL)
 608     {
 609         *cmd_mdrun  = gmx_strdup(cp);
 610     }
 611     else
 612     {
 613         *cmd_mdrun  = gmx_strdup(def_mdrun);
 614     }
 615 }
 616
 617 /* Check that the commands will run mdrun (perhaps via mpirun) by
 618  * running a very quick test simulation. Requires MPI environment to
 619  * be available if applicable. */
 620 static void check_mdrun_works(gmx_bool    bThreads,
 621                               const char *cmd_mpirun,
 622                               const char *cmd_np,
 623                               const char *cmd_mdrun)
 624 {
 625     char      *command = NULL;
 626     char      *cp;
 627     char       line[STRLEN];
 628     FILE      *fp;
 629     const char filename[]     = "benchtest.log";
 630
 631     /* This string should always be identical to the one in copyrite.c,
 632      * gmx_print_version_info() in the defined(GMX_MPI) section */
 633     const char match_mpi[]    = "MPI library:        MPI";
 634     const char match_mdrun[]  = "Executable: ";
 635     gmx_bool   bMdrun         = FALSE;
 636     gmx_bool   bMPI           = FALSE;
 637
 638     /* Run a small test to see whether mpirun + mdrun work  */
 639     fprintf(stdout, "Making sure that mdrun can be executed. ");
 640     if (bThreads)
 641     {
 642         snew(command, strlen(cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50);
 643         sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", cmd_mdrun, cmd_np, filename);
 644     }
 645     else
 646     {
 647         snew(command, strlen(cmd_mpirun) + strlen(cmd_np) + strlen(cmd_mdrun) + strlen(filename) + 50);
 648         sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", cmd_mpirun, cmd_np, cmd_mdrun, filename);
 649     }
 650     fprintf(stdout, "Trying '%s' ... ", command);
 651     make_backup(filename);
 652     gmx_system_call(command);
 653
 654     /* Check if we find the characteristic string in the output: */
 655     if (!gmx_fexist(filename))
 656     {
 657         gmx_fatal(FARGS, "Output from test run could not be found.");
 658     }
 659
 660     fp = fopen(filename, "r");
 661     /* We need to scan the whole output file, since sometimes the queuing system
 662      * also writes stuff to stdout/err */
 663     while (!feof(fp) )
 664     {
 665         cp = fgets(line, STRLEN, fp);
 666         if (cp != NULL)
 667         {
 668             if (str_starts(line, match_mdrun) )
 669             {
 670                 bMdrun = TRUE;
 671             }
 672             if (str_starts(line, match_mpi) )
 673             {
 674                 bMPI = TRUE;
 675             }
 676         }
 677     }
 678     fclose(fp);
 679
 680     if (bThreads)
 681     {
 682         if (bMPI)
 683         {
 684             gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
 685                       "(%s)\n"
 686                       "seems to have been compiled with MPI instead.",
 687                       cmd_mdrun);
 688         }
 689     }
 690     else
 691     {
 692         if (bMdrun && !bMPI)
 693         {
 694             gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
 695                       "(%s)\n"
 696                       "seems to have been compiled without MPI support.",
 697                       cmd_mdrun);
 698         }
 699     }
 700
 701     if (!bMdrun)
 702     {
 703         gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!",
 704                   filename);
 705     }
 706
 707     fprintf(stdout, "passed.\n");
 708
 709     /* Clean up ... */
 710     remove(filename);
 711     sfree(command);
 712 }
 713
 714
 715 static void launch_simulation(
 716         gmx_bool    bLaunch,        /* Should the simulation be launched? */
 717         FILE       *fp,             /* General log file */
 718         gmx_bool    bThreads,       /* whether to use threads */
 719         char       *cmd_mpirun,     /* Command for mpirun */
 720         char       *cmd_np,         /* Switch for -np or -ntmpi or empty */
 721         char       *cmd_mdrun,      /* Command for mdrun */
 722         char       *args_for_mdrun, /* Arguments for mdrun */
 723         const char *simulation_tpr, /* This tpr will be simulated */
 724         int         nPMEnodes)      /* Number of PME nodes to use */
 725 {
 726     char  *command;
 727
 728
 729     /* Make enough space for the system call command,
 730      * (100 extra chars for -npme ... etc. options should suffice): */
 731     snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100);
 732
 733     /* Note that the -passall options requires args_for_mdrun to be at the end
 734      * of the command line string */
 735     if (bThreads)
 736     {
 737         sprintf(command, "%s%s-npme %d -s %s %s",
 738                 cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun);
 739     }
 740     else
 741     {
 742         sprintf(command, "%s%s%s -npme %d -s %s %s",
 743                 cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun);
 744     }
 745
 746     fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch ? "Using" : "Please use", command);
 747     sep_line(fp);
 748     fflush(fp);
 749
 750     /* Now the real thing! */
 751     if (bLaunch)
 752     {
 753         fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command);
 754         sep_line(stdout);
 755         fflush(stdout);
 756         gmx_system_call(command);
 757     }
 758 }
 759
 760
 761 static void modify_PMEsettings(
 762         gmx_int64_t     simsteps,    /* Set this value as number of time steps */
 763         gmx_int64_t     init_step,   /* Set this value as init_step */
 764         const char     *fn_best_tpr, /* tpr file with the best performance */
 765         const char     *fn_sim_tpr)  /* name of tpr file to be launched */
 766 {
 767     t_inputrec   *ir;
 768     t_state       state;
 769     gmx_mtop_t    mtop;
 770     char          buf[200];
 771
 772     snew(ir, 1);
 773     read_tpx_state(fn_best_tpr, ir, &state, NULL, &mtop);
 774
 775     /* Reset nsteps and init_step to the value of the input .tpr file */
 776     ir->nsteps    = simsteps;
 777     ir->init_step = init_step;
 778
 779     /* Write the tpr file which will be launched */
 780     sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, "%"GMX_PRId64);
 781     fprintf(stdout, buf, ir->nsteps);
 782     fflush(stdout);
 783     write_tpx_state(fn_sim_tpr, ir, &state, &mtop);
 784
 785     sfree(ir);
 786 }
 787
 788
 789 #define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
 790
 791 /* Make additional TPR files with more computational load for the
 792  * direct space processors: */
 793 static void make_benchmark_tprs(
 794         const char     *fn_sim_tpr,      /* READ : User-provided tpr file                 */
 795         char           *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files           */
 796         gmx_int64_t     benchsteps,      /* Number of time steps for benchmark runs       */
 797         gmx_int64_t     statesteps,      /* Step counter in checkpoint file               */
 798         real            rmin,            /* Minimal Coulomb radius                        */
 799         real            rmax,            /* Maximal Coulomb radius                        */
 800         real            bScaleRvdw,      /* Scale rvdw along with rcoulomb                */
 801         int            *ntprs,           /* No. of TPRs to write, each with a different
 802                                             rcoulomb and fourierspacing                   */
 803         t_inputinfo    *info,            /* Contains information about mdp file options   */
 804         FILE           *fp)              /* Write the output here                         */
 805 {
 806     int           i, j, d;
 807     t_inputrec   *ir;
 808     t_state       state;
 809     gmx_mtop_t    mtop;
 810     real          nlist_buffer;     /* Thickness of the buffer regions for PME-switch potentials */
 811     char          buf[200];
 812     rvec          box_size;
 813     gmx_bool      bNote = FALSE;
 814     real          add;              /* Add this to rcoul for the next test    */
 815     real          fac = 1.0;        /* Scaling factor for Coulomb radius      */
 816     real          fourierspacing;   /* Basic fourierspacing from tpr          */
 817
 818
 819     sprintf(buf, "Making benchmark tpr file%s with %s time step%s",
 820             *ntprs > 1 ? "s" : "", "%"GMX_PRId64, benchsteps > 1 ? "s" : "");
 821     fprintf(stdout, buf, benchsteps);
 822     if (statesteps > 0)
 823     {
 824         sprintf(buf, " (adding %s steps from checkpoint file)", "%"GMX_PRId64);
 825         fprintf(stdout, buf, statesteps);
 826         benchsteps += statesteps;
 827     }
 828     fprintf(stdout, ".\n");
 829
 830
 831     snew(ir, 1);
 832     read_tpx_state(fn_sim_tpr, ir, &state, NULL, &mtop);
 833
 834     /* Check if some kind of PME was chosen */
 835     if (EEL_PME(ir->coulombtype) == FALSE)
 836     {
 837         gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.",
 838                   EELTYPE(eelPME));
 839     }
 840
 841     /* Check if rcoulomb == rlist, which is necessary for plain PME. */
 842     if (  (ir->cutoff_scheme != ecutsVERLET) &&
 843           (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist))
 844     {
 845         gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).",
 846                   EELTYPE(eelPME), ir->rcoulomb, ir->rlist);
 847     }
 848     /* For other PME types, rcoulomb is allowed to be smaller than rlist */
 849     else if (ir->rcoulomb > ir->rlist)
 850     {
 851         gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)",
 852                   EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist);
 853     }
 854
 855     if (bScaleRvdw && ir->rvdw != ir->rcoulomb)
 856     {
 857         fprintf(stdout, "NOTE: input rvdw != rcoulomb, will not scale rvdw\n");
 858         bScaleRvdw = FALSE;
 859     }
 860
 861     /* Reduce the number of steps for the benchmarks */
 862     info->orig_sim_steps = ir->nsteps;
 863     ir->nsteps           = benchsteps;
 864     /* We must not use init_step from the input tpr file for the benchmarks */
 865     info->orig_init_step = ir->init_step;
 866     ir->init_step        = 0;
 867
 868     /* For PME-switch potentials, keep the radial distance of the buffer region */
 869     nlist_buffer   = ir->rlist - ir->rcoulomb;
 870
 871     /* Determine length of triclinic box vectors */
 872     for (d = 0; d < DIM; d++)
 873     {
 874         box_size[d] = 0;
 875         for (i = 0; i < DIM; i++)
 876         {
 877             box_size[d] += state.box[d][i]*state.box[d][i];
 878         }
 879         box_size[d] = sqrt(box_size[d]);
 880     }
 881
 882     if (ir->fourier_spacing > 0)
 883     {
 884         info->fsx[0] = ir->fourier_spacing;
 885         info->fsy[0] = ir->fourier_spacing;
 886         info->fsz[0] = ir->fourier_spacing;
 887     }
 888     else
 889     {
 890         /* Reconstruct fourierspacing per dimension from the number of grid points and box size */
 891         info->fsx[0] = box_size[XX]/ir->nkx;
 892         info->fsy[0] = box_size[YY]/ir->nky;
 893         info->fsz[0] = box_size[ZZ]/ir->nkz;
 894     }
 895
 896     /* If no value for the fourierspacing was provided on the command line, we
 897      * use the reconstruction from the tpr file */
 898     if (ir->fourier_spacing > 0)
 899     {
 900         /* Use the spacing from the tpr */
 901         fourierspacing = ir->fourier_spacing;
 902     }
 903     else
 904     {
 905         /* Use the maximum observed spacing */
 906         fourierspacing = max(max(info->fsx[0], info->fsy[0]), info->fsz[0]);
 907     }
 908
 909     fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing);
 910
 911     /* For performance comparisons the number of particles is useful to have */
 912     fprintf(fp, "   Number of particles  : %d\n", mtop.natoms);
 913
 914     /* Print information about settings of which some are potentially modified: */
 915     fprintf(fp, "   Coulomb type         : %s\n", EELTYPE(ir->coulombtype));
 916     fprintf(fp, "   Grid spacing x y z   : %f %f %f\n",
 917             box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz);
 918     fprintf(fp, "   Van der Waals type   : %s\n", EVDWTYPE(ir->vdwtype));
 919     if (ir_vdw_switched(ir))
 920     {
 921         fprintf(fp, "   rvdw_switch          : %f nm\n", ir->rvdw_switch);
 922     }
 923     if (EPME_SWITCHED(ir->coulombtype))
 924     {
 925         fprintf(fp, "   rlist                : %f nm\n", ir->rlist);
 926     }
 927     if (ir->rlistlong != max_cutoff(ir->rvdw, ir->rcoulomb))
 928     {
 929         fprintf(fp, "   rlistlong            : %f nm\n", ir->rlistlong);
 930     }
 931
 932     /* Print a descriptive line about the tpr settings tested */
 933     fprintf(fp, "\nWill try these real/reciprocal workload settings:\n");
 934     fprintf(fp, " No.   scaling  rcoulomb");
 935     fprintf(fp, "  nkx  nky  nkz");
 936     fprintf(fp, "   spacing");
 937     if (evdwCUT == ir->vdwtype)
 938     {
 939         fprintf(fp, "      rvdw");
 940     }
 941     if (EPME_SWITCHED(ir->coulombtype))
 942     {
 943         fprintf(fp, "     rlist");
 944     }
 945     if (ir->rlistlong != max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)) )
 946     {
 947         fprintf(fp, " rlistlong");
 948     }
 949     fprintf(fp, "  tpr file\n");
 950
 951     /* Loop to create the requested number of tpr input files */
 952     for (j = 0; j < *ntprs; j++)
 953     {
 954         /* The first .tpr is the provided one, just need to modify nsteps,
 955          * so skip the following block */
 956         if (j != 0)
 957         {
 958             /* Determine which Coulomb radii rc to use in the benchmarks */
 959             add = (rmax-rmin)/(*ntprs-1);
 960             if (gmx_within_tol(rmin, info->rcoulomb[0], GMX_REAL_EPS))
 961             {
 962                 ir->rcoulomb = rmin + j*add;
 963             }
 964             else if (gmx_within_tol(rmax, info->rcoulomb[0], GMX_REAL_EPS))
 965             {
 966                 ir->rcoulomb = rmin + (j-1)*add;
 967             }
 968             else
 969             {
 970                 /* rmin != rcoul != rmax, ergo test between rmin and rmax */
 971                 add          = (rmax-rmin)/(*ntprs-2);
 972                 ir->rcoulomb = rmin + (j-1)*add;
 973             }
 974
 975             /* Determine the scaling factor fac */
 976             fac = ir->rcoulomb/info->rcoulomb[0];
 977
 978             /* Scale the Fourier grid spacing */
 979             ir->nkx = ir->nky = ir->nkz = 0;
 980             calc_grid(NULL, state.box, fourierspacing*fac, &ir->nkx, &ir->nky, &ir->nkz);
 981
 982             /* Adjust other radii since various conditions need to be fulfilled */
 983             if (eelPME == ir->coulombtype)
 984             {
 985                 /* plain PME, rcoulomb must be equal to rlist */
 986                 ir->rlist = ir->rcoulomb;
 987             }
 988             else
 989             {
 990                 /* rlist must be >= rcoulomb, we keep the size of the buffer region */
 991                 ir->rlist = ir->rcoulomb + nlist_buffer;
 992             }
 993
 994             if (bScaleRvdw && evdwCUT == ir->vdwtype)
 995             {
 996                 if (ecutsVERLET == ir->cutoff_scheme)
 997                 {
 998                     /* With Verlet, the van der Waals radius must always equal the Coulomb radius */
 999                     ir->rvdw = ir->rcoulomb;
1000                 }
1001                 else
1002                 {
1003                     /* For vdw cutoff, rvdw >= rlist */
1004                     ir->rvdw = max(info->rvdw[0], ir->rlist);
1005                 }
1006             }
1007
1008             ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
1009
1010         } /* end of "if (j != 0)" */
1011
1012         /* for j==0: Save the original settings
1013          * for j >0: Save modified radii and Fourier grids */
1014         info->rcoulomb[j]  = ir->rcoulomb;
1015         info->rvdw[j]      = ir->rvdw;
1016         info->nkx[j]       = ir->nkx;
1017         info->nky[j]       = ir->nky;
1018         info->nkz[j]       = ir->nkz;
1019         info->rlist[j]     = ir->rlist;
1020         info->rlistlong[j] = ir->rlistlong;
1021         info->fsx[j]       = fac*fourierspacing;
1022         info->fsy[j]       = fac*fourierspacing;
1023         info->fsz[j]       = fac*fourierspacing;
1024
1025         /* Write the benchmark tpr file */
1026         strncpy(fn_bench_tprs[j], fn_sim_tpr, strlen(fn_sim_tpr)-strlen(".tpr"));
1027         sprintf(buf, "_bench%.2d.tpr", j);
1028         strcat(fn_bench_tprs[j], buf);
1029         fprintf(stdout, "Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]);
1030         fprintf(stdout, "%"GMX_PRId64, ir->nsteps);
1031         if (j > 0)
1032         {
1033             fprintf(stdout, ", scaling factor %f\n", fac);
1034         }
1035         else
1036         {
1037             fprintf(stdout, ", unmodified settings\n");
1038         }
1039
1040         write_tpx_state(fn_bench_tprs[j], ir, &state, &mtop);
1041
1042         /* Write information about modified tpr settings to log file */
1043         fprintf(fp, "%4d%10f%10f", j, fac, ir->rcoulomb);
1044         fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
1045         fprintf(fp, " %9f ", info->fsx[j]);
1046         if (evdwCUT == ir->vdwtype)
1047         {
1048             fprintf(fp, "%10f", ir->rvdw);
1049         }
1050         if (EPME_SWITCHED(ir->coulombtype))
1051         {
1052             fprintf(fp, "%10f", ir->rlist);
1053         }
1054         if (info->rlistlong[0] != max_cutoff(info->rlist[0], max_cutoff(info->rvdw[0], info->rcoulomb[0])) )
1055         {
1056             fprintf(fp, "%10f", ir->rlistlong);
1057         }
1058         fprintf(fp, "  %-14s\n", fn_bench_tprs[j]);
1059
1060         /* Make it clear to the user that some additional settings were modified */
1061         if (!gmx_within_tol(ir->rvdw, info->rvdw[0], GMX_REAL_EPS)
1062             || !gmx_within_tol(ir->rlistlong, info->rlistlong[0], GMX_REAL_EPS) )
1063         {
1064             bNote = TRUE;
1065         }
1066     }
1067     if (bNote)
1068     {
1069         fprintf(fp, "\nNote that in addition to the Coulomb radius and the Fourier grid\n"
1070                 "other input settings were also changed (see table above).\n"
1071                 "Please check if the modified settings are appropriate.\n");
1072     }
1073     fflush(stdout);
1074     fflush(fp);
1075     sfree(ir);
1076 }
1077
1078
1079 /* Rename the files we want to keep to some meaningful filename and
1080  * delete the rest */
1081 static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes,
1082                     int nPMEnodes, int nr, gmx_bool bKeepStderr)
1083 {
1084     char        numstring[STRLEN];
1085     char        newfilename[STRLEN];
1086     const char *fn = NULL;
1087     int         i;
1088     const char *opt;
1089
1090
1091     fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n");
1092
1093     for (i = 0; i < nfile; i++)
1094     {
1095         opt = (char *)fnm[i].opt;
1096         if (strcmp(opt, "-p") == 0)
1097         {
1098             /* do nothing; keep this file */
1099             ;
1100         }
1101         else if (strcmp(opt, "-bg") == 0)
1102         {
1103             /* Give the log file a nice name so one can later see which parameters were used */
1104             numstring[0] = '\0';
1105             if (nr > 0)
1106             {
1107                 sprintf(numstring, "_%d", nr);
1108             }
1109             sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg", nfile, fnm), k, nnodes, nPMEnodes, numstring);
1110             if (gmx_fexist(opt2fn("-bg", nfile, fnm)))
1111             {
1112                 fprintf(stdout, "renaming log file to %s\n", newfilename);
1113                 make_backup(newfilename);
1114                 rename(opt2fn("-bg", nfile, fnm), newfilename);
1115             }
1116         }
1117         else if (strcmp(opt, "-err") == 0)
1118         {
1119             /* This file contains the output of stderr. We want to keep it in
1120              * cases where there have been problems. */
1121             fn           = opt2fn(opt, nfile, fnm);
1122             numstring[0] = '\0';
1123             if (nr > 0)
1124             {
1125                 sprintf(numstring, "_%d", nr);
1126             }
1127             sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring);
1128             if (gmx_fexist(fn))
1129             {
1130                 if (bKeepStderr)
1131                 {
1132                     fprintf(stdout, "Saving stderr output in %s\n", newfilename);
1133                     make_backup(newfilename);
1134                     rename(fn, newfilename);
1135                 }
1136                 else
1137                 {
1138                     fprintf(stdout, "Deleting %s\n", fn);
1139                     remove(fn);
1140                 }
1141             }
1142         }
1143         /* Delete the files which are created for each benchmark run: (options -b*) */
1144         else if ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt, nfile, fnm) || !is_optional(&fnm[i])) )
1145         {
1146             remove_if_exists(opt2fn(opt, nfile, fnm));
1147         }
1148     }
1149 }
1150
1151
1152 enum {
1153     eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr
1154 };
1155
1156 /* Create a list of numbers of PME nodes to test */
1157 static void make_npme_list(
1158         const char *npmevalues_opt, /* Make a complete list with all
1159                                      * possibilities or a short list that keeps only
1160                                      * reasonable numbers of PME nodes                  */
1161         int        *nentries,       /* Number of entries we put in the nPMEnodes list   */
1162         int        *nPMEnodes[],    /* Each entry contains the value for -npme          */
1163         int         nnodes,         /* Total number of nodes to do the tests on         */
1164         int         minPMEnodes,    /* Minimum number of PME nodes                      */
1165         int         maxPMEnodes)    /* Maximum number of PME nodes                      */
1166 {
1167     int i, npme, npp;
1168     int min_factor = 1;   /* We request that npp and npme have this minimal
1169                            * largest common factor (depends on npp)           */
1170     int nlistmax;         /* Max. list size                                   */
1171     int nlist;            /* Actual number of entries in list                 */
1172     int eNPME = 0;
1173
1174
1175     /* Do we need to check all possible values for -npme or is a reduced list enough? */
1176     if (0 == strcmp(npmevalues_opt, "all") )
1177     {
1178         eNPME = eNpmeAll;
1179     }
1180     else if (0 == strcmp(npmevalues_opt, "subset") )
1181     {
1182         eNPME = eNpmeSubset;
1183     }
1184     else /* "auto" or "range" */
1185     {
1186         if (nnodes <= 64)
1187         {
1188             eNPME = eNpmeAll;
1189         }
1190         else if (nnodes < 128)
1191         {
1192             eNPME = eNpmeReduced;
1193         }
1194         else
1195         {
1196             eNPME = eNpmeSubset;
1197         }
1198     }
1199
1200     /* Calculate how many entries we could possibly have (in case of -npme all) */
1201     if (nnodes > 2)
1202     {
1203         nlistmax = maxPMEnodes - minPMEnodes + 3;
1204         if (0 == minPMEnodes)
1205         {
1206             nlistmax--;
1207         }
1208     }
1209     else
1210     {
1211         nlistmax = 1;
1212     }
1213
1214     /* Now make the actual list which is at most of size nlist */
1215     snew(*nPMEnodes, nlistmax);
1216     nlist = 0; /* start counting again, now the real entries in the list */
1217     for (i = 0; i < nlistmax - 2; i++)
1218     {
1219         npme = maxPMEnodes - i;
1220         npp  = nnodes-npme;
1221         switch (eNPME)
1222         {
1223             case eNpmeAll:
1224                 min_factor = 1;
1225                 break;
1226             case eNpmeReduced:
1227                 min_factor = 2;
1228                 break;
1229             case eNpmeSubset:
1230                 /* For 2d PME we want a common largest factor of at least the cube
1231                  * root of the number of PP nodes */
1232                 min_factor = (int) pow(npp, 1.0/3.0);
1233                 break;
1234             default:
1235                 gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list");
1236                 break;
1237         }
1238         if (gmx_greatest_common_divisor(npp, npme) >= min_factor)
1239         {
1240             (*nPMEnodes)[nlist] = npme;
1241             nlist++;
1242         }
1243     }
1244     /* We always test 0 PME nodes and the automatic number */
1245     *nentries             = nlist + 2;
1246     (*nPMEnodes)[nlist  ] =  0;
1247     (*nPMEnodes)[nlist+1] = -1;
1248
1249     fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries);
1250     for (i = 0; i < *nentries-1; i++)
1251     {
1252         fprintf(stderr, "%d, ", (*nPMEnodes)[i]);
1253     }
1254     fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]);
1255 }
1256
1257
1258 /* Allocate memory to store the performance data */
1259 static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats)
1260 {
1261     int i, j, k;
1262
1263
1264     for (k = 0; k < ntprs; k++)
1265     {
1266         snew(perfdata[k], datasets);
1267         for (i = 0; i < datasets; i++)
1268         {
1269             for (j = 0; j < repeats; j++)
1270             {
1271                 snew(perfdata[k][i].Gcycles, repeats);
1272                 snew(perfdata[k][i].ns_per_day, repeats);
1273                 snew(perfdata[k][i].PME_f_load, repeats);
1274             }
1275         }
1276     }
1277 }
1278
1279
1280 /* Check for errors on mdrun -h */
1281 static void make_sure_it_runs(char *mdrun_cmd_line, int length, FILE *fp,
1282                               const t_filenm *fnm, int nfile)
1283 {
1284     const char *fn = NULL;
1285     char       *command, *msg;
1286     int         ret;
1287
1288
1289     snew(command, length +  15);
1290     snew(msg, length + 500);
1291
1292     fprintf(stdout, "Making sure the benchmarks can be executed by running just 1 step...\n");
1293     sprintf(command, "%s -nsteps 1 -quiet", mdrun_cmd_line);
1294     fprintf(stdout, "Executing '%s' ...\n", command);
1295     ret = gmx_system_call(command);
1296
1297     if (0 != ret)
1298     {
1299         /* To prevent confusion, do not again issue a gmx_fatal here since we already
1300          * get the error message from mdrun itself */
1301         sprintf(msg,    "Cannot run the benchmark simulations! Please check the error message of\n"
1302                 "mdrun for the source of the problem. Did you provide a command line\n"
1303                 "argument that neither g_tune_pme nor mdrun understands? Offending command:\n"
1304                 "\n%s\n\n", command);
1305
1306         fprintf(stderr, "%s", msg);
1307         sep_line(fp);
1308         fprintf(fp, "%s", msg);
1309
1310         exit(ret);
1311     }
1312     fprintf(stdout, "Benchmarks can be executed!\n");
1313
1314     /* Clean up the benchmark output files we just created */
1315     fprintf(stdout, "Cleaning up ...\n");
1316     remove_if_exists(opt2fn("-bc", nfile, fnm));
1317     remove_if_exists(opt2fn("-be", nfile, fnm));
1318     remove_if_exists(opt2fn("-bcpo", nfile, fnm));
1319     remove_if_exists(opt2fn("-bg", nfile, fnm));
1320
1321     sfree(command);
1322     sfree(msg    );
1323 }
1324
1325
1326 static void do_the_tests(
1327         FILE           *fp,             /* General g_tune_pme output file         */
1328         char          **tpr_names,      /* Filenames of the input files to test   */
1329         int             maxPMEnodes,    /* Max fraction of nodes to use for PME   */
1330         int             minPMEnodes,    /* Min fraction of nodes to use for PME   */
1331         int             npme_fixed,     /* If >= -1, test fixed number of PME
1332                                          * nodes only                             */
1333         const char     *npmevalues_opt, /* Which -npme values should be tested    */
1334         t_perf        **perfdata,       /* Here the performace data is stored     */
1335         int            *pmeentries,     /* Entries in the nPMEnodes list          */
1336         int             repeats,        /* Repeat each test this often            */
1337         int             nnodes,         /* Total number of nodes = nPP + nPME     */
1338         int             nr_tprs,        /* Total number of tpr files to test      */
1339         gmx_bool        bThreads,       /* Threads or MPI?                        */
1340         char           *cmd_mpirun,     /* mpirun command string                  */
1341         char           *cmd_np,         /* "-np", "-n", whatever mpirun needs     */
1342         char           *cmd_mdrun,      /* mdrun command string                   */
1343         char           *cmd_args_bench, /* arguments for mdrun in a string        */
1344         const t_filenm *fnm,            /* List of filenames from command line    */
1345         int             nfile,          /* Number of files specified on the cmdl. */
1346         int             presteps,       /* DLB equilibration steps, is checked    */
1347         gmx_int64_t     cpt_steps)      /* Time step counter in the checkpoint    */
1348 {
1349     int      i, nr, k, ret, count = 0, totaltests;
1350     int     *nPMEnodes = NULL;
1351     t_perf  *pd        = NULL;
1352     int      cmdline_length;
1353     char    *command, *cmd_stub;
1354     char     buf[STRLEN];
1355     gmx_bool bResetProblem = FALSE;
1356     gmx_bool bFirst        = TRUE;
1357
1358
1359     /* This string array corresponds to the eParselog enum type at the start
1360      * of this file */
1361     const char* ParseLog[] = {
1362         "OK.",
1363         "Logfile not found!",
1364         "No timings, logfile truncated?",
1365         "Run was terminated.",
1366         "Counters were not reset properly.",
1367         "No DD grid found for these settings.",
1368         "TPX version conflict!",
1369         "mdrun was not started in parallel!",
1370         "Number of PP ranks has a prime factor that is too large.",
1371         "An error occured."
1372     };
1373     char        str_PME_f_load[13];
1374
1375
1376     /* Allocate space for the mdrun command line. 100 extra characters should
1377        be more than enough for the -npme etcetera arguments */
1378     cmdline_length =  strlen(cmd_mpirun)
1379         + strlen(cmd_np)
1380         + strlen(cmd_mdrun)
1381         + strlen(cmd_args_bench)
1382         + strlen(tpr_names[0]) + 100;
1383     snew(command, cmdline_length);
1384     snew(cmd_stub, cmdline_length);
1385
1386     /* Construct the part of the command line that stays the same for all tests: */
1387     if (bThreads)
1388     {
1389         sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np);
1390     }
1391     else
1392     {
1393         sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun);
1394     }
1395
1396     /* Create a list of numbers of PME nodes to test */
1397     if (npme_fixed < -1)
1398     {
1399         make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes,
1400                        nnodes, minPMEnodes, maxPMEnodes);
1401     }
1402     else
1403     {
1404         *pmeentries  = 1;
1405         snew(nPMEnodes, 1);
1406         nPMEnodes[0] = npme_fixed;
1407         fprintf(stderr, "Will use a fixed number of %d PME-only ranks.\n", nPMEnodes[0]);
1408     }
1409
1410     if (0 == repeats)
1411     {
1412         fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n");
1413         gmx_ffclose(fp);
1414         finalize(opt2fn("-p", nfile, fnm));
1415         exit(0);
1416     }
1417
1418     /* Allocate one dataset for each tpr input file: */
1419     init_perfdata(perfdata, nr_tprs, *pmeentries, repeats);
1420
1421     /*****************************************/
1422     /* Main loop over all tpr files to test: */
1423     /*****************************************/
1424     totaltests = nr_tprs*(*pmeentries)*repeats;
1425     for (k = 0; k < nr_tprs; k++)
1426     {
1427         fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
1428         fprintf(fp, "PME ranks      Gcycles       ns/day        PME/f    Remark\n");
1429         /* Loop over various numbers of PME nodes: */
1430         for (i = 0; i < *pmeentries; i++)
1431         {
1432             pd = &perfdata[k][i];
1433
1434             /* Loop over the repeats for each scenario: */
1435             for (nr = 0; nr < repeats; nr++)
1436             {
1437                 pd->nPMEnodes = nPMEnodes[i];
1438
1439                 /* Add -npme and -s to the command line and save it. Note that
1440                  * the -passall (if set) options requires cmd_args_bench to be
1441                  * at the end of the command line string */
1442                 snew(pd->mdrun_cmd_line, cmdline_length);
1443                 sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s",
1444                         cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
1445
1446                 /* To prevent that all benchmarks fail due to a show-stopper argument
1447                  * on the mdrun command line, we make a quick check first */
1448                 if (bFirst)
1449                 {
1450                     make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp, fnm, nfile);
1451                 }
1452                 bFirst = FALSE;
1453
1454                 /* Do a benchmark simulation: */
1455                 if (repeats > 1)
1456                 {
1457                     sprintf(buf, ", pass %d/%d", nr+1, repeats);
1458                 }
1459                 else
1460                 {
1461                     buf[0] = '\0';
1462                 }
1463                 fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n",
1464                         (100.0*count)/totaltests,
1465                         k+1, nr_tprs, i+1, *pmeentries, buf);
1466                 make_backup(opt2fn("-err", nfile, fnm));
1467                 sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err", nfile, fnm));
1468                 fprintf(stdout, "%s\n", pd->mdrun_cmd_line);
1469                 gmx_system_call(command);
1470
1471                 /* Collect the performance data from the log file; also check stderr
1472                  * for fatal errors */
1473                 ret = parse_logfile(opt2fn("-bg", nfile, fnm), opt2fn("-err", nfile, fnm),
1474                                     pd, nr, presteps, cpt_steps, nnodes);
1475                 if ((presteps > 0) && (ret == eParselogResetProblem))
1476                 {
1477                     bResetProblem = TRUE;
1478                 }
1479
1480                 if (-1 == pd->nPMEnodes)
1481                 {
1482                     sprintf(buf, "(%3d)", pd->guessPME);
1483                 }
1484                 else
1485                 {
1486                     sprintf(buf, "     ");
1487                 }
1488
1489                 /* Nicer output */
1490                 if (pd->PME_f_load[nr] > 0.0)
1491                 {
1492                     sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]);
1493                 }
1494                 else
1495                 {
1496                     sprintf(str_PME_f_load, "%s", "         -  ");
1497                 }
1498
1499                 /* Write the data we got to disk */
1500                 fprintf(fp, "%4d%s %12.3f %12.3f %s    %s", pd->nPMEnodes,
1501                         buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]);
1502                 if (!(ret == eParselogOK || ret == eParselogNoDDGrid || ret == eParselogNotFound) )
1503                 {
1504                     fprintf(fp, " Check %s file for problems.", ret == eParselogFatal ? "err" : "log");
1505                 }
1506                 fprintf(fp, "\n");
1507                 fflush(fp);
1508                 count++;
1509
1510                 /* Do some cleaning up and delete the files we do not need any more */
1511                 cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret == eParselogFatal);
1512
1513                 /* If the first run with this number of processors already failed, do not try again: */
1514                 if (pd->Gcycles[0] <= 0.0 && repeats > 1)
1515                 {
1516                     fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n");
1517                     count += repeats-(nr+1);
1518                     break;
1519                 }
1520             } /* end of repeats loop */
1521         }     /* end of -npme loop */
1522     }         /* end of tpr file loop */
1523
1524     if (bResetProblem)
1525     {
1526         sep_line(fp);
1527         fprintf(fp, "WARNING: The cycle and time step counters could not be reset properly. ");
1528         sep_line(fp);
1529     }
1530     sfree(command);
1531     sfree(cmd_stub);
1532 }
1533
1534
1535 static void check_input(
1536         int             nnodes,
1537         int             repeats,
1538         int            *ntprs,
1539         real           *rmin,
1540         real            rcoulomb,
1541         real           *rmax,
1542         real            maxPMEfraction,
1543         real            minPMEfraction,
1544         int             npme_fixed,
1545         gmx_int64_t     bench_nsteps,
1546         const t_filenm *fnm,
1547         int             nfile,
1548         int             sim_part,
1549         int             presteps,
1550         int             npargs,
1551         t_pargs        *pa)
1552 {
1553     int old;
1554
1555
1556     /* Make sure the input file exists */
1557     if (!gmx_fexist(opt2fn("-s", nfile, fnm)))
1558     {
1559         gmx_fatal(FARGS, "File %s not found.", opt2fn("-s", nfile, fnm));
1560     }
1561
1562     /* Make sure that the checkpoint file is not overwritten during benchmarking */
1563     if ( (0 == strcmp(opt2fn("-cpi", nfile, fnm), opt2fn("-bcpo", nfile, fnm)) ) && (sim_part > 1) )
1564     {
1565         gmx_fatal(FARGS, "Checkpoint input (-cpi) and benchmark checkpoint output (-bcpo) files must not be identical.\n"
1566                   "The checkpoint input file must not be overwritten during the benchmarks.\n");
1567     }
1568
1569     /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */
1570     if (repeats < 0)
1571     {
1572         gmx_fatal(FARGS, "Number of repeats < 0!");
1573     }
1574
1575     /* Check number of nodes */
1576     if (nnodes < 1)
1577     {
1578         gmx_fatal(FARGS, "Number of ranks/threads must be a positive integer.");
1579     }
1580
1581     /* Automatically choose -ntpr if not set */
1582     if (*ntprs < 1)
1583     {
1584         if (nnodes < 16)
1585         {
1586             *ntprs = 1;
1587         }
1588         else
1589         {
1590             *ntprs = 3;
1591             /* Set a reasonable scaling factor for rcoulomb */
1592             if (*rmax <= 0)
1593             {
1594                 *rmax = rcoulomb * 1.2;
1595             }
1596         }
1597         fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs == 1 ? "" : "s");
1598     }
1599     else
1600     {
1601         if (1 == *ntprs)
1602         {
1603             fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n");
1604         }
1605     }
1606
1607     /* Make shure that rmin <= rcoulomb <= rmax */
1608     if (*rmin <= 0)
1609     {
1610         *rmin = rcoulomb;
1611     }
1612     if (*rmax <= 0)
1613     {
1614         *rmax = rcoulomb;
1615     }
1616     if (!(*rmin <= *rmax) )
1617     {
1618         gmx_fatal(FARGS, "Please choose the Coulomb radii such that rmin <= rmax.\n"
1619                   "rmin = %g, rmax = %g, actual rcoul from .tpr file = %g\n", *rmin, *rmax, rcoulomb);
1620     }
1621     /* Add test scenarios if rmin or rmax were set */
1622     if (*ntprs <= 2)
1623     {
1624         if (!gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) && (*ntprs == 1) )
1625         {
1626             (*ntprs)++;
1627             fprintf(stderr, "NOTE: Setting -rmin to %g changed -ntpr to %d\n",
1628                     *rmin, *ntprs);
1629         }
1630         if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) && (*ntprs == 1) )
1631         {
1632             (*ntprs)++;
1633             fprintf(stderr, "NOTE: Setting -rmax to %g changed -ntpr to %d\n",
1634                     *rmax, *ntprs);
1635         }
1636     }
1637     old = *ntprs;
1638     /* If one of rmin, rmax is set, we need 2 tpr files at minimum */
1639     if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) || !gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) )
1640     {
1641         *ntprs = max(*ntprs, 2);
1642     }
1643
1644     /* If both rmin, rmax are set, we need 3 tpr files at minimum */
1645     if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) && !gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) )
1646     {
1647         *ntprs = max(*ntprs, 3);
1648     }
1649
1650     if (old != *ntprs)
1651     {
1652         fprintf(stderr, "NOTE: Your rmin, rmax setting changed -ntpr to %d\n", *ntprs);
1653     }
1654
1655     if (*ntprs > 1)
1656     {
1657         if (gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) && gmx_within_tol(rcoulomb, *rmax, GMX_REAL_EPS)) /* We have just a single rc */
1658         {
1659             fprintf(stderr, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n"
1660                     "Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n"
1661                     "with correspondingly adjusted PME grid settings\n");
1662             *ntprs = 1;
1663         }
1664     }
1665
1666     /* Check whether max and min fraction are within required values */
1667     if (maxPMEfraction > 0.5 || maxPMEfraction < 0)
1668     {
1669         gmx_fatal(FARGS, "-max must be between 0 and 0.5");
1670     }
1671     if (minPMEfraction > 0.5 || minPMEfraction < 0)
1672     {
1673         gmx_fatal(FARGS, "-min must be between 0 and 0.5");
1674     }
1675     if (maxPMEfraction < minPMEfraction)
1676     {
1677         gmx_fatal(FARGS, "-max must be larger or equal to -min");
1678     }
1679
1680     /* Check whether the number of steps - if it was set - has a reasonable value */
1681     if (bench_nsteps < 0)
1682     {
1683         gmx_fatal(FARGS, "Number of steps must be positive.");
1684     }
1685
1686     if (bench_nsteps > 10000 || bench_nsteps < 100)
1687     {
1688         fprintf(stderr, "WARNING: steps=");
1689         fprintf(stderr, "%"GMX_PRId64, bench_nsteps);
1690         fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100) ? "few" : "many");
1691     }
1692
1693     if (presteps < 0)
1694     {
1695         gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n");
1696     }
1697
1698     /* Check for rcoulomb scaling if more than one .tpr file is tested */
1699     if (*ntprs > 1)
1700     {
1701         if (*rmin/rcoulomb < 0.75 || *rmax/rcoulomb > 1.25)
1702         {
1703             fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n");
1704         }
1705     }
1706
1707     /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning
1708      * only. We need to check whether the requested number of PME-only nodes
1709      * makes sense. */
1710     if (npme_fixed > -1)
1711     {
1712         /* No more than 50% of all nodes can be assigned as PME-only nodes. */
1713         if (2*npme_fixed > nnodes)
1714         {
1715             gmx_fatal(FARGS, "Cannot have more than %d PME-only ranks for a total of %d ranks (you chose %d).\n",
1716                       nnodes/2, nnodes, npme_fixed);
1717         }
1718         if ((npme_fixed > 0) && (5*npme_fixed < nnodes))
1719         {
1720             fprintf(stderr, "WARNING: Only %g percent of the ranks are assigned as PME-only ranks.\n",
1721                     100.0*((real)npme_fixed / (real)nnodes));
1722         }
1723         if (opt2parg_bSet("-min", npargs, pa) || opt2parg_bSet("-max", npargs, pa))
1724         {
1725             fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n"
1726                     "      fixed number of PME-only ranks is requested with -fix.\n");
1727         }
1728     }
1729 }
1730
1731
1732 /* Returns TRUE when "opt" is needed at launch time */
1733 static gmx_bool is_launch_file(char *opt, gmx_bool bSet)
1734 {
1735     if (0 == strncmp(opt, "-swap", 5))
1736     {
1737         return bSet;
1738     }
1739
1740     /* Apart from the input .tpr and the output log files we need all options that
1741      * were set on the command line and that do not start with -b */
1742     if    (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2)
1743            || 0 == strncmp(opt, "-err", 4) || 0 == strncmp(opt, "-p", 2) )
1744     {
1745         return FALSE;
1746     }
1747
1748     return bSet;
1749 }
1750
1751
1752 /* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */
1753 static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput)
1754 {
1755     /* Apart from the input .tpr, all files starting with "-b" are for
1756      * _b_enchmark files exclusively */
1757     if (0 == strncmp(opt, "-s", 2))
1758     {
1759         return FALSE;
1760     }
1761
1762     if (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2))
1763     {
1764         if (!bOptional || bSet)
1765         {
1766             return TRUE;
1767         }
1768         else
1769         {
1770             return FALSE;
1771         }
1772     }
1773     else
1774     {
1775         if (bIsOutput)
1776         {
1777             return FALSE;
1778         }
1779         else
1780         {
1781             if (bSet) /* These are additional input files like -cpi -ei */
1782             {
1783                 return TRUE;
1784             }
1785             else
1786             {
1787                 return FALSE;
1788             }
1789         }
1790     }
1791 }
1792
1793
1794 /* Adds 'buf' to 'str' */
1795 static void add_to_string(char **str, char *buf)
1796 {
1797     int len;
1798
1799
1800     len = strlen(*str) + strlen(buf) + 1;
1801     srenew(*str, len);
1802     strcat(*str, buf);
1803 }
1804
1805
1806 /* Create the command line for the benchmark as well as for the real run */
1807 static void create_command_line_snippets(
1808         gmx_bool  bAppendFiles,
1809         gmx_bool  bKeepAndNumCPT,
1810         gmx_bool  bResetHWay,
1811         int       presteps,
1812         int       nfile,
1813         t_filenm  fnm[],
1814         char     *cmd_args_bench[],  /* command line arguments for benchmark runs */
1815         char     *cmd_args_launch[], /* command line arguments for simulation run */
1816         char      extra_args[])      /* Add this to the end of the command line */
1817 {
1818     int         i;
1819     char       *opt;
1820     const char *name;
1821     char        strbuf[STRLEN];
1822
1823
1824     /* strlen needs at least '\0' as a string: */
1825     snew(*cmd_args_bench, 1);
1826     snew(*cmd_args_launch, 1);
1827     *cmd_args_launch[0] = '\0';
1828     *cmd_args_bench[0]  = '\0';
1829
1830
1831     /*******************************************/
1832     /* 1. Process other command line arguments */
1833     /*******************************************/
1834     if (presteps > 0)
1835     {
1836         /* Add equilibration steps to benchmark options */
1837         sprintf(strbuf, "-resetstep %d ", presteps);
1838         add_to_string(cmd_args_bench, strbuf);
1839     }
1840     /* These switches take effect only at launch time */
1841     if (FALSE == bAppendFiles)
1842     {
1843         add_to_string(cmd_args_launch, "-noappend ");
1844     }
1845     if (bKeepAndNumCPT)
1846     {
1847         add_to_string(cmd_args_launch, "-cpnum ");
1848     }
1849     if (bResetHWay)
1850     {
1851         add_to_string(cmd_args_launch, "-resethway ");
1852     }
1853
1854     /********************/
1855     /* 2. Process files */
1856     /********************/
1857     for (i = 0; i < nfile; i++)
1858     {
1859         opt  = (char *)fnm[i].opt;
1860         name = opt2fn(opt, nfile, fnm);
1861
1862         /* Strbuf contains the options, now let's sort out where we need that */
1863         sprintf(strbuf, "%s %s ", opt, name);
1864
1865         if (is_bench_file(opt, opt2bSet(opt, nfile, fnm), is_optional(&fnm[i]), is_output(&fnm[i])) )
1866         {
1867             /* All options starting with -b* need the 'b' removed,
1868              * therefore overwrite strbuf */
1869             if (0 == strncmp(opt, "-b", 2))
1870             {
1871                 sprintf(strbuf, "-%s %s ", &opt[2], name);
1872             }
1873
1874             add_to_string(cmd_args_bench, strbuf);
1875         }
1876
1877         if (is_launch_file(opt, opt2bSet(opt, nfile, fnm)) )
1878         {
1879             add_to_string(cmd_args_launch, strbuf);
1880         }
1881     }
1882
1883     add_to_string(cmd_args_bench, extra_args);
1884     add_to_string(cmd_args_launch, extra_args);
1885 }
1886
1887
1888 /* Set option opt */
1889 static void setopt(const char *opt, int nfile, t_filenm fnm[])
1890 {
1891     int i;
1892
1893     for (i = 0; (i < nfile); i++)
1894     {
1895         if (strcmp(opt, fnm[i].opt) == 0)
1896         {
1897             fnm[i].flag |= ffSET;
1898         }
1899     }
1900 }
1901
1902
1903 /* This routine inspects the tpr file and ...
1904  * 1. checks for output files that get triggered by a tpr option. These output
1905  *    files are marked as 'set' to allow for a proper cleanup after each
1906  *    tuning run.
1907  * 2. returns the PME:PP load ratio
1908  * 3. returns rcoulomb from the tpr */
1909 static float inspect_tpr(int nfile, t_filenm fnm[], real *rcoulomb)
1910 {
1911     gmx_bool     bPull;     /* Is pulling requested in .tpr file?             */
1912     gmx_bool     bTpi;      /* Is test particle insertion requested?          */
1913     gmx_bool     bFree;     /* Is a free energy simulation requested?         */
1914     gmx_bool     bNM;       /* Is a normal mode analysis requested?           */
1915     gmx_bool     bSwap;     /* Is water/ion position swapping requested?      */
1916     t_inputrec   ir;
1917     t_state      state;
1918     gmx_mtop_t   mtop;
1919
1920
1921     /* Check tpr file for options that trigger extra output files */
1922     read_tpx_state(opt2fn("-s", nfile, fnm), &ir, &state, NULL, &mtop);
1923     bPull = (epullNO != ir.ePull);
1924     bFree = (efepNO  != ir.efep );
1925     bNM   = (eiNM    == ir.eI   );
1926     bSwap = (eswapNO != ir.eSwapCoords);
1927     bTpi  = EI_TPI(ir.eI);
1928
1929     /* Set these output files on the tuning command-line */
1930     if (bPull)
1931     {
1932         setopt("-pf", nfile, fnm);
1933         setopt("-px", nfile, fnm);
1934     }
1935     if (bFree)
1936     {
1937         setopt("-dhdl", nfile, fnm);
1938     }
1939     if (bTpi)
1940     {
1941         setopt("-tpi", nfile, fnm);
1942         setopt("-tpid", nfile, fnm);
1943     }
1944     if (bNM)
1945     {
1946         setopt("-mtx", nfile, fnm);
1947     }
1948     if (bSwap)
1949     {
1950         setopt("-swap", nfile, fnm);
1951     }
1952
1953     *rcoulomb = ir.rcoulomb;
1954
1955     /* Return the estimate for the number of PME nodes */
1956     return pme_load_estimate(&mtop, &ir, state.box);
1957 }
1958
1959
1960 static void couple_files_options(int nfile, t_filenm fnm[])
1961 {
1962     int      i;
1963     gmx_bool bSet, bBench;
1964     char    *opt;
1965     char     buf[20];
1966
1967
1968     for (i = 0; i < nfile; i++)
1969     {
1970         opt    = (char *)fnm[i].opt;
1971         bSet   = ((fnm[i].flag & ffSET) != 0);
1972         bBench = (0 == strncmp(opt, "-b", 2));
1973
1974         /* Check optional files */
1975         /* If e.g. -eo is set, then -beo also needs to be set */
1976         if (is_optional(&fnm[i]) && bSet && !bBench)
1977         {
1978             sprintf(buf, "-b%s", &opt[1]);
1979             setopt(buf, nfile, fnm);
1980         }
1981         /* If -beo is set, then -eo also needs to be! */
1982         if (is_optional(&fnm[i]) && bSet && bBench)
1983         {
1984             sprintf(buf, "-%s", &opt[2]);
1985             setopt(buf, nfile, fnm);
1986         }
1987     }
1988 }
1989
1990
1991 #define BENCHSTEPS (1000)
1992
1993 int gmx_tune_pme(int argc, char *argv[])
1994 {
1995     const char     *desc[] = {
1996         "For a given number [TT]-np[tt] or [TT]-ntmpi[tt] of ranks, [THISMODULE] systematically",
1997         "times [gmx-mdrun] with various numbers of PME-only ranks and determines",
1998         "which setting is fastest. It will also test whether performance can",
1999         "be enhanced by shifting load from the reciprocal to the real space",
2000         "part of the Ewald sum. ",
2001         "Simply pass your [TT].tpr[tt] file to [THISMODULE] together with other options",
2002         "for [gmx-mdrun] as needed.[PAR]",
2003         "Which executables are used can be set in the environment variables",
2004         "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'",
2005         "will be used as defaults. Note that for certain MPI frameworks you",
2006         "need to provide a machine- or hostfile. This can also be passed",
2007         "via the MPIRUN variable, e.g.[PAR]",
2008         "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
2009         "Please call [THISMODULE] with the normal options you would pass to",
2010         "[gmx-mdrun] and add [TT]-np[tt] for the number of ranks to perform the",
2011         "tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]",
2012         "to repeat each test several times to get better statistics. [PAR]",
2013         "[THISMODULE] can test various real space / reciprocal space workloads",
2014         "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be",
2015         "written with enlarged cutoffs and smaller Fourier grids respectively.",
2016         "Typically, the first test (number 0) will be with the settings from the input",
2017         "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff",
2018         "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ",
2019         "In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ",
2020         "The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier "
2021         "spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
2022         "if you just seek the optimal number of PME-only ranks; in that case",
2023         "your input [TT].tpr[tt] file will remain unchanged.[PAR]",
2024         "For the benchmark runs, the default of 1000 time steps should suffice for most",
2025         "MD systems. The dynamic load balancing needs about 100 time steps",
2026         "to adapt to local load imbalances, therefore the time step counters",
2027         "are by default reset after 100 steps. For large systems (>1M atoms), as well as ",
2028         "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
2029         "From the 'DD' load imbalance entries in the md.log output file you",
2030         "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
2031         "[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
2032         "After calling [gmx-mdrun] several times, detailed performance information",
2033         "is available in the output file [TT]perf.out[tt].",
2034         "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written",
2035         "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]",
2036         "If you want the simulation to be started automatically with the",
2037         "optimized parameters, use the command line option [TT]-launch[tt].[PAR]",
2038     };
2039
2040     int             nnodes         = 1;
2041     int             repeats        = 2;
2042     int             pmeentries     = 0; /* How many values for -npme do we actually test for each tpr file */
2043     real            maxPMEfraction = 0.50;
2044     real            minPMEfraction = 0.25;
2045     int             maxPMEnodes, minPMEnodes;
2046     float           guessPMEratio;                    /* guessed PME:PP ratio based on the tpr file */
2047     float           guessPMEnodes;
2048     int             npme_fixed     = -2;              /* If >= -1, use only this number
2049                                                        * of PME-only nodes                */
2050     int             ntprs          = 0;
2051     real            rmin           = 0.0, rmax = 0.0; /* min and max value for rcoulomb if scaling is requested */
2052     real            rcoulomb       = -1.0;            /* Coulomb radius as set in .tpr file */
2053     gmx_bool        bScaleRvdw     = TRUE;
2054     gmx_int64_t     bench_nsteps   = BENCHSTEPS;
2055     gmx_int64_t     new_sim_nsteps = -1;  /* -1 indicates: not set by the user */
2056     gmx_int64_t     cpt_steps      = 0;   /* Step counter in .cpt input file   */
2057     int             presteps       = 100; /* Do a full cycle reset after presteps steps */
2058     gmx_bool        bOverwrite     = FALSE, bKeepTPR;
2059     gmx_bool        bLaunch        = FALSE;
2060     char           *ExtraArgs      = NULL;
2061     char          **tpr_names      = NULL;
2062     const char     *simulation_tpr = NULL;
2063     int             best_npme, best_tpr;
2064     int             sim_part = 1; /* For benchmarks with checkpoint files */
2065     char            bbuf[STRLEN];
2066
2067     /* Default program names if nothing else is found */
2068     char         *cmd_mpirun = NULL, *cmd_mdrun = NULL;
2069     char         *cmd_args_bench, *cmd_args_launch;
2070     char         *cmd_np = NULL;
2071
2072     t_perf      **perfdata = NULL;
2073     t_inputinfo  *info;
2074     int           i;
2075     FILE         *fp;
2076     t_commrec    *cr;
2077
2078     /* Print out how long the tuning took */
2079     double          seconds;
2080
2081     static t_filenm fnm[] = {
2082         /* g_tune_pme */
2083         { efOUT, "-p",      "perf",     ffWRITE },
2084         { efLOG, "-err",    "bencherr", ffWRITE },
2085         { efTPX, "-so",     "tuned",    ffWRITE },
2086         /* mdrun: */
2087         { efTPX, NULL,      NULL,       ffREAD },
2088         { efTRN, "-o",      NULL,       ffWRITE },
2089         { efCOMPRESSED, "-x", NULL,     ffOPTWR },
2090         { efCPT, "-cpi",    NULL,       ffOPTRD },
2091         { efCPT, "-cpo",    NULL,       ffOPTWR },
2092         { efSTO, "-c",      "confout",  ffWRITE },
2093         { efEDR, "-e",      "ener",     ffWRITE },
2094         { efLOG, "-g",      "md",       ffWRITE },
2095         { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
2096         { efXVG, "-field",  "field",    ffOPTWR },
2097         { efXVG, "-table",  "table",    ffOPTRD },
2098         { efXVG, "-tabletf", "tabletf",   ffOPTRD },
2099         { efXVG, "-tablep", "tablep",   ffOPTRD },
2100         { efXVG, "-tableb", "table",    ffOPTRD },
2101         { efTRX, "-rerun",  "rerun",    ffOPTRD },
2102         { efXVG, "-tpi",    "tpi",      ffOPTWR },
2103         { efXVG, "-tpid",   "tpidist",  ffOPTWR },
2104         { efEDI, "-ei",     "sam",      ffOPTRD },
2105         { efXVG, "-eo",     "edsam",    ffOPTWR },
2106         { efXVG, "-devout", "deviatie", ffOPTWR },
2107         { efXVG, "-runav",  "runaver",  ffOPTWR },
2108         { efXVG, "-px",     "pullx",    ffOPTWR },
2109         { efXVG, "-pf",     "pullf",    ffOPTWR },
2110         { efXVG, "-ro",     "rotation", ffOPTWR },
2111         { efLOG, "-ra",     "rotangles", ffOPTWR },
2112         { efLOG, "-rs",     "rotslabs", ffOPTWR },
2113         { efLOG, "-rt",     "rottorque", ffOPTWR },
2114         { efMTX, "-mtx",    "nm",       ffOPTWR },
2115         { efNDX, "-dn",     "dipole",   ffOPTWR },
2116         { efXVG, "-swap",   "swapions", ffOPTWR },
2117         /* Output files that are deleted after each benchmark run */
2118         { efTRN, "-bo",     "bench",    ffWRITE },
2119         { efXTC, "-bx",     "bench",    ffWRITE },
2120         { efCPT, "-bcpo",   "bench",    ffWRITE },
2121         { efSTO, "-bc",     "bench",    ffWRITE },
2122         { efEDR, "-be",     "bench",    ffWRITE },
2123         { efLOG, "-bg",     "bench",    ffWRITE },
2124         { efXVG, "-beo",    "benchedo", ffOPTWR },
2125         { efXVG, "-bdhdl",  "benchdhdl", ffOPTWR },
2126         { efXVG, "-bfield", "benchfld", ffOPTWR },
2127         { efXVG, "-btpi",   "benchtpi", ffOPTWR },
2128         { efXVG, "-btpid",  "benchtpid", ffOPTWR },
2129         { efXVG, "-bdevout", "benchdev", ffOPTWR },
2130         { efXVG, "-brunav", "benchrnav", ffOPTWR },
2131         { efXVG, "-bpx",    "benchpx",  ffOPTWR },
2132         { efXVG, "-bpf",    "benchpf",  ffOPTWR },
2133         { efXVG, "-bro",    "benchrot", ffOPTWR },
2134         { efLOG, "-bra",    "benchrota", ffOPTWR },
2135         { efLOG, "-brs",    "benchrots", ffOPTWR },
2136         { efLOG, "-brt",    "benchrott", ffOPTWR },
2137         { efMTX, "-bmtx",   "benchn",   ffOPTWR },
2138         { efNDX, "-bdn",    "bench",    ffOPTWR },
2139         { efXVG, "-bswap",  "benchswp", ffOPTWR }
2140     };
2141
2142     gmx_bool        bThreads     = FALSE;
2143
2144     int             nthreads = 1;
2145
2146     const char     *procstring[] =
2147     { NULL, "-np", "-n", "none", NULL };
2148     const char     *npmevalues_opt[] =
2149     { NULL, "auto", "all", "subset", NULL };
2150
2151     gmx_bool     bAppendFiles          = TRUE;
2152     gmx_bool     bKeepAndNumCPT        = FALSE;
2153     gmx_bool     bResetCountersHalfWay = FALSE;
2154     gmx_bool     bBenchmark            = TRUE;
2155
2156     output_env_t oenv = NULL;
2157
2158     t_pargs      pa[] = {
2159         /***********************/
2160         /* g_tune_pme options: */
2161         /***********************/
2162         { "-np",       FALSE, etINT,  {&nnodes},
2163           "Number of ranks to run the tests on (must be > 2 for separate PME ranks)" },
2164         { "-npstring", FALSE, etENUM, {procstring},
2165           "Specify the number of ranks to [TT]$MPIRUN[tt] using this string"},
2166         { "-ntmpi",    FALSE, etINT,  {&nthreads},
2167           "Number of MPI-threads to run the tests on (turns MPI & mpirun off)"},
2168         { "-r",        FALSE, etINT,  {&repeats},
2169           "Repeat each test this often" },
2170         { "-max",      FALSE, etREAL, {&maxPMEfraction},
2171           "Max fraction of PME ranks to test with" },
2172         { "-min",      FALSE, etREAL, {&minPMEfraction},
2173           "Min fraction of PME ranks to test with" },
2174         { "-npme",     FALSE, etENUM, {npmevalues_opt},
2175           "Within -min and -max, benchmark all possible values for [TT]-npme[tt], or just a reasonable subset. "
2176           "Auto neglects -min and -max and chooses reasonable values around a guess for npme derived from the .tpr"},
2177         { "-fix",      FALSE, etINT,  {&npme_fixed},
2178           "If >= -1, do not vary the number of PME-only ranks, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
2179         { "-rmax",     FALSE, etREAL, {&rmax},
2180           "If >0, maximal rcoulomb for -ntpr>1 (rcoulomb upscaling results in fourier grid downscaling)" },
2181         { "-rmin",     FALSE, etREAL, {&rmin},
2182           "If >0, minimal rcoulomb for -ntpr>1" },
2183         { "-scalevdw",  FALSE, etBOOL, {&bScaleRvdw},
2184           "Scale rvdw along with rcoulomb"},
2185         { "-ntpr",     FALSE, etINT,  {&ntprs},
2186           "Number of [TT].tpr[tt] files to benchmark. Create this many files with different rcoulomb scaling factors depending on -rmin and -rmax. "
2187           "If < 1, automatically choose the number of [TT].tpr[tt] files to test" },
2188         { "-steps",    FALSE, etINT64, {&bench_nsteps},
2189           "Take timings for this many steps in the benchmark runs" },
2190         { "-resetstep", FALSE, etINT,  {&presteps},
2191           "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" },
2192         { "-simsteps", FALSE, etINT64, {&new_sim_nsteps},
2193           "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" },
2194         { "-launch",   FALSE, etBOOL, {&bLaunch},
2195           "Launch the real simulation after optimization" },
2196         { "-bench",    FALSE, etBOOL, {&bBenchmark},
2197           "Run the benchmarks or just create the input [TT].tpr[tt] files?" },
2198         /******************/
2199         /* mdrun options: */
2200         /******************/
2201         /* We let g_tune_pme parse and understand these options, because we need to
2202          * prevent that they appear on the mdrun command line for the benchmarks */
2203         { "-append",   FALSE, etBOOL, {&bAppendFiles},
2204           "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
2205         { "-cpnum",    FALSE, etBOOL, {&bKeepAndNumCPT},
2206           "Keep and number checkpoint files (launch only)" },
2207         { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
2208           "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
2209     };
2210
2211 #define NFILE asize(fnm)
2212
2213     seconds = gmx_gettime();
2214
2215     if (!parse_common_args(&argc, argv, PCA_NOEXIT_ON_ARGS,
2216                            NFILE, fnm, asize(pa), pa, asize(desc), desc,
2217                            0, NULL, &oenv))
2218     {
2219         return 0;
2220     }
2221
2222     /* Store the remaining unparsed command line entries in a string which
2223      * is then attached to the mdrun command line */
2224     snew(ExtraArgs, 1);
2225     ExtraArgs[0] = '\0';
2226     for (i = 1; i < argc; i++) /* argc will now be 1 if everything was understood */
2227     {
2228         add_to_string(&ExtraArgs, argv[i]);
2229         add_to_string(&ExtraArgs, " ");
2230     }
2231
2232     if (opt2parg_bSet("-ntmpi", asize(pa), pa))
2233     {
2234         bThreads = TRUE;
2235         if (opt2parg_bSet("-npstring", asize(pa), pa))
2236         {
2237             fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n");
2238         }
2239
2240         if (nnodes > 1)
2241         {
2242             gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!");
2243         }
2244         /* and now we just set this; a bit of an ugly hack*/
2245         nnodes = nthreads;
2246     }
2247     /* Check for PME:PP ratio and whether tpr triggers additional output files */
2248     guessPMEratio = inspect_tpr(NFILE, fnm, &rcoulomb);
2249
2250     /* Automatically set -beo options if -eo is set etc. */
2251     couple_files_options(NFILE, fnm);
2252
2253     /* Construct the command line arguments for benchmark runs
2254      * as well as for the simulation run */
2255     if (bThreads)
2256     {
2257         sprintf(bbuf, " -ntmpi %d ", nthreads);
2258     }
2259     else
2260     {
2261         /* This string will be used for MPI runs and will appear after the
2262          * mpirun command. */
2263         if (strcmp(procstring[0], "none") != 0)
2264         {
2265             sprintf(bbuf, " %s %d ", procstring[0], nnodes);
2266         }
2267         else
2268         {
2269             sprintf(bbuf, " ");
2270         }
2271     }
2272
2273     cmd_np = bbuf;
2274
2275     create_command_line_snippets(bAppendFiles, bKeepAndNumCPT, bResetCountersHalfWay, presteps,
2276                                  NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs);
2277
2278     /* Read in checkpoint file if requested */
2279     sim_part = 1;
2280     if (opt2bSet("-cpi", NFILE, fnm))
2281     {
2282         snew(cr, 1);
2283         cr->duty = DUTY_PP; /* makes the following routine happy */
2284         read_checkpoint_simulation_part(opt2fn("-cpi", NFILE, fnm),
2285                                         &sim_part, &cpt_steps, cr,
2286                                         FALSE, NFILE, fnm, NULL, NULL);
2287         sfree(cr);
2288         sim_part++;
2289         /* sim_part will now be 1 if no checkpoint file was found */
2290         if (sim_part <= 1)
2291         {
2292             gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi", NFILE, fnm));
2293         }
2294     }
2295
2296     /* Open performance output file and write header info */
2297     fp = gmx_ffopen(opt2fn("-p", NFILE, fnm), "w");
2298
2299     /* Make a quick consistency check of command line parameters */
2300     check_input(nnodes, repeats, &ntprs, &rmin, rcoulomb, &rmax,
2301                 maxPMEfraction, minPMEfraction, npme_fixed,
2302                 bench_nsteps, fnm, NFILE, sim_part, presteps,
2303                 asize(pa), pa);
2304
2305     /* Determine the maximum and minimum number of PME nodes to test,
2306      * the actual list of settings is build in do_the_tests(). */
2307     if ((nnodes > 2) && (npme_fixed < -1))
2308     {
2309         if (0 == strcmp(npmevalues_opt[0], "auto"))
2310         {
2311             /* Determine the npme range automatically based on the PME:PP load guess */
2312             if (guessPMEratio > 1.0)
2313             {
2314                 /* More PME than PP work, probably we do not need separate PME nodes at all! */
2315                 maxPMEnodes = nnodes/2;
2316                 minPMEnodes = nnodes/2;
2317             }
2318             else
2319             {
2320                 /* PME : PP load is in the range 0..1, let's test around the guess */
2321                 guessPMEnodes = nnodes/(1.0 + 1.0/guessPMEratio);
2322                 minPMEnodes   = floor(0.7*guessPMEnodes);
2323                 maxPMEnodes   =  ceil(1.6*guessPMEnodes);
2324                 maxPMEnodes   = min(maxPMEnodes, nnodes/2);
2325             }
2326         }
2327         else
2328         {
2329             /* Determine the npme range based on user input */
2330             maxPMEnodes = floor(maxPMEfraction*nnodes);
2331             minPMEnodes = max(floor(minPMEfraction*nnodes), 0);
2332             fprintf(stdout, "Will try runs with %d ", minPMEnodes);
2333             if (maxPMEnodes != minPMEnodes)
2334             {
2335                 fprintf(stdout, "- %d ", maxPMEnodes);
2336             }
2337             fprintf(stdout, "PME-only ranks.\n  Note that the automatic number of PME-only ranks and no separate PME ranks are always tested.\n");
2338         }
2339     }
2340     else
2341     {
2342         maxPMEnodes = 0;
2343         minPMEnodes = 0;
2344     }
2345
2346     /* Get the commands we need to set up the runs from environment variables */
2347     get_program_paths(bThreads, &cmd_mpirun, &cmd_mdrun);
2348     if (bBenchmark && repeats > 0)
2349     {
2350         check_mdrun_works(bThreads, cmd_mpirun, cmd_np, cmd_mdrun);
2351     }
2352
2353     /* Print some header info to file */
2354     sep_line(fp);
2355     fprintf(fp, "\n      P E R F O R M A N C E   R E S U L T S\n");
2356     sep_line(fp);
2357     fprintf(fp, "%s for Gromacs %s\n", output_env_get_program_display_name(oenv),
2358             gmx_version());
2359     if (!bThreads)
2360     {
2361         fprintf(fp, "Number of ranks         : %d\n", nnodes);
2362         fprintf(fp, "The mpirun command is   : %s\n", cmd_mpirun);
2363         if (strcmp(procstring[0], "none") != 0)
2364         {
2365             fprintf(fp, "Passing # of ranks via  : %s\n", procstring[0]);
2366         }
2367         else
2368         {
2369             fprintf(fp, "Not setting number of ranks in system call\n");
2370         }
2371     }
2372     else
2373     {
2374         fprintf(fp, "Number of threads       : %d\n", nnodes);
2375     }
2376
2377     fprintf(fp, "The mdrun  command is   : %s\n", cmd_mdrun);
2378     fprintf(fp, "mdrun args benchmarks   : %s\n", cmd_args_bench);
2379     fprintf(fp, "Benchmark steps         : ");
2380     fprintf(fp, "%"GMX_PRId64, bench_nsteps);
2381     fprintf(fp, "\n");
2382     fprintf(fp, "dlb equilibration steps : %d\n", presteps);
2383     if (sim_part > 1)
2384     {
2385         fprintf(fp, "Checkpoint time step    : ");
2386         fprintf(fp, "%"GMX_PRId64, cpt_steps);
2387         fprintf(fp, "\n");
2388     }
2389     fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch);
2390
2391     if (new_sim_nsteps >= 0)
2392     {
2393         bOverwrite = TRUE;
2394         fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so", NFILE, fnm));
2395         fprintf(stderr, "%"GMX_PRId64, new_sim_nsteps+cpt_steps);
2396         fprintf(stderr, " steps.\n");
2397         fprintf(fp, "Simulation steps        : ");
2398         fprintf(fp, "%"GMX_PRId64, new_sim_nsteps);
2399         fprintf(fp, "\n");
2400     }
2401     if (repeats > 1)
2402     {
2403         fprintf(fp, "Repeats for each test   : %d\n", repeats);
2404     }
2405
2406     if (npme_fixed >= -1)
2407     {
2408         fprintf(fp, "Fixing -npme at         : %d\n", npme_fixed);
2409     }
2410
2411     fprintf(fp, "Input file              : %s\n", opt2fn("-s", NFILE, fnm));
2412     fprintf(fp, "   PME/PP load estimate : %g\n", guessPMEratio);
2413
2414     /* Allocate memory for the inputinfo struct: */
2415     snew(info, 1);
2416     info->nr_inputfiles = ntprs;
2417     for (i = 0; i < ntprs; i++)
2418     {
2419         snew(info->rcoulomb, ntprs);
2420         snew(info->rvdw, ntprs);
2421         snew(info->rlist, ntprs);
2422         snew(info->rlistlong, ntprs);
2423         snew(info->nkx, ntprs);
2424         snew(info->nky, ntprs);
2425         snew(info->nkz, ntprs);
2426         snew(info->fsx, ntprs);
2427         snew(info->fsy, ntprs);
2428         snew(info->fsz, ntprs);
2429     }
2430     /* Make alternative tpr files to test: */
2431     snew(tpr_names, ntprs);
2432     for (i = 0; i < ntprs; i++)
2433     {
2434         snew(tpr_names[i], STRLEN);
2435     }
2436
2437     /* It can be that ntprs is reduced by make_benchmark_tprs if not enough
2438      * different grids could be found. */
2439     make_benchmark_tprs(opt2fn("-s", NFILE, fnm), tpr_names, bench_nsteps+presteps,
2440                         cpt_steps, rmin, rmax, bScaleRvdw, &ntprs, info, fp);
2441
2442     /********************************************************************************/
2443     /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats  */
2444     /********************************************************************************/
2445     snew(perfdata, ntprs);
2446     if (bBenchmark)
2447     {
2448         do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
2449                      repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2450                      cmd_args_bench, fnm, NFILE, presteps, cpt_steps);
2451
2452         fprintf(fp, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds)/60.0);
2453
2454         /* Analyse the results and give a suggestion for optimal settings: */
2455         bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries,
2456                                 repeats, info, &best_tpr, &best_npme);
2457
2458         /* Take the best-performing tpr file and enlarge nsteps to original value */
2459         if (bKeepTPR && !bOverwrite)
2460         {
2461             simulation_tpr = opt2fn("-s", NFILE, fnm);
2462         }
2463         else
2464         {
2465             simulation_tpr = opt2fn("-so", NFILE, fnm);
2466             modify_PMEsettings(bOverwrite ? (new_sim_nsteps+cpt_steps) : info->orig_sim_steps,
2467                                info->orig_init_step, tpr_names[best_tpr], simulation_tpr);
2468         }
2469
2470         /* Let's get rid of the temporary benchmark input files */
2471         for (i = 0; i < ntprs; i++)
2472         {
2473             fprintf(stdout, "Deleting temporary benchmark input file %s\n", tpr_names[i]);
2474             remove(tpr_names[i]);
2475         }
2476
2477         /* Now start the real simulation if the user requested it ... */
2478         launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
2479                           cmd_args_launch, simulation_tpr, best_npme);
2480     }
2481     gmx_ffclose(fp);
2482
2483     /* ... or simply print the performance results to screen: */
2484     if (!bLaunch)
2485     {
2486         finalize(opt2fn("-p", NFILE, fnm));
2487     }
2488
2489     return 0;
2490 }