src/mdlib/gmx_wallcycle.c

   1 /*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2008, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  34  */
  35
  36
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <string.h>
  42 #include "gmx_wallcycle.h"
  43 #include "gmx_cyclecounter.h"
  44 #include "smalloc.h"
  45 #include "gmx_fatal.h"
  46
  47 #ifdef GMX_LIB_MPI
  48 #include <mpi.h>
  49 #endif
  50 #ifdef GMX_THREAD_MPI
  51 #include "tmpi.h"
  52 #endif
  53
  54 typedef struct
  55 {
  56     int          n;
  57     gmx_cycles_t c;
  58     gmx_cycles_t start;
  59     gmx_cycles_t last;
  60 } wallcc_t;
  61
  62 typedef struct gmx_wallcycle
  63 {
  64     wallcc_t     *wcc;
  65     /* variables for testing/debugging */
  66     gmx_bool         wc_barrier;
  67     wallcc_t     *wcc_all;
  68     int          wc_depth;
  69     int          ewc_prev;
  70     gmx_cycles_t cycle_prev;
  71     gmx_large_int_t   reset_counters;
  72 #ifdef GMX_MPI
  73     MPI_Comm     mpi_comm_mygroup;
  74 #endif
  75     int          omp_nthreads;
  76 } gmx_wallcycle_t_t;
  77
  78 /* Each name should not exceed 19 characters */
  79 static const char *wcn[ewcNR] =
  80 { "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Comm. coord.", "Neighbor search", "Born radii", "Force", "Wait + Comm. F", "PME mesh", "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve", "Wait + Comm. X/F", "Wait + Recv. PME F", "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", "Enforced rotation", "Add rot. forces", "Test" };
  81
  82 gmx_bool wallcycle_have_counter(void)
  83 {
  84   return gmx_cycles_have_counter();
  85 }
  86
  87 gmx_wallcycle_t wallcycle_init(FILE *fplog,int resetstep,t_commrec *cr, int omp_nthreads)
  88 {
  89     gmx_wallcycle_t wc;
  90
  91
  92     if (!wallcycle_have_counter())
  93     {
  94         return NULL;
  95     }
  96
  97     snew(wc,1);
  98
  99     wc->wc_barrier = FALSE;
 100     wc->wcc_all    = NULL;
 101     wc->wc_depth   = 0;
 102     wc->ewc_prev   = -1;
 103     wc->reset_counters = resetstep;
 104     wc->omp_nthreads = omp_nthreads;
 105
 106 #ifdef GMX_MPI
 107     if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
 108     {
 109         if (fplog)
 110         {
 111             fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
 112         }
 113         wc->wc_barrier = TRUE;
 114         wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
 115     }
 116 #endif
 117
 118     snew(wc->wcc,ewcNR);
 119     if (getenv("GMX_CYCLE_ALL") != NULL)
 120     {
 121 /*#ifndef GMX_THREAD_MPI*/
 122         if (fplog)
 123         {
 124             fprintf(fplog,"\nWill time all the code during the run\n\n");
 125         }
 126         snew(wc->wcc_all,ewcNR*ewcNR);
 127 /*#else*/
 128         gmx_fatal(FARGS, "GMX_CYCLE_ALL is incompatible with threaded code");
 129 /*#endif*/
 130     }
 131
 132     return wc;
 133 }
 134
 135 static void wallcycle_all_start(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
 136 {
 137     wc->ewc_prev = ewc;
 138     wc->cycle_prev = cycle;
 139 }
 140
 141 static void wallcycle_all_stop(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
 142 {
 143     wc->wcc_all[wc->ewc_prev*ewcNR+ewc].n += 1;
 144     wc->wcc_all[wc->ewc_prev*ewcNR+ewc].c += cycle - wc->cycle_prev;
 145 }
 146
 147 void wallcycle_start(gmx_wallcycle_t wc, int ewc)
 148 {
 149     gmx_cycles_t cycle;
 150
 151     if (wc == NULL)
 152     {
 153         return;
 154     }
 155
 156 #ifdef GMX_MPI
 157     if (wc->wc_barrier)
 158     {
 159         MPI_Barrier(wc->mpi_comm_mygroup);
 160     }
 161 #endif
 162
 163     cycle = gmx_cycles_read();
 164     wc->wcc[ewc].start = cycle;
 165     if (wc->wcc_all != NULL)
 166     {
 167         wc->wc_depth++;
 168         if (ewc == ewcRUN)
 169         {
 170             wallcycle_all_start(wc,ewc,cycle);
 171         }
 172         else if (wc->wc_depth == 3)
 173         {
 174             wallcycle_all_stop(wc,ewc,cycle);
 175         }
 176     }
 177 }
 178
 179 double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
 180 {
 181     gmx_cycles_t cycle,last;
 182
 183     if (wc == NULL)
 184     {
 185         return 0;
 186     }
 187
 188 #ifdef GMX_MPI
 189     if (wc->wc_barrier)
 190     {
 191         MPI_Barrier(wc->mpi_comm_mygroup);
 192     }
 193 #endif
 194
 195     cycle = gmx_cycles_read();
 196     last = cycle - wc->wcc[ewc].start;
 197     wc->wcc[ewc].c += last;
 198     wc->wcc[ewc].n++;
 199     if (wc->wcc_all)
 200     {
 201         wc->wc_depth--;
 202         if (ewc == ewcRUN)
 203         {
 204             wallcycle_all_stop(wc,ewc,cycle);
 205         }
 206         else if (wc->wc_depth == 2)
 207         {
 208             wallcycle_all_start(wc,ewc,cycle);
 209         }
 210     }
 211
 212     return last;
 213 }
 214
 215 void wallcycle_reset_all(gmx_wallcycle_t wc)
 216 {
 217     int i;
 218
 219     if (wc == NULL)
 220     {
 221         return;
 222     }
 223
 224     for(i=0; i<ewcNR; i++)
 225     {
 226         wc->wcc[i].n = 0;
 227         wc->wcc[i].c = 0;
 228         wc->wcc[i].start = 0;
 229         wc->wcc[i].last = 0;
 230     }
 231 }
 232
 233 static gmx_bool pme_subdivision(int ewc)
 234 {
 235     return (ewc >= ewcPME_REDISTXF && ewc <= ewcPME_SOLVE);
 236 }
 237
 238 void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc,double cycles[])
 239 {
 240     wallcc_t *wcc;
 241     double cycles_n[ewcNR],buf[ewcNR],*cyc_all,*buf_all;
 242     int    i;
 243
 244     if (wc == NULL)
 245     {
 246         return;
 247     }
 248
 249     wcc = wc->wcc;
 250
 251     if (wc->omp_nthreads>1)
 252     {
 253         for(i=0; i<ewcNR; i++)
 254         {
 255             if (pme_subdivision(i) || i==ewcPMEMESH || (i==ewcRUN && cr->duty == DUTY_PME))
 256             {
 257                 wcc[i].c *= wc->omp_nthreads;
 258             }
 259         }
 260     }
 261
 262     if (wcc[ewcDDCOMMLOAD].n > 0)
 263     {
 264         wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMLOAD].c;
 265     }
 266     if (wcc[ewcDDCOMMBOUND].n > 0)
 267     {
 268         wcc[ewcDOMDEC].c -= wcc[ewcDDCOMMBOUND].c;
 269     }
 270     if (wcc[ewcPME_FFTCOMM].n > 0)
 271     {
 272         wcc[ewcPME_FFT].c -= wcc[ewcPME_FFTCOMM].c;
 273     }
 274
 275     if (cr->npmenodes == 0)
 276     {
 277         /* All nodes do PME (or no PME at all) */
 278         if (wcc[ewcPMEMESH].n > 0)
 279         {
 280             wcc[ewcFORCE].c -= wcc[ewcPMEMESH].c;
 281         }
 282     }
 283     else
 284     {
 285         /* The are PME-only nodes */
 286         if (wcc[ewcPMEMESH].n > 0)
 287         {
 288             /* This must be a PME only node, calculate the Wait + Comm. time */
 289             wcc[ewcPMEWAITCOMM].c = wcc[ewcRUN].c - wcc[ewcPMEMESH].c;
 290         }
 291     }
 292
 293     /* Store the cycles in a double buffer for summing */
 294     for(i=0; i<ewcNR; i++)
 295     {
 296         cycles_n[i] = (double)wcc[i].n;
 297         cycles[i]   = (double)wcc[i].c;
 298     }
 299
 300 #ifdef GMX_MPI
 301     if (cr->nnodes > 1)
 302     {
 303         MPI_Allreduce(cycles_n,buf,ewcNR,MPI_DOUBLE,MPI_MAX,
 304                       cr->mpi_comm_mysim);
 305         for(i=0; i<ewcNR; i++)
 306         {
 307             wcc[i].n = (int)(buf[i] + 0.5);
 308         }
 309         MPI_Allreduce(cycles,buf,ewcNR,MPI_DOUBLE,MPI_SUM,
 310                       cr->mpi_comm_mysim);
 311         for(i=0; i<ewcNR; i++)
 312         {
 313             cycles[i] = buf[i];
 314         }
 315
 316         if (wc->wcc_all != NULL)
 317         {
 318             snew(cyc_all,ewcNR*ewcNR);
 319             snew(buf_all,ewcNR*ewcNR);
 320             for(i=0; i<ewcNR*ewcNR; i++)
 321             {
 322                 cyc_all[i] = wc->wcc_all[i].c;
 323             }
 324             MPI_Allreduce(cyc_all,buf_all,ewcNR*ewcNR,MPI_DOUBLE,MPI_SUM,
 325                           cr->mpi_comm_mysim);
 326             for(i=0; i<ewcNR*ewcNR; i++)
 327             {
 328                 wc->wcc_all[i].c = buf_all[i];
 329             }
 330             sfree(buf_all);
 331             sfree(cyc_all);
 332         }
 333     }
 334 #endif
 335 }
 336
 337 static void print_cycles(FILE *fplog, double c2t, const char *name, int nnodes,
 338                          int n, double c, double tot)
 339 {
 340     char num[11];
 341
 342     if (c > 0)
 343     {
 344         if (n > 0)
 345         {
 346             sprintf(num,"%10d",n);
 347         }
 348         else
 349         {
 350             sprintf(num,"          ");
 351         }
 352         fprintf(fplog," %-19s %4d %10s %12.3f %10.1f   %5.1f\n",
 353                 name,nnodes,num,c*1e-9,c*c2t,100*c/tot);
 354     }
 355 }
 356
 357
 358 void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
 359                      gmx_wallcycle_t wc, double cycles[])
 360 {
 361     double c2t,tot,sum;
 362     int    i,j,npp;
 363     char   buf[STRLEN];
 364     const char *myline = "-----------------------------------------------------------------------";
 365
 366     if (wc == NULL)
 367     {
 368         return;
 369     }
 370
 371     if (npme > 0)
 372     {
 373         npp = nnodes - npme;
 374     }
 375     else
 376     {
 377         npp  = nnodes;
 378         npme = nnodes;
 379     }
 380     tot = cycles[ewcRUN];
 381     /* PME part has to be multiplied with number of threads */
 382     if (npme == 0)
 383     {
 384         tot += cycles[ewcPMEMESH]*(wc->omp_nthreads-1);
 385     }
 386     /* Conversion factor from cycles to seconds */
 387     if (tot > 0)
 388     {
 389       c2t = (npp+npme*wc->omp_nthreads)*realtime/tot;
 390     }
 391     else
 392     {
 393       c2t = 0;
 394     }
 395
 396     fprintf(fplog,"\n     R E A L   C Y C L E   A N D   T I M E   A C C O U N T I N G\n\n");
 397
 398     fprintf(fplog," Computing:         Nodes     Number     G-Cycles    Seconds     %c\n",'%');
 399     fprintf(fplog,"%s\n",myline);
 400     sum = 0;
 401     for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
 402     {
 403         if (!pme_subdivision(i))
 404         {
 405             print_cycles(fplog,c2t,wcn[i],
 406                          (i==ewcPMEMESH || i==ewcPMEWAITCOMM) ? npme : npp,
 407                          wc->wcc[i].n,cycles[i],tot);
 408             sum += cycles[i];
 409         }
 410     }
 411     if (wc->wcc_all != NULL)
 412     {
 413         for(i=0; i<ewcNR; i++)
 414         {
 415             for(j=0; j<ewcNR; j++)
 416             {
 417                 sprintf(buf,"%-9s",wcn[i]);
 418                 buf[9] = ' ';
 419                 sprintf(buf+10,"%-9s",wcn[j]);
 420                 buf[19] = '\0';
 421                 print_cycles(fplog,c2t,buf,
 422                              (i==ewcPMEMESH || i==ewcPMEWAITCOMM) ? npme : npp,
 423                              wc->wcc_all[i*ewcNR+j].n,
 424                              wc->wcc_all[i*ewcNR+j].c,
 425                              tot);
 426             }
 427         }
 428     }
 429     print_cycles(fplog,c2t,"Rest",npp,0,tot-sum,tot);
 430     fprintf(fplog,"%s\n",myline);
 431     print_cycles(fplog,c2t,"Total",nnodes,0,tot,tot);
 432     fprintf(fplog,"%s\n",myline);
 433
 434     if (wc->wcc[ewcPMEMESH].n > 0)
 435     {
 436         fprintf(fplog,"%s\n",myline);
 437         for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
 438         {
 439             if (pme_subdivision(i))
 440             {
 441                 print_cycles(fplog,c2t,wcn[i],
 442                              (i>=ewcPMEMESH && i<=ewcPME_SOLVE) ? npme : npp,
 443                              wc->wcc[i].n,cycles[i],tot);
 444             }
 445         }
 446         fprintf(fplog,"%s\n",myline);
 447     }
 448
 449     if (cycles[ewcMoveE] > tot*0.05)
 450     {
 451         sprintf(buf,
 452                 "NOTE: %d %% of the run time was spent communicating energies,\n"
 453                 "      you might want to use the -gcom option of mdrun\n",
 454                 (int)(100*cycles[ewcMoveE]/tot+0.5));
 455         if (fplog)
 456         {
 457             fprintf(fplog,"\n%s\n",buf);
 458         }
 459         /* Only the sim master calls this function, so always print to stderr */
 460         fprintf(stderr,"\n%s\n",buf);
 461     }
 462 }
 463
 464 extern gmx_large_int_t wcycle_get_reset_counters(gmx_wallcycle_t wc)
 465 {
 466     if (wc == NULL)
 467     {
 468         return -1;
 469     }
 470
 471     return wc->reset_counters;
 472 }
 473
 474 extern void wcycle_set_reset_counters(gmx_wallcycle_t wc, gmx_large_int_t reset_counters)
 475 {
 476     if (wc == NULL)
 477         return;
 478
 479     wc->reset_counters = reset_counters;
 480 }