/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
*
- *
+ *
* This source code is part of
- *
+ *
* G R O M A C S
- *
+ *
* GROningen MAchine for Chemical Simulations
- *
+ *
* Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2008, The GROMACS development team,
* check out http://www.gromacs.org for more information.
-
+
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
- *
+ *
* If you want to redistribute modifications, please consider that
* scientific software is very special. Version control is crucial -
* bugs must be traceable. We will be happy to consider code for
* inclusion in the official distribution, but derived work must not
* be called official GROMACS. Details are found in the README & COPYING
* files - if they are missing, get the official version at www.gromacs.org.
- *
+ *
* To help us fund GROMACS development, we humbly ask that you cite
* the papers on the package - you can find them in the top README file.
- *
+ *
* For more info, check our website at http://www.gromacs.org
- *
+ *
* And Hey:
* Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
*/
typedef struct gmx_wallcycle
{
- wallcc_t *wcc;
+ wallcc_t *wcc;
/* variables for testing/debugging */
gmx_bool wc_barrier;
- wallcc_t *wcc_all;
- int wc_depth;
+ wallcc_t *wcc_all;
+ int wc_depth;
#ifdef DEBUG_WCYCLE
#define DEPTH_MAX 6
- int counterlist[DEPTH_MAX];
- int count_depth;
+ int counterlist[DEPTH_MAX];
+ int count_depth;
#endif
- int ewc_prev;
- gmx_cycles_t cycle_prev;
+ int ewc_prev;
+ gmx_cycles_t cycle_prev;
gmx_large_int_t reset_counters;
#ifdef GMX_MPI
- MPI_Comm mpi_comm_mygroup;
+ MPI_Comm mpi_comm_mygroup;
#endif
- int nthreads_pp;
- int nthreads_pme;
+ int nthreads_pp;
+ int nthreads_pme;
#ifdef GMX_CYCLE_SUBCOUNTERS
- wallcc_t *wcsc;
+ wallcc_t *wcsc;
#endif
- double *cycles_sum;
+ double *cycles_sum;
} gmx_wallcycle_t_t;
/* Each name should not exceed 19 characters */
static const char *wcn[ewcNR] =
-{ "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load",
- "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
- "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
- "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve",
- "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
- "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies",
- "Enforced rotation", "Add rot. forces", "Test" };
+{
+ "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load",
+ "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
+ "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
+ "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve",
+ "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
+ "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies",
+ "Enforced rotation", "Add rot. forces", "Test"
+};
static const char *wcsn[ewcsNR] =
-{ "DD redist.", "DD NS grid + sort", "DD setup comm.",
- "DD make top.", "DD make constr.", "DD top. other",
- "NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
- "Bonded F", "Nonbonded F", "Ewald F correction",
- "NB X buffer ops.", "NB F buffer ops."
+{
+ "DD redist.", "DD NS grid + sort", "DD setup comm.",
+ "DD make top.", "DD make constr.", "DD top. other",
+ "NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
+ "Bonded F", "Nonbonded F", "Ewald F correction",
+ "NB X buffer ops.", "NB F buffer ops."
};
gmx_bool wallcycle_have_counter(void)
{
- return gmx_cycles_have_counter();
+ return gmx_cycles_have_counter();
}
-gmx_wallcycle_t wallcycle_init(FILE *fplog,int resetstep,t_commrec *cr,
+gmx_wallcycle_t wallcycle_init(FILE *fplog, int resetstep, t_commrec *cr,
int nthreads_pp, int nthreads_pme)
{
gmx_wallcycle_t wc;
-
-
+
+
if (!wallcycle_have_counter())
{
return NULL;
}
- snew(wc,1);
+ snew(wc, 1);
wc->wc_barrier = FALSE;
wc->wcc_all = NULL;
#ifdef GMX_MPI
if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
{
- if (fplog)
+ if (fplog)
{
- fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
+ fprintf(fplog, "\nWill call MPI_Barrier before each cycle start/stop call\n\n");
}
- wc->wc_barrier = TRUE;
+ wc->wc_barrier = TRUE;
wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
}
#endif
- snew(wc->wcc,ewcNR);
+ snew(wc->wcc, ewcNR);
if (getenv("GMX_CYCLE_ALL") != NULL)
{
- if (fplog)
+ if (fplog)
{
- fprintf(fplog,"\nWill time all the code during the run\n\n");
+ fprintf(fplog, "\nWill time all the code during the run\n\n");
}
- snew(wc->wcc_all,ewcNR*ewcNR);
+ snew(wc->wcc_all, ewcNR*ewcNR);
}
#ifdef GMX_CYCLE_SUBCOUNTERS
- snew(wc->wcsc,ewcsNR);
+ snew(wc->wcsc, ewcsNR);
#endif
#ifdef DEBUG_WCYCLE
{
return;
}
-
+
if (wc->wcc != NULL)
{
sfree(wc->wcc);
sfree(wc);
}
-static void wallcycle_all_start(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
+static void wallcycle_all_start(gmx_wallcycle_t wc, int ewc, gmx_cycles_t cycle)
{
- wc->ewc_prev = ewc;
+ wc->ewc_prev = ewc;
wc->cycle_prev = cycle;
}
-static void wallcycle_all_stop(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
+static void wallcycle_all_stop(gmx_wallcycle_t wc, int ewc, gmx_cycles_t cycle)
{
wc->wcc_all[wc->ewc_prev*ewcNR+ewc].n += 1;
wc->wcc_all[wc->ewc_prev*ewcNR+ewc].c += cycle - wc->cycle_prev;
if (wc->count_depth < 0 || wc->count_depth >= DEPTH_MAX)
{
- gmx_fatal(FARGS,"wallcycle counter depth out of range: %d",
+ gmx_fatal(FARGS, "wallcycle counter depth out of range: %d",
wc->count_depth);
}
wc->counterlist[wc->count_depth] = ewc;
if (wc->count_depth < 0)
{
- gmx_fatal(FARGS,"wallcycle counter depth out of range when stopping %s: %d",wcn[ewc],wc->count_depth);
+ gmx_fatal(FARGS, "wallcycle counter depth out of range when stopping %s: %d", wcn[ewc], wc->count_depth);
}
if (wc->counterlist[wc->count_depth] != ewc)
{
- gmx_fatal(FARGS,"wallcycle mismatch at stop, start %s, stop %s",
- wcn[wc->counterlist[wc->count_depth]],wcn[ewc]);
+ gmx_fatal(FARGS, "wallcycle mismatch at stop, start %s, stop %s",
+ wcn[wc->counterlist[wc->count_depth]], wcn[ewc]);
}
}
#endif
#endif
#ifdef DEBUG_WCYCLE
- debug_start_check(wc,ewc);
+ debug_start_check(wc, ewc);
#endif
- cycle = gmx_cycles_read();
+ cycle = gmx_cycles_read();
wc->wcc[ewc].start = cycle;
if (wc->wcc_all != NULL)
{
wc->wc_depth++;
if (ewc == ewcRUN)
{
- wallcycle_all_start(wc,ewc,cycle);
+ wallcycle_all_start(wc, ewc, cycle);
}
else if (wc->wc_depth == 3)
{
- wallcycle_all_stop(wc,ewc,cycle);
+ wallcycle_all_stop(wc, ewc, cycle);
}
}
}
double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
{
- gmx_cycles_t cycle,last;
-
+ gmx_cycles_t cycle, last;
+
if (wc == NULL)
{
return 0;
}
-
+
#ifdef GMX_MPI
if (wc->wc_barrier)
{
#endif
#ifdef DEBUG_WCYCLE
- debug_stop_check(wc,ewc);
+ debug_stop_check(wc, ewc);
#endif
-
- cycle = gmx_cycles_read();
- last = cycle - wc->wcc[ewc].start;
+
+ cycle = gmx_cycles_read();
+ last = cycle - wc->wcc[ewc].start;
wc->wcc[ewc].c += last;
wc->wcc[ewc].n++;
if (wc->wcc_all)
wc->wc_depth--;
if (ewc == ewcRUN)
{
- wallcycle_all_stop(wc,ewc,cycle);
+ wallcycle_all_stop(wc, ewc, cycle);
}
else if (wc->wc_depth == 2)
{
- wallcycle_all_start(wc,ewc,cycle);
+ wallcycle_all_start(wc, ewc, cycle);
}
}
return;
}
- for(i=0; i<ewcNR; i++)
+ for (i = 0; i < ewcNR; i++)
{
wc->wcc[i].n = 0;
wc->wcc[i].c = 0;
}
if (wc->wcc_all)
{
- for(i=0; i<ewcNR*ewcNR; i++)
+ for (i = 0; i < ewcNR*ewcNR; i++)
{
wc->wcc_all[i].n = 0;
wc->wcc_all[i].c = 0;
}
}
#ifdef GMX_CYCLE_SUBCOUNTERS
- for (i=0; i<ewcsNR; i++)
+ for (i = 0; i < ewcsNR; i++)
{
wc->wcsc[i].n = 0;
wc->wcsc[i].c = 0;
void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc)
{
wallcc_t *wcc;
- double *cycles;
- double cycles_n[ewcNR+ewcsNR],buf[ewcNR+ewcsNR],*cyc_all,*buf_all;
- int i,j;
- int nsum;
+ double *cycles;
+ double cycles_n[ewcNR+ewcsNR], buf[ewcNR+ewcsNR], *cyc_all, *buf_all;
+ int i, j;
+ int nsum;
if (wc == NULL)
{
return;
}
- snew(wc->cycles_sum,ewcNR+ewcsNR);
+ snew(wc->cycles_sum, ewcNR+ewcsNR);
cycles = wc->cycles_sum;
wcc = wc->wcc;
- for(i=0; i<ewcNR; i++)
+ for (i = 0; i < ewcNR; i++)
{
- if (is_pme_counter(i) || (i==ewcRUN && cr->duty == DUTY_PME))
+ if (is_pme_counter(i) || (i == ewcRUN && cr->duty == DUTY_PME))
{
wcc[i].c *= wc->nthreads_pme;
if (wc->wcc_all)
{
- for(j=0; j<ewcNR; j++)
+ for (j = 0; j < ewcNR; j++)
{
wc->wcc_all[i*ewcNR+j].c *= wc->nthreads_pme;
}
if (wc->wcc_all)
{
- for(j=0; j<ewcNR; j++)
+ for (j = 0; j < ewcNR; j++)
{
wc->wcc_all[i*ewcNR+j].c *= wc->nthreads_pp;
}
wcc[ewcPMEWAITCOMM].c = wcc[ewcRUN].c - wcc[ewcPMEMESH].c;
}
}
-
+
/* Store the cycles in a double buffer for summing */
- for(i=0; i<ewcNR; i++)
+ for (i = 0; i < ewcNR; i++)
{
cycles_n[i] = (double)wcc[i].n;
cycles[i] = (double)wcc[i].c;
}
nsum = ewcNR;
#ifdef GMX_CYCLE_SUBCOUNTERS
- for(i=0; i<ewcsNR; i++)
+ for (i = 0; i < ewcsNR; i++)
{
- wc->wcsc[i].c *= wc->nthreads_pp;
+ wc->wcsc[i].c *= wc->nthreads_pp;
cycles_n[ewcNR+i] = (double)wc->wcsc[i].n;
cycles[ewcNR+i] = (double)wc->wcsc[i].c;
}
nsum += ewcsNR;
-#endif
-
+#endif
+
#ifdef GMX_MPI
if (cr->nnodes > 1)
{
- MPI_Allreduce(cycles_n,buf,nsum,MPI_DOUBLE,MPI_MAX,
+ MPI_Allreduce(cycles_n, buf, nsum, MPI_DOUBLE, MPI_MAX,
cr->mpi_comm_mysim);
- for(i=0; i<ewcNR; i++)
+ for (i = 0; i < ewcNR; i++)
{
wcc[i].n = (int)(buf[i] + 0.5);
}
#ifdef GMX_CYCLE_SUBCOUNTERS
- for(i=0; i<ewcsNR; i++)
+ for (i = 0; i < ewcsNR; i++)
{
wc->wcsc[i].n = (int)(buf[ewcNR+i] + 0.5);
}
-#endif
+#endif
- MPI_Allreduce(cycles,buf,nsum,MPI_DOUBLE,MPI_SUM,
+ MPI_Allreduce(cycles, buf, nsum, MPI_DOUBLE, MPI_SUM,
cr->mpi_comm_mysim);
- for(i=0; i<nsum; i++)
+ for (i = 0; i < nsum; i++)
{
cycles[i] = buf[i];
}
if (wc->wcc_all != NULL)
{
- snew(cyc_all,ewcNR*ewcNR);
- snew(buf_all,ewcNR*ewcNR);
- for(i=0; i<ewcNR*ewcNR; i++)
+ snew(cyc_all, ewcNR*ewcNR);
+ snew(buf_all, ewcNR*ewcNR);
+ for (i = 0; i < ewcNR*ewcNR; i++)
{
cyc_all[i] = wc->wcc_all[i].c;
}
- MPI_Allreduce(cyc_all,buf_all,ewcNR*ewcNR,MPI_DOUBLE,MPI_SUM,
+ MPI_Allreduce(cyc_all, buf_all, ewcNR*ewcNR, MPI_DOUBLE, MPI_SUM,
cr->mpi_comm_mysim);
- for(i=0; i<ewcNR*ewcNR; i++)
+ for (i = 0; i < ewcNR*ewcNR; i++)
{
wc->wcc_all[i].c = buf_all[i];
}
#endif
}
-static void print_cycles(FILE *fplog, double c2t, const char *name,
- int nnodes_tot,int nnodes, int nthreads,
+static void print_cycles(FILE *fplog, double c2t, const char *name,
+ int nnodes_tot, int nnodes, int nthreads,
int n, double c, double tot)
{
- char num[11];
- char thstr[6];
+ char num[11];
+ char thstr[6];
double wallt;
-
+
if (c > 0)
{
if (n > 0)
{
- snprintf(num,sizeof(num),"%10d",n);
+ snprintf(num, sizeof(num), "%10d", n);
if (nthreads < 0)
+ {
snprintf(thstr, sizeof(thstr), "N/A");
+ }
else
+ {
snprintf(thstr, sizeof(thstr), "%4d", nthreads);
+ }
}
else
{
- sprintf(num," ");
+ sprintf(num, " ");
sprintf(thstr, " ");
}
wallt = c*c2t*nnodes_tot/(double)nnodes;
- fprintf(fplog," %-19s %4d %4s %10s %10.3f %12.3f %5.1f\n",
- name,nnodes,thstr,num,wallt,c*1e-9,100*c/tot);
+ fprintf(fplog, " %-19s %4d %4s %10s %10.3f %12.3f %5.1f\n",
+ name, nnodes, thstr, num, wallt, c*1e-9, 100*c/tot);
}
}
-static void print_gputimes(FILE *fplog, const char *name,
+static void print_gputimes(FILE *fplog, const char *name,
int n, double t, double tot_t)
{
char num[11];
}
else
{
- sprintf(num," ");
- sprintf(avg_perf," ");
+ sprintf(num, " ");
+ sprintf(avg_perf, " ");
}
if (t != tot_t)
{
fprintf(fplog, " %-29s %10s%12.3f %s %5.1f\n",
- name, num, t/1000, avg_perf, 100 * t/tot_t);
+ name, num, t/1000, avg_perf, 100 * t/tot_t);
}
else
{
- fprintf(fplog, " %-29s %10s%12.3f %s %5.1f\n",
- name, "", t/1000, avg_perf, 100.0);
+ fprintf(fplog, " %-29s %10s%12.3f %s %5.1f\n",
+ name, "", t/1000, avg_perf, 100.0);
}
}
void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
gmx_wallcycle_t wc, wallclock_gpu_t *gpu_t)
{
- double *cycles;
- double c2t,tot,tot_gpu,tot_cpu_overlap,gpu_cpu_ratio,sum,tot_k;
- int i,j,npp,nth_pp,nth_pme;
- char buf[STRLEN];
+ double *cycles;
+ double c2t, tot, tot_gpu, tot_cpu_overlap, gpu_cpu_ratio, sum, tot_k;
+ int i, j, npp, nth_pp, nth_pme;
+ char buf[STRLEN];
const char *hline = "-----------------------------------------------------------------------------";
-
+
if (wc == NULL)
{
return;
c2t = 0;
}
- fprintf(fplog,"\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
+ fprintf(fplog, "\n R E A L C Y C L E A N D T I M E A C C O U N T I N G\n\n");
- fprintf(fplog," Computing: Nodes Th. Count Wall t (s) G-Cycles %c\n",'%');
- fprintf(fplog,"%s\n",hline);
+ fprintf(fplog, " Computing: Nodes Th. Count Wall t (s) G-Cycles %c\n", '%');
+ fprintf(fplog, "%s\n", hline);
sum = 0;
- for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
+ for (i = ewcPPDURINGPME+1; i < ewcNR; i++)
{
if (!is_pme_subcounter(i))
{
- print_cycles(fplog,c2t,wcn[i],nnodes,
+ print_cycles(fplog, c2t, wcn[i], nnodes,
is_pme_counter(i) ? npme : npp,
- is_pme_counter(i) ? nth_pme : nth_pp,
- wc->wcc[i].n,cycles[i],tot);
+ is_pme_counter(i) ? nth_pme : nth_pp,
+ wc->wcc[i].n, cycles[i], tot);
sum += cycles[i];
}
}
if (wc->wcc_all != NULL)
{
- for(i=0; i<ewcNR; i++)
+ for (i = 0; i < ewcNR; i++)
{
- for(j=0; j<ewcNR; j++)
+ for (j = 0; j < ewcNR; j++)
{
- snprintf(buf,9,"%-9s",wcn[i]);
+ snprintf(buf, 9, "%-9s", wcn[i]);
buf[9] = ' ';
- snprintf(buf+10,9,"%-9s",wcn[j]);
+ snprintf(buf+10, 9, "%-9s", wcn[j]);
buf[19] = '\0';
- print_cycles(fplog,c2t,buf,nnodes,
+ print_cycles(fplog, c2t, buf, nnodes,
is_pme_counter(i) ? npme : npp,
is_pme_counter(i) ? nth_pme : nth_pp,
wc->wcc_all[i*ewcNR+j].n,
}
}
}
- print_cycles(fplog,c2t,"Rest",npp,npp,-1,0,tot-sum,tot);
- fprintf(fplog,"%s\n",hline);
- print_cycles(fplog,c2t,"Total",nnodes,nnodes,-1,0,tot,tot);
- fprintf(fplog,"%s\n",hline);
-
+ print_cycles(fplog, c2t, "Rest", npp, npp, -1, 0, tot-sum, tot);
+ fprintf(fplog, "%s\n", hline);
+ print_cycles(fplog, c2t, "Total", nnodes, nnodes, -1, 0, tot, tot);
+ fprintf(fplog, "%s\n", hline);
+
if (wc->wcc[ewcPMEMESH].n > 0)
{
- fprintf(fplog,"%s\n",hline);
- for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
+ fprintf(fplog, "%s\n", hline);
+ for (i = ewcPPDURINGPME+1; i < ewcNR; i++)
{
if (is_pme_subcounter(i))
{
- print_cycles(fplog,c2t,wcn[i],nnodes,
+ print_cycles(fplog, c2t, wcn[i], nnodes,
is_pme_counter(i) ? npme : npp,
is_pme_counter(i) ? nth_pme : nth_pp,
- wc->wcc[i].n,cycles[i],tot);
+ wc->wcc[i].n, cycles[i], tot);
}
}
- fprintf(fplog,"%s\n",hline);
+ fprintf(fplog, "%s\n", hline);
}
#ifdef GMX_CYCLE_SUBCOUNTERS
- fprintf(fplog,"%s\n",hline);
- for(i=0; i<ewcsNR; i++)
+ fprintf(fplog, "%s\n", hline);
+ for (i = 0; i < ewcsNR; i++)
{
- print_cycles(fplog,c2t,wcsn[i],nnodes,npp,nth_pp,
- wc->wcsc[i].n,cycles[ewcNR+i],tot);
+ print_cycles(fplog, c2t, wcsn[i], nnodes, npp, nth_pp,
+ wc->wcsc[i].n, cycles[ewcNR+i], tot);
}
- fprintf(fplog,"%s\n",hline);
+ fprintf(fplog, "%s\n", hline);
#endif
/* print GPU timing summary */
if (gpu_t)
{
const char *k_log_str[2][2] = {
- {"Nonbonded F kernel", "Nonbonded F+ene k."},
- {"Nonbonded F+prune k.", "Nonbonded F+ene+prune k."}};
+ {"Nonbonded F kernel", "Nonbonded F+ene k."},
+ {"Nonbonded F+prune k.", "Nonbonded F+ene+prune k."}
+ };
tot_gpu = gpu_t->pl_h2d_t + gpu_t->nb_h2d_t + gpu_t->nb_d2h_t;
tot_k = 0.0;
for (i = 0; i < 2; i++)
{
- for(j = 0; j < 2; j++)
+ for (j = 0; j < 2; j++)
{
tot_k += gpu_t->ktime[i][j].t;
}
}
tot_gpu += tot_k;
-
+
tot_cpu_overlap = wc->wcc[ewcFORCE].c;
if (wc->wcc[ewcPMEMESH].n > 0)
{
tot_cpu_overlap *= c2t * 1000; /* convert s to ms */
fprintf(fplog, "\n GPU timings\n%s\n", hline);
- fprintf(fplog," Computing: Count Wall t (s) ms/step %c\n",'%');
+ fprintf(fplog, " Computing: Count Wall t (s) ms/step %c\n", '%');
fprintf(fplog, "%s\n", hline);
print_gputimes(fplog, "Pair list H2D",
- gpu_t->pl_h2d_c, gpu_t->pl_h2d_t, tot_gpu);
- print_gputimes(fplog, "X / q H2D",
- gpu_t->nb_c, gpu_t->nb_h2d_t, tot_gpu);
+ gpu_t->pl_h2d_c, gpu_t->pl_h2d_t, tot_gpu);
+ print_gputimes(fplog, "X / q H2D",
+ gpu_t->nb_c, gpu_t->nb_h2d_t, tot_gpu);
for (i = 0; i < 2; i++)
{
- for(j = 0; j < 2; j++)
+ for (j = 0; j < 2; j++)
{
if (gpu_t->ktime[i][j].c)
{
print_gputimes(fplog, k_log_str[i][j],
- gpu_t->ktime[i][j].c, gpu_t->ktime[i][j].t, tot_gpu);
+ gpu_t->ktime[i][j].c, gpu_t->ktime[i][j].t, tot_gpu);
}
}
- }
+ }
print_gputimes(fplog, "F D2H", gpu_t->nb_c, gpu_t->nb_d2h_t, tot_gpu);
fprintf(fplog, "%s\n", hline);
/* The user could have used -notunepme,
* but we currently can't check that here.
*/
- md_print_warn(NULL,fplog,
+ md_print_warn(NULL, fplog,
"\nNOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
" performance loss. Maybe the domain decomposition limits the PME tuning.\n"
" In that case, try setting the DD grid manually (-dd) or lowering -dds.");
/* We should not end up here, unless the box is
* too small for increasing the cut-off for PME tuning.
*/
- md_print_warn(NULL,fplog,
+ md_print_warn(NULL, fplog,
"\nNOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
" performance loss.");
}
}
if (gpu_cpu_ratio > 1.2)
{
- md_print_warn(NULL,fplog,
+ md_print_warn(NULL, fplog,
"\nNOTE: The GPU has >20%% more load than the CPU. This imbalance causes\n"
" performance loss, consider using a shorter cut-off and a finer PME grid.");
}
/* Only the sim master calls this function, so always print to stderr */
if (wc->wcc[ewcDOMDEC].n == 0)
{
- md_print_warn(NULL,fplog,
+ md_print_warn(NULL, fplog,
"NOTE: %d %% of the run time was spent in pair search,\n"
" you might want to increase nstlist (this has no effect on accuracy)\n",
(int)(100*cycles[ewcNS]/tot+0.5));
}
else
{
- md_print_warn(NULL,fplog,
+ md_print_warn(NULL, fplog,
"NOTE: %d %% of the run time was spent in domain decomposition,\n"
" %d %% of the run time was spent in pair search,\n"
" you might want to increase nstlist (this has no effect on accuracy)\n",
if (cycles[ewcMoveE] > tot*0.05)
{
/* Only the sim master calls this function, so always print to stderr */
- md_print_warn(NULL,fplog,
+ md_print_warn(NULL, fplog,
"NOTE: %d %% of the run time was spent communicating energies,\n"
" you might want to use the -gcom option of mdrun\n",
(int)(100*cycles[ewcMoveE]/tot+0.5));
{
return -1;
}
-
+
return wc->reset_counters;
}
extern void wcycle_set_reset_counters(gmx_wallcycle_t wc, gmx_large_int_t reset_counters)
{
if (wc == NULL)
+ {
return;
+ }
wc->reset_counters = reset_counters;
}