if (wc->count_depth < 0)
{
- gmx_fatal(FARGS, "wallcycle counter depth out of range when stopping %s: %d", wcn[ewc],
- wc->count_depth);
+ gmx_fatal(FARGS, "wallcycle counter depth out of range when stopping %s: %d", wcn[ewc], wc->count_depth);
}
if (wc->counterlist[wc->count_depth] != ewc)
{
- gmx_fatal(FARGS, "wallcycle mismatch at stop, start %s, stop %s",
- wcn[wc->counterlist[wc->count_depth]], wcn[ewc]);
+ gmx_fatal(FARGS,
+ "wallcycle mismatch at stop, start %s, stop %s",
+ wcn[wc->counterlist[wc->count_depth]],
+ wcn[ewc]);
}
}
#endif
/* Convert the cycle count to wallclock time for this task */
wallt = c_sum * c2t;
- fprintf(fplog, " %-19.19s %4s %4s %10s %10.3f %14.3f %5.1f\n", name, nnodes_str,
- nthreads_str, ncalls_str, wallt, c_sum * 1e-9, percentage);
+ fprintf(fplog,
+ " %-19.19s %4s %4s %10s %10.3f %14.3f %5.1f\n",
+ name,
+ nnodes_str,
+ nthreads_str,
+ ncalls_str,
+ wallt,
+ c_sum * 1e-9,
+ percentage);
}
}
for (j = 0; j < ewcNR; j++)
{
snprintf(buf, 20, "%-9.9s %-9.9s", wcn[i], wcn[j]);
- print_cycles(fplog, c2t_pp, buf, npp, nth_pp, wc->wcc_all[i * ewcNR + j].n,
- wc->wcc_all[i * ewcNR + j].c, tot);
+ print_cycles(fplog,
+ c2t_pp,
+ buf,
+ npp,
+ nth_pp,
+ wc->wcc_all[i * ewcNR + j].n,
+ wc->wcc_all[i * ewcNR + j].c,
+ tot);
}
}
}
fprintf(fplog, "%s\n", hline);
for (auto i : validPmeSubcounterIndices)
{
- print_cycles(fplog, npme > 0 ? c2t_pme : c2t_pp, wcn[i], npme > 0 ? npme : npp,
- nth_pme, wc->wcc[i].n, cyc_sum[i], tot);
+ print_cycles(fplog,
+ npme > 0 ? c2t_pme : c2t_pp,
+ wcn[i],
+ npme > 0 ? npme : npp,
+ nth_pme,
+ wc->wcc[i].n,
+ cyc_sum[i],
+ tot);
}
fprintf(fplog, "%s\n", hline);
}
fprintf(fplog, "\n GPU timings\n%s\n", hline);
fprintf(fplog,
- " Computing: Count Wall t (s) ms/step %c\n", '%');
+ " Computing: Count Wall t (s) ms/step %c\n",
+ '%');
fprintf(fplog, "%s\n", hline);
print_gputimes(fplog, "Pair list H2D", gpu_nbnxn_t->pl_h2d_c, gpu_nbnxn_t->pl_h2d_t, tot_gpu);
print_gputimes(fplog, "X / q H2D", gpu_nbnxn_t->nb_c, gpu_nbnxn_t->nb_h2d_t, tot_gpu);
{
if (gpu_nbnxn_t->ktime[i][j].c)
{
- print_gputimes(fplog, k_log_str[i][j], gpu_nbnxn_t->ktime[i][j].c,
- gpu_nbnxn_t->ktime[i][j].t, tot_gpu);
+ print_gputimes(fplog,
+ k_log_str[i][j],
+ gpu_nbnxn_t->ktime[i][j].c,
+ gpu_nbnxn_t->ktime[i][j].t,
+ tot_gpu);
}
}
}
{
if (gpu_pme_t->timing[k].c)
{
- print_gputimes(fplog, PMEStageNames[k], gpu_pme_t->timing[k].c,
- gpu_pme_t->timing[k].t, tot_gpu);
+ print_gputimes(
+ fplog, PMEStageNames[k], gpu_pme_t->timing[k].c, gpu_pme_t->timing[k].t, tot_gpu);
}
}
}
if (gpu_nbnxn_t->pruneTime.c)
{
- print_gputimes(fplog, "Pruning kernel", gpu_nbnxn_t->pruneTime.c,
- gpu_nbnxn_t->pruneTime.t, tot_gpu);
+ print_gputimes(fplog, "Pruning kernel", gpu_nbnxn_t->pruneTime.c, gpu_nbnxn_t->pruneTime.t, tot_gpu);
}
print_gputimes(fplog, "F D2H", gpu_nbnxn_t->nb_c, gpu_nbnxn_t->nb_d2h_t, tot_gpu);
fprintf(fplog, "%s\n", hline);
* and avoid adding it to tot_gpu as this is not in the force
* overlap. We print the fraction as relative to the rest.
*/
- print_gputimes(fplog, "*Dynamic pruning", gpu_nbnxn_t->dynamicPruneTime.c,
- gpu_nbnxn_t->dynamicPruneTime.t, tot_gpu);
+ print_gputimes(fplog,
+ "*Dynamic pruning",
+ gpu_nbnxn_t->dynamicPruneTime.c,
+ gpu_nbnxn_t->dynamicPruneTime.t,
+ tot_gpu);
fprintf(fplog, "%s\n", hline);
}
gpu_cpu_ratio = tot_gpu / tot_cpu_overlap;
fprintf(fplog,
"\nAverage per-step force GPU/CPU evaluation time ratio: %.3f ms/%.3f ms = "
"%.3f\n",
- tot_gpu / gpu_nbnxn_t->nb_c, tot_cpu_overlap / wc->wcc[ewcFORCE].n, gpu_cpu_ratio);
+ tot_gpu / gpu_nbnxn_t->nb_c,
+ tot_cpu_overlap / wc->wcc[ewcFORCE].n,
+ gpu_cpu_ratio);
}
/* only print notes related to CPU-GPU load balance with PME */