* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#include "gromacs/timing/wallcycle.h"
+#include "gmxpre.h"
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "wallcycle.h"
-#include <string.h>
+#include "config.h"
-#include "gromacs/utility/smalloc.h"
-#include "gmx_fatal.h"
-#include "md_logging.h"
-#include "gromacs/utility/cstringutil.h"
+#include <stdlib.h>
+#include <string.h>
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/timing/cyclecounter.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
/* DEBUG_WCYCLE adds consistency checking for the counters.
* It checks if you stop a counter different from the last
"DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
"Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
"PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec",
- "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
- "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies",
+ "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "Wait GPU loc. est.", "NB X/F buffer ops.",
+ "Vsite spread", "COM pull force",
+ "Write traj.", "Update", "Constraints", "Comm. energies",
"Enforced rotation", "Add rot. forces", "Coordinate swapping", "IMD", "Test"
};
"DD redist.", "DD NS grid + sort", "DD setup comm.",
"DD make top.", "DD make constr.", "DD top. other",
"NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
- "Bonded F", "Nonbonded F", "Ewald F correction",
+ "Listed F", "Nonbonded F", "Ewald F correction",
"NB X buffer ops.", "NB F buffer ops."
};
wcc = wc->wcc;
+ /* The GPU wait estimate counter is used for load balancing only
+ * and will mess up the total due to double counting: clear it.
+ */
+ wcc[ewcWAIT_GPU_NB_L_EST].n = 0;
+ wcc[ewcWAIT_GPU_NB_L_EST].c = 0;
+
for (i = 0; i < ewcNR; i++)
{
if (is_pme_counter(i) || (i == ewcRUN && cr->duty == DUTY_PME))
fprintf(fplog, "\n\n");
fprintf(fplog, " Computing: Num Num Call Wall time Giga-Cycles\n");
- fprintf(fplog, " Nodes Threads Count (s) total sum %%\n");
+ fprintf(fplog, " Ranks Threads Count (s) total sum %%\n");
}
void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
if (npme > 0)
{
fprintf(fplog,
- "(*) Note that with separate PME nodes, the walltime column actually sums to\n"
+ "(*) Note that with separate PME ranks, the walltime column actually sums to\n"
" twice the total reported, but the cycle count total and %% are correct.\n"
"%s\n", hline);
}