* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#include "gromacs/timing/wallcycle.h"
+#include "gmxpre.h"
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "wallcycle.h"
-#include <string.h>
+#include "config.h"
-#include "gromacs/utility/smalloc.h"
-#include "gmx_fatal.h"
-#include "md_logging.h"
-#include "gromacs/utility/cstringutil.h"
+#include <stdlib.h>
+#include <string.h>
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/timing/cyclecounter.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
/* DEBUG_WCYCLE adds consistency checking for the counters.
* It checks if you stop a counter different from the last
"DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
"Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
"PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec",
- "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
+ "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "Wait GPU loc. est.", "NB X/F buffer ops.",
"Vsite spread", "COM pull force",
"Write traj.", "Update", "Constraints", "Comm. energies",
"Enforced rotation", "Add rot. forces", "Coordinate swapping", "IMD", "Test"
"DD redist.", "DD NS grid + sort", "DD setup comm.",
"DD make top.", "DD make constr.", "DD top. other",
"NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
- "Bonded F", "Nonbonded F", "Ewald F correction",
+ "Listed F", "Nonbonded F", "Ewald F correction",
"NB X buffer ops.", "NB F buffer ops."
};
wcc = wc->wcc;
+ /* The GPU wait estimate counter is used for load balancing only
+ * and will mess up the total due to double counting: clear it.
+ */
+ wcc[ewcWAIT_GPU_NB_L_EST].n = 0;
+ wcc[ewcWAIT_GPU_NB_L_EST].c = 0;
+
for (i = 0; i < ewcNR; i++)
{
if (is_pme_counter(i) || (i == ewcRUN && cr->duty == DUTY_PME))