"DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
"Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
"PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec",
- "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
- "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies",
+ "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "Wait GPU loc. est.", "NB X/F buffer ops.",
+ "Vsite spread", "COM pull force",
+ "Write traj.", "Update", "Constraints", "Comm. energies",
"Enforced rotation", "Add rot. forces", "Coordinate swapping", "IMD", "Test"
};
wcc = wc->wcc;
+ /* The GPU wait estimate counter is used for load balancing only
+ * and will mess up the total due to double counting: clear it.
+ */
+ wcc[ewcWAIT_GPU_NB_L_EST].n = 0;
+ wcc[ewcWAIT_GPU_NB_L_EST].c = 0;
+
for (i = 0; i < ewcNR; i++)
{
if (is_pme_counter(i) || (i == ewcRUN && cr->duty == DUTY_PME))