{
if (ftype == F_CMAP)
{
+ /* TODO The execution time for CMAP dihedrals might be
+ nice to account to its own subtimer, but first
+ wallcycle needs to be extended to support calling from
+ multiple threads. */
v = cmap_dihs(nbn, iatoms+nb0,
idef->iparams, &idef->cmap_grid,
x, f, fshift,
}
else
{
+ /* TODO The execution time for pairs might be nice to account
+ to its own subtimer, but first wallcycle needs to be
+ extended to support calling from multiple threads. */
v = do_pairs(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift,
pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
}
}
void calc_listed(const gmx_multisim_t *ms,
+ gmx_wallcycle *wcycle,
const t_idef *idef,
const rvec x[], history_t *hist,
rvec f[], t_forcerec *fr,
}
#endif
- if (idef->il[F_POSRES].nr > 0)
- {
- posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr);
- }
+ if ((idef->il[F_POSRES].nr > 0) ||
+ (idef->il[F_FBPOSRES].nr > 0) ||
+ (idef->il[F_ORIRES].nr > 0) ||
+ (idef->il[F_DISRES].nr > 0))
+ {
+ /* TODO Use of restraints triggers further function calls
+ inside the loop over calc_one_bond(), but those are too
+ awkward to account to this subtimer properly in the present
+ code. We don't test / care much about performance with
+ restraints, anyway. */
+ wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
+
+ if (idef->il[F_POSRES].nr > 0)
+ {
+ posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr);
+ }
- if (idef->il[F_FBPOSRES].nr > 0)
- {
- fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr);
- }
+ if (idef->il[F_FBPOSRES].nr > 0)
+ {
+ fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr);
+ }
- /* Do pre force calculation stuff which might require communication */
- if (idef->il[F_ORIRES].nr)
- {
- enerd->term[F_ORIRESDEV] =
- calc_orires_dev(ms, idef->il[F_ORIRES].nr,
- idef->il[F_ORIRES].iatoms,
- idef->iparams, md, x,
- pbc_null, fcd, hist);
- }
- if (idef->il[F_DISRES].nr)
- {
- calc_disres_R_6(idef->il[F_DISRES].nr,
- idef->il[F_DISRES].iatoms,
- idef->iparams, x, pbc_null,
- fcd, hist);
-#ifdef GMX_MPI
- if (fcd->disres.nsystems > 1)
+ /* Do pre force calculation stuff which might require communication */
+ if (idef->il[F_ORIRES].nr > 0)
{
- gmx_sum_sim(2*fcd->disres.nres, fcd->disres.Rt_6, ms);
+ enerd->term[F_ORIRESDEV] =
+ calc_orires_dev(ms, idef->il[F_ORIRES].nr,
+ idef->il[F_ORIRES].iatoms,
+ idef->iparams, md, x,
+ pbc_null, fcd, hist);
}
+ if (idef->il[F_DISRES].nr)
+ {
+ calc_disres_R_6(idef->il[F_DISRES].nr,
+ idef->il[F_DISRES].iatoms,
+ idef->iparams, x, pbc_null,
+ fcd, hist);
+#ifdef GMX_MPI
+ if (fcd->disres.nsystems > 1)
+ {
+ gmx_sum_sim(2*fcd->disres.nres, fcd->disres.Rt_6, ms);
+ }
#endif
+ }
+
+ wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
}
+ wallcycle_sub_start(wcycle, ewcsLISTED);
#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
for (thread = 0; thread < fr->nthreads; thread++)
{
}
}
}
+ wallcycle_sub_stop(wcycle, ewcsLISTED);
+
if (fr->nthreads > 1)
{
+ wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
reduce_thread_forces(fr->natoms_force, f, fr->fshift,
enerd->term, &enerd->grpp, dvdl,
fr->nthreads, fr->f_t,
fr->red_nblock, 1<<fr->red_ashift,
bCalcEnerVir,
force_flags & GMX_FORCE_DHDL);
+ wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
}
+
+ /* Remaining code does not have enough flops to bother counting */
if (force_flags & GMX_FORCE_DHDL)
{
for (i = 0; i < efptNR; i++)
return;
}
- wallcycle_sub_start(wcycle, ewcsLISTED);
-
if ((idef->il[F_POSRES].nr > 0) ||
(idef->il[F_FBPOSRES].nr > 0))
{
+ /* Not enough flops to bother counting */
set_pbc(&pbc_full, fr->ePBC, box);
}
- calc_listed(ms, idef, x, hist, f, fr, pbc, &pbc_full,
+ calc_listed(ms, wcycle, idef, x, hist, f, fr, pbc, &pbc_full,
graph, enerd, nrnb, lambda, md, fcd,
global_atom_index, flags);
*/
if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL))
{
- posres_wrapper_lambda(fepvals, idef, &pbc_full, x, enerd, lambda, fr);
+ posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
if (idef->ilsort != ilsortNO_FE)
{
+ wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
if (idef->ilsort != ilsortFE_SORTED)
{
gmx_incons("The bonded interactions are not sorted for free energy");
sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
}
+ wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
}
}
debug_gmx();
-
- wallcycle_sub_stop(wcycle, ewcsLISTED);
}
* Note that pbc_full is used only for position restraints, and is
* not initialized if there are none. */
void calc_listed(const gmx_multisim_t *ms,
+ struct gmx_wallcycle *wcycle,
const t_idef *idef,
const rvec x[], history_t *hist,
rvec f[], t_forcerec *fr,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/legacyheaders/nrnb.h"
#include "gromacs/math/vec.h"
#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/timing/wallcycle.h"
#include "gromacs/topology/idef.h"
#include "gromacs/utility/basedefinitions.h"
+struct gmx_wallcycle;
+
namespace
{
}
void
-posres_wrapper_lambda(const t_lambda *fepvals,
- const t_idef *idef,
- const struct t_pbc *pbc,
- const rvec x[],
- gmx_enerdata_t *enerd,
- real *lambda,
- t_forcerec *fr)
+posres_wrapper_lambda(struct gmx_wallcycle *wcycle,
+ const t_lambda *fepvals,
+ const t_idef *idef,
+ const struct t_pbc *pbc,
+ const rvec x[],
+ gmx_enerdata_t *enerd,
+ real *lambda,
+ t_forcerec *fr)
{
real v;
int i;
return;
}
+ wallcycle_sub_start_nocount(wcycle, ewcsRESTRAINTS);
for (i = 0; i < enerd->n_lambda; i++)
{
real dvdl_dum = 0, lambda_dum;
fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
enerd->enerpart_lambda[i] += v;
}
+ wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
}
/*! \brief Helper function that wraps calls to fbposres for
#endif
struct t_pbc;
+struct gmx_wallcycle;
/*! \brief Helper function that wraps calls to posres */
void
/*! \brief Helper function that wraps calls to posres for free-energy
pertubation */
void
-posres_wrapper_lambda(const t_lambda *fepvals,
- const t_idef *idef,
- const struct t_pbc *pbc,
- const rvec x[],
- gmx_enerdata_t *enerd,
- real *lambda,
- t_forcerec *fr);
+posres_wrapper_lambda(struct gmx_wallcycle *wcycle,
+ const t_lambda *fepvals,
+ const t_idef *idef,
+ const struct t_pbc *pbc,
+ const rvec x[],
+ gmx_enerdata_t *enerd,
+ real *lambda,
+ t_forcerec *fr);
/*! \brief Helper function that wraps calls to fbposres for
free-energy perturbation */
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
/* Shift the coordinates. Must be done before listed forces and PPPM,
* but is also necessary for SHAKE and update, therefore it can NOT
* go when no listed forces have to be evaluated.
+ *
+ * The shifting and PBC code is deliberately not timed, since with
+ * the Verlet scheme it only takes non-zero time with triclinic
+ * boxes, and even then the time is around a factor of 100 less
+ * than the next smallest counter.
*/
+
/* Here sometimes we would not need to shift with NBFonly,
* but we do so anyhow for consistency of the returned coordinates.
*/
((flags & GMX_FORCE_LISTED)
|| EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
{
+ /* TODO There are no electrostatics methods that require this
+ transformation, when using the Verlet scheme, so update the
+ above conditional. */
/* Since all atoms are in the rectangular or triclinic unit-cell,
* only single box vector shifts (2 in x) are required.
*/
if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
{
+ /* This is not in a subcounter because it takes a
+ negligible and constant-sized amount of time */
Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
&dvdl_long_range_correction_q,
fr->vir_el_recip);
}
/* Start the force cycle counter.
- * This counter is stopped in do_forcelow_level.
+ * This counter is stopped after do_force_lowlevel.
* No parallel communication should occur while this counter is running,
* since that will interfere with the dynamic load balancing.
*/
}
/* Add all the non-bonded force to the normal force array.
- * This can be split into a local a non-local part when overlapping
+ * This can be split into a local and a non-local part when overlapping
* communication with calculation with domain decomposition.
*/
cycles_force += wallcycle_stop(wcycle, ewcFORCE);
if ((flags & GMX_FORCE_VIRIAL) &&
nbv->grp[aloc].nbl_lists.nnbl > 1)
{
+ /* This is not in a subcounter because it takes a
+ negligible and constant-sized amount of time */
nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
fr->fshift);
}
}
/* Start the force cycle counter.
- * This counter is stopped in do_forcelow_level.
+ * This counter is stopped after do_force_lowlevel.
* No parallel communication should occur while this counter is running,
* since that will interfere with the dynamic load balancing.
*/
"DD redist.", "DD NS grid + sort", "DD setup comm.",
"DD make top.", "DD make constr.", "DD top. other",
"NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
- "Bonded F", "Nonbonded F", "Ewald F correction",
- "NB X buffer ops.", "NB F buffer ops."
+ "Bonded F",
+ "Bonded-FEP F",
+ "Restraints F",
+ "Listed buffer ops.",
+ "Nonbonded F",
+ "Ewald F correction",
+ "NB X buffer ops.",
+ "NB F buffer ops.",
};
gmx_bool wallcycle_have_counter(void)
}
}
+ if (wc->wc_barrier)
+ {
+ md_print_warn(NULL, fplog,
+ "MPI_Barrier was called before each cycle start/stop\n"
+ "call, so timings are not those of real runs.\n");
+ }
+
if (wc->wcc[ewcNB_XF_BUF_OPS].n > 0 &&
(cyc_sum[ewcDOMDEC] > tot*0.1 ||
cyc_sum[ewcNS] > tot*0.1))
}
}
+void wallcycle_sub_start_nocount(gmx_wallcycle_t wc, int ewcs)
+{
+ if (wc == NULL)
+ {
+ return;
+ }
+
+ wallcycle_sub_start(wc, ewcs);
+ wc->wcsc[ewcs].n--;
+}
+
void wallcycle_sub_stop(gmx_wallcycle_t wc, int ewcs)
{
if (wc != NULL)
void wallcycle_sub_start(gmx_wallcycle_t gmx_unused wc, int gmx_unused ewcs)
{
}
+void wallcycle_sub_start_nocount(gmx_wallcycle_t gmx_unused wc, int gmx_unused ewcs)
+{
+}
void wallcycle_sub_stop(gmx_wallcycle_t gmx_unused wc, int gmx_unused ewcs)
{
}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_TIMING_WALLCYCLE_H
#define GMX_TIMING_WALLCYCLE_H
+/* NOTE: None of the routines here are safe to call within an OpenMP
+ * region */
+
#include <stdio.h>
#include "gromacs/legacyheaders/types/commrec_fwd.h"
ewcsDD_MAKETOP, ewcsDD_MAKECONSTR, ewcsDD_TOPOTHER,
ewcsNBS_GRID_LOCAL, ewcsNBS_GRID_NONLOCAL,
ewcsNBS_SEARCH_LOCAL, ewcsNBS_SEARCH_NONLOCAL,
- ewcsLISTED, ewcsNONBONDED, ewcsEWALD_CORRECTION,
- ewcsNB_X_BUF_OPS, ewcsNB_F_BUF_OPS,
+ ewcsLISTED,
+ ewcsLISTED_FEP,
+ ewcsRESTRAINTS,
+ ewcsLISTED_BUF_OPS,
+ ewcsNONBONDED,
+ ewcsEWALD_CORRECTION,
+ ewcsNB_X_BUF_OPS,
+ ewcsNB_F_BUF_OPS,
ewcsNR
};
void wallcycle_sub_start(gmx_wallcycle_t wc, int ewcs);
/* Set the start sub cycle count for ewcs */
+void wallcycle_sub_start_nocount(gmx_wallcycle_t wc, int ewcs);
+/* Set the start sub cycle count for ewcs without increasing the call count */
+
void wallcycle_sub_stop(gmx_wallcycle_t wc, int ewcs);
/* Stop the sub cycle count for ewcs */