This is aimed to allow comparing the performance of the pair-interaction
kernels separately from the force buffer clearing.
Change-Id: Ifb2b4b3e5a43ac2ee547da651f9432a22fe58421
wallcycle_sub_start(wcycle, ewcsNONBONDED);
}
wallcycle_sub_start(wcycle, ewcsNONBONDED);
}
- nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, *fr, enerd, nrnb);
+ nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, *fr, enerd, nrnb, wcycle);
#include "gromacs/nbnxm/nbnxm_simd.h"
#include "gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h"
#include "gromacs/simd/simd.h"
#include "gromacs/nbnxm/nbnxm_simd.h"
#include "gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h"
#include "gromacs/simd/simd.h"
+#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/real.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/real.h"
* \param[in] clearF Enum that tells if to clear the force output buffer
* \param[out] vCoulomb Output buffer for Coulomb energies
* \param[out] vVdw Output buffer for Van der Waals energies
* \param[in] clearF Enum that tells if to clear the force output buffer
* \param[out] vCoulomb Output buffer for Coulomb energies
* \param[out] vVdw Output buffer for Van der Waals energies
+ * \param[in] wcycle Pointer to cycle counting data structure.
*/
static void
nbnxn_kernel_cpu(const PairlistSet &pairlistSet,
*/
static void
nbnxn_kernel_cpu(const PairlistSet &pairlistSet,
int forceFlags,
int clearF,
real *vCoulomb,
int forceFlags,
int clearF,
real *vCoulomb,
+ real *vVdw,
+ gmx_wallcycle *wcycle)
gmx::ArrayRef<const NbnxnPairlistCpu> pairlists = pairlistSet.cpuLists();
int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
gmx::ArrayRef<const NbnxnPairlistCpu> pairlists = pairlistSet.cpuLists();
int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
+ wallcycle_sub_start(wcycle, ewcsNBFCLEARBUF);
#pragma omp parallel for schedule(static) num_threads(nthreads)
for (int nb = 0; nb < pairlists.ssize(); nb++)
{
#pragma omp parallel for schedule(static) num_threads(nthreads)
for (int nb = 0; nb < pairlists.ssize(); nb++)
{
clear_fshift(out->fshift.data());
}
clear_fshift(out->fshift.data());
}
+ if (nb == 0)
+ {
+ wallcycle_sub_stop(wcycle, ewcsNBFCLEARBUF);
+ wallcycle_sub_start(wcycle, ewcsNBFKERNEL);
+ }
+
// TODO: Change to reference
const NbnxnPairlistCpu *pairlist = &pairlists[nb];
// TODO: Change to reference
const NbnxnPairlistCpu *pairlist = &pairlists[nb];
+ wallcycle_sub_stop(wcycle, ewcsNBFKERNEL);
if (forceFlags & GMX_FORCE_ENERGY)
{
if (forceFlags & GMX_FORCE_ENERGY)
{
int clearF,
const t_forcerec &fr,
gmx_enerdata_t *enerd,
int clearF,
const t_forcerec &fr,
gmx_enerdata_t *enerd,
+ t_nrnb *nrnb,
+ gmx_wallcycle *wcycle)
{
const PairlistSet &pairlistSet = pairlistSets().pairlistSet(iLocality);
{
const PairlistSet &pairlistSet = pairlistSets().pairlistSet(iLocality);
enerd->grpp.ener[egCOULSR].data(),
fr.bBHAM ?
enerd->grpp.ener[egBHAMSR].data() :
enerd->grpp.ener[egCOULSR].data(),
fr.bBHAM ?
enerd->grpp.ener[egBHAMSR].data() :
- enerd->grpp.ener[egLJSR].data());
+ enerd->grpp.ener[egLJSR].data(),
+ wcycle);
break;
case Nbnxm::KernelType::Gpu8x8x8:
break;
case Nbnxm::KernelType::Gpu8x8x8:
int clearF,
const t_forcerec &fr,
gmx_enerdata_t *enerd,
int clearF,
const t_forcerec &fr,
gmx_enerdata_t *enerd,
+ t_nrnb *nrnb,
+ gmx_wallcycle *wcycle);
//! Executes the non-bonded free-energy kernel, always runs on the CPU
void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality,
//! Executes the non-bonded free-energy kernel, always runs on the CPU
void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality,
"Listed buffer ops.",
"Nonbonded pruning",
"Nonbonded F",
"Listed buffer ops.",
"Nonbonded pruning",
"Nonbonded F",
+ "NB F kernel", "NB F clear buf",
"Launch NB GPU tasks",
"Launch Bonded GPU tasks",
"Launch PME GPU tasks",
"Launch NB GPU tasks",
"Launch Bonded GPU tasks",
"Launch PME GPU tasks",
ewcsLISTED_BUF_OPS,
ewcsNONBONDED_PRUNING,
ewcsNONBONDED,
ewcsLISTED_BUF_OPS,
ewcsNONBONDED_PRUNING,
ewcsNONBONDED,
+ ewcsNBFKERNEL, ewcsNBFCLEARBUF,
ewcsLAUNCH_GPU_NONBONDED,
ewcsLAUNCH_GPU_BONDED,
ewcsLAUNCH_GPU_PME,
ewcsLAUNCH_GPU_NONBONDED,
ewcsLAUNCH_GPU_BONDED,
ewcsLAUNCH_GPU_PME,