struct t_forcerec;
struct t_idef;
struct t_inputrec;
+struct gmx_wallcycle;
/*! \brief The number on bonded function types supported on GPUs */
static constexpr int numFTypesOnGpu = 8;
public:
//! Construct the manager with constant data and the stream to use.
GpuBonded(const gmx_ffparams_t &ffparams,
- void *streamPtr);
+ void *streamPtr,
+ gmx_wallcycle *wcycle);
//! Destructor
~GpuBonded();
/*! \brief Launches the transfer of computed bonded energies. */
void launchEnergyTransfer();
/*! \brief Waits on the energy transfer, and accumulates bonded energies to \c enerd. */
- void accumulateEnergyTerms(gmx_enerdata_t *enerd);
+ void waitAccumulateEnergyTerms(gmx_enerdata_t *enerd);
/*! \brief Clears the device side energy buffer */
void clearEnergies();
};
GpuBonded::GpuBonded(const gmx_ffparams_t & /* ffparams */,
- void * /*streamPtr */)
+ void * /*streamPtr */,
+ gmx_wallcycle * /* wcycle */)
: impl_(nullptr)
{
}
}
void
-GpuBonded::accumulateEnergyTerms(gmx_enerdata_t * /* enerd */)
+GpuBonded::waitAccumulateEnergyTerms(gmx_enerdata_t * /* enerd */)
{
}
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/mdtypes/enerdata.h"
+#include "gromacs/timing/wallcycle.h"
#include "gromacs/topology/forcefieldparameters.h"
struct t_forcerec;
// ---- GpuBonded::Impl
GpuBonded::Impl::Impl(const gmx_ffparams_t &ffparams,
- void *streamPtr)
+ void *streamPtr,
+ gmx_wallcycle *wcycle)
{
stream_ = *static_cast<CommandStream*>(streamPtr);
+ wcycle_ = wcycle;
allocateDeviceBuffer(&d_forceParams_, ffparams.numTypes(), nullptr);
// This could be an async transfer (if the source is pinned), so
void
GpuBonded::Impl::launchEnergyTransfer()
{
- // TODO should wrap with ewcLAUNCH_GPU
GMX_ASSERT(haveInteractions_, "No GPU bonded interactions, so no energies will be computed, so transfer should not be called");
+ wallcycle_sub_start_nocount(wcycle_, ewcsLAUNCH_GPU_BONDED);
// TODO add conditional on whether there has been any compute (and make sure host buffer doesn't contain garbage)
float *h_vTot = vTot_.data();
copyFromDeviceBuffer(h_vTot, &d_vTot_,
0, F_NRE,
stream_, GpuApiCallBehavior::Async, nullptr);
+ wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_BONDED);
}
void
-GpuBonded::Impl::accumulateEnergyTerms(gmx_enerdata_t *enerd)
+GpuBonded::Impl::waitAccumulateEnergyTerms(gmx_enerdata_t *enerd)
{
- // TODO should wrap with some kind of wait counter, so not all
- // wait goes in to the "Rest" counter
GMX_ASSERT(haveInteractions_, "No GPU bonded interactions, so no energies will be computed or transferred, so accumulation should not occur");
+ wallcycle_start(wcycle_, ewcWAIT_GPU_BONDED);
cudaError_t stat = cudaStreamSynchronize(stream_);
CU_RET_ERR(stat, "D2H transfer of bonded energies failed");
+ wallcycle_stop(wcycle_, ewcWAIT_GPU_BONDED);
for (int fType : fTypesOnGpu)
{
void
GpuBonded::Impl::clearEnergies()
{
- // TODO should wrap with ewcLAUNCH_GPU
+ wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+ wallcycle_sub_start_nocount(wcycle_, ewcsLAUNCH_GPU_BONDED);
clearDeviceBufferAsync(&d_vTot_, 0, F_NRE, stream_);
+ wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_BONDED);
+ wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
}
// ---- GpuBonded
GpuBonded::GpuBonded(const gmx_ffparams_t &ffparams,
- void *streamPtr)
- : impl_(new Impl(ffparams, streamPtr))
+ void *streamPtr,
+ gmx_wallcycle *wcycle)
+ : impl_(new Impl(ffparams, streamPtr, wcycle))
{
}
}
void
-GpuBonded::accumulateEnergyTerms(gmx_enerdata_t *enerd)
+GpuBonded::waitAccumulateEnergyTerms(gmx_enerdata_t *enerd)
{
- impl_->accumulateEnergyTerms(enerd);
+ impl_->waitAccumulateEnergyTerms(enerd);
}
void
public:
//! Constructor
Impl(const gmx_ffparams_t &ffparams,
- void *streamPtr);
+ void *streamPtr,
+ gmx_wallcycle *wcycle);
/*! \brief Destructor, non-default needed for freeing
* device-side buffers */
~Impl();
/*! \brief Launches the transfer of computed bonded energies. */
void launchEnergyTransfer();
/*! \brief Waits on the energy transfer, and accumulates bonded energies to \c enerd. */
- void accumulateEnergyTerms(gmx_enerdata_t *enerd);
+ void waitAccumulateEnergyTerms(gmx_enerdata_t *enerd);
/*! \brief Clears the device side energy buffer */
void clearEnergies();
private:
//! Parameters and pointers, passed to the CUDA kernel
BondedCudaKernelParameters kernelParams_;
+
+ //! \brief Pointer to wallcycle structure.
+ gmx_wallcycle *wcycle_;
};
} // namespace gmx
const gmx_device_info_t *deviceInfo,
const bool useGpuForBonded,
gmx_bool bNoSolvOpt,
- real print_force)
+ real print_force,
+ gmx_wallcycle *wcycle)
{
real rtab;
char *env;
// TODO the heap allocation is only needed while
// t_forcerec lacks a constructor.
fr->gpuBonded = new gmx::GpuBonded(mtop->ffparams,
- stream);
+ stream,
+ wcycle);
}
}
struct t_filenm;
struct t_inputrec;
struct gmx_gpu_info_t;
+struct gmx_wallcycle;
namespace gmx
{
* \param[in] useGpuForBonded Whether bonded interactions will run on a GPU
* \param[in] bNoSolvOpt Do not use solvent optimization
* \param[in] print_force Print forces for atoms with force >= print_force
+ * \param[out] wcycle Pointer to cycle counter object
*/
void init_forcerec(FILE *fplog,
const gmx::MDLogger &mdlog,
const gmx_device_info_t *deviceInfo,
bool useGpuForBonded,
gmx_bool bNoSolvOpt,
- real print_force);
+ real print_force,
+ gmx_wallcycle *wcycle);
/*! \brief Divide exclusions over threads
*
if (forceWorkload.haveGpuBondedWork && (flags & GMX_FORCE_ENERGY))
{
- wallcycle_start(wcycle, ewcWAIT_GPU_BONDED);
// in principle this should be included in the DD balancing region,
// but generally it is infrequent so we'll omit it for the sake of
// simpler code
- gpuBonded->accumulateEnergyTerms(enerd);
- wallcycle_stop(wcycle, ewcWAIT_GPU_BONDED);
+ gpuBonded->waitAccumulateEnergyTerms(enerd);
- wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
- wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_BONDED);
gpuBonded->clearEnergies();
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
- wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
}
if (ppForceWorkload->haveGpuBondedWork && (flags & GMX_FORCE_ENERGY))
{
- wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_BONDED);
fr->gpuBonded->launchEnergyTransfer();
- wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
}
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
*hwinfo, nonbondedDeviceInfo,
useGpuForBonded,
FALSE,
- pforce);
+ pforce,
+ wcycle);
/* Initialize the mdAtoms structure.
* mdAtoms is not filled with atom data,