#include <stdio.h>
#include <stdlib.h>
+#include <cuda_profiler_api.h>
+
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/pmalloc_cuda.h"
#include "gromacs/hardware/gpu_hw_info.h"
*
* In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
*/
-static int cuda_max_device_count = 32;
+static int cuda_max_device_count = 32;
+
+static bool cudaProfilerRun = ((getenv("NVPROF_ID") != NULL));
/** Dummy kernel used for sanity checking. */
__global__ void k_dummy_test()
*nb_free = NULL;
}
}
+
+void startGpuProfiler(void)
+{
+ /* The NVPROF_ID environment variable is set by nvprof and indicates that
+ mdrun is executed in the CUDA profiler.
+ If nvprof was run is with "--profile-from-start off", the profiler will
+ be started here. This way we can avoid tracing the CUDA events from the
+ first part of the run. Starting the profiler again does nothing.
+ */
+ if (cudaProfilerRun)
+ {
+ cudaError_t stat;
+ stat = cudaProfilerStart();
+ CU_RET_ERR(stat, "cudaProfilerStart failed");
+ }
+}
+
+void stopGpuProfiler(void)
+{
+ /* Stopping the nvidia here allows us to eliminate the subsequent
+ API calls from the trace, e.g. uninitialization and cleanup. */
+ if (cudaProfilerRun)
+ {
+ cudaError_t stat;
+ stat = cudaProfilerStop();
+ CU_RET_ERR(stat, "cudaProfilerStop failed");
+ }
+}
+
+void resetGpuProfiler(void)
+{
+ /* With CUDA <=7.5 the profiler can't be properly reset; we can only start
+ * the profiling here (can't stop it) which will achieve the desired effect if
+ * the run was started with the profiling disabled.
+ *
+ * TODO: add a stop (or replace it with reset) when this will work correctly in CUDA.
+ * stopGpuProfiler();
+ */
+ if (cudaProfilerRun)
+ {
+ startGpuProfiler();
+ }
+}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2010, The GROMACS development team.
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
gmx_host_alloc_t **nb_alloc,
gmx_host_free_t **nb_free);
+
+
+/*! \brief Starts the GPU profiler if mdrun is being profiled.
+ *
+ * When a profiler run is in progress (based on the presence of the NVPROF_ID
+ * env. var.), the profiler is started to begin collecting data during the
+ * rest of the run (or until stopGpuProfiler is called).
+ *
+ * Note that this is implemented only for the CUDA API.
+ */
+CUDA_FUNC_QUALIFIER
+void startGpuProfiler(void) GPU_FUNC_TERM
+
+
+/*! \brief Resets the GPU profiler if mdrun is being profiled.
+ *
+ * When a profiler run is in progress (based on the presence of the NVPROF_ID
+ * env. var.), the profiler data is restet in order to eliminate the data collected
+ * from the preceding part fo the run.
+ *
+ * This function should typically be called at the mdrun counter reset time.
+ *
+ * Note that this is implemented only for the CUDA API.
+ */
+CUDA_FUNC_QUALIFIER
+void resetGpuProfiler(void) GPU_FUNC_TERM
+
+
+/*! \brief Stops the CUDA profiler if mdrun is being profiled.
+ *
+ * This function can be called at cleanup when skipping recording
+ * recording subsequent API calls from being traces/profiled is desired,
+ * e.g. before uninitialization.
+ *
+ * Note that this is implemented only for the CUDA API.
+ */
+CUDA_FUNC_QUALIFIER
+void stopGpuProfiler(void) GPU_FUNC_TERM
+
+
#endif
{
/* free nbnxn data in GPU memory */
nbnxn_gpu_free(fr->nbv->gpu_nbv);
+ /* stop the GPU profiler (only CUDA) */
+ stopGpuProfiler();
/* With tMPI we need to wait for all ranks to finish deallocation before
* destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
#include <stdio.h>
#include <stdlib.h>
-#include <cuda_profiler_api.h>
-
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/gpu_utils/pmalloc_cuda.h"
cu_plist_t *plist, *plist_nl;
cu_timers_t *timers;
- /* Stopping the nvidia profiler here allows us to eliminate the subsequent
- uninitialization API calls from the trace. */
- if (getenv("NVPROF_ID") != NULL)
- {
- stat = cudaProfilerStop();
- CU_RET_ERR(stat, "cudaProfilerStop failed");
- }
-
if (nb == NULL)
{
return;
void nbnxn_gpu_reset_timings(nonbonded_verlet_t* nbv)
{
- /* The NVPROF_ID environment variable is set by nvprof and indicates that
- mdrun is executed in the CUDA profiler.
- If nvprof was run is with "--profile-from-start off", the profiler will
- be started here. This way we can avoid tracing the CUDA events from the
- first part of the run. Starting the profiler again does nothing.
- */
- if (getenv("NVPROF_ID") != NULL)
- {
- cudaError_t stat;
- stat = cudaProfilerStart();
- CU_RET_ERR(stat, "cudaProfilerStart failed");
- }
-
if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime)
{
init_timings(nbv->gpu_nbv->timings);
#include "gromacs/gmxlib/md_logging.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/imd/imd.h"
#include "gromacs/listed-forces/manage-threading.h"
#include "gromacs/math/functions.h"
if (use_GPU(nbv))
{
nbnxn_gpu_reset_timings(nbv);
+ resetGpuProfiler();
}
wallcycle_stop(wcycle, ewcRUN);