}
return cuda_dev->nvml_initialized;
}
+
+/*! \brief Reads and returns the application clocks for device.
+ *
+ * \param[in] device The GPU device
+ * \param[out] app_sm_clock The current application SM clock
+ * \param[out] app_mem_clock The current application memory clock
+ * \returns if applacation clocks are supported
+ */
+static bool getApplicationClocks(const gmx_device_info_t *cuda_dev,
+ unsigned int *app_sm_clock,
+ unsigned int *app_mem_clock)
+{
+ nvmlReturn_t nvml_stat;
+
+ nvml_stat = nvmlDeviceGetApplicationsClock(cuda_dev->nvml_device_id, NVML_CLOCK_SM, app_sm_clock);
+ if (NVML_ERROR_NOT_SUPPORTED == nvml_stat)
+ {
+ return false;
+ }
+ HANDLE_NVML_RET_ERR(nvml_stat, "nvmlDeviceGetApplicationsClock failed");
+ nvml_stat = nvmlDeviceGetApplicationsClock(cuda_dev->nvml_device_id, NVML_CLOCK_MEM, app_mem_clock);
+ HANDLE_NVML_RET_ERR(nvml_stat, "nvmlDeviceGetApplicationsClock failed");
+
+ return true;
+}
#endif /* HAVE_NVML_APPLICATION_CLOCKS */
/*! \brief Tries to set application clocks for the GPU with the given index.
{
return false;
}
- if (!addNVMLDeviceId( &(gpu_info->gpu_dev[gpuid])))
+
+ gmx_device_info_t *cuda_dev = &(gpu_info->gpu_dev[gpuid]);
+
+ if (!addNVMLDeviceId(cuda_dev))
{
return false;
}
//get current application clocks setting
- unsigned int app_sm_clock = 0;
- unsigned int app_mem_clock = 0;
- nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &app_sm_clock );
- if (NVML_ERROR_NOT_SUPPORTED == nvml_stat)
+ if (!getApplicationClocks(cuda_dev,
+ &cuda_dev->nvml_orig_app_sm_clock,
+ &cuda_dev->nvml_orig_app_mem_clock))
{
return false;
}
- HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
- nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &app_mem_clock );
- HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
//get max application clocks
unsigned int max_sm_clock = 0;
unsigned int max_mem_clock = 0;
- nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &max_sm_clock );
+ nvml_stat = nvmlDeviceGetMaxClockInfo(cuda_dev->nvml_device_id, NVML_CLOCK_SM, &max_sm_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
- nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &max_mem_clock );
+ nvml_stat = nvmlDeviceGetMaxClockInfo(cuda_dev->nvml_device_id, NVML_CLOCK_MEM, &max_mem_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
- gpu_info->gpu_dev[gpuid].nvml_is_restricted = NVML_FEATURE_ENABLED;
- gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = false;
+ cuda_dev->nvml_is_restricted = NVML_FEATURE_ENABLED;
+ cuda_dev->nvml_app_clocks_changed = false;
- nvml_stat = nvmlDeviceGetAPIRestriction ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &(gpu_info->gpu_dev[gpuid].nvml_is_restricted) );
+ nvml_stat = nvmlDeviceGetAPIRestriction(cuda_dev->nvml_device_id, NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &(cuda_dev->nvml_is_restricted));
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetAPIRestriction failed" );
/* Note: Distinguishing between different types of GPUs here might be necessary in the future,
e.g. if max application clocks should not be used for certain GPUs. */
- if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock && gpu_info->gpu_dev[gpuid].nvml_is_restricted == NVML_FEATURE_DISABLED)
+ if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock && cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED)
{
- md_print_info( fplog, "Changing GPU application clocks for %s to (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, max_mem_clock, max_sm_clock);
- nvml_stat = nvmlDeviceSetApplicationsClocks ( gpu_info->gpu_dev[gpuid].nvml_device_id, max_mem_clock, max_sm_clock );
+ md_print_info(fplog, "Changing GPU application clocks for %s to (%d,%d)\n", cuda_dev->prop.name, max_mem_clock, max_sm_clock);
+ nvml_stat = nvmlDeviceSetApplicationsClocks(cuda_dev->nvml_device_id, max_mem_clock, max_sm_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
- gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = true;
+ cuda_dev->nvml_app_clocks_changed = true;
+ cuda_dev->nvml_set_app_sm_clock = max_sm_clock;
+ cuda_dev->nvml_set_app_mem_clock = max_mem_clock;
}
- else if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock)
+ else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock)
{
- md_print_warn( fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock, max_mem_clock, max_sm_clock);
+ md_print_warn(fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock, max_mem_clock, max_sm_clock);
}
- else if (nvml_stat == NVML_SUCCESS && app_sm_clock == max_sm_clock)
+ else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock == max_sm_clock)
{
- //TODO: This should probably be integrated into the GPU Properties table.
- md_print_info( fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock);
+ md_print_info(fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
}
else
{
nvmlReturn_t nvml_stat = NVML_SUCCESS;
if (cuda_dev &&
cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED &&
- cuda_dev->nvml_ap_clocks_changed)
+ cuda_dev->nvml_app_clocks_changed)
{
- nvml_stat = nvmlDeviceResetApplicationsClocks( cuda_dev->nvml_device_id );
- HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceResetApplicationsClocks failed" );
+ /* Check if the clocks are still what we set them to.
+ * If so, set them back to the state we originally found them in.
+ * If not, don't touch them, because something else set them later.
+ */
+ unsigned int app_sm_clock, app_mem_clock;
+ getApplicationClocks(cuda_dev, &app_sm_clock, &app_mem_clock);
+ if (app_sm_clock == cuda_dev->nvml_set_app_sm_clock &&
+ app_mem_clock == cuda_dev->nvml_set_app_mem_clock)
+ {
+ nvml_stat = nvmlDeviceSetApplicationsClocks(cuda_dev->nvml_device_id, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
+ HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
+ }
}
nvml_stat = nvmlShutdown();
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlShutdown failed" );