Merge branch release-5-1 into release-2016
[alexxy/gromacs.git] / src / gromacs / gpu_utils / gpu_utils.cu
index f08d55443e6d234175d1a822db25ebd03ea39a93..e8b78249e909a45dcf6142ab1b3b83de3d1ff2c7 100644 (file)
@@ -302,6 +302,31 @@ static bool addNVMLDeviceId(gmx_device_info_t* cuda_dev)
     }
     return cuda_dev->nvml_initialized;
 }
+
+/*! \brief Reads and returns the application clocks for device.
+ *
+ * \param[in]  device        The GPU device
+ * \param[out] app_sm_clock  The current application SM clock
+ * \param[out] app_mem_clock The current application memory clock
+ * \returns if applacation clocks are supported
+ */
+static bool getApplicationClocks(const gmx_device_info_t *cuda_dev,
+                                 unsigned int            *app_sm_clock,
+                                 unsigned int            *app_mem_clock)
+{
+    nvmlReturn_t nvml_stat;
+
+    nvml_stat = nvmlDeviceGetApplicationsClock(cuda_dev->nvml_device_id, NVML_CLOCK_SM, app_sm_clock);
+    if (NVML_ERROR_NOT_SUPPORTED == nvml_stat)
+    {
+        return false;
+    }
+    HANDLE_NVML_RET_ERR(nvml_stat, "nvmlDeviceGetApplicationsClock failed");
+    nvml_stat = nvmlDeviceGetApplicationsClock(cuda_dev->nvml_device_id, NVML_CLOCK_MEM, app_mem_clock);
+    HANDLE_NVML_RET_ERR(nvml_stat, "nvmlDeviceGetApplicationsClock failed");
+
+    return true;
+}
 #endif /* HAVE_NVML_APPLICATION_CLOCKS */
 
 /*! \brief Tries to set application clocks for the GPU with the given index.
@@ -370,52 +395,52 @@ static gmx_bool init_gpu_application_clocks(FILE gmx_unused *fplog, int gmx_unus
     {
         return false;
     }
-    if (!addNVMLDeviceId( &(gpu_info->gpu_dev[gpuid])))
+
+    gmx_device_info_t *cuda_dev = &(gpu_info->gpu_dev[gpuid]);
+
+    if (!addNVMLDeviceId(cuda_dev))
     {
         return false;
     }
     //get current application clocks setting
-    unsigned int app_sm_clock  = 0;
-    unsigned int app_mem_clock = 0;
-    nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &app_sm_clock );
-    if (NVML_ERROR_NOT_SUPPORTED == nvml_stat)
+    if (!getApplicationClocks(cuda_dev,
+                              &cuda_dev->nvml_orig_app_sm_clock,
+                              &cuda_dev->nvml_orig_app_mem_clock))
     {
         return false;
     }
-    HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
-    nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &app_mem_clock );
-    HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
     //get max application clocks
     unsigned int max_sm_clock  = 0;
     unsigned int max_mem_clock = 0;
-    nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &max_sm_clock );
+    nvml_stat = nvmlDeviceGetMaxClockInfo(cuda_dev->nvml_device_id, NVML_CLOCK_SM, &max_sm_clock);
     HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
-    nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &max_mem_clock );
+    nvml_stat = nvmlDeviceGetMaxClockInfo(cuda_dev->nvml_device_id, NVML_CLOCK_MEM, &max_mem_clock);
     HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
 
-    gpu_info->gpu_dev[gpuid].nvml_is_restricted     = NVML_FEATURE_ENABLED;
-    gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = false;
+    cuda_dev->nvml_is_restricted      = NVML_FEATURE_ENABLED;
+    cuda_dev->nvml_app_clocks_changed = false;
 
-    nvml_stat = nvmlDeviceGetAPIRestriction ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &(gpu_info->gpu_dev[gpuid].nvml_is_restricted) );
+    nvml_stat = nvmlDeviceGetAPIRestriction(cuda_dev->nvml_device_id, NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &(cuda_dev->nvml_is_restricted));
     HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetAPIRestriction failed" );
 
     /* Note: Distinguishing between different types of GPUs here might be necessary in the future,
        e.g. if max application clocks should not be used for certain GPUs. */
-    if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock && gpu_info->gpu_dev[gpuid].nvml_is_restricted == NVML_FEATURE_DISABLED)
+    if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock && cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED)
     {
-        md_print_info( fplog, "Changing GPU application clocks for %s to (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, max_mem_clock, max_sm_clock);
-        nvml_stat = nvmlDeviceSetApplicationsClocks ( gpu_info->gpu_dev[gpuid].nvml_device_id, max_mem_clock, max_sm_clock );
+        md_print_info(fplog, "Changing GPU application clocks for %s to (%d,%d)\n", cuda_dev->prop.name, max_mem_clock, max_sm_clock);
+        nvml_stat = nvmlDeviceSetApplicationsClocks(cuda_dev->nvml_device_id, max_mem_clock, max_sm_clock);
         HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
-        gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = true;
+        cuda_dev->nvml_app_clocks_changed = true;
+        cuda_dev->nvml_set_app_sm_clock   = max_sm_clock;
+        cuda_dev->nvml_set_app_mem_clock  = max_mem_clock;
     }
-    else if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock)
+    else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock)
     {
-        md_print_warn( fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock, max_mem_clock, max_sm_clock);
+        md_print_warn(fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock, max_mem_clock, max_sm_clock);
     }
-    else if (nvml_stat == NVML_SUCCESS && app_sm_clock == max_sm_clock)
+    else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock == max_sm_clock)
     {
-        //TODO: This should probably be integrated into the GPU Properties table.
-        md_print_info( fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock);
+        md_print_info(fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
     }
     else
     {
@@ -438,10 +463,20 @@ static gmx_bool reset_gpu_application_clocks(const gmx_device_info_t gmx_unused
     nvmlReturn_t nvml_stat = NVML_SUCCESS;
     if (cuda_dev &&
         cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED &&
-        cuda_dev->nvml_ap_clocks_changed)
+        cuda_dev->nvml_app_clocks_changed)
     {
-        nvml_stat = nvmlDeviceResetApplicationsClocks( cuda_dev->nvml_device_id );
-        HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceResetApplicationsClocks failed" );
+        /* Check if the clocks are still what we set them to.
+         * If so, set them back to the state we originally found them in.
+         * If not, don't touch them, because something else set them later.
+         */
+        unsigned int app_sm_clock, app_mem_clock;
+        getApplicationClocks(cuda_dev, &app_sm_clock, &app_mem_clock);
+        if (app_sm_clock  == cuda_dev->nvml_set_app_sm_clock &&
+            app_mem_clock == cuda_dev->nvml_set_app_mem_clock)
+        {
+            nvml_stat = nvmlDeviceSetApplicationsClocks(cuda_dev->nvml_device_id, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
+            HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
+        }
     }
     nvml_stat = nvmlShutdown();
     HANDLE_NVML_RET_ERR( nvml_stat, "nvmlShutdown failed" );