File: | gromacs/gmxlib/gmx_detect_hardware.c |
Location: | line 252, column 5 |
Description: | Value stored to 'btMPI' is never read |
1 | /* |
2 | * This file is part of the GROMACS molecular simulation package. |
3 | * |
4 | * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by |
5 | * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, |
6 | * and including many others, as listed in the AUTHORS file in the |
7 | * top-level source directory and at http://www.gromacs.org. |
8 | * |
9 | * GROMACS is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public License |
11 | * as published by the Free Software Foundation; either version 2.1 |
12 | * of the License, or (at your option) any later version. |
13 | * |
14 | * GROMACS is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with GROMACS; if not, see |
21 | * http://www.gnu.org/licenses, or write to the Free Software Foundation, |
22 | * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
23 | * |
24 | * If you want to redistribute modifications to GROMACS, please |
25 | * consider that scientific software is very special. Version |
26 | * control is crucial - bugs must be traceable. We will be happy to |
27 | * consider code for inclusion in the official distribution, but |
28 | * derived work must not be called official GROMACS. Details are found |
29 | * in the README & COPYING files - if they are missing, get the |
30 | * official version at http://www.gromacs.org. |
31 | * |
32 | * To help us fund GROMACS development, we humbly ask that you cite |
33 | * the research papers on the package. Check out http://www.gromacs.org. |
34 | */ |
35 | #ifdef HAVE_CONFIG_H1 |
36 | #include <config.h> |
37 | #endif |
38 | |
39 | #include <assert.h> |
40 | #include <errno(*__errno_location ()).h> |
41 | #include <stdlib.h> |
42 | #include <string.h> |
43 | |
44 | #ifdef HAVE_UNISTD_H |
45 | /* For sysconf */ |
46 | #include <unistd.h> |
47 | #endif |
48 | |
49 | #include "types/enums.h" |
50 | #include "types/hw_info.h" |
51 | #include "types/commrec.h" |
52 | #include "network.h" |
53 | #include "md_logging.h" |
54 | #include "gmx_cpuid.h" |
55 | #include "gpu_utils.h" |
56 | #include "copyrite.h" |
57 | #include "gmx_detect_hardware.h" |
58 | #include "md_logging.h" |
59 | |
60 | #include "gromacs/utility/basenetwork.h" |
61 | #include "gromacs/utility/cstringutil.h" |
62 | #include "gromacs/utility/fatalerror.h" |
63 | #include "gromacs/utility/gmxomp.h" |
64 | #include "gromacs/utility/smalloc.h" |
65 | |
66 | #include "thread_mpi/threads.h" |
67 | |
68 | #ifdef GMX_NATIVE_WINDOWS |
69 | #include <windows.h> |
70 | #endif |
71 | |
72 | #ifdef GMX_GPU |
73 | const gmx_bool bGPUBinary = TRUE1; |
74 | #else |
75 | const gmx_bool bGPUBinary = FALSE0; |
76 | #endif |
77 | |
78 | static const char * invalid_gpuid_hint = |
79 | "A delimiter-free sequence of valid numeric IDs of available GPUs is expected."; |
80 | |
81 | /* The globally shared hwinfo structure. */ |
82 | static gmx_hw_info_t *hwinfo_g; |
83 | /* A reference counter for the hwinfo structure */ |
84 | static int n_hwinfo = 0; |
85 | /* A lock to protect the hwinfo structure */ |
86 | static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER{ {0}, ((void*)0) }; |
87 | |
88 | |
89 | /* FW decl. */ |
90 | static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count); |
91 | static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info, |
92 | const gmx_gpu_opt_t *gpu_opt); |
93 | |
94 | static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info) |
95 | { |
96 | int i, ndev; |
97 | char stmp[STRLEN4096]; |
98 | |
99 | ndev = gpu_info->ncuda_dev; |
100 | |
101 | sbuf[0] = '\0'; |
102 | for (i = 0; i < ndev; i++) |
103 | { |
104 | get_gpu_device_info_string(stmp, gpu_info, i); |
105 | strcat(sbuf, " "); |
106 | strcat(sbuf, stmp); |
107 | if (i < ndev - 1) |
108 | { |
109 | strcat(sbuf, "\n"); |
110 | } |
111 | } |
112 | } |
113 | |
114 | static void print_gpu_detection_stats(FILE *fplog, |
115 | const gmx_gpu_info_t *gpu_info, |
116 | const t_commrec *cr) |
117 | { |
118 | char onhost[266], stmp[STRLEN4096]; |
119 | int ngpu; |
120 | |
121 | if (!gpu_info->bDetectGPUs) |
122 | { |
123 | /* We skipped the detection, so don't print detection stats */ |
124 | return; |
125 | } |
126 | |
127 | ngpu = gpu_info->ncuda_dev; |
128 | |
129 | #if defined GMX_MPI && !defined GMX_THREAD_MPI |
130 | /* We only print the detection on one, of possibly multiple, nodes */ |
131 | strncpy(onhost, " on host ", 10)__builtin_strncpy (onhost, " on host ", 10); |
132 | gmx_gethostname(onhost+9, 256); |
133 | #else |
134 | /* We detect all relevant GPUs */ |
135 | strncpy(onhost, "", 1)__builtin_strncpy (onhost, "", 1); |
136 | #endif |
137 | |
138 | if (ngpu > 0) |
139 | { |
140 | sprint_gpus(stmp, gpu_info); |
141 | md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n", |
142 | ngpu, (ngpu > 1) ? "s" : "", onhost, stmp); |
143 | } |
144 | else |
145 | { |
146 | md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost); |
147 | } |
148 | } |
149 | |
150 | static void print_gpu_use_stats(FILE *fplog, |
151 | const gmx_gpu_info_t *gpu_info, |
152 | const gmx_gpu_opt_t *gpu_opt, |
153 | const t_commrec *cr) |
154 | { |
155 | char sbuf[STRLEN4096], stmp[STRLEN4096]; |
156 | int i, ngpu_comp, ngpu_use; |
157 | |
158 | ngpu_comp = gpu_info->ncuda_dev_compatible; |
159 | ngpu_use = gpu_opt->ncuda_dev_use; |
160 | |
161 | /* Issue a note if GPUs are available but not used */ |
162 | if (ngpu_comp > 0 && ngpu_use < 1) |
163 | { |
164 | sprintf(sbuf, |
165 | "%d compatible GPU%s detected in the system, but none will be used.\n" |
166 | "Consider trying GPU acceleration with the Verlet scheme!", |
167 | ngpu_comp, (ngpu_comp > 1) ? "s" : ""); |
168 | } |
169 | else |
170 | { |
171 | int ngpu_use_uniq; |
172 | |
173 | ngpu_use_uniq = gmx_count_gpu_dev_unique(gpu_info, gpu_opt); |
174 | |
175 | sprintf(sbuf, "%d GPU%s %sselected for this run.\n" |
176 | "Mapping of GPU%s to the %d PP rank%s in this node: ", |
177 | ngpu_use_uniq, (ngpu_use_uniq > 1) ? "s" : "", |
178 | gpu_opt->bUserSet ? "user-" : "auto-", |
179 | (ngpu_use > 1) ? "s" : "", |
180 | cr->nrank_pp_intranode, |
181 | (cr->nrank_pp_intranode > 1) ? "s" : ""); |
182 | |
183 | for (i = 0; i < ngpu_use; i++) |
184 | { |
185 | sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, gpu_opt, i)); |
186 | if (i < ngpu_use - 1) |
187 | { |
188 | strcat(stmp, ", "); |
189 | } |
190 | strcat(sbuf, stmp); |
191 | } |
192 | } |
193 | md_print_info(cr, fplog, "%s\n\n", sbuf); |
194 | } |
195 | |
196 | /* Give a suitable fatal error or warning if the build configuration |
197 | and runtime CPU do not match. */ |
198 | static void |
199 | check_use_of_rdtscp_on_this_cpu(FILE *fplog, |
200 | const t_commrec *cr, |
201 | const gmx_hw_info_t *hwinfo) |
202 | { |
203 | gmx_bool bCpuHasRdtscp, bBinaryUsesRdtscp; |
204 | #ifdef HAVE_RDTSCP |
205 | bBinaryUsesRdtscp = TRUE1; |
206 | #else |
207 | bBinaryUsesRdtscp = FALSE0; |
208 | #endif |
209 | |
210 | bCpuHasRdtscp = gmx_cpuid_feature(hwinfo->cpuid_info, GMX_CPUID_FEATURE_X86_RDTSCP); |
211 | |
212 | if (!bCpuHasRdtscp && bBinaryUsesRdtscp) |
213 | { |
214 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 214, "The %s executable was compiled to use the rdtscp CPU instruction. " |
215 | "However, this is not supported by the current hardware and continuing would lead to a crash. " |
216 | "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.", |
217 | ShortProgram()); |
218 | } |
219 | |
220 | if (bCpuHasRdtscp && !bBinaryUsesRdtscp) |
221 | { |
222 | md_print_warn(cr, fplog, "The current CPU can measure timings more accurately than the code in\n" |
223 | "%s was configured to use. This might affect your simulation\n" |
224 | "speed as accurate timings are needed for load-balancing.\n" |
225 | "Please consider rebuilding %s with the GMX_USE_RDTSCP=OFF CMake option.\n", |
226 | ShortProgram(), ShortProgram()); |
227 | } |
228 | } |
229 | |
230 | void gmx_check_hw_runconf_consistency(FILE *fplog, |
231 | const gmx_hw_info_t *hwinfo, |
232 | const t_commrec *cr, |
233 | const gmx_hw_opt_t *hw_opt, |
234 | gmx_bool bUseGPU) |
235 | { |
236 | int npppn, ntmpi_pp; |
237 | char sbuf[STRLEN4096], th_or_proc[STRLEN4096], th_or_proc_plural[STRLEN4096], pernode[STRLEN4096]; |
238 | gmx_bool btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU; |
239 | |
240 | assert(hwinfo)((void) (0)); |
241 | assert(cr)((void) (0)); |
242 | |
243 | /* Below we only do consistency checks for PP and GPUs, |
244 | * this is irrelevant for PME only nodes, so in that case we return |
245 | * here. |
246 | */ |
247 | if (!(cr->duty & DUTY_PP(1<<0))) |
248 | { |
249 | return; |
250 | } |
251 | |
252 | btMPI = bMPI = FALSE0; |
Value stored to 'btMPI' is never read | |
253 | bNthreadsAuto = FALSE0; |
254 | #if defined(GMX_THREAD_MPI) |
255 | btMPI = TRUE1; |
256 | bNthreadsAuto = (hw_opt->nthreads_tmpi < 1); |
257 | #elif defined(GMX_LIB_MPI) |
258 | bMPI = TRUE1; |
259 | #endif |
260 | |
261 | /* GPU emulation detection is done later, but we need here as well |
262 | * -- uncool, but there's no elegant workaround */ |
263 | bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL((void*)0)); |
264 | bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL((void*)0)); |
265 | |
266 | /* check the SIMD level mdrun is compiled with against hardware |
267 | capabilities */ |
268 | /* TODO: Here we assume homogeneous hardware which is not necessarily |
269 | the case! Might not hurt to add an extra check over MPI. */ |
270 | gmx_cpuid_simd_check(hwinfo->cpuid_info, fplog, SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) && ((cr)->duty & (1<<0))) || !((cr)->nnodes > 1))); |
271 | |
272 | check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo); |
273 | |
274 | /* NOTE: this print is only for and on one physical node */ |
275 | print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr); |
276 | |
277 | if (hwinfo->gpu_info.ncuda_dev_compatible > 0) |
278 | { |
279 | /* NOTE: this print is only for and on one physical node */ |
280 | print_gpu_use_stats(fplog, &hwinfo->gpu_info, &hw_opt->gpu_opt, cr); |
281 | } |
282 | |
283 | /* Need to ensure that we have enough GPUs: |
284 | * - need one GPU per PP node |
285 | * - no GPU oversubscription with tMPI |
286 | * */ |
287 | /* number of PP processes per node */ |
288 | npppn = cr->nrank_pp_intranode; |
289 | |
290 | pernode[0] = '\0'; |
291 | th_or_proc_plural[0] = '\0'; |
292 | if (btMPI) |
293 | { |
294 | sprintf(th_or_proc, "thread-MPI thread"); |
295 | if (npppn > 1) |
296 | { |
297 | sprintf(th_or_proc_plural, "s"); |
298 | } |
299 | } |
300 | else if (bMPI) |
301 | { |
302 | sprintf(th_or_proc, "MPI process"); |
303 | if (npppn > 1) |
304 | { |
305 | sprintf(th_or_proc_plural, "es"); |
306 | } |
307 | sprintf(pernode, " per node"); |
308 | } |
309 | else |
310 | { |
311 | /* neither MPI nor tMPI */ |
312 | sprintf(th_or_proc, "process"); |
313 | } |
314 | |
315 | if (bUseGPU && hwinfo->gpu_info.ncuda_dev_compatible > 0 && |
316 | !bEmulateGPU) |
317 | { |
318 | int ngpu_comp, ngpu_use; |
319 | char gpu_comp_plural[2], gpu_use_plural[2]; |
320 | |
321 | ngpu_comp = hwinfo->gpu_info.ncuda_dev_compatible; |
322 | ngpu_use = hw_opt->gpu_opt.ncuda_dev_use; |
323 | |
324 | sprintf(gpu_comp_plural, "%s", (ngpu_comp > 1) ? "s" : ""); |
325 | sprintf(gpu_use_plural, "%s", (ngpu_use > 1) ? "s" : ""); |
326 | |
327 | /* number of tMPI threads auto-adjusted */ |
328 | if (btMPI && bNthreadsAuto) |
329 | { |
330 | if (hw_opt->gpu_opt.bUserSet && npppn < ngpu_use) |
331 | { |
332 | /* The user manually provided more GPUs than threads we |
333 | could automatically start. */ |
334 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 334, |
335 | "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n" |
336 | "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.", |
337 | ngpu_use, gpu_use_plural, |
338 | npppn, th_or_proc_plural, |
339 | ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : ""); |
340 | } |
341 | |
342 | if (!hw_opt->gpu_opt.bUserSet && npppn < ngpu_comp) |
343 | { |
344 | /* There are more GPUs than tMPI threads; we have |
345 | limited the number GPUs used. */ |
346 | md_print_warn(cr, fplog, |
347 | "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n" |
348 | " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n", |
349 | ngpu_comp, gpu_comp_plural, |
350 | npppn, th_or_proc_plural, |
351 | ShortProgram(), npppn, |
352 | npppn > 1 ? "s" : "", |
353 | bMaxMpiThreadsSet ? "\n Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : ""); |
354 | } |
355 | } |
356 | |
357 | if (hw_opt->gpu_opt.bUserSet) |
358 | { |
359 | if (ngpu_use != npppn) |
360 | { |
361 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 361, |
362 | "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n" |
363 | "%s was started with %d PP %s%s%s, but you provided %d GPU%s.", |
364 | th_or_proc, btMPI ? "s" : "es", pernode, |
365 | ShortProgram(), npppn, th_or_proc, |
366 | th_or_proc_plural, pernode, |
367 | ngpu_use, gpu_use_plural); |
368 | } |
369 | } |
370 | else |
371 | { |
372 | if (ngpu_comp > npppn) |
373 | { |
374 | md_print_warn(cr, fplog, |
375 | "NOTE: potentially sub-optimal launch configuration, %s started with less\n" |
376 | " PP %s%s%s than GPU%s available.\n" |
377 | " Each PP %s can use only one GPU, %d GPU%s%s will be used.\n", |
378 | ShortProgram(), th_or_proc, |
379 | th_or_proc_plural, pernode, gpu_comp_plural, |
380 | th_or_proc, npppn, gpu_use_plural, pernode); |
381 | } |
382 | |
383 | if (ngpu_use != npppn) |
384 | { |
385 | /* Avoid duplicate error messages. |
386 | * Unfortunately we can only do this at the physical node |
387 | * level, since the hardware setup and MPI process count |
388 | * might differ between physical nodes. |
389 | */ |
390 | if (cr->rank_pp_intranode == 0) |
391 | { |
392 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 392, |
393 | "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n" |
394 | "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.", |
395 | th_or_proc, btMPI ? "s" : "es", pernode, |
396 | ShortProgram(), npppn, th_or_proc, |
397 | th_or_proc_plural, pernode, |
398 | ngpu_use, gpu_use_plural); |
399 | } |
400 | } |
401 | } |
402 | |
403 | { |
404 | int same_count; |
405 | |
406 | same_count = gmx_count_gpu_dev_shared(&hw_opt->gpu_opt); |
407 | |
408 | if (same_count > 0) |
409 | { |
410 | md_print_info(cr, fplog, |
411 | "NOTE: You assigned %s to multiple %s%s.\n", |
412 | same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es"); |
413 | } |
414 | } |
415 | } |
416 | |
417 | #ifdef GMX_MPI |
418 | if (PAR(cr)((cr)->nnodes > 1)) |
419 | { |
420 | /* Avoid other ranks to continue after |
421 | inconsistency */ |
422 | MPI_BarriertMPI_Barrier(cr->mpi_comm_mygroup); |
423 | } |
424 | #endif |
425 | |
426 | } |
427 | |
428 | /* Return 0 if none of the GPU (per node) are shared among PP ranks. |
429 | * |
430 | * Sharing GPUs among multiple PP ranks is possible when the user passes |
431 | * GPU IDs. Here we check for sharing and return a non-zero value when |
432 | * this is detected. Note that the return value represents the number of |
433 | * PP rank pairs that share a device. |
434 | */ |
435 | int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt) |
436 | { |
437 | int same_count = 0; |
438 | int ngpu = gpu_opt->ncuda_dev_use; |
439 | |
440 | if (gpu_opt->bUserSet) |
441 | { |
442 | int i, j; |
443 | |
444 | for (i = 0; i < ngpu - 1; i++) |
445 | { |
446 | for (j = i + 1; j < ngpu; j++) |
447 | { |
448 | same_count += (gpu_opt->cuda_dev_use[i] == |
449 | gpu_opt->cuda_dev_use[j]); |
450 | } |
451 | } |
452 | } |
453 | |
454 | return same_count; |
455 | } |
456 | |
457 | /* Count and return the number of unique GPUs (per node) selected. |
458 | * |
459 | * As sharing GPUs among multiple PP ranks is possible when the user passes |
460 | * GPU IDs, the number of GPUs user (per node) can be different from the |
461 | * number of GPU IDs selected. |
462 | */ |
463 | static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info, |
464 | const gmx_gpu_opt_t *gpu_opt) |
465 | { |
466 | int i, uniq_count, ngpu; |
467 | int *uniq_ids; |
468 | |
469 | assert(gpu_info)((void) (0)); |
470 | assert(gpu_opt)((void) (0)); |
471 | |
472 | ngpu = gpu_info->ncuda_dev; |
473 | uniq_count = 0; |
474 | |
475 | snew(uniq_ids, ngpu)(uniq_ids) = save_calloc("uniq_ids", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 475, (ngpu), sizeof(*(uniq_ids))); |
476 | |
477 | /* Each element in uniq_ids will be set to 0 or 1. The n-th element set |
478 | * to 1 indicates that the respective GPU was selected to be used. */ |
479 | for (i = 0; i < gpu_opt->ncuda_dev_use; i++) |
480 | { |
481 | uniq_ids[get_gpu_device_id(gpu_info, gpu_opt, i)] = 1; |
482 | } |
483 | /* Count the devices used. */ |
484 | for (i = 0; i < ngpu; i++) |
485 | { |
486 | uniq_count += uniq_ids[i]; |
487 | } |
488 | |
489 | sfree(uniq_ids)save_free("uniq_ids", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 489, (uniq_ids)); |
490 | |
491 | return uniq_count; |
492 | } |
493 | |
494 | |
495 | /* Return the number of hardware threads supported by the current CPU. |
496 | * We assume that this is equal with the number of CPUs reported to be |
497 | * online by the OS at the time of the call. |
498 | */ |
499 | static int get_nthreads_hw_avail(FILE gmx_unused__attribute__ ((unused)) *fplog, const t_commrec gmx_unused__attribute__ ((unused)) *cr) |
500 | { |
501 | int ret = 0; |
502 | |
503 | #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__))) |
504 | /* Windows */ |
505 | SYSTEM_INFO sysinfo; |
506 | GetSystemInfo( &sysinfo ); |
507 | ret = sysinfo.dwNumberOfProcessors; |
508 | #elif defined HAVE_SYSCONF |
509 | /* We are probably on Unix. |
510 | * Now check if we have the argument to use before executing the call |
511 | */ |
512 | #if defined(_SC_NPROCESSORS_ONLN_SC_NPROCESSORS_ONLN) |
513 | ret = sysconf(_SC_NPROCESSORS_ONLN_SC_NPROCESSORS_ONLN); |
514 | #elif defined(_SC_NPROC_ONLN) |
515 | ret = sysconf(_SC_NPROC_ONLN); |
516 | #elif defined(_SC_NPROCESSORS_CONF_SC_NPROCESSORS_CONF) |
517 | ret = sysconf(_SC_NPROCESSORS_CONF_SC_NPROCESSORS_CONF); |
518 | #elif defined(_SC_NPROC_CONF) |
519 | ret = sysconf(_SC_NPROC_CONF); |
520 | #else |
521 | #warning "No valid sysconf argument value found. Executables will not be able to determine the number of hardware threads: mdrun will use 1 thread by default!" |
522 | #endif /* End of check for sysconf argument values */ |
523 | |
524 | #else |
525 | /* Neither windows nor Unix. No fscking idea how many CPUs we have! */ |
526 | ret = -1; |
527 | #endif |
528 | |
529 | if (debug) |
530 | { |
531 | fprintf(debug, "Detected %d processors, will use this as the number " |
532 | "of supported hardware threads.\n", ret); |
533 | } |
534 | |
535 | #ifdef GMX_OPENMP |
536 | if (ret != gmx_omp_get_num_procs()) |
537 | { |
538 | md_print_warn(cr, fplog, |
539 | "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n" |
540 | "Consider setting the launch configuration manually!", |
541 | ret, gmx_omp_get_num_procs()); |
542 | } |
543 | #endif |
544 | |
545 | return ret; |
546 | } |
547 | |
548 | static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr) |
549 | { |
550 | #ifdef GMX_LIB_MPI |
551 | int rank_world; |
552 | MPI_Comm physicalnode_comm; |
553 | #endif |
554 | int rank_local; |
555 | |
556 | /* Under certain circumstances MPI ranks on the same physical node |
557 | * can not simultaneously access the same GPU(s). Therefore we run |
558 | * the detection only on one MPI rank per node and broadcast the info. |
559 | * Note that with thread-MPI only a single thread runs this code. |
560 | * |
561 | * TODO: We should also do CPU hardware detection only once on each |
562 | * physical node and broadcast it, instead of do it on every MPI rank. |
563 | */ |
564 | #ifdef GMX_LIB_MPI |
565 | /* A split of MPI_COMM_WORLD over physical nodes is only required here, |
566 | * so we create and destroy it locally. |
567 | */ |
568 | MPI_Comm_ranktMPI_Comm_rank(MPI_COMM_WORLDTMPI_COMM_WORLD, &rank_world); |
569 | MPI_Comm_splittMPI_Comm_split(MPI_COMM_WORLDTMPI_COMM_WORLD, gmx_physicalnode_id_hash(), |
570 | rank_world, &physicalnode_comm); |
571 | MPI_Comm_ranktMPI_Comm_rank(physicalnode_comm, &rank_local); |
572 | #else |
573 | /* Here there should be only one process, check this */ |
574 | assert(cr->nnodes == 1 && cr->sim_nodeid == 0)((void) (0)); |
575 | |
576 | rank_local = 0; |
577 | #endif |
578 | |
579 | if (rank_local == 0) |
580 | { |
581 | char detection_error[STRLEN4096] = "", sbuf[STRLEN4096]; |
582 | |
583 | if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0) |
584 | { |
585 | if (detection_error != NULL((void*)0) && detection_error[0] != '\0') |
586 | { |
587 | sprintf(sbuf, ":\n %s\n", detection_error); |
588 | } |
589 | else |
590 | { |
591 | sprintf(sbuf, "."); |
592 | } |
593 | md_print_warn(cr, fplog, |
594 | "NOTE: Error occurred during GPU detection%s" |
595 | " Can not use GPU acceleration, will fall back to CPU kernels.\n", |
596 | sbuf); |
597 | } |
598 | } |
599 | |
600 | #ifdef GMX_LIB_MPI |
601 | /* Broadcast the GPU info to the other ranks within this node */ |
602 | MPI_BcasttMPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev, 1, MPI_INTTMPI_INT, 0, physicalnode_comm); |
603 | |
604 | if (hwinfo_g->gpu_info.ncuda_dev > 0) |
605 | { |
606 | int cuda_dev_size; |
607 | |
608 | cuda_dev_size = hwinfo_g->gpu_info.ncuda_dev*sizeof_cuda_dev_info(); |
609 | |
610 | if (rank_local > 0) |
611 | { |
612 | hwinfo_g->gpu_info.cuda_dev = |
613 | (cuda_dev_info_ptr_t)malloc(cuda_dev_size); |
614 | } |
615 | MPI_BcasttMPI_Bcast(hwinfo_g->gpu_info.cuda_dev, cuda_dev_size, MPI_BYTETMPI_BYTE, |
616 | 0, physicalnode_comm); |
617 | MPI_BcasttMPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev_compatible, 1, MPI_INTTMPI_INT, |
618 | 0, physicalnode_comm); |
619 | } |
620 | |
621 | MPI_Comm_freetMPI_Comm_free(&physicalnode_comm); |
622 | #endif |
623 | } |
624 | |
625 | gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr, |
626 | gmx_bool bDetectGPUs) |
627 | { |
628 | gmx_hw_info_t *hw; |
629 | int ret; |
630 | |
631 | /* make sure no one else is doing the same thing */ |
632 | ret = tMPI_Thread_mutex_lock(&hw_info_lock); |
633 | if (ret != 0) |
634 | { |
635 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 635, "Error locking hwinfo mutex: %s", strerror(errno(*__errno_location ()))); |
636 | } |
637 | |
638 | /* only initialize the hwinfo structure if it is not already initalized */ |
639 | if (n_hwinfo == 0) |
640 | { |
641 | snew(hwinfo_g, 1)(hwinfo_g) = save_calloc("hwinfo_g", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 641, (1), sizeof(*(hwinfo_g))); |
642 | |
643 | /* detect CPUID info; no fuss, we don't detect system-wide |
644 | * -- sloppy, but that's it for now */ |
645 | if (gmx_cpuid_init(&hwinfo_g->cpuid_info) != 0) |
646 | { |
647 | gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 647, cr, NULL((void*)0), "CPUID detection failed!"); |
648 | } |
649 | |
650 | /* detect number of hardware threads */ |
651 | hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr); |
652 | |
653 | /* detect GPUs */ |
654 | hwinfo_g->gpu_info.ncuda_dev = 0; |
655 | hwinfo_g->gpu_info.cuda_dev = NULL((void*)0); |
656 | hwinfo_g->gpu_info.ncuda_dev_compatible = 0; |
657 | |
658 | /* Run the detection if the binary was compiled with GPU support |
659 | * and we requested detection. |
660 | */ |
661 | hwinfo_g->gpu_info.bDetectGPUs = |
662 | (bGPUBinary && bDetectGPUs && |
663 | getenv("GMX_DISABLE_GPU_DETECTION") == NULL((void*)0)); |
664 | if (hwinfo_g->gpu_info.bDetectGPUs) |
665 | { |
666 | gmx_detect_gpus(fplog, cr); |
667 | } |
668 | } |
669 | /* increase the reference counter */ |
670 | n_hwinfo++; |
671 | |
672 | ret = tMPI_Thread_mutex_unlock(&hw_info_lock); |
673 | if (ret != 0) |
674 | { |
675 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 675, "Error unlocking hwinfo mutex: %s", strerror(errno(*__errno_location ()))); |
676 | } |
677 | |
678 | return hwinfo_g; |
679 | } |
680 | |
681 | void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt) |
682 | { |
683 | char *env; |
684 | |
685 | if (gpu_opt->gpu_id != NULL((void*)0) && !bGPUBinary) |
686 | { |
687 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 687, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram()); |
688 | } |
689 | |
690 | env = getenv("GMX_GPU_ID"); |
691 | if (env != NULL((void*)0) && gpu_opt->gpu_id != NULL((void*)0)) |
692 | { |
693 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 693, "GMX_GPU_ID and -gpu_id can not be used at the same time"); |
694 | } |
695 | if (env == NULL((void*)0)) |
696 | { |
697 | env = gpu_opt->gpu_id; |
698 | } |
699 | |
700 | /* parse GPU IDs if the user passed any */ |
701 | if (env != NULL((void*)0)) |
702 | { |
703 | /* Parse a "plain" GPU ID string which contains a sequence of |
704 | * digits corresponding to GPU IDs; the order will indicate |
705 | * the process/tMPI thread - GPU assignment. */ |
706 | parse_digits_from_plain_string(env, |
707 | &gpu_opt->ncuda_dev_use, |
708 | &gpu_opt->cuda_dev_use); |
709 | |
710 | if (gpu_opt->ncuda_dev_use == 0) |
711 | { |
712 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 712, "Empty GPU ID string encountered.\n%s\n", |
713 | invalid_gpuid_hint); |
714 | } |
715 | |
716 | gpu_opt->bUserSet = TRUE1; |
717 | } |
718 | } |
719 | |
720 | void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr, |
721 | const gmx_gpu_info_t *gpu_info, |
722 | gmx_bool bForceUseGPU, |
723 | gmx_gpu_opt_t *gpu_opt) |
724 | { |
725 | int i; |
726 | const char *env; |
727 | char sbuf[STRLEN4096], stmp[STRLEN4096]; |
728 | |
729 | /* Bail if binary is not compiled with GPU acceleration, but this is either |
730 | * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */ |
731 | if (bForceUseGPU && !bGPUBinary) |
732 | { |
733 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 733, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram()); |
734 | } |
735 | |
736 | if (gpu_opt->bUserSet) |
737 | { |
738 | /* Check the GPU IDs passed by the user. |
739 | * (GPU IDs have been parsed by gmx_parse_gpu_ids before) |
740 | */ |
741 | int *checkres; |
742 | int res; |
743 | |
744 | snew(checkres, gpu_opt->ncuda_dev_use)(checkres) = save_calloc("checkres", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 744, (gpu_opt->ncuda_dev_use), sizeof(*(checkres))); |
745 | |
746 | res = check_selected_cuda_gpus(checkres, gpu_info, gpu_opt); |
747 | |
748 | if (!res) |
749 | { |
750 | print_gpu_detection_stats(fplog, gpu_info, cr); |
751 | |
752 | sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n"); |
753 | for (i = 0; i < gpu_opt->ncuda_dev_use; i++) |
754 | { |
755 | if (checkres[i] != egpuCompatible) |
756 | { |
757 | sprintf(stmp, " GPU #%d: %s\n", |
758 | gpu_opt->cuda_dev_use[i], |
759 | gpu_detect_res_str[checkres[i]]); |
760 | strcat(sbuf, stmp); |
761 | } |
762 | } |
763 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 763, "%s", sbuf); |
764 | } |
765 | |
766 | sfree(checkres)save_free("checkres", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 766, (checkres)); |
767 | } |
768 | else |
769 | { |
770 | pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt); |
771 | |
772 | if (gpu_opt->ncuda_dev_use > cr->nrank_pp_intranode) |
773 | { |
774 | /* We picked more GPUs than we can use: limit the number. |
775 | * We print detailed messages about this later in |
776 | * gmx_check_hw_runconf_consistency. |
777 | */ |
778 | limit_num_gpus_used(gpu_opt, cr->nrank_pp_intranode); |
779 | } |
780 | |
781 | gpu_opt->bUserSet = FALSE0; |
782 | } |
783 | |
784 | /* If the user asked for a GPU, check whether we have a GPU */ |
785 | if (bForceUseGPU && gpu_info->ncuda_dev_compatible == 0) |
786 | { |
787 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 787, "GPU acceleration requested, but no compatible GPUs were detected."); |
788 | } |
789 | } |
790 | |
791 | static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count) |
792 | { |
793 | int ndev_use; |
794 | |
795 | assert(gpu_opt)((void) (0)); |
796 | |
797 | ndev_use = gpu_opt->ncuda_dev_use; |
798 | |
799 | if (count > ndev_use) |
800 | { |
801 | /* won't increase the # of GPUs */ |
802 | return; |
803 | } |
804 | |
805 | if (count < 1) |
806 | { |
807 | char sbuf[STRLEN4096]; |
808 | sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!", |
809 | ndev_use, count); |
810 | gmx_incons(sbuf)_gmx_error("incons", sbuf, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 810); |
811 | } |
812 | |
813 | /* TODO: improve this implementation: either sort GPUs or remove the weakest here */ |
814 | gpu_opt->ncuda_dev_use = count; |
815 | } |
816 | |
817 | void gmx_hardware_info_free(gmx_hw_info_t *hwinfo) |
818 | { |
819 | int ret; |
820 | |
821 | ret = tMPI_Thread_mutex_lock(&hw_info_lock); |
822 | if (ret != 0) |
823 | { |
824 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 824, "Error locking hwinfo mutex: %s", strerror(errno(*__errno_location ()))); |
825 | } |
826 | |
827 | /* decrease the reference counter */ |
828 | n_hwinfo--; |
829 | |
830 | |
831 | if (hwinfo != hwinfo_g) |
832 | { |
833 | gmx_incons("hwinfo < hwinfo_g")_gmx_error("incons", "hwinfo < hwinfo_g", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 833); |
834 | } |
835 | |
836 | if (n_hwinfo < 0) |
837 | { |
838 | gmx_incons("n_hwinfo < 0")_gmx_error("incons", "n_hwinfo < 0", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 838); |
839 | } |
840 | |
841 | if (n_hwinfo == 0) |
842 | { |
843 | gmx_cpuid_done(hwinfo_g->cpuid_info); |
844 | free_gpu_info(&hwinfo_g->gpu_info); |
845 | sfree(hwinfo_g)save_free("hwinfo_g", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 845, (hwinfo_g)); |
846 | } |
847 | |
848 | ret = tMPI_Thread_mutex_unlock(&hw_info_lock); |
849 | if (ret != 0) |
850 | { |
851 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_detect_hardware.c" , 851, "Error unlocking hwinfo mutex: %s", strerror(errno(*__errno_location ()))); |
852 | } |
853 | } |