File: | gromacs/gmxlib/gmx_thread_affinity.c |
Location: | line 292, column 5 |
Description: | Value stored to 'nth_affinity_set' is never read |
1 | /* |
2 | * This file is part of the GROMACS molecular simulation package. |
3 | * |
4 | * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by |
5 | * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, |
6 | * and including many others, as listed in the AUTHORS file in the |
7 | * top-level source directory and at http://www.gromacs.org. |
8 | * |
9 | * GROMACS is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public License |
11 | * as published by the Free Software Foundation; either version 2.1 |
12 | * of the License, or (at your option) any later version. |
13 | * |
14 | * GROMACS is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with GROMACS; if not, see |
21 | * http://www.gnu.org/licenses, or write to the Free Software Foundation, |
22 | * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
23 | * |
24 | * If you want to redistribute modifications to GROMACS, please |
25 | * consider that scientific software is very special. Version |
26 | * control is crucial - bugs must be traceable. We will be happy to |
27 | * consider code for inclusion in the official distribution, but |
28 | * derived work must not be called official GROMACS. Details are found |
29 | * in the README & COPYING files - if they are missing, get the |
30 | * official version at http://www.gromacs.org. |
31 | * |
32 | * To help us fund GROMACS development, we humbly ask that you cite |
33 | * the research papers on the package. Check out http://www.gromacs.org. |
34 | */ |
35 | #ifdef HAVE_CONFIG_H1 |
36 | #include <config.h> |
37 | #endif |
38 | #if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_GETAFFINITY) |
39 | #define _GNU_SOURCE |
40 | #include <sched.h> |
41 | #include <sys/syscall.h> |
42 | #endif |
43 | #include <string.h> |
44 | #include <errno(*__errno_location ()).h> |
45 | #include <assert.h> |
46 | #include <stdio.h> |
47 | |
48 | #include "thread_mpi/threads.h" |
49 | |
50 | #include "typedefs.h" |
51 | #include "types/commrec.h" |
52 | #include "types/hw_info.h" |
53 | #include "copyrite.h" |
54 | #include "gmx_cpuid.h" |
55 | #include "gmx_omp_nthreads.h" |
56 | #include "md_logging.h" |
57 | #include "gmx_thread_affinity.h" |
58 | |
59 | #include "gromacs/utility/basenetwork.h" |
60 | #include "gromacs/utility/cstringutil.h" |
61 | #include "gromacs/utility/fatalerror.h" |
62 | #include "gromacs/utility/gmxomp.h" |
63 | #include "gromacs/utility/smalloc.h" |
64 | |
65 | static int |
66 | get_thread_affinity_layout(FILE *fplog, |
67 | const t_commrec *cr, |
68 | const gmx_hw_info_t * hwinfo, |
69 | int nthreads, |
70 | int pin_offset, int * pin_stride, |
71 | const int **locality_order) |
72 | { |
73 | int nhwthreads, npkg, ncores, nhwthreads_per_core, rc; |
74 | const int * pkg_id; |
75 | const int * core_id; |
76 | const int * hwthread_id; |
77 | gmx_bool bPickPinStride; |
78 | |
79 | if (pin_offset < 0) |
80 | { |
81 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_thread_affinity.c" , 81, "Negative thread pinning offset requested"); |
82 | } |
83 | if (*pin_stride < 0) |
84 | { |
85 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_thread_affinity.c" , 85, "Negative thread pinning stride requested"); |
86 | } |
87 | |
88 | rc = gmx_cpuid_topology(hwinfo->cpuid_info, &nhwthreads, &npkg, &ncores, |
89 | &nhwthreads_per_core, |
90 | &pkg_id, &core_id, &hwthread_id, locality_order); |
91 | |
92 | if (rc != 0) |
93 | { |
94 | /* topology information not available or invalid, ignore it */ |
95 | nhwthreads = hwinfo->nthreads_hw_avail; |
96 | *locality_order = NULL((void*)0); |
97 | |
98 | if (nhwthreads <= 0) |
99 | { |
100 | /* We don't know anything about the hardware, don't pin */ |
101 | md_print_warn(cr, fplog, |
102 | "NOTE: We don't know how many logical cores we have, will not pin threads"); |
103 | |
104 | return -1; |
105 | } |
106 | } |
107 | |
108 | if (nthreads > nhwthreads) |
109 | { |
110 | /* We are oversubscribing, don't pin */ |
111 | md_print_warn(NULL((void*)0), fplog, |
112 | "WARNING: Oversubscribing the CPU, will not pin threads"); |
113 | |
114 | return -1; |
115 | } |
116 | |
117 | if (pin_offset + nthreads > nhwthreads) |
118 | { |
119 | /* We are oversubscribing, don't pin */ |
120 | md_print_warn(NULL((void*)0), fplog, |
121 | "WARNING: The requested pin offset is too large for the available logical cores,\n" |
122 | " will not pin threads"); |
123 | |
124 | return -1; |
125 | } |
126 | |
127 | |
128 | /* do we need to choose the pinning stride? */ |
129 | bPickPinStride = (*pin_stride == 0); |
130 | |
131 | if (bPickPinStride) |
132 | { |
133 | if (rc == 0 && pin_offset + nthreads*nhwthreads_per_core <= nhwthreads) |
134 | { |
135 | /* Put one thread on each physical core */ |
136 | *pin_stride = nhwthreads_per_core; |
137 | } |
138 | else |
139 | { |
140 | /* We don't know if we have SMT, and if we do, we don't know |
141 | * if hw threads in the same physical core are consecutive. |
142 | * Without SMT the pinning layout should not matter too much. |
143 | * so we assume a consecutive layout and maximally spread out" |
144 | * the threads at equal threads per core. |
145 | * Note that IBM is the major non-x86 case with cpuid support |
146 | * and probably threads are already pinned by the queuing system, |
147 | * so we wouldn't end up here in the first place. |
148 | */ |
149 | *pin_stride = (nhwthreads - pin_offset)/nthreads; |
150 | } |
151 | } |
152 | else |
153 | { |
154 | /* Check the placement of the thread with the largest index to make sure |
155 | * that the offset & stride doesn't cause pinning beyond the last hardware thread. */ |
156 | if (pin_offset + (nthreads-1)*(*pin_stride) >= nhwthreads) |
157 | { |
158 | /* We are oversubscribing, don't pin */ |
159 | md_print_warn(NULL((void*)0), fplog, |
160 | "WARNING: The requested pinning stride is too large for the available logical cores,\n" |
161 | " will not pin threads"); |
162 | |
163 | return -1; |
164 | } |
165 | } |
166 | |
167 | if (fplog != NULL((void*)0)) |
168 | { |
169 | fprintf(fplog, "Pinning threads with a%s logical core stride of %d\n", |
170 | bPickPinStride ? "n auto-selected" : " user-specified", |
171 | *pin_stride); |
172 | } |
173 | |
174 | return 0; |
175 | } |
176 | |
177 | /* Set CPU affinity. Can be important for performance. |
178 | On some systems (e.g. Cray) CPU Affinity is set by default. |
179 | But default assigning doesn't work (well) with only some ranks |
180 | having threads. This causes very low performance. |
181 | External tools have cumbersome syntax for setting affinity |
182 | in the case that only some ranks have threads. |
183 | Thus it is important that GROMACS sets the affinity internally |
184 | if only PME is using threads. |
185 | */ |
186 | void |
187 | gmx_set_thread_affinity(FILE *fplog, |
188 | const t_commrec *cr, |
189 | gmx_hw_opt_t *hw_opt, |
190 | const gmx_hw_info_t *hwinfo) |
191 | { |
192 | int nth_affinity_set, thread0_id_node, |
193 | nthread_local, nthread_node, nthread_hw_max, nphyscore; |
194 | int offset; |
195 | const int *locality_order; |
196 | int rc; |
197 | |
198 | if (hw_opt->thread_affinity == threadaffOFF) |
199 | { |
200 | /* Nothing to do */ |
201 | return; |
202 | } |
203 | |
204 | /* If the tMPI thread affinity setting is not supported encourage the user |
205 | * to report it as it's either a bug or an exotic platform which we might |
206 | * want to support. */ |
207 | if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES) |
208 | { |
209 | /* we know Mac OS doesn't support setting thread affinity, so there's |
210 | no point in warning the user in that case. In any other case |
211 | the user might be able to do something about it. */ |
212 | #ifndef __APPLE__ |
213 | md_print_warn(NULL((void*)0), fplog, |
214 | "Can not set thread affinities on the current platform. On NUMA systems this\n" |
215 | "can cause performance degradation. If you think your platform should support\n" |
216 | "setting affinities, contact the GROMACS developers."); |
217 | #endif /* __APPLE__ */ |
218 | return; |
219 | } |
220 | |
221 | /* threads on this MPI process or TMPI thread */ |
222 | if (cr->duty & DUTY_PP(1<<0)) |
223 | { |
224 | nthread_local = gmx_omp_nthreads_get(emntNonbonded); |
225 | } |
226 | else |
227 | { |
228 | nthread_local = gmx_omp_nthreads_get(emntPME); |
229 | } |
230 | |
231 | /* map the current process to cores */ |
232 | thread0_id_node = 0; |
233 | nthread_node = nthread_local; |
234 | #ifdef GMX_MPI |
235 | if (PAR(cr)((cr)->nnodes > 1) || MULTISIM(cr)((cr)->ms)) |
236 | { |
237 | /* We need to determine a scan of the thread counts in this |
238 | * compute node. |
239 | */ |
240 | MPI_Comm comm_intra; |
241 | |
242 | MPI_Comm_splittMPI_Comm_split(MPI_COMM_WORLDTMPI_COMM_WORLD, gmx_hostname_num(), cr->rank_intranode, |
243 | &comm_intra); |
244 | MPI_ScantMPI_Scan(&nthread_local, &thread0_id_node, 1, MPI_INTTMPI_INT, MPI_SUMTMPI_SUM, comm_intra); |
245 | /* MPI_Scan is inclusive, but here we need exclusive */ |
246 | thread0_id_node -= nthread_local; |
247 | /* Get the total number of threads on this physical node */ |
248 | MPI_AllreducetMPI_Allreduce(&nthread_local, &nthread_node, 1, MPI_INTTMPI_INT, MPI_SUMTMPI_SUM, comm_intra); |
249 | MPI_Comm_freetMPI_Comm_free(&comm_intra); |
250 | } |
251 | #endif |
252 | |
253 | if (hw_opt->thread_affinity == threadaffAUTO && |
254 | nthread_node != hwinfo->nthreads_hw_avail) |
255 | { |
256 | if (nthread_node > 1 && nthread_node < hwinfo->nthreads_hw_avail) |
257 | { |
258 | md_print_warn(cr, fplog, |
259 | "NOTE: The number of threads is not equal to the number of (logical) cores\n" |
260 | " and the -pin option is set to auto: will not pin thread to cores.\n" |
261 | " This can lead to significant performance degradation.\n" |
262 | " Consider using -pin on (and -pinoffset in case you run multiple jobs).\n"); |
263 | } |
264 | |
265 | return; |
266 | } |
267 | |
268 | offset = 0; |
269 | if (hw_opt->core_pinning_offset != 0) |
270 | { |
271 | offset = hw_opt->core_pinning_offset; |
272 | md_print_info(cr, fplog, "Applying core pinning offset %d\n", offset); |
273 | } |
274 | |
275 | rc = get_thread_affinity_layout(fplog, cr, hwinfo, |
276 | nthread_node, |
277 | offset, &hw_opt->core_pinning_stride, |
278 | &locality_order); |
279 | |
280 | if (rc != 0) |
281 | { |
282 | /* Incompatible layout, don't pin, warning was already issued */ |
283 | return; |
284 | } |
285 | |
286 | /* Set the per-thread affinity. In order to be able to check the success |
287 | * of affinity settings, we will set nth_affinity_set to 1 on threads |
288 | * where the affinity setting succeded and to 0 where it failed. |
289 | * Reducing these 0/1 values over the threads will give the total number |
290 | * of threads on which we succeeded. |
291 | */ |
292 | nth_affinity_set = 0; |
Value stored to 'nth_affinity_set' is never read | |
293 | #pragma omp parallel num_threads(nthread_local) reduction(+:nth_affinity_set) |
294 | { |
295 | int thread_id, thread_id_node; |
296 | int index, core; |
297 | gmx_bool setaffinity_ret; |
298 | |
299 | thread_id = gmx_omp_get_thread_num(); |
300 | thread_id_node = thread0_id_node + thread_id; |
301 | index = offset + thread_id_node*hw_opt->core_pinning_stride; |
302 | if (locality_order != NULL((void*)0)) |
303 | { |
304 | core = locality_order[index]; |
305 | } |
306 | else |
307 | { |
308 | core = index; |
309 | } |
310 | |
311 | setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core); |
312 | |
313 | /* store the per-thread success-values of the setaffinity */ |
314 | nth_affinity_set = (setaffinity_ret == 0); |
315 | |
316 | if (debug) |
317 | { |
318 | fprintf(debug, "On rank %2d, thread %2d, index %2d, core %2d the affinity setting returned %d\n", |
319 | cr->nodeid, gmx_omp_get_thread_num(), index, core, setaffinity_ret); |
320 | } |
321 | } |
322 | |
323 | if (nth_affinity_set > nthread_local) |
324 | { |
325 | char msg[STRLEN4096]; |
326 | |
327 | sprintf(msg, "Looks like we have set affinity for more threads than " |
328 | "we have (%d > %d)!\n", nth_affinity_set, nthread_local); |
329 | gmx_incons(msg)_gmx_error("incons", msg, "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_thread_affinity.c" , 329); |
330 | } |
331 | else |
332 | { |
333 | /* check & warn if some threads failed to set their affinities */ |
334 | if (nth_affinity_set != nthread_local) |
335 | { |
336 | char sbuf1[STRLEN4096], sbuf2[STRLEN4096]; |
337 | |
338 | /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */ |
339 | sbuf1[0] = sbuf2[0] = '\0'; |
340 | /* Only add rank info if we have more than one rank. */ |
341 | if (cr->nnodes > 1) |
342 | { |
343 | #ifdef GMX_MPI |
344 | #ifdef GMX_THREAD_MPI |
345 | sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid); |
346 | #else /* GMX_LIB_MPI */ |
347 | sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid); |
348 | #endif |
349 | #endif /* GMX_MPI */ |
350 | } |
351 | |
352 | if (nthread_local > 1) |
353 | { |
354 | sprintf(sbuf2, "for %d/%d thread%s ", |
355 | nthread_local - nth_affinity_set, nthread_local, |
356 | nthread_local > 1 ? "s" : ""); |
357 | } |
358 | |
359 | md_print_warn(NULL((void*)0), fplog, |
360 | "WARNING: %sAffinity setting %sfailed.\n" |
361 | " This can cause performance degradation! If you think your setting are\n" |
362 | " correct, contact the GROMACS developers.", |
363 | sbuf1, sbuf2); |
364 | } |
365 | } |
366 | return; |
367 | } |
368 | |
369 | /* Check the process affinity mask and if it is found to be non-zero, |
370 | * will honor it and disable mdrun internal affinity setting. |
371 | * Note that this will only work on Linux as we use a GNU feature. |
372 | */ |
373 | void |
374 | gmx_check_thread_affinity_set(FILE *fplog, |
375 | const t_commrec *cr, |
376 | gmx_hw_opt_t *hw_opt, |
377 | int gmx_unused__attribute__ ((unused)) ncpus, |
378 | gmx_bool bAfterOpenmpInit) |
379 | { |
380 | #ifdef HAVE_SCHED_GETAFFINITY |
381 | cpu_set_t mask_current; |
382 | int i, ret, cpu_count, cpu_set; |
383 | gmx_bool bAllSet; |
384 | #endif |
385 | |
386 | assert(hw_opt)((void) (0)); |
387 | if (!bAfterOpenmpInit) |
388 | { |
389 | /* Check for externally set OpenMP affinity and turn off internal |
390 | * pinning if any is found. We need to do this check early to tell |
391 | * thread-MPI whether it should do pinning when spawning threads. |
392 | * TODO: the above no longer holds, we should move these checks later |
393 | */ |
394 | if (hw_opt->thread_affinity != threadaffOFF) |
395 | { |
396 | char *message; |
397 | if (!gmx_omp_check_thread_affinity(&message)) |
398 | { |
399 | /* TODO: with -pin auto we should only warn when using all cores */ |
400 | md_print_warn(cr, fplog, "%s", message); |
401 | sfree(message)save_free("message", "/home/alexxy/Develop/gromacs/src/gromacs/gmxlib/gmx_thread_affinity.c" , 401, (message)); |
402 | hw_opt->thread_affinity = threadaffOFF; |
403 | } |
404 | } |
405 | |
406 | /* With thread-MPI this is needed as pinning might get turned off, |
407 | * which needs to be known before starting thread-MPI. |
408 | * With thread-MPI hw_opt is processed here on the master rank |
409 | * and passed to the other ranks later, so we only do this on master. |
410 | */ |
411 | if (!SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) && ((cr)->duty & (1<<0))) || !((cr)->nnodes > 1))) |
412 | { |
413 | return; |
414 | } |
415 | #ifndef GMX_THREAD_MPI |
416 | return; |
417 | #endif |
418 | } |
419 | |
420 | #ifdef HAVE_SCHED_GETAFFINITY |
421 | if (hw_opt->thread_affinity == threadaffOFF) |
422 | { |
423 | /* internal affinity setting is off, don't bother checking process affinity */ |
424 | return; |
425 | } |
426 | |
427 | CPU_ZERO(&mask_current)do __builtin_memset (&mask_current, '\0', sizeof (cpu_set_t )); while (0); |
428 | if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0) |
429 | { |
430 | /* failed to query affinity mask, will just return */ |
431 | if (debug) |
432 | { |
433 | fprintf(debug, "Failed to query affinity mask (error %d)", ret); |
434 | } |
435 | return; |
436 | } |
437 | |
438 | /* Before proceeding with the actual check, make sure that the number of |
439 | * detected CPUs is >= the CPUs in the current set. |
440 | * We need to check for CPU_COUNT as it was added only in glibc 2.6. */ |
441 | #ifdef CPU_COUNT |
442 | if (ncpus < CPU_COUNT(&mask_current)__sched_cpucount (sizeof (cpu_set_t), &mask_current)) |
443 | { |
444 | if (debug) |
445 | { |
446 | fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT", |
447 | ncpus, CPU_COUNT(&mask_current)__sched_cpucount (sizeof (cpu_set_t), &mask_current)); |
448 | } |
449 | return; |
450 | } |
451 | #endif /* CPU_COUNT */ |
452 | |
453 | bAllSet = TRUE1; |
454 | for (i = 0; (i < ncpus && i < CPU_SETSIZE1024); i++) |
455 | { |
456 | bAllSet = bAllSet && (CPU_ISSET(i, &mask_current)(__extension__ ({ size_t __cpu = (i); __cpu / 8 < (sizeof ( cpu_set_t)) ? ((((const __cpu_mask *) ((&mask_current)-> __bits))[((__cpu) / (8 * sizeof (__cpu_mask)))] & ((__cpu_mask ) 1 << ((__cpu) % (8 * sizeof (__cpu_mask)))))) != 0 : 0 ; })) != 0); |
457 | } |
458 | |
459 | if (!bAllSet) |
460 | { |
461 | if (hw_opt->thread_affinity == threadaffAUTO) |
462 | { |
463 | if (!bAfterOpenmpInit) |
464 | { |
465 | md_print_warn(cr, fplog, |
466 | "Non-default thread affinity set, disabling internal thread affinity"); |
467 | } |
468 | else |
469 | { |
470 | md_print_warn(cr, fplog, |
471 | "Non-default thread affinity set probably by the OpenMP library,\n" |
472 | "disabling internal thread affinity"); |
473 | } |
474 | hw_opt->thread_affinity = threadaffOFF; |
475 | } |
476 | else |
477 | { |
478 | /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */ |
479 | if (bAfterOpenmpInit) |
480 | { |
481 | md_print_warn(cr, fplog, |
482 | "Overriding thread affinity set outside %s\n", |
483 | ShortProgram()); |
484 | } |
485 | } |
486 | |
487 | if (debug) |
488 | { |
489 | fprintf(debug, "Non-default affinity mask found\n"); |
490 | } |
491 | } |
492 | else |
493 | { |
494 | if (debug) |
495 | { |
496 | fprintf(debug, "Default affinity mask found\n"); |
497 | } |
498 | } |
499 | #endif /* HAVE_SCHED_GETAFFINITY */ |
500 | } |