src/gromacs/mdrunutility/threadaffinity.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
   5  * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 #include "gmxpre.h"
  37
  38 #include "threadaffinity.h"
  39
  40 #include "config.h"
  41
  42 #include <cerrno>
  43 #include <cstdio>
  44 #include <cstring>
  45
  46 #if HAVE_SCHED_AFFINITY
  47 #    include <sched.h>
  48 #endif
  49
  50 #include "thread_mpi/threads.h"
  51
  52 #include "gromacs/hardware/hardwaretopology.h"
  53 #include "gromacs/hardware/hw_info.h"
  54 #include "gromacs/mdtypes/commrec.h"
  55 #include "gromacs/utility/basenetwork.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/exceptions.h"
  58 #include "gromacs/utility/fatalerror.h"
  59 #include "gromacs/utility/gmxassert.h"
  60 #include "gromacs/utility/gmxomp.h"
  61 #include "gromacs/utility/logger.h"
  62 #include "gromacs/utility/physicalnodecommunicator.h"
  63 #include "gromacs/utility/programcontext.h"
  64 #include "gromacs/utility/smalloc.h"
  65 #include "gromacs/utility/unique_cptr.h"
  66
  67 namespace
  68 {
  69
  70 class DefaultThreadAffinityAccess : public gmx::IThreadAffinityAccess
  71 {
  72 public:
  73     bool isThreadAffinitySupported() const override
  74     {
  75         return tMPI_Thread_setaffinity_support() == TMPI_SETAFFINITY_SUPPORT_YES;
  76     }
  77     bool setCurrentThreadAffinityToCore(int core) override
  78     {
  79         const int ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
  80         return ret == 0;
  81     }
  82 };
  83
  84 //! Global instance of DefaultThreadAffinityAccess
  85 DefaultThreadAffinityAccess g_defaultAffinityAccess;
  86
  87 } // namespace
  88
  89 gmx::IThreadAffinityAccess::~IThreadAffinityAccess() {}
  90
  91 static bool invalidWithinSimulation(const t_commrec* cr, bool invalidLocally)
  92 {
  93 #if GMX_MPI
  94     if (cr->nnodes > 1)
  95     {
  96         int value = invalidLocally ? 1 : 0;
  97         int globalValue;
  98         MPI_Reduce(&value, &globalValue, 1, MPI_INT, MPI_LOR, MASTERRANK(cr), cr->mpi_comm_mysim);
  99         return SIMMASTER(cr) ? (globalValue != 0) : invalidLocally;
 100     }
 101 #else
 102     GMX_UNUSED_VALUE(cr);
 103 #endif
 104     return invalidLocally;
 105 }
 106
 107 static bool get_thread_affinity_layout(const gmx::MDLogger&         mdlog,
 108                                        const t_commrec*             cr,
 109                                        const gmx::HardwareTopology& hwTop,
 110                                        int                          threads,
 111                                        bool  affinityIsAutoAndNumThreadsIsNotAuto,
 112                                        int   pin_offset,
 113                                        int*  pin_stride,
 114                                        int** localityOrder,
 115                                        bool* issuedWarning)
 116 {
 117     int  hwThreads;
 118     int  hwThreadsPerCore = 0;
 119     bool bPickPinStride;
 120     bool haveTopology;
 121     bool invalidValue;
 122
 123     haveTopology = (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic);
 124
 125     if (pin_offset < 0)
 126     {
 127         gmx_fatal(FARGS, "Negative thread pinning offset requested");
 128     }
 129     if (*pin_stride < 0)
 130     {
 131         gmx_fatal(FARGS, "Negative thread pinning stride requested");
 132     }
 133
 134     if (haveTopology)
 135     {
 136         hwThreads = hwTop.machine().logicalProcessorCount;
 137         // Just use the value for the first core
 138         hwThreadsPerCore = hwTop.machine().sockets[0].cores[0].hwThreads.size();
 139         snew(*localityOrder, hwThreads);
 140         int i = 0;
 141         for (auto& s : hwTop.machine().sockets)
 142         {
 143             for (auto& c : s.cores)
 144             {
 145                 for (auto& t : c.hwThreads)
 146                 {
 147                     (*localityOrder)[i++] = t.logicalProcessorId;
 148                 }
 149             }
 150         }
 151     }
 152     else
 153     {
 154         /* topology information not available or invalid, ignore it */
 155         hwThreads      = hwTop.machine().logicalProcessorCount;
 156         *localityOrder = nullptr;
 157     }
 158     // Only warn about the first problem per node.  Otherwise, the first test
 159     // failing would essentially always cause also the other problems get
 160     // reported, leading to bogus warnings.  The order in the conditionals
 161     // with this variable is important, since the MPI_Reduce() in
 162     // invalidWithinSimulation() needs to always happen.
 163     bool alreadyWarned = false;
 164     invalidValue       = (hwThreads <= 0);
 165     if (invalidWithinSimulation(cr, invalidValue))
 166     {
 167         /* We don't know anything about the hardware, don't pin */
 168         GMX_LOG(mdlog.warning)
 169                 .asParagraph()
 170                 .appendText("NOTE: No information on available cores, thread pinning disabled.");
 171         alreadyWarned = true;
 172     }
 173     bool validLayout = !invalidValue;
 174
 175     if (affinityIsAutoAndNumThreadsIsNotAuto)
 176     {
 177         invalidValue = (threads != hwThreads);
 178         bool warn    = (invalidValue && threads > 1 && threads < hwThreads);
 179         if (invalidWithinSimulation(cr, warn) && !alreadyWarned)
 180         {
 181             GMX_LOG(mdlog.warning)
 182                     .asParagraph()
 183                     .appendText(
 184                             "NOTE: The number of threads is not equal to the number of (logical) "
 185                             "cores\n"
 186                             "      and the -pin option is set to auto: will not pin threads to "
 187                             "cores.\n"
 188                             "      This can lead to significant performance degradation.\n"
 189                             "      Consider using -pin on (and -pinoffset in case you run multiple "
 190                             "jobs).");
 191             alreadyWarned = true;
 192         }
 193         validLayout = validLayout && !invalidValue;
 194     }
 195
 196     invalidValue = (threads > hwThreads);
 197     if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
 198     {
 199         GMX_LOG(mdlog.warning)
 200                 .asParagraph()
 201                 .appendText("NOTE: Oversubscribing the CPU, will not pin threads");
 202         alreadyWarned = true;
 203     }
 204     validLayout = validLayout && !invalidValue;
 205
 206     invalidValue = (pin_offset + threads > hwThreads);
 207     if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
 208     {
 209         GMX_LOG(mdlog.warning)
 210                 .asParagraph()
 211                 .appendText(
 212                         "WARNING: Requested offset too large for available cores, thread pinning "
 213                         "disabled.");
 214         alreadyWarned = true;
 215     }
 216     validLayout = validLayout && !invalidValue;
 217
 218     invalidValue = false;
 219     /* do we need to choose the pinning stride? */
 220     bPickPinStride = (*pin_stride == 0);
 221
 222     if (bPickPinStride)
 223     {
 224         if (haveTopology && pin_offset + threads * hwThreadsPerCore <= hwThreads)
 225         {
 226             /* Put one thread on each physical core */
 227             *pin_stride = hwThreadsPerCore;
 228         }
 229         else
 230         {
 231             /* We don't know if we have SMT, and if we do, we don't know
 232              * if hw threads in the same physical core are consecutive.
 233              * Without SMT the pinning layout should not matter too much.
 234              * so we assume a consecutive layout and maximally spread out"
 235              * the threads at equal threads per core.
 236              * Note that IBM is the major non-x86 case with cpuid support
 237              * and probably threads are already pinned by the queuing system,
 238              * so we wouldn't end up here in the first place.
 239              */
 240             *pin_stride = (hwThreads - pin_offset) / threads;
 241         }
 242     }
 243     else
 244     {
 245         /* Check the placement of the thread with the largest index to make sure
 246          * that the offset & stride doesn't cause pinning beyond the last hardware thread. */
 247         invalidValue = (pin_offset + (threads - 1) * (*pin_stride) >= hwThreads);
 248     }
 249     if (invalidWithinSimulation(cr, invalidValue) && !alreadyWarned)
 250     {
 251         /* We are oversubscribing, don't pin */
 252         GMX_LOG(mdlog.warning)
 253                 .asParagraph()
 254                 .appendText(
 255                         "WARNING: Requested stride too large for available cores, thread pinning "
 256                         "disabled.");
 257         alreadyWarned = true;
 258     }
 259     validLayout = validLayout && !invalidValue;
 260
 261     if (validLayout)
 262     {
 263         GMX_LOG(mdlog.info)
 264                 .appendTextFormatted("Pinning threads with a%s logical core stride of %d",
 265                                      bPickPinStride ? "n auto-selected" : " user-specified",
 266                                      *pin_stride);
 267     }
 268
 269     *issuedWarning = alreadyWarned;
 270
 271     return validLayout;
 272 }
 273
 274 static bool set_affinity(const t_commrec*            cr,
 275                          int                         nthread_local,
 276                          int                         intraNodeThreadOffset,
 277                          int                         offset,
 278                          int                         core_pinning_stride,
 279                          const int*                  localityOrder,
 280                          gmx::IThreadAffinityAccess* affinityAccess)
 281 {
 282     // Set the per-thread affinity. In order to be able to check the success
 283     // of affinity settings, we will set nth_affinity_set to 1 on threads
 284     // where the affinity setting succeded and to 0 where it failed.
 285     // Reducing these 0/1 values over the threads will give the total number
 286     // of threads on which we succeeded.
 287
 288     // To avoid warnings from the static analyzer we initialize nth_affinity_set
 289     // to zero outside the OpenMP block, and then add to it inside the block.
 290     // The value will still always be 0 or 1 from each thread.
 291     int nth_affinity_set = 0;
 292 #pragma omp parallel num_threads(nthread_local) reduction(+ : nth_affinity_set)
 293     {
 294         try
 295         {
 296             int thread_id, thread_id_node;
 297             int index, core;
 298
 299             thread_id      = gmx_omp_get_thread_num();
 300             thread_id_node = intraNodeThreadOffset + thread_id;
 301             index          = offset + thread_id_node * core_pinning_stride;
 302             if (localityOrder != nullptr)
 303             {
 304                 core = localityOrder[index];
 305             }
 306             else
 307             {
 308                 core = index;
 309             }
 310
 311             const bool ret = affinityAccess->setCurrentThreadAffinityToCore(core);
 312
 313             /* store the per-thread success-values of the setaffinity */
 314             nth_affinity_set += (ret ? 1 : 0);
 315
 316             if (debug)
 317             {
 318                 fprintf(debug,
 319                         "On rank %2d, thread %2d, index %2d, core %2d the affinity setting "
 320                         "returned %d\n",
 321                         cr->nodeid,
 322                         gmx_omp_get_thread_num(),
 323                         index,
 324                         core,
 325                         ret ? 1 : 0);
 326             }
 327         }
 328         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 329     }
 330
 331     if (nth_affinity_set > nthread_local)
 332     {
 333         char msg[STRLEN];
 334
 335         sprintf(msg,
 336                 "Looks like we have set affinity for more threads than "
 337                 "we have (%d > %d)!\n",
 338                 nth_affinity_set,
 339                 nthread_local);
 340         gmx_incons(msg);
 341     }
 342
 343     /* check & warn if some threads failed to set their affinities */
 344     const bool allAffinitiesSet = (nth_affinity_set == nthread_local);
 345     if (!allAffinitiesSet)
 346     {
 347         char sbuf1[STRLEN], sbuf2[STRLEN];
 348
 349         /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
 350         sbuf1[0] = sbuf2[0] = '\0';
 351         /* Only add rank info if we have more than one rank. */
 352         if (cr->nnodes > 1)
 353         {
 354 #if GMX_MPI
 355 #    if GMX_THREAD_MPI
 356             sprintf(sbuf1, "In tMPI thread #%d: ", cr->nodeid);
 357 #    else /* GMX_LIB_MPI */
 358             sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
 359 #    endif
 360 #endif /* GMX_MPI */
 361         }
 362
 363         if (nthread_local > 1)
 364         {
 365             sprintf(sbuf2,
 366                     "for %d/%d thread%s ",
 367                     nthread_local - nth_affinity_set,
 368                     nthread_local,
 369                     nthread_local > 1 ? "s" : "");
 370         }
 371
 372         // TODO: This output should also go through mdlog.
 373         fprintf(stderr, "NOTE: %sAffinity setting %sfailed.\n", sbuf1, sbuf2);
 374     }
 375     return allAffinitiesSet;
 376 }
 377
 378 void analyzeThreadsOnThisNode(const gmx::PhysicalNodeCommunicator& physicalNodeComm,
 379                               int                                  numThreadsOnThisRank,
 380                               int*                                 numThreadsOnThisNode,
 381                               int*                                 intraNodeThreadOffset)
 382 {
 383     *intraNodeThreadOffset = 0;
 384     *numThreadsOnThisNode  = numThreadsOnThisRank;
 385 #if GMX_MPI
 386     if (physicalNodeComm.size_ > 1)
 387     {
 388         /* We need to determine a scan of the thread counts in this
 389          * compute node. */
 390         MPI_Scan(&numThreadsOnThisRank, intraNodeThreadOffset, 1, MPI_INT, MPI_SUM, physicalNodeComm.comm_);
 391         /* MPI_Scan is inclusive, but here we need exclusive */
 392         *intraNodeThreadOffset -= numThreadsOnThisRank;
 393         /* Get the total number of threads on this physical node */
 394         MPI_Allreduce(
 395                 &numThreadsOnThisRank, numThreadsOnThisNode, 1, MPI_INT, MPI_SUM, physicalNodeComm.comm_);
 396     }
 397 #else
 398     GMX_UNUSED_VALUE(physicalNodeComm);
 399 #endif
 400 }
 401
 402 /* Set CPU affinity. Can be important for performance.
 403    On some systems (e.g. Cray) CPU Affinity is set by default.
 404    But default assigning doesn't work (well) with only some ranks
 405    having threads. This causes very low performance.
 406    External tools have cumbersome syntax for setting affinity
 407    in the case that only some ranks have threads.
 408    Thus it is important that GROMACS sets the affinity internally
 409    if only PME is using threads.
 410  */
 411 void gmx_set_thread_affinity(const gmx::MDLogger&         mdlog,
 412                              const t_commrec*             cr,
 413                              const gmx_hw_opt_t*          hw_opt,
 414                              const gmx::HardwareTopology& hwTop,
 415                              int                          numThreadsOnThisRank,
 416                              int                          numThreadsOnThisNode,
 417                              int                          intraNodeThreadOffset,
 418                              gmx::IThreadAffinityAccess*  affinityAccess)
 419 {
 420     int* localityOrder = nullptr;
 421
 422     if (hw_opt->threadAffinity == ThreadAffinity::Off)
 423     {
 424         /* Nothing to do */
 425         return;
 426     }
 427
 428     if (affinityAccess == nullptr)
 429     {
 430         affinityAccess = &g_defaultAffinityAccess;
 431     }
 432
 433     /* If the tMPI thread affinity setting is not supported encourage the user
 434      * to report it as it's either a bug or an exotic platform which we might
 435      * want to support. */
 436     if (!affinityAccess->isThreadAffinitySupported())
 437     {
 438         /* we know Mac OS does not support setting thread affinity, so there's
 439            no point in warning the user in that case. In any other case
 440            the user might be able to do something about it. */
 441 #if !defined(__APPLE__)
 442         GMX_LOG(mdlog.warning)
 443                 .asParagraph()
 444                 .appendText("NOTE: Cannot set thread affinities on the current platform.");
 445 #endif /* __APPLE__ */
 446         return;
 447     }
 448
 449     int offset              = hw_opt->core_pinning_offset;
 450     int core_pinning_stride = hw_opt->core_pinning_stride;
 451     if (offset != 0)
 452     {
 453         GMX_LOG(mdlog.warning).appendTextFormatted("Applying core pinning offset %d", offset);
 454     }
 455
 456     bool affinityIsAutoAndNumThreadsIsNotAuto =
 457             (hw_opt->threadAffinity == ThreadAffinity::Auto && !hw_opt->totNumThreadsIsAuto);
 458     bool                   issuedWarning;
 459     bool                   validLayout = get_thread_affinity_layout(mdlog,
 460                                                   cr,
 461                                                   hwTop,
 462                                                   numThreadsOnThisNode,
 463                                                   affinityIsAutoAndNumThreadsIsNotAuto,
 464                                                   offset,
 465                                                   &core_pinning_stride,
 466                                                   &localityOrder,
 467                                                   &issuedWarning);
 468     const gmx::sfree_guard localityOrderGuard(localityOrder);
 469
 470     bool allAffinitiesSet;
 471     if (validLayout)
 472     {
 473         allAffinitiesSet = set_affinity(
 474                 cr, numThreadsOnThisRank, intraNodeThreadOffset, offset, core_pinning_stride, localityOrder, affinityAccess);
 475     }
 476     else
 477     {
 478         // Produce the warning if any rank fails.
 479         allAffinitiesSet = false;
 480     }
 481     if (invalidWithinSimulation(cr, !allAffinitiesSet) && !issuedWarning)
 482     {
 483         GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: Thread affinity was not set.");
 484     }
 485 }
 486
 487 /* Detects and returns whether we have the default affinity mask
 488  *
 489  * Returns true when we can query thread affinities and CPU count is
 490  * consistent and we have default affinity mask on all ranks.
 491  *
 492  * Should be called simultaneously by all MPI ranks.
 493  */
 494 static bool detectDefaultAffinityMask(const int nthreads_hw_avail)
 495 {
 496     bool detectedDefaultAffinityMask = true;
 497
 498 #if HAVE_SCHED_AFFINITY
 499     cpu_set_t mask_current;
 500     CPU_ZERO(&mask_current);
 501     int ret;
 502     if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
 503     {
 504         /* failed to query affinity mask, will just return */
 505         if (debug)
 506         {
 507             fprintf(debug, "Failed to query affinity mask (error %d)", ret);
 508         }
 509         detectedDefaultAffinityMask = false;
 510     }
 511
 512     /* Before proceeding with the actual check, make sure that the number of
 513      * detected CPUs is >= the CPUs in the current set.
 514      * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
 515 #    ifdef CPU_COUNT
 516     if (detectedDefaultAffinityMask && nthreads_hw_avail < CPU_COUNT(&mask_current))
 517     {
 518         if (debug)
 519         {
 520             fprintf(debug,
 521                     "%d hardware threads detected, but %d was returned by CPU_COUNT",
 522                     nthreads_hw_avail,
 523                     CPU_COUNT(&mask_current));
 524         }
 525         detectedDefaultAffinityMask = false;
 526     }
 527 #    endif /* CPU_COUNT */
 528
 529     if (detectedDefaultAffinityMask)
 530     {
 531         /* Here we check whether all bits of the affinity mask are set.
 532          * Note that this mask can change while the program is executing,
 533          * e.g. by the system dynamically enabling or disabling cores or
 534          * by some scheduler changing the affinities. Thus we can not assume
 535          * that the result is the same on all ranks within a node
 536          * when running this code at different times.
 537          */
 538         bool allBitsAreSet = true;
 539         for (int i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++)
 540         {
 541             allBitsAreSet = allBitsAreSet && (CPU_ISSET(i, &mask_current) != 0);
 542         }
 543         if (debug)
 544         {
 545             fprintf(debug, "%s affinity mask found\n", allBitsAreSet ? "Default" : "Non-default");
 546         }
 547         if (!allBitsAreSet)
 548         {
 549             detectedDefaultAffinityMask = false;
 550         }
 551     }
 552 #else
 553     GMX_UNUSED_VALUE(nthreads_hw_avail);
 554 #endif /* HAVE_SCHED_AFFINITY */
 555
 556 #if GMX_MPI
 557     int mpiIsInitialized;
 558     MPI_Initialized(&mpiIsInitialized);
 559     if (mpiIsInitialized)
 560     {
 561         bool maskToReduce = detectedDefaultAffinityMask;
 562         MPI_Allreduce(&maskToReduce, &detectedDefaultAffinityMask, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD);
 563     }
 564 #endif
 565
 566     return detectedDefaultAffinityMask;
 567 }
 568
 569 /* Check the process affinity mask and if it is found to be non-zero,
 570  * will honor it and disable mdrun internal affinity setting.
 571  * Note that this will only work on Linux as we use a GNU feature.
 572  */
 573 void gmx_check_thread_affinity_set(const gmx::MDLogger& mdlog,
 574                                    gmx_hw_opt_t*        hw_opt,
 575                                    int gmx_unused nthreads_hw_avail,
 576                                    gmx_bool       bAfterOpenmpInit)
 577 {
 578     GMX_RELEASE_ASSERT(hw_opt, "hw_opt must be a non-NULL pointer");
 579
 580     if (!bAfterOpenmpInit)
 581     {
 582         /* Check for externally set OpenMP affinity and turn off internal
 583          * pinning if any is found. We need to do this check early to tell
 584          * thread-MPI whether it should do pinning when spawning threads.
 585          * TODO: the above no longer holds, we should move these checks later
 586          */
 587         if (hw_opt->threadAffinity != ThreadAffinity::Off)
 588         {
 589             char* message;
 590             if (!gmx_omp_check_thread_affinity(&message))
 591             {
 592                 /* We only pin automatically with totNumThreadsIsAuto=true */
 593                 if (hw_opt->threadAffinity == ThreadAffinity::On || hw_opt->totNumThreadsIsAuto)
 594                 {
 595                     GMX_LOG(mdlog.warning).asParagraph().appendText(message);
 596                 }
 597                 sfree(message);
 598                 hw_opt->threadAffinity = ThreadAffinity::Off;
 599             }
 600         }
 601     }
 602
 603     if (!detectDefaultAffinityMask(nthreads_hw_avail))
 604     {
 605         if (hw_opt->threadAffinity == ThreadAffinity::Auto)
 606         {
 607             if (!bAfterOpenmpInit)
 608             {
 609                 GMX_LOG(mdlog.warning)
 610                         .asParagraph()
 611                         .appendText(
 612                                 "Non-default thread affinity set, disabling internal thread "
 613                                 "affinity");
 614             }
 615             else
 616             {
 617                 GMX_LOG(mdlog.warning)
 618                         .asParagraph()
 619                         .appendText(
 620                                 "Non-default thread affinity set probably by the OpenMP library,\n"
 621                                 "disabling internal thread affinity");
 622             }
 623             hw_opt->threadAffinity = ThreadAffinity::Off;
 624         }
 625         else
 626         {
 627             /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */
 628             if (bAfterOpenmpInit)
 629             {
 630                 GMX_LOG(mdlog.warning)
 631                         .asParagraph()
 632                         .appendTextFormatted("Overriding thread affinity set outside %s",
 633                                              gmx::getProgramContext().displayName());
 634             }
 635         }
 636     }
 637 }