src/gmxlib/gmx_detect_hardware.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   5  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   6  * others, as listed in the AUTHORS file in the top-level source
   7  * directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <stdlib.h>
  40 #include <assert.h>
  41 #include <string.h>
  42
  43 #include "types/enums.h"
  44 #include "types/hw_info.h"
  45 #include "types/commrec.h"
  46 #include "gmx_fatal.h"
  47 #include "gmx_fatal_collective.h"
  48 #include "smalloc.h"
  49 #include "gpu_utils.h"
  50 #include "statutil.h"
  51 #include "gmx_detect_hardware.h"
  52 #include "main.h"
  53 #include "md_logging.h"
  54
  55 #include "thread_mpi/threads.h"
  56
  57 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
  58 #include "windows.h"
  59 #endif
  60
  61 /* Although we can't have more than 10 GPU different ID-s passed by the user as
  62  * the id-s are assumed to be represented by single digits, as multiple
  63  * processes can share a GPU, we can end up with more than 10 IDs.
  64  * To account for potential extreme cases we'll set the limit to a pretty
  65  * ridiculous number. */
  66 static unsigned int max_gpu_ids_user = 64;
  67
  68 static const char * invalid_gpuid_hint =
  69     "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
  70
  71 /* The globally shared hwinfo structure. */
  72 static gmx_hw_info_t      *hwinfo_g;
  73 /* A reference counter for the hwinfo structure */
  74 static int                 n_hwinfo = 0;
  75 /* A lock to protect the hwinfo structure */
  76 static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
  77
  78
  79 /* FW decl. */
  80 static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
  81
  82 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
  83 {
  84     int      i, ndev;
  85     char     stmp[STRLEN];
  86
  87     ndev = gpu_info->ncuda_dev;
  88
  89     sbuf[0] = '\0';
  90     for (i = 0; i < ndev; i++)
  91     {
  92         get_gpu_device_info_string(stmp, gpu_info, i);
  93         strcat(sbuf, "  ");
  94         strcat(sbuf, stmp);
  95         if (i < ndev - 1)
  96         {
  97             strcat(sbuf, "\n");
  98         }
  99     }
 100 }
 101
 102 static void print_gpu_detection_stats(FILE                 *fplog,
 103                                       const gmx_gpu_info_t *gpu_info,
 104                                       const t_commrec      *cr)
 105 {
 106     char onhost[266], stmp[STRLEN];
 107     int  ngpu;
 108
 109     ngpu = gpu_info->ncuda_dev;
 110
 111 #if defined GMX_MPI && !defined GMX_THREAD_MPI
 112     /* We only print the detection on one, of possibly multiple, nodes */
 113     strncpy(onhost, " on host ", 10);
 114     gmx_gethostname(onhost+9, 256);
 115 #else
 116     /* We detect all relevant GPUs */
 117     strncpy(onhost, "", 1);
 118 #endif
 119
 120     if (ngpu > 0)
 121     {
 122         sprint_gpus(stmp, gpu_info, TRUE);
 123         md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n",
 124                       ngpu, (ngpu > 1) ? "s" : "", onhost, stmp);
 125     }
 126     else
 127     {
 128         md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost);
 129     }
 130 }
 131
 132 static void print_gpu_use_stats(FILE                 *fplog,
 133                                 const gmx_gpu_info_t *gpu_info,
 134                                 const t_commrec      *cr)
 135 {
 136     char sbuf[STRLEN], stmp[STRLEN];
 137     int  i, ngpu, ngpu_all;
 138
 139     ngpu     = gpu_info->ncuda_dev_use;
 140     ngpu_all = gpu_info->ncuda_dev;
 141
 142     /* Issue note if GPUs are available but not used */
 143     if (ngpu_all > 0 && ngpu < 1)
 144     {
 145         sprintf(sbuf,
 146                 "%d compatible GPU%s detected in the system, but none will be used.\n"
 147                 "Consider trying GPU acceleration with the Verlet scheme!",
 148                 ngpu_all, (ngpu_all > 1) ? "s" : "");
 149     }
 150     else
 151     {
 152         sprintf(sbuf, "%d GPU%s %sselected for this run: ",
 153                 ngpu, (ngpu > 1) ? "s" : "",
 154                 gpu_info->bUserSet ? "user-" : "auto-");
 155         for (i = 0; i < ngpu; i++)
 156         {
 157             sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, i));
 158             if (i < ngpu - 1)
 159             {
 160                 strcat(stmp, ", ");
 161             }
 162             strcat(sbuf, stmp);
 163         }
 164     }
 165     md_print_info(cr, fplog, "%s\n\n", sbuf);
 166 }
 167
 168 /* Parse a "plain" GPU ID string which contains a sequence of digits corresponding
 169  * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */
 170 static void parse_gpu_id_plain_string(const char *idstr, int *nid, int *idlist)
 171 {
 172     int    i;
 173     size_t len_idstr;
 174
 175     len_idstr = strlen(idstr);
 176
 177     if (len_idstr > max_gpu_ids_user)
 178     {
 179         gmx_fatal(FARGS, "%d GPU IDs provided, but only at most %d are supported",
 180                   len_idstr, max_gpu_ids_user);
 181     }
 182
 183     *nid = len_idstr;
 184
 185     for (i = 0; i < *nid; i++)
 186     {
 187         if (idstr[i] < '0' || idstr[i] > '9')
 188         {
 189             gmx_fatal(FARGS, "Invalid character in GPU ID string: '%c'\n%s\n",
 190                       idstr[i], invalid_gpuid_hint);
 191         }
 192         idlist[i] = idstr[i] - '0';
 193     }
 194 }
 195
 196 static void parse_gpu_id_csv_string(const char *idstr, int *nid, int *idlist)
 197 {
 198     /* XXX implement cvs format to support more than 10 different GPUs in a box. */
 199     gmx_incons("Not implemented yet");
 200 }
 201
 202 void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
 203                                       const t_commrec *cr, int ntmpi_requested,
 204                                       gmx_bool bUseGPU)
 205 {
 206     int                        npppn, ntmpi_pp, ngpu;
 207     char                       sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
 208     char                       gpu_plural[2];
 209     gmx_bool                   bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
 210     int                        ret;
 211     static tMPI_Thread_mutex_t cons_lock = TMPI_THREAD_MUTEX_INITIALIZER;
 212
 213
 214     assert(hwinfo);
 215     assert(cr);
 216
 217     /* Below we only do consistency checks for PP and GPUs,
 218      * this is irrelevant for PME only nodes, so in that case we return
 219      * here.
 220      */
 221     if (!(cr->duty & DUTY_PP))
 222     {
 223         return;
 224     }
 225
 226     /* We run this function only once, but must make sure that all threads
 227        that are alive run this function, so they get consistent data. We
 228        achieve this by mutual exclusion and returning if the structure is
 229        already properly checked & set */
 230     ret = tMPI_Thread_mutex_lock(&cons_lock);
 231     if (ret != 0)
 232     {
 233         gmx_fatal(FARGS, "Error locking cons mutex: %s", strerror(errno));
 234     }
 235
 236     if (!hwinfo->bConsistencyChecked)
 237     {
 238         btMPI         = bMPI = FALSE;
 239         bNthreadsAuto = FALSE;
 240 #if defined(GMX_THREAD_MPI)
 241         btMPI         = TRUE;
 242         bNthreadsAuto = (ntmpi_requested < 1);
 243 #elif defined(GMX_LIB_MPI)
 244         bMPI  = TRUE;
 245 #endif
 246
 247 #ifdef GMX_GPU
 248         bGPUBin      = TRUE;
 249 #else
 250         bGPUBin      = FALSE;
 251 #endif
 252
 253         /* GPU emulation detection is done later, but we need here as well
 254          * -- uncool, but there's no elegant workaround */
 255         bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
 256         bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
 257
 258         /* check the acceleration mdrun is compiled with against hardware
 259            capabilities */
 260         /* TODO: Here we assume homogeneous hardware which is not necessarily
 261                  the case! Might not hurt to add an extra check over MPI. */
 262         gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog);
 263
 264         /* Need to ensure that we have enough GPUs:
 265          * - need one GPU per PP node
 266          * - no GPU oversubscription with tMPI
 267          * => keep on the GPU support, otherwise turn off (or bail if forced)
 268          * */
 269         /* number of PP processes per node */
 270         npppn = cr->nrank_pp_intranode;
 271
 272         pernode[0]           = '\0';
 273         th_or_proc_plural[0] = '\0';
 274         if (btMPI)
 275         {
 276             sprintf(th_or_proc, "thread-MPI thread");
 277             if (npppn > 1)
 278             {
 279                 sprintf(th_or_proc_plural, "s");
 280             }
 281         }
 282         else if (bMPI)
 283         {
 284             sprintf(th_or_proc, "MPI process");
 285             if (npppn > 1)
 286             {
 287                 sprintf(th_or_proc_plural, "es");
 288             }
 289             sprintf(pernode, " per node");
 290         }
 291         else
 292         {
 293             /* neither MPI nor tMPI */
 294             sprintf(th_or_proc, "process");
 295         }
 296
 297         if (bGPUBin)
 298         {
 299             print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
 300         }
 301
 302         if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
 303         {
 304             ngpu = hwinfo->gpu_info.ncuda_dev_use;
 305             sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
 306
 307             /* number of tMPI threads atuo-adjusted */
 308             if (btMPI && bNthreadsAuto)
 309             {
 310                 if (npppn < ngpu)
 311                 {
 312                     if (hwinfo->gpu_info.bUserSet)
 313                     {
 314                         /* The user manually provided more GPUs than threads we
 315                            could automatically start. */
 316                         gmx_fatal(FARGS,
 317                                   "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
 318                                   "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
 319                                   ngpu, gpu_plural, npppn, th_or_proc_plural,
 320                                   ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
 321                     }
 322                     else
 323                     {
 324                         /* There are more GPUs than tMPI threads; we have to
 325                            limit the number GPUs used. */
 326                         md_print_warn(cr, fplog,
 327                                       "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
 328                                       "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
 329                                       ngpu, gpu_plural, npppn,
 330                                       th_or_proc_plural,
 331                                       ShortProgram(), npppn,
 332                                       npppn > 1 ? "s" : "",
 333                                       bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
 334
 335                         if (cr->rank_pp_intranode == 0)
 336                         {
 337                             limit_num_gpus_used(hwinfo, npppn);
 338                             ngpu = hwinfo->gpu_info.ncuda_dev_use;
 339                             sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
 340                         }
 341                     }
 342                 }
 343             }
 344
 345             if (ngpu != npppn)
 346             {
 347                 if (hwinfo->gpu_info.bUserSet)
 348                 {
 349                     gmx_fatal(FARGS,
 350                               "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 351                               "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
 352                               th_or_proc, btMPI ? "s" : "es", pernode,
 353                               ShortProgram(), npppn, th_or_proc,
 354                               th_or_proc_plural, pernode, ngpu, gpu_plural);
 355                 }
 356                 else
 357                 {
 358                     if (ngpu > npppn)
 359                     {
 360                         md_print_warn(cr, fplog,
 361                                       "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
 362                                       "      PP %s%s%s than GPU%s available.\n"
 363                                       "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
 364                                       ShortProgram(), th_or_proc,
 365                                       th_or_proc_plural, pernode, gpu_plural,
 366                                       th_or_proc, npppn, gpu_plural, pernode);
 367
 368                         if (bMPI || (btMPI && cr->rank_pp_intranode == 0))
 369                         {
 370                             limit_num_gpus_used(hwinfo, npppn);
 371                             ngpu = hwinfo->gpu_info.ncuda_dev_use;
 372                             sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
 373                         }
 374                     }
 375                     else
 376                     {
 377                         /* Avoid duplicate error messages.
 378                          * Unfortunately we can only do this at the physical node
 379                          * level, since the hardware setup and MPI process count
 380                          * might be differ over physical nodes.
 381                          */
 382                         if (cr->rank_pp_intranode == 0)
 383                         {
 384                             gmx_fatal(FARGS,
 385                                       "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 386                                       "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
 387                                       th_or_proc, btMPI ? "s" : "es", pernode,
 388                                       ShortProgram(), npppn, th_or_proc,
 389                                       th_or_proc_plural, pernode, ngpu,
 390                                       gpu_plural);
 391                         }
 392                     }
 393                 }
 394             }
 395
 396             {
 397                 int      same_count;
 398
 399                 same_count = gmx_count_gpu_dev_shared(&(hwinfo->gpu_info));
 400
 401                 if (btMPI && same_count > 0)
 402                 {
 403                     gmx_fatal(FARGS,
 404                               "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
 405                               "Use MPI if you are sure that you want to assign GPU to multiple threads.");
 406                 }
 407
 408                 if (same_count > 0)
 409                 {
 410                     md_print_warn(cr, fplog,
 411                                   "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
 412                                   "      multiple %s%s; this should be avoided as it can cause\n"
 413                                   "      performance loss.\n",
 414                                   same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
 415                 }
 416             }
 417             print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
 418         }
 419         hwinfo->bConsistencyChecked = TRUE;
 420     }
 421
 422     ret = tMPI_Thread_mutex_unlock(&cons_lock);
 423     if (ret != 0)
 424     {
 425         gmx_fatal(FARGS, "Error unlocking cons mutex: %s", strerror(errno));
 426     }
 427
 428 #ifdef GMX_MPI
 429     if (PAR(cr))
 430     {
 431         /* Avoid other ranks to continue after
 432            inconsistency */
 433         MPI_Barrier(cr->mpi_comm_mygroup);
 434     }
 435 #endif
 436
 437 }
 438
 439 int gmx_count_gpu_dev_shared(const gmx_gpu_info_t *gpu_info)
 440 {
 441     int      same_count    = 0;
 442     int      ngpu          = gpu_info->ncuda_dev_use;
 443
 444     if (gpu_info->bUserSet)
 445     {
 446         int      i, j;
 447
 448         for (i = 0; i < ngpu - 1; i++)
 449         {
 450             for (j = i + 1; j < ngpu; j++)
 451             {
 452                 same_count      += (gpu_info->cuda_dev_use[i] ==
 453                                     gpu_info->cuda_dev_use[j]);
 454             }
 455         }
 456     }
 457
 458     return same_count;
 459 }
 460
 461
 462 /* Return the number of hardware threads supported by the current CPU.
 463  * We assume that this is equal with the number of CPUs reported to be
 464  * online by the OS at the time of the call.
 465  */
 466 static int get_nthreads_hw_avail(FILE *fplog, const t_commrec *cr)
 467 {
 468     int ret = 0;
 469
 470 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
 471     /* Windows */
 472     SYSTEM_INFO sysinfo;
 473     GetSystemInfo( &sysinfo );
 474     ret = sysinfo.dwNumberOfProcessors;
 475 #elif defined HAVE_SYSCONF
 476     /* We are probably on Unix.
 477      * Now check if we have the argument to use before executing the call
 478      */
 479 #if defined(_SC_NPROCESSORS_ONLN)
 480     ret = sysconf(_SC_NPROCESSORS_ONLN);
 481 #elif defined(_SC_NPROC_ONLN)
 482     ret = sysconf(_SC_NPROC_ONLN);
 483 #elif defined(_SC_NPROCESSORS_CONF)
 484     ret = sysconf(_SC_NPROCESSORS_CONF);
 485 #elif defined(_SC_NPROC_CONF)
 486     ret = sysconf(_SC_NPROC_CONF);
 487 #endif /* End of check for sysconf argument values */
 488
 489 #else
 490     /* Neither windows nor Unix. No fscking idea how many CPUs we have! */
 491     ret = -1;
 492 #endif
 493
 494     if (debug)
 495     {
 496         fprintf(debug, "Detected %d processors, will use this as the number "
 497                 "of supported hardware threads.\n", ret);
 498     }
 499
 500 #ifdef GMX_OMPENMP
 501     if (ret != gmx_omp_get_num_procs())
 502     {
 503         md_print_warn(cr, fplog,
 504                       "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n"
 505                       "Consider setting the launch configuration manually!",
 506                       ret, gmx_omp_get_num_procs());
 507     }
 508 #endif
 509
 510     return ret;
 511 }
 512
 513 gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
 514                                    gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
 515                                    const char *gpu_id)
 516 {
 517     int              i;
 518     const char      *env;
 519     char             sbuf[STRLEN], stmp[STRLEN];
 520     gmx_hw_info_t   *hw;
 521     gmx_gpu_info_t   gpuinfo_auto, gpuinfo_user;
 522     gmx_bool         bGPUBin;
 523     int              ret;
 524
 525     /* make sure no one else is doing the same thing */
 526     ret = tMPI_Thread_mutex_lock(&hw_info_lock);
 527     if (ret != 0)
 528     {
 529         gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
 530     }
 531
 532     /* only initialize the hwinfo structure if it is not already initalized */
 533     if (n_hwinfo == 0)
 534     {
 535         snew(hwinfo_g, 1);
 536         hwinfo_g->bConsistencyChecked = FALSE;
 537
 538         /* detect CPUID info; no fuss, we don't detect system-wide
 539          * -- sloppy, but that's it for now */
 540         if (gmx_cpuid_init(&hwinfo_g->cpuid_info) != 0)
 541         {
 542             gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
 543         }
 544
 545         /* detect number of hardware threads */
 546         hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
 547
 548         /* detect GPUs */
 549         hwinfo_g->gpu_info.ncuda_dev_use  = 0;
 550         hwinfo_g->gpu_info.cuda_dev_use   = NULL;
 551         hwinfo_g->gpu_info.ncuda_dev      = 0;
 552         hwinfo_g->gpu_info.cuda_dev       = NULL;
 553
 554 #ifdef GMX_GPU
 555         bGPUBin      = TRUE;
 556 #else
 557         bGPUBin      = FALSE;
 558 #endif
 559
 560         /* Bail if binary is not compiled with GPU acceleration, but this is either
 561          * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
 562         if (bForceUseGPU && !bGPUBin)
 563         {
 564             gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
 565         }
 566         if (gpu_id != NULL && !bGPUBin)
 567         {
 568             gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
 569         }
 570
 571         /* run the detection if the binary was compiled with GPU support */
 572         if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION") == NULL)
 573         {
 574             char detection_error[STRLEN];
 575
 576             if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
 577             {
 578                 if (detection_error != NULL && detection_error[0] != '\0')
 579                 {
 580                     sprintf(sbuf, ":\n      %s\n", detection_error);
 581                 }
 582                 else
 583                 {
 584                     sprintf(sbuf, ".");
 585                 }
 586                 md_print_warn(cr, fplog,
 587                               "NOTE: Error occurred during GPU detection%s"
 588                               "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
 589                               sbuf);
 590             }
 591         }
 592
 593         if (bForceUseGPU || bTryUseGPU)
 594         {
 595             env = getenv("GMX_GPU_ID");
 596             if (env != NULL && gpu_id != NULL)
 597             {
 598                 gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
 599             }
 600             if (env == NULL)
 601             {
 602                 env = gpu_id;
 603             }
 604
 605             /* parse GPU IDs if the user passed any */
 606             if (env != NULL)
 607             {
 608                 int *gpuid, *checkres;
 609                 int  nid, res;
 610
 611                 snew(gpuid, max_gpu_ids_user);
 612                 snew(checkres, max_gpu_ids_user);
 613
 614                 parse_gpu_id_plain_string(env, &nid, gpuid);
 615
 616                 if (nid == 0)
 617                 {
 618                     gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
 619                               invalid_gpuid_hint);
 620                 }
 621
 622                 res = check_select_cuda_gpus(checkres, &hwinfo_g->gpu_info,
 623                                              gpuid, nid);
 624
 625                 if (!res)
 626                 {
 627                     print_gpu_detection_stats(fplog, &hwinfo_g->gpu_info, cr);
 628
 629                     sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
 630                     for (i = 0; i < nid; i++)
 631                     {
 632                         if (checkres[i] != egpuCompatible)
 633                         {
 634                             sprintf(stmp, "    GPU #%d: %s\n",
 635                                     gpuid[i], gpu_detect_res_str[checkres[i]]);
 636                             strcat(sbuf, stmp);
 637                         }
 638                     }
 639                     gmx_fatal(FARGS, "%s", sbuf);
 640                 }
 641
 642                 hwinfo_g->gpu_info.bUserSet = TRUE;
 643
 644                 sfree(gpuid);
 645                 sfree(checkres);
 646             }
 647             else
 648             {
 649                 pick_compatible_gpus(&hwinfo_g->gpu_info);
 650                 hwinfo_g->gpu_info.bUserSet = FALSE;
 651             }
 652
 653             /* decide whether we can use GPU */
 654             hwinfo_g->bCanUseGPU = (hwinfo_g->gpu_info.ncuda_dev_use > 0);
 655             if (!hwinfo_g->bCanUseGPU && bForceUseGPU)
 656             {
 657                 gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
 658             }
 659         }
 660     }
 661     /* increase the reference counter */
 662     n_hwinfo++;
 663
 664     ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
 665     if (ret != 0)
 666     {
 667         gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
 668     }
 669
 670     return hwinfo_g;
 671 }
 672
 673 static void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
 674 {
 675     int ndev_use;
 676
 677     assert(hwinfo);
 678
 679     ndev_use = hwinfo->gpu_info.ncuda_dev_use;
 680
 681     if (count > ndev_use)
 682     {
 683         /* won't increase the # of GPUs */
 684         return;
 685     }
 686
 687     if (count < 1)
 688     {
 689         char sbuf[STRLEN];
 690         sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!",
 691                 ndev_use, count);
 692         gmx_incons(sbuf);
 693     }
 694
 695     /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
 696     hwinfo->gpu_info.ncuda_dev_use = count;
 697 }
 698
 699 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
 700 {
 701     int ret;
 702
 703     ret = tMPI_Thread_mutex_lock(&hw_info_lock);
 704     if (ret != 0)
 705     {
 706         gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
 707     }
 708
 709     /* decrease the reference counter */
 710     n_hwinfo--;
 711
 712
 713     if (hwinfo != hwinfo_g)
 714     {
 715         gmx_incons("hwinfo < hwinfo_g");
 716     }
 717
 718     if (n_hwinfo < 0)
 719     {
 720         gmx_incons("n_hwinfo < 0");
 721     }
 722
 723     if (n_hwinfo == 0)
 724     {
 725         gmx_cpuid_done(hwinfo_g->cpuid_info);
 726         free_gpu_info(&hwinfo_g->gpu_info);
 727         sfree(hwinfo_g);
 728     }
 729
 730     ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
 731     if (ret != 0)
 732     {
 733         gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
 734     }
 735 }