src/gmxlib/gmx_detect_hardware.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012, by the GROMACS development team, led by
   5  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   6  * others, as listed in the AUTHORS file in the top-level source
   7  * directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <stdlib.h>
  40 #include <assert.h>
  41 #include <string.h>
  42
  43 #include "types/enums.h"
  44 #include "types/hw_info.h"
  45 #include "types/commrec.h"
  46 #include "gmx_fatal.h"
  47 #include "gmx_fatal_collective.h"
  48 #include "smalloc.h"
  49 #include "gpu_utils.h"
  50 #include "statutil.h"
  51 #include "gmx_detect_hardware.h"
  52 #include "main.h"
  53 #include "md_logging.h"
  54
  55 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
  56 #include "windows.h"
  57 #endif
  58
  59 /* Although we can't have more than 10 GPU different ID-s passed by the user as
  60  * the id-s are assumed to be represented by single digits, as multiple
  61  * processes can share a GPU, we can end up with more than 10 IDs.
  62  * To account for potential extreme cases we'll set the limit to a pretty
  63  * ridiculous number. */
  64 static unsigned int max_gpu_ids_user = 64;
  65
  66 static const char* invalid_gpuid_hint =
  67     "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
  68
  69 /* FW decl. */
  70 void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
  71
  72 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
  73 {
  74     int      i, ndev;
  75     char     stmp[STRLEN];
  76
  77     ndev = gpu_info->ncuda_dev;
  78
  79     sbuf[0] = '\0';
  80     for (i = 0; i < ndev; i++)
  81     {
  82         get_gpu_device_info_string(stmp, gpu_info, i);
  83         strcat(sbuf, "  ");
  84         strcat(sbuf, stmp);
  85         if (i < ndev - 1)
  86         {
  87             strcat(sbuf, "\n");
  88         }
  89     }
  90 }
  91
  92 static void print_gpu_detection_stats(FILE *fplog,
  93                                       const gmx_gpu_info_t *gpu_info,
  94                                       const t_commrec *cr)
  95 {
  96     char onhost[266],stmp[STRLEN];
  97     int  ngpu;
  98
  99     ngpu = gpu_info->ncuda_dev;
 100
 101 #if defined GMX_MPI && !defined GMX_THREAD_MPI
 102     /* We only print the detection on one, of possibly multiple, nodes */
 103     strncpy(onhost," on host ",10);
 104     gmx_gethostname(onhost+9,256);
 105 #else
 106     /* We detect all relevant GPUs */
 107     strncpy(onhost,"",1);
 108 #endif
 109
 110     if (ngpu > 0)
 111     {
 112         sprint_gpus(stmp, gpu_info, TRUE);
 113         md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n",
 114                       ngpu, (ngpu > 1) ? "s" : "", onhost, stmp);
 115     }
 116     else
 117     {
 118         md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost);
 119     }
 120 }
 121
 122 static void print_gpu_use_stats(FILE *fplog,
 123                                 const gmx_gpu_info_t *gpu_info,
 124                                 const t_commrec *cr)
 125 {
 126     char sbuf[STRLEN], stmp[STRLEN];
 127     int  i, ngpu, ngpu_all;
 128
 129     ngpu     = gpu_info->ncuda_dev_use;
 130     ngpu_all = gpu_info->ncuda_dev;
 131
 132     /* Issue note if GPUs are available but not used */
 133     if (ngpu_all > 0 && ngpu < 1)
 134     {
 135         sprintf(sbuf,
 136                 "%d compatible GPU%s detected in the system, but none will be used.\n"
 137                 "Consider trying GPU acceleration with the Verlet scheme!",
 138                 ngpu_all, (ngpu_all > 1) ? "s" : "");
 139     }
 140     else
 141     {
 142         sprintf(sbuf, "%d GPU%s %sselected for this run: ",
 143                 ngpu, (ngpu > 1) ? "s" : "",
 144                 gpu_info->bUserSet ? "user-" : "auto-");
 145         for (i = 0; i < ngpu; i++)
 146         {
 147             sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, i));
 148             if (i < ngpu - 1)
 149             {
 150                 strcat(stmp, ", ");
 151             }
 152             strcat(sbuf, stmp);
 153         }
 154     }
 155     md_print_info(cr, fplog, "%s\n\n", sbuf);
 156 }
 157
 158 /* Parse a "plain" GPU ID string which contains a sequence of digits corresponding
 159  * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */
 160 static void parse_gpu_id_plain_string(const char *idstr, int *nid, int *idlist)
 161 {
 162     int  i;
 163     size_t len_idstr;
 164
 165     len_idstr = strlen(idstr);
 166
 167     if (len_idstr > max_gpu_ids_user)
 168     {
 169         gmx_fatal(FARGS,"%d GPU IDs provided, but only at most %d are supported",
 170                   len_idstr, max_gpu_ids_user);
 171     }
 172
 173     *nid = len_idstr;
 174
 175     for (i = 0; i < *nid; i++)
 176     {
 177         if (idstr[i] < '0' || idstr[i] > '9')
 178         {
 179             gmx_fatal(FARGS, "Invalid character in GPU ID string: '%c'\n%s\n",
 180                       idstr[i], invalid_gpuid_hint);
 181         }
 182         idlist[i] = idstr[i] - '0';
 183     }
 184 }
 185
 186 static void parse_gpu_id_csv_string(const char *idstr, int *nid, int *idlist)
 187 {
 188     /* XXX implement cvs format to support more than 10 different GPUs in a box. */
 189     gmx_incons("Not implemented yet");
 190 }
 191
 192 void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
 193                                       const t_commrec *cr, int ntmpi_requested,
 194                                       gmx_bool bUseGPU)
 195 {
 196     int      npppn, ntmpi_pp, ngpu;
 197     char     sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
 198     char     gpu_plural[2];
 199     gmx_bool bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
 200
 201     assert(hwinfo);
 202     assert(cr);
 203
 204     btMPI = bMPI = FALSE;
 205     bNthreadsAuto = FALSE;
 206 #if defined(GMX_THREAD_MPI)
 207     btMPI = TRUE;
 208     bNthreadsAuto = (ntmpi_requested < 1);
 209 #elif defined(GMX_LIB_MPI)
 210     bMPI  = TRUE;
 211 #endif
 212
 213 #ifdef GMX_GPU
 214     bGPUBin      = TRUE;
 215 #else
 216     bGPUBin      = FALSE;
 217 #endif
 218
 219     /* GPU emulation detection is done later, but we need here as well
 220      * -- uncool, but there's no elegant workaround */
 221     bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
 222     bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
 223
 224     if (SIMMASTER(cr))
 225     {
 226         /* check the acceleration mdrun is compiled with against hardware capabilities */
 227         /* TODO: Here we assume homogeneous hardware which is not necessarily the case!
 228          *       Might not hurt to add an extra check over MPI. */
 229         gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog);
 230     }
 231
 232     /* Below we only do consistency checks for PP and GPUs,
 233      * this is irrelevant for PME only nodes, so in that case we return here.
 234      */
 235     if (!(cr->duty & DUTY_PP))
 236     {
 237         return;
 238     }
 239
 240     /* Need to ensure that we have enough GPUs:
 241      * - need one GPU per PP node
 242      * - no GPU oversubscription with tMPI
 243      * => keep on the GPU support, otherwise turn off (or bail if forced)
 244      * */
 245     /* number of PP processes per node */
 246     npppn = cr->nrank_pp_intranode;
 247
 248     pernode[0] = '\0';
 249     th_or_proc_plural[0] = '\0';
 250     if (btMPI)
 251     {
 252         sprintf(th_or_proc, "thread-MPI thread");
 253         if (npppn > 1)
 254         {
 255             sprintf(th_or_proc_plural, "s");
 256         }
 257     }
 258     else if (bMPI)
 259     {
 260         sprintf(th_or_proc, "MPI process");
 261         if (npppn > 1)
 262         {
 263             sprintf(th_or_proc_plural, "es");
 264         }
 265         sprintf(pernode, " per node");
 266     }
 267     else
 268     {
 269         /* neither MPI nor tMPI */
 270         sprintf(th_or_proc, "process");
 271     }
 272
 273     if (bGPUBin)
 274     {
 275         print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
 276     }
 277
 278     if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
 279     {
 280         ngpu = hwinfo->gpu_info.ncuda_dev_use;
 281         sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
 282
 283         /* number of tMPI threads atuo-adjusted */
 284         if (btMPI && bNthreadsAuto && SIMMASTER(cr))
 285         {
 286             if (npppn < ngpu)
 287             {
 288                 if (hwinfo->gpu_info.bUserSet)
 289                 {
 290                     /* The user manually provided more GPUs than threads we could
 291                      * automatically start. */
 292                     gmx_fatal(FARGS,
 293                               "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
 294                               "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
 295                               ngpu, gpu_plural, npppn, th_or_proc_plural,
 296                               ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
 297                 }
 298                 else
 299                 {
 300                     /* There are more GPUs than tMPI threads; we have to limit the number GPUs used. */
 301                     md_print_warn(cr,fplog,
 302                                   "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
 303                                   "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
 304                                   ngpu, gpu_plural, npppn, th_or_proc_plural,
 305                                   ShortProgram(), npppn, npppn > 1 ? "s" : "",
 306                                   bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
 307
 308                     if (cr->rank_pp_intranode == 0)
 309                     {
 310                         limit_num_gpus_used(hwinfo, npppn);
 311                         ngpu = hwinfo->gpu_info.ncuda_dev_use;
 312                         sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
 313                     }
 314                 }
 315             }
 316         }
 317
 318         if (ngpu != npppn)
 319         {
 320             if (hwinfo->gpu_info.bUserSet)
 321             {
 322                 gmx_fatal(FARGS,
 323                           "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 324                           "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
 325                           th_or_proc, btMPI ? "s" : "es" , pernode,
 326                           ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
 327             }
 328             else
 329             {
 330                 if (ngpu > npppn)
 331                 {
 332                     md_print_warn(cr,fplog,
 333                                   "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
 334                                   "      PP %s%s%s than GPU%s available.\n"
 335                                   "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
 336                                   ShortProgram(),
 337                                   th_or_proc, th_or_proc_plural, pernode, gpu_plural,
 338                                   th_or_proc, npppn, gpu_plural, pernode);
 339
 340                     if (bMPI || (btMPI && cr->rank_pp_intranode == 0))
 341                     {
 342                         limit_num_gpus_used(hwinfo, npppn);
 343                         ngpu = hwinfo->gpu_info.ncuda_dev_use;
 344                         sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
 345                     }
 346                 }
 347                 else
 348                 {
 349                     /* Avoid duplicate error messages.
 350                      * Unfortunately we can only do this at the physical node
 351                      * level, since the hardware setup and MPI process count
 352                      * might be differ over physical nodes.
 353                      */
 354                     if (cr->rank_pp_intranode == 0)
 355                     {
 356                         gmx_fatal(FARGS,
 357                                   "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 358                                   "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
 359                                   th_or_proc, btMPI ? "s" : "es" , pernode,
 360                                   ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
 361                     }
 362 #ifdef GMX_MPI
 363                     else
 364                     {
 365                         /* Avoid other ranks to continue after inconsistency */
 366                         MPI_Barrier(cr->mpi_comm_mygroup);
 367                     }
 368 #endif
 369                 }
 370             }
 371         }
 372
 373         hwinfo->gpu_info.bDevShare = FALSE;
 374         if (hwinfo->gpu_info.bUserSet && (cr->rank_pp_intranode == 0))
 375         {
 376             int i, j, same_count;
 377             gmx_bool bSomeSame, bAllDifferent;
 378
 379             same_count = 0; /* number of GPUs shared among ranks */
 380             bSomeSame = FALSE;
 381             bAllDifferent = TRUE;
 382
 383             for (i = 0; i < ngpu - 1; i++)
 384             {
 385                 for (j = i + 1; j < ngpu; j++)
 386                 {
 387                     bSomeSame       |= hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
 388                     bAllDifferent   &= hwinfo->gpu_info.cuda_dev_use[i] != hwinfo->gpu_info.cuda_dev_use[j];
 389                     same_count      += hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
 390                 }
 391             }
 392
 393             /* store the number of shared/oversubscribed GPUs */
 394             hwinfo->gpu_info.bDevShare = bSomeSame;
 395
 396             if (btMPI && !bAllDifferent)
 397             {
 398                 gmx_fatal(FARGS,
 399                           "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
 400                           "Use MPI if you are sure that you want to assign GPU to multiple threads.");
 401             }
 402
 403             if (bSomeSame)
 404             {
 405                 md_print_warn(cr,fplog,
 406                               "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
 407                               "      multiple %s%s; this should be avoided as it can cause\n"
 408                               "      performance loss.\n",
 409                               same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
 410             }
 411         }
 412         print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
 413     }
 414 }
 415
 416 /* Return the number of hardware threads supported by the current CPU.
 417  * We assume that this is equal with the number of CPUs reported to be
 418  * online by the OS at the time of the call.
 419  */
 420 static int get_nthreads_hw_avail(FILE *fplog, const t_commrec *cr)
 421 {
 422      int ret = 0;
 423
 424 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
 425     /* Windows */
 426     SYSTEM_INFO sysinfo;
 427     GetSystemInfo( &sysinfo );
 428     ret = sysinfo.dwNumberOfProcessors;
 429 #elif defined HAVE_SYSCONF
 430     /* We are probably on Unix.
 431      * Now check if we have the argument to use before executing the call
 432      */
 433 #if defined(_SC_NPROCESSORS_ONLN)
 434     ret = sysconf(_SC_NPROCESSORS_ONLN);
 435 #elif defined(_SC_NPROC_ONLN)
 436     ret = sysconf(_SC_NPROC_ONLN);
 437 #elif defined(_SC_NPROCESSORS_CONF)
 438     ret = sysconf(_SC_NPROCESSORS_CONF);
 439 #elif defined(_SC_NPROC_CONF)
 440     ret = sysconf(_SC_NPROC_CONF);
 441 #endif /* End of check for sysconf argument values */
 442
 443 #else
 444     /* Neither windows nor Unix. No fscking idea how many CPUs we have! */
 445     ret = -1;
 446 #endif
 447
 448     if (debug)
 449     {
 450         fprintf(debug, "Detected %d processors, will use this as the number "
 451                 "of supported hardware threads.\n", ret);
 452     }
 453
 454 #ifdef GMX_OMPENMP
 455     if (ret != gmx_omp_get_num_procs())
 456     {
 457         md_print_warn(cr, fplog,
 458                       "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n"
 459                       "Consider setting the launch configuration manually!",
 460                       ret, gmx_omp_get_num_procs());
 461     }
 462 #endif
 463
 464     return ret;
 465 }
 466
 467 void gmx_detect_hardware(FILE *fplog, gmx_hw_info_t *hwinfo,
 468                          const t_commrec *cr,
 469                          gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
 470                          const char *gpu_id)
 471 {
 472     int             i;
 473     const char      *env;
 474     char            sbuf[STRLEN], stmp[STRLEN];
 475     gmx_hw_info_t   *hw;
 476     gmx_gpu_info_t  gpuinfo_auto, gpuinfo_user;
 477     gmx_bool        bGPUBin;
 478
 479     assert(hwinfo);
 480
 481     /* detect CPUID info; no fuss, we don't detect system-wide
 482      * -- sloppy, but that's it for now */
 483     if (gmx_cpuid_init(&hwinfo->cpuid_info) != 0)
 484     {
 485         gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
 486     }
 487
 488     /* detect number of hardware threads */
 489     hwinfo->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
 490
 491     /* detect GPUs */
 492     hwinfo->gpu_info.ncuda_dev_use  = 0;
 493     hwinfo->gpu_info.cuda_dev_use   = NULL;
 494     hwinfo->gpu_info.ncuda_dev      = 0;
 495     hwinfo->gpu_info.cuda_dev       = NULL;
 496
 497 #ifdef GMX_GPU
 498     bGPUBin      = TRUE;
 499 #else
 500     bGPUBin      = FALSE;
 501 #endif
 502
 503     /* Bail if binary is not compiled with GPU acceleration, but this is either
 504      * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
 505     if (bForceUseGPU && !bGPUBin)
 506     {
 507         gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
 508     }
 509     if (gpu_id != NULL && !bGPUBin)
 510     {
 511         gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
 512     }
 513
 514     /* run the detection if the binary was compiled with GPU support */
 515     if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION")==NULL)
 516     {
 517         char detection_error[STRLEN];
 518
 519         if (detect_cuda_gpus(&hwinfo->gpu_info, detection_error) != 0)
 520         {
 521             if (detection_error != NULL && detection_error[0] != '\0')
 522             {
 523                 sprintf(sbuf, ":\n      %s\n", detection_error);
 524             }
 525             else
 526             {
 527                 sprintf(sbuf, ".");
 528             }
 529             md_print_warn(cr, fplog,
 530                           "NOTE: Error occurred during GPU detection%s"
 531                           "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
 532                           sbuf);
 533         }
 534     }
 535
 536     if (bForceUseGPU || bTryUseGPU)
 537     {
 538         env = getenv("GMX_GPU_ID");
 539         if (env != NULL && gpu_id != NULL)
 540         {
 541             gmx_fatal(FARGS,"GMX_GPU_ID and -gpu_id can not be used at the same time");
 542         }
 543         if (env == NULL)
 544         {
 545             env = gpu_id;
 546         }
 547
 548         /* parse GPU IDs if the user passed any */
 549         if (env != NULL)
 550         {
 551             int *gpuid, *checkres;
 552             int nid, res;
 553
 554             snew(gpuid, max_gpu_ids_user);
 555             snew(checkres, max_gpu_ids_user);
 556
 557             parse_gpu_id_plain_string(env, &nid, gpuid);
 558
 559             if (nid == 0)
 560             {
 561                 gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n", invalid_gpuid_hint);
 562             }
 563
 564             res = check_select_cuda_gpus(checkres, &hwinfo->gpu_info, gpuid, nid);
 565
 566             if (!res)
 567             {
 568                 print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
 569
 570                 sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
 571                 for (i = 0; i < nid; i++)
 572                 {
 573                     if (checkres[i] != egpuCompatible)
 574                     {
 575                         sprintf(stmp, "    GPU #%d: %s\n",
 576                                 gpuid[i], gpu_detect_res_str[checkres[i]]);
 577                         strcat(sbuf, stmp);
 578                     }
 579                 }
 580                 gmx_fatal(FARGS, "%s", sbuf);
 581             }
 582
 583             hwinfo->gpu_info.bUserSet = TRUE;
 584
 585             sfree(gpuid);
 586             sfree(checkres);
 587         }
 588         else
 589         {
 590             pick_compatible_gpus(&hwinfo->gpu_info);
 591             hwinfo->gpu_info.bUserSet = FALSE;
 592         }
 593
 594         /* decide whether we can use GPU */
 595         hwinfo->bCanUseGPU = (hwinfo->gpu_info.ncuda_dev_use > 0);
 596         if (!hwinfo->bCanUseGPU && bForceUseGPU)
 597         {
 598             gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
 599         }
 600     }
 601 }
 602
 603 void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
 604 {
 605     int ndev_use;
 606
 607     assert(hwinfo);
 608
 609     ndev_use = hwinfo->gpu_info.ncuda_dev_use;
 610
 611     if (count > ndev_use)
 612     {
 613         /* won't increase the # of GPUs */
 614         return;
 615     }
 616
 617     if (count < 1)
 618     {
 619         char sbuf[STRLEN];
 620         sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!",
 621                 ndev_use, count);
 622         gmx_incons(sbuf);
 623     }
 624
 625     /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
 626     hwinfo->gpu_info.ncuda_dev_use = count;
 627 }
 628
 629 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
 630 {
 631     if (hwinfo)
 632     {
 633         gmx_cpuid_done(hwinfo->cpuid_info);
 634         free_gpu_info(&hwinfo->gpu_info);
 635         sfree(hwinfo);
 636     }
 637 }