The recent DD load balancing fix which solved the issue of incorrect
imbalance measure with GPU sharing (
ba8232e9) addressed GPUs with
incorrect indexing. This caused out of bounds indexing in the GPU ID
query function. The query function also had a bug in the error checking
which allowed the incorrect indexing.
Now also mdrun -nb cpu -gpu_id ... is allowed, which before would give
a fatal error.
This commit addresses both issues; fixes #1385
Change-Id: I2800f610b873da92afe78bbfd869258f378ba2d7
{
assert(gpu_info);
assert(gpu_opt);
- if (idx < 0 && idx >= gpu_opt->ncuda_dev_use)
- {
- return -1;
- }
+ assert(idx >= 0 && idx < gpu_opt->ncuda_dev_use);
return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
}
gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,
&hw_opt->gpu_opt);
}
+ else
+ {
+ /* Ignore (potentially) manually selected GPUs */
+ hw_opt->gpu_opt.ncuda_dev_use = 0;
+ }
/* check consistency of CPU acceleration and number of GPUs selected */
gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);
physicalnode_id_hash = gmx_physicalnode_id_hash();
- gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->nodeid);
+ gpu_id = get_gpu_device_id(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->rank_pp_intranode);
dd = cr->dd;