}
#endif
-void dd_setup_dlb_resource_sharing(t_commrec gmx_unused *cr,
- const gmx_hw_info_t gmx_unused *hwinfo,
- const gmx_hw_opt_t gmx_unused &hw_opt)
+void dd_setup_dlb_resource_sharing(t_commrec *cr,
+ int gpu_id)
{
#if GMX_MPI
int physicalnode_id_hash;
- int gpu_id;
gmx_domdec_t *dd;
MPI_Comm mpi_comm_pp_physicalnode;
- if (!(cr->duty & DUTY_PP) || hw_opt.gpu_opt.n_dev_use == 0)
+ if (!(cr->duty & DUTY_PP) || gpu_id < 0)
{
- /* Only PP nodes (currently) use GPUs.
- * If we don't have GPUs, there are no resources to share.
+ /* Only ranks with short-ranged tasks (currently) use GPUs.
+ * If we don't have GPUs assigned, there are no resources to share.
*/
return;
}
physicalnode_id_hash = gmx_physicalnode_id_hash();
- gpu_id = get_gpu_device_id(hwinfo->gpu_info, &hw_opt.gpu_opt, cr->rank_pp_intranode);
-
dd = cr->dd;
if (debug)
{
MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
}
+#else
+ GMX_UNUSED_VALUE(cr);
+ GMX_UNUSED_VALUE(gpu_id);
#endif
}
#include <stdio.h>
#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/hardware/hw_info.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/mdlib/vsite.h"
#include "gromacs/mdtypes/forcerec.h"
/*! \brief Clear a lock such that with DLB=auto DLB may get turned on later */
void dd_dlb_unlock(struct gmx_domdec_t *dd);
-/*! \brief Set up communication for averaging GPU wait times over ranks
+/*! \brief Set up communication for averaging GPU wait times over domains
*
* When domains (PP MPI ranks) share a GPU, the individual GPU wait times
* are meaningless, as it depends on the order in which tasks on the same
* GPU finish. Therefore there wait times need to be averaged over the ranks
* sharing the same GPU. This function sets up the communication for that.
*/
-void dd_setup_dlb_resource_sharing(struct t_commrec *cr,
- const gmx_hw_info_t *hwinfo,
- const gmx_hw_opt_t &hw_opt);
+void dd_setup_dlb_resource_sharing(t_commrec *cr,
+ int gpu_id);
/*! \brief Collects local rvec arrays \p lv to \p v on the master rank */
void dd_collect_vec(struct gmx_domdec_t *dd,
/*! \brief Tries to set application clocks for the GPU with the given index.
*
- * The variable \gpuid is the index of the GPU in the gpu_info.cuda_dev array
- * to handle the application clocks for. Application clocks are set to the
- * max supported value to increase performance if application clock permissions
- * allow this. For future GPU architectures a more sophisticated scheme might be
- * required.
+ * Application clocks are set to the max supported value to increase
+ * performance if application clock permissions allow this. For future
+ * GPU architectures a more sophisticated scheme might be required.
*
* \todo Refactor this into a detection phase and a work phase. Also
* refactor to remove compile-time dependence on logging header.
*
* \param mdlog log file to write to
- * \param[in] gpuid index of the GPU to set application clocks for
- * \param[in] gpu_info GPU info of all detected devices in the system.
+ * \param[in] cuda_dev GPU device info for the GPU in use
* \returns true if no error occurs during application clocks handling.
*/
static gmx_bool init_gpu_application_clocks(
- const gmx::MDLogger &mdlog, int gmx_unused gpuid,
- const gmx_gpu_info_t gmx_unused *gpu_info)
+ const gmx::MDLogger &mdlog,
+ gmx_device_info_t *cuda_dev)
{
- const cudaDeviceProp *prop = &gpu_info->gpu_dev[gpuid].prop;
+ const cudaDeviceProp *prop = &cuda_dev->prop;
int cuda_compute_capability = prop->major * 10 + prop->minor;
gmx_bool bGpuCanUseApplicationClocks =
((0 == gmx_wcmatch("*Tesla*", prop->name) && cuda_compute_capability >= 35 ) ||
return false;
}
- gmx_device_info_t *cuda_dev = &(gpu_info->gpu_dev[gpuid]);
-
if (!addNVMLDeviceId(cuda_dev))
{
return false;
#endif /* HAVE_NVML_APPLICATION_CLOCKS */
}
-void init_gpu(const gmx::MDLogger &mdlog, int rank, int mygpu,
- const struct gmx_gpu_info_t *gpu_info,
- const struct gmx_gpu_opt_t *gpu_opt)
+void init_gpu(const gmx::MDLogger &mdlog, int rank,
+ gmx_device_info_t *deviceInfo)
{
cudaError_t stat;
char sbuf[STRLEN];
- int gpuid;
-
- assert(gpu_info);
- assert(gpu_opt);
-
- if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
- {
- snprintf(sbuf, STRLEN, "On rank %d trying to initialize an non-existent GPU: "
- "there are %d selected GPU(s), but #%d was requested.",
- rank, gpu_opt->n_dev_use, mygpu);
- gmx_incons(sbuf);
- }
- gpuid = gpu_info->gpu_dev[gpu_opt->dev_use[mygpu]].id;
+ assert(deviceInfo);
- stat = cudaSetDevice(gpuid);
+ stat = cudaSetDevice(deviceInfo->id);
if (stat != cudaSuccess)
{
snprintf(sbuf, STRLEN, "On rank %d failed to initialize GPU #%d",
- rank, mygpu);
+ rank, deviceInfo->id);
CU_RET_ERR(stat, sbuf);
}
if (debug)
{
- fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->gpu_dev[gpuid].prop.name);
+ fprintf(stderr, "Initialized GPU ID #%d: %s\n", deviceInfo->id, deviceInfo->prop.name);
}
//Ignoring return value as NVML errors should be treated not critical.
- init_gpu_application_clocks(mdlog, gpuid, gpu_info);
+ init_gpu_application_clocks(mdlog, deviceInfo);
}
-gmx_bool free_cuda_gpu(
- int gmx_unused mygpu, char *result_str,
- const gmx_gpu_info_t gmx_unused *gpu_info,
- const gmx_gpu_opt_t gmx_unused *gpu_opt
- )
+gmx_bool free_cuda_gpu(const gmx_device_info_t *deviceInfo,
+ char *result_str)
{
cudaError_t stat;
gmx_bool reset_gpu_application_clocks_status = true;
- int gpuid;
assert(result_str);
fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
}
- gpuid = gpu_opt ? gpu_opt->dev_use[mygpu] : -1;
- if (gpuid != -1)
+ if (deviceInfo != nullptr)
{
- reset_gpu_application_clocks_status = reset_gpu_application_clocks( &(gpu_info->gpu_dev[gpuid]) );
+ reset_gpu_application_clocks_status = reset_gpu_application_clocks(deviceInfo);
}
stat = cudaDeviceReset();
return (stat == cudaSuccess) && reset_gpu_application_clocks_status;
}
+gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
+ int deviceId)
+{
+ if (deviceId < 0 || deviceId >= gpu_info.n_dev)
+ {
+ gmx_incons("Invalid GPU deviceId requested");
+ }
+ return &gpu_info.gpu_dev[deviceId];
+}
+
/*! \brief Returns true if the gpu characterized by the device properties is
* supported by the native gpu acceleration.
*
#include "gromacs/gpu_utils/gpu_macros.h"
#include "gromacs/utility/basedefinitions.h"
+struct gmx_device_info_t;
struct gmx_gpu_info_t;
struct gmx_gpu_opt_t;
GPU_FUNC_QUALIFIER
void free_gpu_info(const struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info)) GPU_FUNC_TERM
-/*! \brief Initializes the GPU with the given index.
+/*! \brief Initializes the GPU described by \c deviceInfo.
*
- * The varible \p mygpu is the index of the GPU to initialize in the
- * gpu_info.gpu_dev array.
+ * TODO Doxygen complains about these - probably a Doxygen bug, since
+ * the patterns here are the same as elsewhere in this header.
*
- * \param mdlog log file to write to
- * \param[in] rank MPI rank of this process (for error output)
- * \param[in] mygpu index of the GPU to initialize
- * \param[in] gpu_info GPU info of all detected devices in the system.
- * \param[in] gpu_opt options for using the GPUs in gpu_info
+ * param[in] mdlog log file to write to
+ * param[in] rank MPI rank of this process (for error output)
+ * \param[inout] deviceInfo device info of the GPU to initialize
*
* Issues a fatal error for any critical errors that occur during
* initialization.
GPU_FUNC_QUALIFIER
void init_gpu(const gmx::MDLogger &GPU_FUNC_ARGUMENT(mdlog),
int GPU_FUNC_ARGUMENT(rank),
- int GPU_FUNC_ARGUMENT(mygpu),
- const struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info),
- const gmx_gpu_opt_t *GPU_FUNC_ARGUMENT(gpu_opt)) GPU_FUNC_TERM
+ gmx_device_info_t *GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM
/*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
*
* The context is explicitly destroyed and therefore all data uploaded to the GPU
* is lost. This should only be called when none of this data is required anymore.
*
- * \param[in] mygpu index of the GPU clean up for
+ * \param[in] deviceInfo device info of the GPU to clean up for
* \param[out] result_str the message related to the error that occurred
* during the initialization (if there was any).
- * \param[in] gpu_info GPU info of all detected devices in the system.
- * \param[in] gpu_opt options for using the GPUs in gpu_info
+ *
* \returns true if no error occurs during the freeing.
*/
CUDA_FUNC_QUALIFIER
-gmx_bool free_cuda_gpu(int CUDA_FUNC_ARGUMENT(mygpu),
- char *CUDA_FUNC_ARGUMENT(result_str),
- const gmx_gpu_info_t *CUDA_FUNC_ARGUMENT(gpu_info),
- const gmx_gpu_opt_t *CUDA_FUNC_ARGUMENT(gpu_opt)) CUDA_FUNC_TERM_WITH_RETURN(TRUE)
+gmx_bool free_cuda_gpu(const gmx_device_info_t *CUDA_FUNC_ARGUMENT(deviceInfo),
+ char *CUDA_FUNC_ARGUMENT(result_str)) CUDA_FUNC_TERM_WITH_RETURN(TRUE)
+
+/*! \brief Return a pointer to the device info for \c deviceId
+ *
+ * \param[in] gpu_info GPU info of all detected devices in the system.
+ * \param[in] deviceId ID for the GPU device requested.
+ *
+ * \returns Pointer to the device info for \c deviceId.
+ */
+GPU_FUNC_QUALIFIER
+gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
+ int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(NULL)
/*! \brief Returns the device ID of the CUDA GPU currently in use.
*
//! This function is documented in the header file
void init_gpu(const gmx::MDLogger & /*mdlog*/,
- int rank,
- int mygpu,
- const gmx_gpu_info_t *gpu_info,
- const gmx_gpu_opt_t *gpu_opt
- )
+ int /* rank */,
+ gmx_device_info_t *deviceInfo)
{
- assert(gpu_opt);
-
- if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
- {
- char sbuf[STRLEN];
- sprintf(sbuf, "On rank %d trying to initialize an non-existent GPU: "
- "there are %d selected GPU(s), but #%d was requested.",
- rank, gpu_opt->n_dev_use, mygpu);
- gmx_incons(sbuf);
- }
+ assert(deviceInfo);
// If the device is NVIDIA, for safety reasons we disable the JIT
// caching as this is known to be broken at least until driver 364.19;
// the cache does not always get regenerated when the source code changes,
// e.g. if the path to the kernel sources remains the same
- if (gpu_info->gpu_dev[mygpu].vendor_e == OCL_VENDOR_NVIDIA)
+ if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
{
// Ignore return values, failing to set the variable does not mean
// that something will go wrong later.
return gpu_opt->dev_use[idx];
}
+//! This function is documented in the header file
+gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
+ int deviceId)
+{
+ if (deviceId < 0 || deviceId >= gpu_info.n_dev)
+ {
+ gmx_incons("Invalid GPU deviceId requested");
+ }
+ return &gpu_info.gpu_dev[deviceId];
+}
+
//! This function is documented in the header file
char* get_ocl_gpu_device_name(const gmx_gpu_info_t *gpu_info,
const gmx_gpu_opt_t *gpu_opt,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <string>
#include "gromacs/gpu_utils/oclutils.h"
-#include "gromacs/hardware/gpu_hw_info.h"
namespace gmx
{
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/arrayref.h"
+struct gmx_device_info_t;
struct gmx_edsam;
struct gmx_gpu_info_t;
struct gmx_groups_t;
void free_gpu_resources(const t_forcerec *fr,
const t_commrec *cr,
- const gmx_gpu_info_t *gpu_info,
- const gmx_gpu_opt_t *gpu_opt);
+ const gmx_device_info_t *deviceInfo);
#endif
*interaction_const = ic;
}
+/* TODO deviceInfo should be logically const, but currently
+ * init_gpu modifies it to set up NVML support. This could
+ * happen during the detection phase, and deviceInfo could
+ * the become const. */
static void init_nb_verlet(FILE *fp,
const gmx::MDLogger &mdlog,
nonbonded_verlet_t **nb_verlet,
const t_inputrec *ir,
const t_forcerec *fr,
const t_commrec *cr,
- const char *nbpu_opt)
+ const char *nbpu_opt,
+ gmx_device_info_t *deviceInfo)
{
nonbonded_verlet_t *nbv;
int i;
snew(nbv, 1);
nbv->emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
- nbv->bUseGPU = (fr->gpu_opt->n_dev_use > 0);
+ nbv->bUseGPU = deviceInfo != nullptr;
+
GMX_RELEASE_ASSERT(!(nbv->emulateGpu && nbv->bUseGPU), "When GPU emulation is active, there cannot be a GPU assignment");
if (nbv->bUseGPU)
{
- /* This PP MPI rank uses the GPU that the GPU assignment
- * prepared for it, which is the entry in gpu_opt->dev_use
- * corresponding to the index of this PP MPI rank within the
- * set of such ranks on this node. */
- init_gpu(mdlog, cr->nodeid, cr->rank_pp_intranode,
- &fr->hwinfo->gpu_info, fr->gpu_opt);
+ /* Use the assigned GPU. */
+ init_gpu(mdlog, cr->nodeid, deviceInfo);
}
nbv->nbs = nullptr;
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
nbnxn_gpu_init(&nbv->gpu_nbv,
- &fr->hwinfo->gpu_info,
- fr->gpu_opt,
+ deviceInfo,
fr->ic,
nbv->grp,
- cr->rank_pp_intranode,
cr->nodeid,
(nbv->ngrp > 1) && !bHybridGPURun);
const char *tabpfn,
const t_filenm *tabbfnm,
const char *nbpu_opt,
+ gmx_device_info_t *deviceInfo,
gmx_bool bNoSolvOpt,
real print_force)
{
gmx_bool bFEP_NonBonded;
int *nm_ind, egp_flags;
- if (fr->hwinfo == nullptr)
- {
- /* Detect hardware, gather information.
- * In mdrun, hwinfo has already been set before calling init_forcerec.
- * Here we ignore GPUs, as tools will not use them anyhow.
- */
- fr->hwinfo = gmx_detect_hardware(mdlog, cr, FALSE);
- }
-
/* By default we turn SIMD kernels on, but it might be turned off further down... */
fr->use_simd_kernels = TRUE;
GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, "With Verlet lists and no PME rcoulomb and rvdw should be identical");
}
- init_nb_verlet(fp, mdlog, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
+ init_nb_verlet(fp, mdlog, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt, deviceInfo);
}
if (ir->eDispCorr != edispcNO)
* in this run because the PME ranks have no knowledge of whether GPUs
* are used or not, but all ranks need to enter the barrier below.
*/
-void free_gpu_resources(const t_forcerec *fr,
- const t_commrec *cr,
- const gmx_gpu_info_t *gpu_info,
- const gmx_gpu_opt_t *gpu_opt)
+void free_gpu_resources(const t_forcerec *fr,
+ const t_commrec *cr,
+ const gmx_device_info_t *deviceInfo)
{
gmx_bool bIsPPrankUsingGPU;
char gpu_err_str[STRLEN];
if (bIsPPrankUsingGPU)
{
/* uninitialize GPU (by destroying the context) */
- if (!free_cuda_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
+ if (!free_cuda_gpu(deviceInfo, gpu_err_str))
{
gmx_warning("On rank %d failed to free GPU #%d: %s",
cr->nodeid, get_current_cuda_gpu_device_id(), gpu_err_str);
#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/timing/wallcycle.h"
+struct gmx_device_info_t;
struct t_commrec;
struct t_fcdata;
struct t_filenm;
* \param[in] tabpfn Table potential file for pair interactions
* \param[in] tabbfnm Table potential files for bonded interactions
* \param[in] nbpu_opt Nonbonded Processing Unit (GPU/CPU etc.)
+ * \param[in] deviceInfo Info about GPU device to use for short-ranged work
* \param[in] bNoSolvOpt Do not use solvent optimization
* \param[in] print_force Print forces for atoms with force >= print_force
*/
const char *tabpfn,
const t_filenm *tabbfnm,
const char *nbpu_opt,
+ gmx_device_info_t *deviceInfo,
gmx_bool bNoSolvOpt,
real print_force);
/*! Returns the number of blocks to be used for the nonbonded GPU kernel. */
-static inline int calc_nb_kernel_nblock(int nwork_units, gmx_device_info_t *dinfo)
+static inline int calc_nb_kernel_nblock(int nwork_units, const gmx_device_info_t *dinfo)
{
int max_grid_x_size;
int evdwtype,
bool bDoEne,
bool bDoPrune,
- struct gmx_device_info_t gmx_unused *devInfo)
+ const gmx_device_info_t gmx_unused *devInfo)
{
nbnxn_cu_kfunc_ptr_t res;
}
/*! Calculates the amount of shared memory required by the CUDA kernel in use. */
-static inline int calc_shmem_required(const int num_threads_z, gmx_device_info_t gmx_unused *dinfo, const cu_nbparam_t *nbp)
+static inline int calc_shmem_required(const int num_threads_z, const gmx_device_info_t gmx_unused *dinfo, const cu_nbparam_t *nbp)
{
int shmem;
/*! Set up the cache configuration for the non-bonded kernels,
*/
-void nbnxn_cuda_set_cacheconfig(gmx_device_info_t *devinfo)
+void nbnxn_cuda_set_cacheconfig(const gmx_device_info_t *devinfo)
{
cudaError_t stat;
static unsigned int gpu_min_ci_balanced_factor = 44;
/* Functions from nbnxn_cuda.cu */
-extern void nbnxn_cuda_set_cacheconfig(gmx_device_info_t *devinfo);
+extern void nbnxn_cuda_set_cacheconfig(const gmx_device_info_t *devinfo);
extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_texref();
extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_comb_texref();
extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_coulomb_tab_texref();
}
void nbnxn_gpu_init(gmx_nbnxn_cuda_t **p_nb,
- const gmx_gpu_info_t *gpu_info,
- const gmx_gpu_opt_t *gpu_opt,
+ const gmx_device_info_t *deviceInfo,
const interaction_const_t *ic,
nonbonded_verlet_group_t *nbv_grp,
- int my_gpu_index,
int /*rank*/,
gmx_bool bLocalAndNonlocal)
{
cudaError_t stat;
gmx_nbnxn_cuda_t *nb;
- assert(gpu_info);
-
if (p_nb == NULL)
{
return;
init_plist(nb->plist[eintLocal]);
/* set device info, just point it to the right GPU among the detected ones */
- nb->dev_info = &gpu_info->gpu_dev[get_gpu_device_id(*gpu_info, gpu_opt, my_gpu_index)];
+ nb->dev_info = deviceInfo;
/* local/non-local GPU streams */
stat = cudaStreamCreate(&nb->stream[eintLocal]);
*/
struct gmx_nbnxn_cuda_t
{
- struct gmx_device_info_t *dev_info; /**< CUDA device information */
+ const gmx_device_info_t *dev_info; /**< CUDA device information */
bool bUseTwoStreams; /**< true if doing both local/non-local NB work on GPU */
cu_atomdata_t *atdat; /**< atom data */
cu_nbparam_t *nbparam; /**< parameters required for the non-bonded calc. */
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
struct nbnxn_atomdata_t;
struct gmx_wallclock_gpu_t;
struct gmx_gpu_info_t;
-struct gmx_gpu_opt_t;
/** Initializes the data structures related to GPU nonbonded calculations. */
GPU_FUNC_QUALIFIER
void nbnxn_gpu_init(gmx_nbnxn_gpu_t gmx_unused **p_nb,
- const struct gmx_gpu_info_t gmx_unused *gpu_info,
- const struct gmx_gpu_opt_t gmx_unused *gpu_opt,
+ const gmx_device_info_t gmx_unused *deviceInfo,
const interaction_const_t gmx_unused *ic,
nonbonded_verlet_group_t gmx_unused *nbv_grp,
- int gmx_unused my_gpu_index,
int gmx_unused rank,
/* true if both local and non-local are done on GPU */
gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM
/*! \brief Validates the input global work size parameter.
*/
-static inline void validate_global_work_size(size_t *global_work_size, int work_dim, gmx_device_info_t *dinfo)
+static inline void validate_global_work_size(size_t *global_work_size, int work_dim, const gmx_device_info_t *dinfo)
{
cl_uint device_size_t_size_bits;
cl_uint host_size_t_size_bits;
//! This function is documented in the header file
void nbnxn_gpu_init(gmx_nbnxn_ocl_t **p_nb,
- const gmx_gpu_info_t *gpu_info,
- const gmx_gpu_opt_t *gpu_opt,
+ const gmx_device_info_t *deviceInfo,
const interaction_const_t *ic,
nonbonded_verlet_group_t *nbv_grp,
- int my_gpu_index,
int rank,
gmx_bool bLocalAndNonlocal)
{
cl_int cl_error;
cl_command_queue_properties queue_properties;
- assert(gpu_info);
- assert(gpu_opt);
assert(ic);
if (p_nb == NULL)
snew(nb->timings, 1);
/* set device info, just point it to the right GPU among the detected ones */
- nb->dev_info = gpu_info->gpu_dev + gpu_opt->dev_use[my_gpu_index];
+ nb->dev_info = deviceInfo;
snew(nb->dev_rundata, 1);
/* init to NULL the debug buffer */
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*/
struct gmx_nbnxn_ocl_t
{
- struct gmx_device_info_t *dev_info; /**< OpenCL device information */
+ const gmx_device_info_t *dev_info; /**< OpenCL device information */
struct gmx_device_runtime_data_t *dev_rundata; /**< OpenCL runtime data (context, kernels) */
/**< Pointers to non-bonded kernel functions
struct t_nblist;
struct t_nblists;
struct t_QMMMrec;
-struct gmx_hw_info_t;
-struct gmx_gpu_opt_t;
#ifdef __cplusplus
extern "C" {
rvec posres_com;
rvec posres_comB;
- const struct gmx_hw_info_t *hwinfo;
- const struct gmx_gpu_opt_t *gpu_opt;
gmx_bool use_simd_kernels;
/* Interaction for calculated in kernels. In many cases this is similar to
check_resource_division_efficiency(hwinfo, hw_opt, hw_opt.gpu_opt.n_dev_use, Flags & MD_NTOMPSET,
cr, mdlog);
+ gmx_device_info_t *shortRangedDeviceInfo = nullptr;
+ int shortRangedDeviceId = -1;
+ if (cr->duty & DUTY_PP)
+ {
+ if (willUsePhysicalGpu)
+ {
+ shortRangedDeviceId = hw_opt.gpu_opt.dev_use[cr->nrank_pp_intranode];
+ shortRangedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, shortRangedDeviceId);
+ }
+ }
+
if (DOMAINDECOMP(cr))
{
/* When we share GPUs over ranks, we need to know this for the DLB */
- dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);
+ dd_setup_dlb_resource_sharing(cr, shortRangedDeviceId);
}
/* getting number of PP/PME threads
/* Initiate forcerecord */
fr = mk_forcerec();
- fr->hwinfo = hwinfo;
- fr->gpu_opt = &hw_opt.gpu_opt;
fr->forceProviders = mdModules.initForceProviders();
init_forcerec(fplog, mdlog, fr, fcd,
inputrec, mtop, cr, box,
opt2fn("-tablep", nfile, fnm),
getFilenm("-tableb", nfile, fnm),
nbpu_opt,
+ shortRangedDeviceInfo,
FALSE,
pforce);
}
/* Free GPU memory and context */
- free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : nullptr);
+ free_gpu_resources(fr, cr, shortRangedDeviceInfo);
if (doMembed)
{