#endif
/** Types of electrostatics available in the CUDA nonbonded force kernels. */
-enum { eelCuEWALD, eelCuEWALD_TWIN, eelCuRF, eelCuCUT, eelCuNR };
+enum {
+ eelCuEWALD, eelCuEWALD_TWIN, eelCuRF, eelCuCUT, eelCuNR
+};
-enum { eNbnxnCuKDefault, eNbnxnCuKLegacy, eNbnxnCuKOld, eNbnxnCuKNR };
+enum {
+ eNbnxnCuKDefault, eNbnxnCuKLegacy, eNbnxnCuKOld, eNbnxnCuKNR
+};
#define NBNXN_KVER_OLD(k) (k == eNbnxnCuKOld)
#define NBNXN_KVER_LEGACY(k) (k == eNbnxnCuKLegacy)
/** Nonbonded atom data -- both inputs and outputs. */
struct cu_atomdata
{
- int natoms; /**< number of atoms */
- int natoms_local; /**< number of local atoms */
- int nalloc; /**< allocation size for the atom data (xq, f) */
+ int natoms; /**< number of atoms */
+ int natoms_local; /**< number of local atoms */
+ int nalloc; /**< allocation size for the atom data (xq, f) */
float4 *xq; /**< atom coordinates + charges, size natoms */
float3 *f; /**< force output array, size natoms */
float3 *fshift; /**< shift forces */
- int ntypes; /**< number of atom types */
+ int ntypes; /**< number of atom types */
int *atom_types; /**< atom type indices, size natoms */
float3 *shift_vec; /**< shifts */
- bool bShiftVecUploaded; /**< true if the shift vector has been uploaded */
+ bool bShiftVecUploaded; /**< true if the shift vector has been uploaded */
};
/** Parameters required for the CUDA nonbonded calculations. */
struct cu_nbparam
{
- int eeltype; /**< type of electrostatics */
-
- float epsfac; /**< charge multiplication factor */
- float c_rf, /**< Reaction-field/plain cutoff electrostatics const. */
- two_k_rf; /**< Reaction-field electrostatics constant */
- float ewald_beta; /**< Ewald/PME parameter */
- float sh_ewald; /**< Ewald/PME correction term */
- float rvdw_sq; /**< VdW cut-off */
- float rcoulomb_sq; /**< Coulomb cut-off */
- float rlist_sq; /**< pair-list cut-off */
- float sh_invrc6; /**< LJ potential correction term */
+ int eeltype; /**< type of electrostatics */
+
+ float epsfac; /**< charge multiplication factor */
+ float c_rf, /**< Reaction-field/plain cutoff electrostatics const. */
+ two_k_rf; /**< Reaction-field electrostatics constant */
+ float ewald_beta; /**< Ewald/PME parameter */
+ float sh_ewald; /**< Ewald/PME correction term */
+ float rvdw_sq; /**< VdW cut-off */
+ float rcoulomb_sq; /**< Coulomb cut-off */
+ float rlist_sq; /**< pair-list cut-off */
+ float sh_invrc6; /**< LJ potential correction term */
float *nbfp; /**< nonbonded parameter table with C6/C12 pairs */
/* Ewald Coulomb force table data */
- int coulomb_tab_size; /**< table size (s.t. it fits in texture cache) */
- float coulomb_tab_scale; /**< table scale/spacing */
+ int coulomb_tab_size; /**< table size (s.t. it fits in texture cache) */
+ float coulomb_tab_scale; /**< table scale/spacing */
float *coulomb_tab; /**< pointer to the table in the device memory */
};
/** Pair list data */
struct cu_plist
{
- int na_c; /**< number of atoms per cluster */
-
- int nsci; /**< size of sci, # of i clusters in the list */
- int sci_nalloc; /**< allocation size of sci */
- nbnxn_sci_t *sci; /**< list of i-cluster ("super-clusters") */
-
- int ncj4; /**< total # of 4*j clusters */
- int cj4_nalloc; /**< allocation size of cj4 */
- nbnxn_cj4_t *cj4; /**< 4*j cluster list, contains j cluster number
- and index into the i cluster list */
- nbnxn_excl_t *excl; /**< atom interaction bits */
- int nexcl; /**< count for excl */
- int excl_nalloc;/**< allocation size of excl */
-
- bool bDoPrune; /**< true if pair-list pruning needs to be
- done during the current step */
+ int na_c; /**< number of atoms per cluster */
+
+ int nsci; /**< size of sci, # of i clusters in the list */
+ int sci_nalloc; /**< allocation size of sci */
+ nbnxn_sci_t *sci; /**< list of i-cluster ("super-clusters") */
+
+ int ncj4; /**< total # of 4*j clusters */
+ int cj4_nalloc; /**< allocation size of cj4 */
+ nbnxn_cj4_t *cj4; /**< 4*j cluster list, contains j cluster number
+ and index into the i cluster list */
+ nbnxn_excl_t *excl; /**< atom interaction bits */
+ int nexcl; /**< count for excl */
+ int excl_nalloc; /**< allocation size of excl */
+
+ bool bDoPrune; /**< true if pair-list pruning needs to be
+ done during the current step */
};
/** CUDA events used for timing GPU kernels and H2D/D2H transfers.
/** Main data structure for CUDA nonbonded force calculations. */
struct nbnxn_cuda
{
- cuda_dev_info_t *dev_info; /**< CUDA device information */
- int kernel_ver; /**< The version of the kernel to be executed on the
- device in use, possible values: eNbnxnCuK* */
- bool bUseTwoStreams; /**< true if doing both local/non-local NB work on GPU */
- bool bUseStreamSync; /**< true if the standard cudaStreamSynchronize is used
- and not memory polling-based waiting */
- cu_atomdata_t *atdat; /**< atom data */
- cu_nbparam_t *nbparam; /**< parameters required for the non-bonded calc. */
- cu_plist_t *plist[2]; /**< pair-list data structures (local and non-local) */
- nb_staging_t nbst; /**< staging area where fshift/energies get downloaded */
-
- cudaStream_t stream[2]; /**< local and non-local GPU streams */
+ cuda_dev_info_t *dev_info; /**< CUDA device information */
+ int kernel_ver; /**< The version of the kernel to be executed on the
+ device in use, possible values: eNbnxnCuK* */
+ bool bUseTwoStreams; /**< true if doing both local/non-local NB work on GPU */
+ bool bUseStreamSync; /**< true if the standard cudaStreamSynchronize is used
+ and not memory polling-based waiting */
+ cu_atomdata_t *atdat; /**< atom data */
+ cu_nbparam_t *nbparam; /**< parameters required for the non-bonded calc. */
+ cu_plist_t *plist[2]; /**< pair-list data structures (local and non-local) */
+ nb_staging_t nbst; /**< staging area where fshift/energies get downloaded */
+
+ cudaStream_t stream[2]; /**< local and non-local GPU streams */
/** events used for synchronization */
cudaEvent_t nonlocal_done; /**< event triggered when the non-local non-bonded kernel
- is done (and the local transfer can proceed) */
+ is done (and the local transfer can proceed) */
cudaEvent_t misc_ops_done; /**< event triggered when the operations that precede the
main force calculations are done (e.g. buffer 0-ing) */
* concurrent streams, so we won't time if both l/nl work is done on GPUs.
* Timer init/uninit is still done even with timing off so only the condition
* setting bDoTime needs to be change if this CUDA "feature" gets fixed. */
- bool bDoTime; /**< True if event-based timing is enabled. */
+ bool bDoTime; /**< True if event-based timing is enabled. */
cu_timers_t *timers; /**< CUDA event-based timers. */
wallclock_gpu_t *timings; /**< Timing data. */
};
}
#endif
-#endif /* NBNXN_CUDA_TYPES_H */
+#endif /* NBNXN_CUDA_TYPES_H */