This leads to less confusion in formatting tools such as clang-format.
Fixes #3053.
Change-Id: I0367b2f9ff1a8f8b4369505a41557c3feb8c9c71
*
* \param[in] pme The PME structure.
*/
-GPU_FUNC_QUALIFIER void pme_gpu_reset_timings(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM
+GPU_FUNC_QUALIFIER void pme_gpu_reset_timings(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM;
/*! \brief
* Copies the PME GPU timings to the gmx_wallclock_gpu_pme_t structure (for log output). To be called at the run end.
* \param[in] timings The gmx_wallclock_gpu_pme_t structure.
*/
GPU_FUNC_QUALIFIER void pme_gpu_get_timings(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
- gmx_wallclock_gpu_pme_t *GPU_FUNC_ARGUMENT(timings)) GPU_FUNC_TERM
+ gmx_wallclock_gpu_pme_t *GPU_FUNC_ARGUMENT(timings)) GPU_FUNC_TERM;
/* The main PME GPU functions */
bool GPU_FUNC_ARGUMENT(needToUpdateBox),
const matrix GPU_FUNC_ARGUMENT(box),
gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle),
- int GPU_FUNC_ARGUMENT(flags)) GPU_FUNC_TERM
+ int GPU_FUNC_ARGUMENT(flags)) GPU_FUNC_TERM;
/*! \brief
* Launches first stage of PME on GPU - H2D input transfers, spreading kernel, and D2H grid transfer if needed.
*/
GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
const rvec *GPU_FUNC_ARGUMENT(x),
- gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM
+ gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
/*! \brief
* Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
* \param[in] wcycle The wallclock counter.
*/
GPU_FUNC_QUALIFIER void pme_gpu_launch_complex_transforms(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
- gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM
+ gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
/*! \brief
* Launches last stage of PME on GPU - force gathering and D2H force transfer.
*/
GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle),
- PmeForceOutputHandling GPU_FUNC_ARGUMENT(forceTreatment)) GPU_FUNC_TERM
+ PmeForceOutputHandling GPU_FUNC_ARGUMENT(forceTreatment)) GPU_FUNC_TERM;
/*! \brief
* Attempts to complete PME GPU tasks.
* \returns True if the PME GPU tasks have completed
*/
GPU_FUNC_QUALIFIER bool
-pme_gpu_try_finish_task(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
- int GPU_FUNC_ARGUMENT(flags),
- gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle),
- gmx::ForceWithVirial *GPU_FUNC_ARGUMENT(forceWithVirial),
- gmx_enerdata_t *GPU_FUNC_ARGUMENT(enerd),
- GpuTaskCompletion GPU_FUNC_ARGUMENT(completionKind)) GPU_FUNC_TERM_WITH_RETURN(false)
+ pme_gpu_try_finish_task(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
+ int GPU_FUNC_ARGUMENT(flags),
+ gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle),
+ gmx::ForceWithVirial *GPU_FUNC_ARGUMENT(forceWithVirial),
+ gmx_enerdata_t *GPU_FUNC_ARGUMENT(enerd),
+ GpuTaskCompletion GPU_FUNC_ARGUMENT(completionKind)) GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief
* Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
* \param[out] enerd The output energies
*/
GPU_FUNC_QUALIFIER void
-pme_gpu_wait_and_reduce(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
- int GPU_FUNC_ARGUMENT(flags),
- gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle),
- gmx::ForceWithVirial *GPU_FUNC_ARGUMENT(forceWithVirial),
- gmx_enerdata_t *GPU_FUNC_ARGUMENT(enerd)) GPU_FUNC_TERM
+ pme_gpu_wait_and_reduce(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
+ int GPU_FUNC_ARGUMENT(flags),
+ gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle),
+ gmx::ForceWithVirial *GPU_FUNC_ARGUMENT(forceWithVirial),
+ gmx_enerdata_t *GPU_FUNC_ARGUMENT(enerd)) GPU_FUNC_TERM;
/*! \brief
* The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
* \param[in] wcycle The wallclock counter.
*/
GPU_FUNC_QUALIFIER void pme_gpu_reinit_computation(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
- gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM
+ gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
/*! \brief Get pointer to device copy of coordinate data. */
-GPU_FUNC_QUALIFIER void *pme_gpu_get_device_x(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr)
+GPU_FUNC_QUALIFIER void *pme_gpu_get_device_x(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
#endif
*
* \param[in] pmeGpu The PME GPU structure.
*/
-GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM
+GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
/*! \libinternal \brief
* Allocates the fixed size energy and virial buffer both on GPU and CPU.
* Needs to be called for every PME computation. The coordinates are then used in the spline calculation.
*/
GPU_FUNC_QUALIFIER void pme_gpu_copy_input_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
- const rvec *GPU_FUNC_ARGUMENT(h_coordinates)) GPU_FUNC_TERM
+ const rvec *GPU_FUNC_ARGUMENT(h_coordinates)) GPU_FUNC_TERM;
/*! \libinternal \brief
* Frees the coordinates on the GPU.
int GPU_FUNC_ARGUMENT(gridIndex),
real *GPU_FUNC_ARGUMENT(h_grid),
bool GPU_FUNC_ARGUMENT(computeSplines),
- bool GPU_FUNC_ARGUMENT(spreadCharges)) GPU_FUNC_TERM
+ bool GPU_FUNC_ARGUMENT(spreadCharges)) GPU_FUNC_TERM;
/*! \libinternal \brief
* 3D FFT R2C/C2R routine.
GPU_FUNC_QUALIFIER void pme_gpu_solve(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
t_complex *GPU_FUNC_ARGUMENT(h_grid),
GridOrdering GPU_FUNC_ARGUMENT(gridOrdering),
- bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial)) GPU_FUNC_TERM
+ bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial)) GPU_FUNC_TERM;
/*! \libinternal \brief
* A GPU force gathering function.
*/
GPU_FUNC_QUALIFIER void pme_gpu_gather(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
PmeForceOutputHandling GPU_FUNC_ARGUMENT(forceTreatment),
- const float *GPU_FUNC_ARGUMENT(h_grid)
- ) GPU_FUNC_TERM
+ const float *GPU_FUNC_ARGUMENT(h_grid)) GPU_FUNC_TERM;
/*! \brief Return pointer to device copy of coordinate data. */
-GPU_FUNC_QUALIFIER void * pme_gpu_get_kernelparam_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr)
+GPU_FUNC_QUALIFIER void * pme_gpu_get_kernelparam_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
/* The inlined convenience PME GPU status getters */
* \returns The output object.
*/
GPU_FUNC_QUALIFIER PmeOutput
-pme_gpu_getEnergyAndVirial(const gmx_pme_t &GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(PmeOutput {})
+pme_gpu_getEnergyAndVirial(const gmx_pme_t &GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(PmeOutput {});
/*! \libinternal \brief
* Returns the GPU outputs (forces, energy and virial)
*/
GPU_FUNC_QUALIFIER PmeOutput
pme_gpu_getOutput(const gmx_pme_t &GPU_FUNC_ARGUMENT(pme),
- int GPU_FUNC_ARGUMENT(flags)) GPU_FUNC_TERM_WITH_RETURN(PmeOutput {})
+ int GPU_FUNC_ARGUMENT(flags)) GPU_FUNC_TERM_WITH_RETURN(PmeOutput {});
/*! \libinternal \brief
* Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
* \param[in] box The unit cell box.
*/
GPU_FUNC_QUALIFIER void pme_gpu_update_input_box(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
- const matrix GPU_FUNC_ARGUMENT(box)) GPU_FUNC_TERM
+ const matrix GPU_FUNC_ARGUMENT(box)) GPU_FUNC_TERM;
/*! \libinternal \brief
* Finishes the PME GPU computation, waiting for the output forces and/or energy/virial to be copied to the host.
const PmeAtomComm *GPU_FUNC_ARGUMENT(atc),
PmeSplineDataType GPU_FUNC_ARGUMENT(type),
int GPU_FUNC_ARGUMENT(dimIndex),
- PmeLayoutTransform GPU_FUNC_ARGUMENT(transform)) GPU_FUNC_TERM
+ PmeLayoutTransform GPU_FUNC_ARGUMENT(transform)) GPU_FUNC_TERM;
/*! \libinternal \brief
* Gets a unique index to an element in a spline parameter buffer (theta/dtheta),
*/
GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
gmx::IVec *GPU_FUNC_ARGUMENT(gridSize),
- gmx::IVec *GPU_FUNC_ARGUMENT(paddedGridSize)) GPU_FUNC_TERM
+ gmx::IVec *GPU_FUNC_ARGUMENT(paddedGridSize)) GPU_FUNC_TERM;
/*! \libinternal \brief
* (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
*/
GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
const gmx_device_info_t *GPU_FUNC_ARGUMENT(gpuInfo),
- PmeGpuProgramHandle GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM
+ PmeGpuProgramHandle GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
/*! \libinternal \brief
* Destroys the PME GPU data at the end of the run.
*
* \param[in] pmeGpu The PME GPU structure.
*/
-GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM
+GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
/*! \libinternal \brief
* Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
*/
GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
int GPU_FUNC_ARGUMENT(nAtoms),
- const real *GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM
+ const real *GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM;
/*! \brief \libinternal
* The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
* \return The output forces, energy and virial
*/
GPU_FUNC_QUALIFIER PmeOutput
-pme_gpu_wait_finish_task(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
- int GPU_FUNC_ARGUMENT(flags),
- gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM_WITH_RETURN(PmeOutput {}
- )
+ pme_gpu_wait_finish_task(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
+ int GPU_FUNC_ARGUMENT(flags),
+ gmx_wallcycle *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM_WITH_RETURN(PmeOutput {}
+ );
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
everywhere a GPU function is called. */
#define REAL_FUNC_QUALIFIER
#define REAL_FUNC_ARGUMENT(arg) arg
-#define REAL_FUNC_TERM ;
-#define REAL_FUNC_TERM_WITH_RETURN(arg) ;
+#define REAL_FUNC_TERM
+#define REAL_FUNC_TERM_WITH_RETURN(arg)
#define NULL_FUNC_QUALIFIER gmx_unused static
#define NULL_FUNC_ARGUMENT(arg) arg gmx_unused
* the call to canDetectGpus() should always prevent this occuring)
*/
GPU_FUNC_QUALIFIER
-void findGpus(gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info)) GPU_FUNC_TERM
+void findGpus(gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info)) GPU_FUNC_TERM;
/*! \brief Return a container of the detected GPUs that are compatible.
*
* initialization.
*/
GPU_FUNC_QUALIFIER
-void init_gpu(const gmx_device_info_t *GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM
+void init_gpu(const gmx_device_info_t *GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM;
/*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
*
* \returns true if no error occurs during the freeing.
*/
CUDA_FUNC_QUALIFIER
-void free_gpu(const gmx_device_info_t *CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM
+void free_gpu(const gmx_device_info_t *CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM;
/*! \brief Return a pointer to the device info for \c deviceId
*
*/
GPU_FUNC_QUALIFIER
gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
- int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(nullptr)
+ int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
/*! \brief Returns the device ID of the CUDA GPU currently in use.
*
* \returns device ID of the GPU in use at the time of the call
*/
CUDA_FUNC_QUALIFIER
-int get_current_cuda_gpu_device_id(void) CUDA_FUNC_TERM_WITH_RETURN(-1)
+int get_current_cuda_gpu_device_id(void) CUDA_FUNC_TERM_WITH_RETURN(-1);
/*! \brief Formats and returns a device information string for a given GPU.
*
GPU_FUNC_QUALIFIER
void get_gpu_device_info_string(char *GPU_FUNC_ARGUMENT(s),
const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
- int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM
+ int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM;
/*! \brief Returns whether all compatible OpenCL devices are from AMD.
*
*/
OPENCL_FUNC_QUALIFIER
bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &OPENCL_FUNC_ARGUMENT(gpuInfo))
-OPENCL_FUNC_TERM_WITH_RETURN(false)
+OPENCL_FUNC_TERM_WITH_RETURN(false);
/*! \brief Returns the size of the gpu_dev_info struct.
*
* \returns size in bytes of gpu_dev_info
*/
GPU_FUNC_QUALIFIER
-size_t sizeof_gpu_dev_info() GPU_FUNC_TERM_WITH_RETURN(0)
+size_t sizeof_gpu_dev_info() GPU_FUNC_TERM_WITH_RETURN(0);
//! Get status of device with specified index
int gpu_info_get_stat(const gmx_gpu_info_t &info, int index);
* Note that this is implemented only for the CUDA API.
*/
CUDA_FUNC_QUALIFIER
-void startGpuProfiler(void) CUDA_FUNC_TERM
+void startGpuProfiler(void) CUDA_FUNC_TERM;
/*! \brief Resets the GPU profiler if mdrun is being profiled.
* Note that this is implemented only for the CUDA API.
*/
CUDA_FUNC_QUALIFIER
-void resetGpuProfiler(void) CUDA_FUNC_TERM
+void resetGpuProfiler(void) CUDA_FUNC_TERM;
/*! \brief Stops the CUDA profiler if mdrun is being profiled.
* Note that this is implemented only for the CUDA API.
*/
CUDA_FUNC_QUALIFIER
-void stopGpuProfiler(void) CUDA_FUNC_TERM
+void stopGpuProfiler(void) CUDA_FUNC_TERM;
//! Tells whether the host buffer was pinned for non-blocking transfers. Only implemented for CUDA.
CUDA_FUNC_QUALIFIER
-bool isHostMemoryPinned(const void *CUDA_FUNC_ARGUMENT(h_ptr)) CUDA_FUNC_TERM_WITH_RETURN(false)
+bool isHostMemoryPinned(const void *CUDA_FUNC_ARGUMENT(h_ptr)) CUDA_FUNC_TERM_WITH_RETURN(false);
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017, by the GROMACS development team, led by
+ * Copyright (c) 2017,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* Does not throw.
*/
CUDA_FUNC_QUALIFIER void pinBuffer(void *CUDA_FUNC_ARGUMENT(pointer),
- std::size_t CUDA_FUNC_ARGUMENT(numBytes)) noexcept CUDA_FUNC_TERM
+ std::size_t CUDA_FUNC_ARGUMENT(numBytes)) noexcept CUDA_FUNC_TERM;
/*! \brief Unpin the allocation.
*
*
* Does not throw.
*/
-CUDA_FUNC_QUALIFIER void unpinBuffer(void *CUDA_FUNC_ARGUMENT(pointer)) noexcept CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void unpinBuffer(void *CUDA_FUNC_ARGUMENT(pointer)) noexcept CUDA_FUNC_TERM;
} // namespace gmx
const nbnxn_atomdata_t gmx_unused *nbat,
int gmx_unused rank,
/* true if both local and non-local are done on GPU */
- gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr)
+ gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr);
/** Initializes pair-list data for GPU, called at every pair search step. */
GPU_FUNC_QUALIFIER
void gpu_init_pairlist(gmx_nbnxn_gpu_t gmx_unused *nb,
const struct NbnxnPairlistGpu gmx_unused *h_nblist,
- InteractionLocality gmx_unused iloc) GPU_FUNC_TERM
+ InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
/** Initializes atom-data on the GPU, called at every pair search step. */
GPU_FUNC_QUALIFIER
void gpu_init_atomdata(gmx_nbnxn_gpu_t gmx_unused *nb,
- const nbnxn_atomdata_t gmx_unused *nbat) GPU_FUNC_TERM
+ const nbnxn_atomdata_t gmx_unused *nbat) GPU_FUNC_TERM;
/*! \brief Re-generate the GPU Ewald force table, resets rlist, and update the
* electrostatic type switching to twin cut-off (or back) if needed.
*/
GPU_FUNC_QUALIFIER
void gpu_pme_loadbal_update_param(const struct nonbonded_verlet_t gmx_unused *nbv,
- const interaction_const_t gmx_unused *ic) GPU_FUNC_TERM
+ const interaction_const_t gmx_unused *ic) GPU_FUNC_TERM;
/** Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */
GPU_FUNC_QUALIFIER
void gpu_upload_shiftvec(gmx_nbnxn_gpu_t gmx_unused *nb,
- const nbnxn_atomdata_t gmx_unused *nbatom) GPU_FUNC_TERM
+ const nbnxn_atomdata_t gmx_unused *nbatom) GPU_FUNC_TERM;
/** Clears GPU outputs: nonbonded force, shift force and energy. */
GPU_FUNC_QUALIFIER
void gpu_clear_outputs(gmx_nbnxn_gpu_t gmx_unused *nb,
- int gmx_unused flags) GPU_FUNC_TERM
+ int gmx_unused flags) GPU_FUNC_TERM;
/** Frees all GPU resources used for the nonbonded calculations. */
GPU_FUNC_QUALIFIER
-void gpu_free(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM
+void gpu_free(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM;
/** Returns the GPU timings structure or NULL if GPU is not used or timing is off. */
GPU_FUNC_QUALIFIER
-struct gmx_wallclock_gpu_nbnxn_t *gpu_get_timings(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(nullptr)
+struct gmx_wallclock_gpu_nbnxn_t *gpu_get_timings(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(nullptr);
/** Resets nonbonded GPU timings. */
GPU_FUNC_QUALIFIER
-void gpu_reset_timings(struct nonbonded_verlet_t gmx_unused *nbv) GPU_FUNC_TERM
+void gpu_reset_timings(struct nonbonded_verlet_t gmx_unused *nbv) GPU_FUNC_TERM;
/** Calculates the minimum size of proximity lists to improve SM load balance
* with GPU non-bonded kernels. */
- GPU_FUNC_QUALIFIER
-int gpu_min_ci_balanced(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(-1)
+GPU_FUNC_QUALIFIER
+int gpu_min_ci_balanced(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(-1);
/** Returns if analytical Ewald GPU kernels are used. */
GPU_FUNC_QUALIFIER
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(FALSE)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(FALSE);
/** Returns an opaque pointer to the GPU command stream
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
void *gpu_get_command_stream(gmx_nbnxn_gpu_t gmx_unused *nb,
- InteractionLocality gmx_unused iloc) CUDA_FUNC_TERM_WITH_RETURN(nullptr)
+ InteractionLocality gmx_unused iloc) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU coordinate+charge array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void *gpu_get_xq(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr)
+void *gpu_get_xq(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU force array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void *gpu_get_f(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr)
+void *gpu_get_f(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU shift force array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
- rvec *gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr)
+rvec *gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
} // namespace Nbnxm
/*! \brief Handles any JIT compilation of nbnxn kernels for the selected device */
OPENCL_FUNC_QUALIFIER void
-nbnxn_gpu_compile_kernels(gmx_nbnxn_gpu_t gmx_unused *nb) OPENCL_FUNC_TERM
+ nbnxn_gpu_compile_kernels(gmx_nbnxn_gpu_t gmx_unused *nb) OPENCL_FUNC_TERM;
#endif
GPU_FUNC_QUALIFIER
void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused *nb,
const struct nbnxn_atomdata_t gmx_unused *nbdata,
- AtomLocality gmx_unused aloc) GPU_FUNC_TERM
+ AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the nonbonded force calculations.
GPU_FUNC_QUALIFIER
void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused *nb,
int gmx_unused flags,
- InteractionLocality gmx_unused iloc) GPU_FUNC_TERM
+ InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the nonbonded prune-only kernel.
GPU_FUNC_QUALIFIER
void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb,
InteractionLocality gmx_unused iloc,
- int gmx_unused numParts) GPU_FUNC_TERM
+ int gmx_unused numParts) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the download of short-range forces from the GPU
nbnxn_atomdata_t gmx_unused *nbatom,
int gmx_unused flags,
AtomLocality gmx_unused aloc,
- const bool gmx_unused copyBackNbForce) GPU_FUNC_TERM
+ const bool gmx_unused copyBackNbForce) GPU_FUNC_TERM;
/*! \brief Attempts to complete nonbonded GPU task.
*
real gmx_unused *e_lj,
real gmx_unused *e_el,
rvec gmx_unused *fshift,
- GpuTaskCompletion gmx_unused completionKind) GPU_FUNC_TERM_WITH_RETURN(false)
+ GpuTaskCompletion gmx_unused completionKind) GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Completes the nonbonded GPU task blocking until GPU tasks and data
* transfers to finish.
AtomLocality gmx_unused aloc,
real gmx_unused *e_lj,
real gmx_unused *e_el,
- rvec gmx_unused *fshift) GPU_FUNC_TERM
+ rvec gmx_unused *fshift) GPU_FUNC_TERM;
/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
GPU_FUNC_QUALIFIER
-int gpu_pick_ewald_kernel_type(bool gmx_unused bTwinCut) GPU_FUNC_TERM_WITH_RETURN(-1)
+int gpu_pick_ewald_kernel_type(bool gmx_unused bTwinCut) GPU_FUNC_TERM_WITH_RETURN(-1);
/*! \brief Initialization for X buffer operations on GPU.
* Called on the NS step and performs (re-)allocations and memory copies. !*/
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused &gridSet,
- gmx_nbnxn_gpu_t gmx_unused *gpu_nbv) CUDA_FUNC_TERM
+ gmx_nbnxn_gpu_t gmx_unused *gpu_nbv) CUDA_FUNC_TERM;
/*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
*/
Nbnxm::AtomLocality gmx_unused locality,
const rvec gmx_unused *x,
int gmx_unused gridId,
- int gmx_unused numColumnsMax) CUDA_FUNC_TERM
+ int gmx_unused numColumnsMax) CUDA_FUNC_TERM;
/*! \brief Sync the nonlocal stream with dependent tasks in the local queue.
* \param[in] nb The nonbonded data GPU structure
*/
CUDA_FUNC_QUALIFIER
void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused *nb,
- const InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM
+ const InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
/*! \brief Set up internal flags that indicate what type of short-range work there is.
*
GPU_FUNC_QUALIFIER
void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused *nb,
const gmx::GpuBonded gmx_unused *gpuBonded,
- const Nbnxm::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM
+ const Nbnxm::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
/*! \brief Returns true if there is GPU short-range work for the given atom locality.
*
*/
GPU_FUNC_QUALIFIER
bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t gmx_unused *nb,
- const Nbnxm::AtomLocality gmx_unused aLocality) GPU_FUNC_TERM_WITH_RETURN(false)
+ const Nbnxm::AtomLocality gmx_unused aLocality) GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Initialization for F buffer operations on GPU */
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused *cell,
gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
- int gmx_unused natoms_total) CUDA_FUNC_TERM
+ int gmx_unused natoms_total) CUDA_FUNC_TERM;
/*! \brief F buffer operations on GPU: adds nb format force to rvec format. */
CUDA_FUNC_QUALIFIER
gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
int gmx_unused atomStart,
int gmx_unused nAtoms,
- GpuBufferOpsAccumulateForce gmx_unused accumulateForce) CUDA_FUNC_TERM
+ GpuBufferOpsAccumulateForce gmx_unused accumulateForce) CUDA_FUNC_TERM;
/*! \brief Copy force buffer from CPU to GPU */
CUDA_FUNC_QUALIFIER
void nbnxn_launch_copy_f_to_gpu(const AtomLocality gmx_unused atomLocality,
const Nbnxm::GridSet gmx_unused &gridSet,
gmx_nbnxn_gpu_t gmx_unused *nb,
- rvec gmx_unused *f) CUDA_FUNC_TERM
+ rvec gmx_unused *f) CUDA_FUNC_TERM;
/*! \brief Copy force buffer from GPU to CPU */
CUDA_FUNC_QUALIFIER
void nbnxn_launch_copy_f_from_gpu(const AtomLocality gmx_unused atomLocality,
const Nbnxm::GridSet gmx_unused &gridSet,
gmx_nbnxn_gpu_t gmx_unused *nb,
- rvec gmx_unused *f) CUDA_FUNC_TERM
+ rvec gmx_unused *f) CUDA_FUNC_TERM;
/*! \brief Wait for GPU stream to complete */
CUDA_FUNC_QUALIFIER
void nbnxn_wait_for_gpu_force_reduction(const AtomLocality gmx_unused atomLocality,
- gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM
+ gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM;
} // namespace Nbnxm