src/gromacs/nbnxm/kernels_simd_2xmm/kernel_common.h: warning: should include "nbnxm_simd.h"
src/gromacs/nbnxm/kernels_simd_4xm/kernel_common.h: warning: should include "nbnxm_simd.h"
+# This seems to be a false positive
+src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h: error: gmx_nbnxm_gpu_t: is in internal file(s), but appears in public documentation
+
# Temporary while we change the SIMD implementation
src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace_common.h: warning: should include "simd.h"
#include "gromacs/mdtypes/state_propagator_data_gpu.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_gpu.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012-2018, The GROMACS development team.
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "grid.h"
#include "gridset.h"
#include "nbnxm_geometry.h"
+#include "nbnxm_gpu.h"
#include "pairlist.h"
using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
const gmx::AtomLocality locality,
bool fillLocal,
- gmx_nbnxn_gpu_t* gpu_nbv,
+ gmx_nbnxm_gpu_t* gpu_nbv,
DeviceBuffer<float> d_x,
GpuEventSynchronizer* xReadyOnDevice)
{
const Nbnxm::GridSet& gridSet,
void* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
- gmx_nbnxn_gpu_t* gpu_nbv,
+ gmx_nbnxm_gpu_t* gpu_nbv,
bool useGpuFPmeReduction,
bool accumulateForce)
{
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \libinternal \file
+ * \brief
+ * Functionality for per-atom data in the nbnxm module
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_nbnxm
+ * \inlibraryapi
+ */
+
#ifndef GMX_NBNXN_ATOMDATA_H
#define GMX_NBNXN_ATOMDATA_H
#include "gromacs/utility/bitmask.h"
#include "gromacs/utility/real.h"
-#include "gpu_types.h"
-
namespace gmx
{
class MDLogger;
}
+struct gmx_nbnxm_gpu_t;
struct nbnxn_atomdata_t;
struct nonbonded_verlet_t;
struct t_mdatoms;
enum class KernelType;
} // namespace Nbnxm
-/* Convenience type for vector with aligned memory */
+//! Convenience type for vector with aligned memory
template<typename T>
using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
AlignedVector<real> VSc; // Temporary SIMD Coulomb group energy storage
};
-/* Block size in atoms for the non-bonded thread force-buffer reduction,
- * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
+/*! \brief Block size in atoms for the non-bonded thread force-buffer reduction.
+ *
+ * Should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
* Should be small to reduce the reduction and zeroing cost,
* but too small will result in overhead.
* Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
# define NBNXN_BUFFERFLAG_SIZE 16
#endif
-/* We store the reduction flags as gmx_bitmask_t.
+/*! \brief We store the reduction flags as gmx_bitmask_t.
* This limits the number of flags to BITMASK_SIZE.
*/
#define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
-/* Flags for telling if threads write to force output buffers */
+/*! \internal
+ * \brief Flags for telling if threads write to force output buffers */
typedef struct
{
- int nflag; /* The number of flag blocks */
- gmx_bitmask_t* flag; /* Bit i is set when thread i writes to a cell-block */
- int flag_nalloc; /* Allocation size of cxy_flag */
+ //! The number of flag blocks
+ int nflag;
+ //! Bit i is set when thread i writes to a cell-block
+ gmx_bitmask_t* flag;
+ //! Allocation size of cxy_flag
+ int flag_nalloc;
} nbnxn_buffer_flags_t;
-/* LJ combination rules: geometric, Lorentz-Berthelot, none */
+/*! \brief LJ combination rules: geometric, Lorentz-Berthelot, none */
enum
{
ljcrGEOM,
ljcrNR
};
-/* Struct that stores atom related data for the nbnxn module
+/*! \internal
+ * \brief Struct that stores atom related data for the nbnxn module
*
* Note: performance would improve slightly when all std::vector containers
* in this struct would not initialize during resize().
gmx::HostVector<int> energrp;
};
- // Diagonal and topology exclusion helper data for all SIMD kernels
+ /*! \internal
+ * \brief Diagonal and topology exclusion helper data for all SIMD kernels. */
struct SimdMasks
{
SimdMasks();
- // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
+ //! Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
AlignedVector<real> diagonal_4xn_j_minus_i;
- // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
+ //! Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
AlignedVector<real> diagonal_2xnn_j_minus_i;
- // Filters for topology exclusion masks for the SIMD kernels
+ //! Filters for topology exclusion masks for the SIMD kernels
AlignedVector<uint32_t> exclusion_filter;
- // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
+ //! Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
AlignedVector<uint64_t> exclusion_filter64;
- // Array of masks needed for exclusions
+ //! Array of masks needed for exclusions
AlignedVector<real> interaction_array;
};
- /* Constructor
+ /*! \brief Constructor
*
* \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
*/
nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
- /* Returns a const reference to the parameters */
+ //! Returns a const reference to the parameters
const Params& params() const { return params_; }
- /* Returns a non-const reference to the parameters */
+ //! Returns a non-const reference to the parameters
Params& paramsDeprecated() { return params_; }
- /* Returns the current total number of atoms stored */
+ //! Returns the current total number of atoms stored
int numAtoms() const { return numAtoms_; }
- /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+ //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
gmx::ArrayRef<const real> x() const { return x_; }
- /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+ //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
gmx::ArrayRef<real> x() { return x_; }
- /* Resizes the coordinate buffer and sets the number of atoms */
+ //! Resizes the coordinate buffer and sets the number of atoms
void resizeCoordinateBuffer(int numAtoms);
- /* Resizes the force buffers for the current number of atoms */
+ //! Resizes the force buffers for the current number of atoms
void resizeForceBuffers();
private:
- // The LJ and charge parameters
+ //! The LJ and charge parameters
Params params_;
- // The total number of atoms currently stored
+ //! The total number of atoms currently stored
int numAtoms_;
public:
- int natoms_local; /* Number of local atoms */
- int XFormat; /* The format of x (and q), enum */
- int FFormat; /* The format of f, enum */
- gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */
- gmx::HostVector<gmx::RVec> shift_vec; /* Shift vectors, copied from t_forcerec */
- int xstride; /* stride for a coordinate in x (usually 3 or 4) */
- int fstride; /* stride for a coordinate in f (usually 3 or 4) */
+ //! Number of local atoms
+ int natoms_local;
+ //! The format of x (and q), enum
+ int XFormat;
+ //! The format of f, enum
+ int FFormat;
+ //! Do we need to update shift_vec every step?
+ gmx_bool bDynamicBox;
+ //! Shift vectors, copied from t_forcerec
+ gmx::HostVector<gmx::RVec> shift_vec;
+ //! stride for a coordinate in x (usually 3 or 4)
+ int xstride;
+ //! stride for a coordinate in f (usually 3 or 4)
+ int fstride;
+
private:
- gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride */
+ //! x and possibly q, size natoms*xstride
+ gmx::HostVector<real> x_;
public:
- // Masks for handling exclusions in the SIMD kernels
+ //! Masks for handling exclusions in the SIMD kernels
const SimdMasks simdMasks;
- /* Output data */
- std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
-
- /* Reduction related data */
- gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */
- nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */
- gmx_bool bUseTreeReduce; /* Use tree for force reduction */
- tMPI_Atomic* syncStep; /* Synchronization step for tree reduce */
+ //! Output data structures, 1 per thread
+ std::vector<nbnxn_atomdata_output_t> out;
+
+ //! Reduction related data
+ //! \{
+ //! Use the flags or operate on all atoms
+ gmx_bool bUseBufferFlags;
+ //! Flags for buffer zeroing+reduc.
+ nbnxn_buffer_flags_t buffer_flags;
+ //! Use tree for force reduction
+ gmx_bool bUseTreeReduce;
+ //! Synchronization step for tree reduce
+ tMPI_Atomic* syncStep;
+ //! \}
};
-/* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
+/*! \brief Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
* and fills up to na_round with coordinates that are far away.
*/
void copy_rvec_to_nbat_real(const int* a, int na, int na_round, const rvec* x, int nbatFormat, real* xnb, int a0);
+//! Describes the combination rule in use by this force field
enum
{
enbnxninitcombruleDETECT,
enbnxninitcombruleNONE
};
-/* Initialize the non-bonded atom data structure.
+/*! \brief Initialize the non-bonded atom data structure.
+ *
* The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
* Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
* to the atom data structure.
int n_energygroups,
int nout);
+//! Sets the atomdata after pair search
void nbnxn_atomdata_set(nbnxn_atomdata_t* nbat,
const Nbnxm::GridSet& gridSet,
const t_mdatoms* mdatoms,
const int* atinfo);
-/* Copy the shift vectors to nbat */
+//! Copy the shift vectors to nbat
void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box, rvec* shift_vec, nbnxn_atomdata_t* nbat);
/*! \brief Transform coordinates to xbat layout
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
gmx::AtomLocality locality,
bool fillLocal,
- gmx_nbnxn_gpu_t* gpu_nbv,
+ gmx_nbnxm_gpu_t* gpu_nbv,
DeviceBuffer<float> d_x,
GpuEventSynchronizer* xReadyOnDevice);
const Nbnxm::GridSet& gridSet,
void* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
- gmx_nbnxn_gpu_t* gpu_nbv,
+ gmx_nbnxm_gpu_t* gpu_nbv,
bool useGpuFPmeReduction,
bool accumulateForce);
-/* Add the fshift force stored in nbat to fshift */
+//! Add the fshift force stored in nbat to fshift
void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t& nbat, gmx::ArrayRef<gmx::RVec> fshift);
-/* Get the atom start index and number of atoms for a given locality */
+//! Get the atom start index and number of atoms for a given locality
void nbnxn_get_atom_range(gmx::AtomLocality atomLocality,
const Nbnxm::GridSet& gridSet,
int* atomStart,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_NBNXM_CLUSTERDISTANCEKERNELTYPE_H
#define GMX_NBNXM_CLUSTERDISTANCEKERNELTYPE_H
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/simd/simd.h"
#include "gromacs/utility/gmxassert.h"
-#include "atomdata.h"
#include "pairlistparams.h"
//! The types of kernel for calculating the distance between pairs of atom clusters
* the local, this function records the event if called with the local stream as
* argument and inserts in the GPU stream a wait on the event on the nonlocal.
*/
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_cuda_t* nb, const InteractionLocality interactionLocality)
+void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t* nb, const InteractionLocality interactionLocality)
{
cudaStream_t stream = nb->stream[interactionLocality];
}
/*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
the local x+q H2D (and all preceding) tasks are complete and synchronize
with this event in the non-local stream before launching the non-bonded kernel.
*/
-void gpu_launch_kernel(gmx_nbnxn_cuda_t* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
+void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
{
cu_atomdata_t* adat = nb->atdat;
cu_nbparam_t* nbp = nb->nbparam;
return shmem;
}
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_cuda_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
{
cu_atomdata_t* adat = nb->atdat;
cu_nbparam_t* nbp = nb->nbparam;
}
}
-void gpu_launch_cpyback(gmx_nbnxn_cuda_t* nb,
+void gpu_launch_cpyback(gmx_nbnxm_gpu_t* nb,
nbnxn_atomdata_t* nbatom,
const gmx::StepWorkload& stepWork,
const AtomLocality atomLocality)
/* X buffer operations on GPU: performs conversion from rvec to nb format. */
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid,
bool setFillerCoords,
- gmx_nbnxn_gpu_t* nb,
+ gmx_nbnxm_gpu_t* nb,
DeviceBuffer<float> d_x,
GpuEventSynchronizer* xReadyOnDevice,
const Nbnxm::AtomLocality locality,
*/
void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality atomLocality,
DeviceBuffer<float> totalForcesDevice,
- gmx_nbnxn_gpu_t* nb,
+ gmx_nbnxm_gpu_t* nb,
void* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
int atomStart,
#include <stdio.h>
#include <stdlib.h>
-// TODO We would like to move this down, but the way gmx_nbnxn_gpu_t
+// TODO We would like to move this down, but the way gmx_nbnxm_gpu_t
// is currently declared means this has to be before gpu_types.h
#include "nbnxm_cuda_types.h"
static unsigned int gpu_min_ci_balanced_factor = 44;
/* Fw. decl. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxn_cuda_t* nb);
+static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb);
/* Fw. decl, */
static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
}
/*! Initializes simulation constant data. */
-static void cuda_init_const(gmx_nbnxn_cuda_t* nb,
+static void cuda_init_const(gmx_nbnxm_gpu_t* nb,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t::Params& nbatParams)
nbnxn_cuda_clear_e_fshift(nb);
}
-gmx_nbnxn_cuda_t* gpu_init(const gmx_device_info_t* deviceInfo,
- const interaction_const_t* ic,
- const PairlistParams& listParams,
- const nbnxn_atomdata_t* nbat,
- int /*rank*/,
- gmx_bool bLocalAndNonlocal)
+gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t* deviceInfo,
+ const interaction_const_t* ic,
+ const PairlistParams& listParams,
+ const nbnxn_atomdata_t* nbat,
+ int /*rank*/,
+ gmx_bool bLocalAndNonlocal)
{
cudaError_t stat;
- gmx_nbnxn_cuda_t* nb;
+ gmx_nbnxm_gpu_t* nb;
snew(nb, 1);
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
return nb;
}
-void gpu_init_pairlist(gmx_nbnxn_cuda_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
{
char sbuf[STRLEN];
bool bDoTime = (nb->bDoTime && !h_plist->sci.empty());
d_plist->haveFreshList = true;
}
-void gpu_upload_shiftvec(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
{
cu_atomdata_t* adat = nb->atdat;
cudaStream_t ls = nb->stream[InteractionLocality::Local];
}
/*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */
-static void nbnxn_cuda_clear_f(gmx_nbnxn_cuda_t* nb, int natoms_clear)
+static void nbnxn_cuda_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
{
cudaError_t stat;
cu_atomdata_t* adat = nb->atdat;
}
/*! Clears nonbonded shift force output array and energy outputs on the GPU. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxn_cuda_t* nb)
+static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
{
cudaError_t stat;
cu_atomdata_t* adat = nb->atdat;
CU_RET_ERR(stat, "cudaMemsetAsync on e_el falied");
}
-void gpu_clear_outputs(gmx_nbnxn_cuda_t* nb, bool computeVirial)
+void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
{
nbnxn_cuda_clear_f(nb, nb->atdat->natoms);
/* clear shift force array and energies if the outputs were
}
}
-void gpu_init_atomdata(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
{
cudaError_t stat;
int nalloc, natoms;
}
}
-void gpu_free(gmx_nbnxn_cuda_t* nb)
+void gpu_free(gmx_nbnxm_gpu_t* nb)
{
cudaError_t stat;
cu_atomdata_t* atdat;
}
//! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxn_cuda_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
{
return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
}
}
}
-int gpu_min_ci_balanced(gmx_nbnxn_cuda_t* nb)
+int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
{
return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->prop.multiProcessorCount : 0;
}
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_cuda_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
{
return ((nb->nbparam->eeltype == eelCuEWALD_ANA) || (nb->nbparam->eeltype == eelCuEWALD_ANA_TWIN));
}
-void* gpu_get_command_stream(gmx_nbnxn_gpu_t* nb, const InteractionLocality iloc)
+void* gpu_get_command_stream(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc)
{
assert(nb);
return static_cast<void*>(&nb->stream[iloc]);
}
-void* gpu_get_xq(gmx_nbnxn_gpu_t* nb)
+void* gpu_get_xq(gmx_nbnxm_gpu_t* nb)
{
assert(nb);
return static_cast<void*>(nb->atdat->xq);
}
-void* gpu_get_f(gmx_nbnxn_gpu_t* nb)
+void* gpu_get_f(gmx_nbnxm_gpu_t* nb)
{
assert(nb);
return static_cast<void*>(nb->atdat->f);
}
-rvec* gpu_get_fshift(gmx_nbnxn_gpu_t* nb)
+rvec* gpu_get_fshift(gmx_nbnxm_gpu_t* nb)
{
assert(nb);
/* Initialization for X buffer operations on GPU. */
/* TODO Remove explicit pinning from host arrays from here and manage in a more natural way*/
-void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxn_gpu_t* gpu_nbv)
+void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxm_gpu_t* gpu_nbv)
{
cudaStream_t stream = gpu_nbv->stream[InteractionLocality::Local];
bool bDoTime = gpu_nbv->bDoTime;
/* Initialization for F buffer operations on GPU. */
void nbnxn_gpu_init_add_nbat_f_to_f(const int* cell,
- gmx_nbnxn_gpu_t* gpu_nbv,
+ gmx_nbnxm_gpu_t* gpu_nbv,
int natoms_total,
GpuEventSynchronizer* const localReductionDone)
{
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2012, The GROMACS development team.
- * Copyright (c) 2013-2019, by the GROMACS development team, led by
+ * Copyright (c) 2013-2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
class GpuEventSynchronizer;
-/** \internal
+/*! \internal
* \brief Main data structure for CUDA nonbonded force calculations.
*/
-struct gmx_nbnxn_cuda_t
+struct gmx_nbnxm_gpu_t
{
- //! CUDA device information
+ /*! \brief CUDA device information */
const gmx_device_info_t* dev_info;
- //! true if doing both local/non-local NB work on GPU
+ /*! \brief true if doing both local/non-local NB work on GPU */
bool bUseTwoStreams;
- //! atom data
+ /*! \brief atom data */
cu_atomdata_t* atdat;
- //! f buf ops cell index mapping
+ /*! \brief f buf ops cell index mapping */
int* cell;
- //! number of indices in cell buffer
+ /*! \brief number of indices in cell buffer */
int ncell;
- //! number of indices allocated in cell buffer
+ /*! \brief number of indices allocated in cell buffer */
int ncell_alloc;
- //! array of atom indices
+ /*! \brief array of atom indices */
int* atomIndices;
- //! size of atom indices
+ /*! \brief size of atom indices */
int atomIndicesSize;
- //! size of atom indices allocated in device buffer
+ /*! \brief size of atom indices allocated in device buffer */
int atomIndicesSize_alloc;
- //! x buf ops num of atoms
+ /*! \brief x buf ops num of atoms */
int* cxy_na;
- //! number of elements in cxy_na
+ /*! \brief number of elements in cxy_na */
int ncxy_na;
- //! number of elements allocated allocated in device buffer
+ /*! \brief number of elements allocated allocated in device buffer */
int ncxy_na_alloc;
- //! x buf ops cell index mapping
+ /*! \brief x buf ops cell index mapping */
int* cxy_ind;
- //! number of elements in cxy_ind
+ /*! \brief number of elements in cxy_ind */
int ncxy_ind;
- //! number of elements allocated allocated in device buffer
+ /*! \brief number of elements allocated allocated in device buffer */
int ncxy_ind_alloc;
- //! parameters required for the non-bonded calc.
+ /*! \brief parameters required for the non-bonded calc. */
cu_nbparam_t* nbparam;
- //! pair-list data structures (local and non-local)
+ /*! \brief pair-list data structures (local and non-local) */
gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist;
- //! staging area where fshift/energies get downloaded
+ /*! \brief staging area where fshift/energies get downloaded */
nb_staging_t nbst;
- //! local and non-local GPU streams
+ /*! \brief local and non-local GPU streams */
gmx::EnumerationArray<Nbnxm::InteractionLocality, cudaStream_t> stream;
- /** events used for synchronization */
- cudaEvent_t nonlocal_done; /**< event triggered when the non-local non-bonded kernel
- is done (and the local transfer can proceed) */
- cudaEvent_t misc_ops_and_local_H2D_done; /**< event triggered when the tasks issued in
- the local stream that need to precede the
- non-local force or buffer operation calculations are
- done (e.g. f buffer 0-ing, local x/q H2D, buffer op
- initialization in local stream that is required also
- by nonlocal stream ) */
-
- //! True if there has been local/nonlocal GPU work, either bonded or nonbonded, scheduled
- // to be executed in the current domain. As long as bonded work is not split up into
- // local/nonlocal, if there is bonded GPU work, both flags will be true.
+ /*! \brief Events used for synchronization */
+ /*! \{ */
+ /*! \brief Event triggered when the non-local non-bonded
+ * kernel is done (and the local transfer can proceed) */
+ cudaEvent_t nonlocal_done;
+ /*! \brief Event triggered when the tasks issued in the local
+ * stream that need to precede the non-local force or buffer
+ * operation calculations are done (e.g. f buffer 0-ing, local
+ * x/q H2D, buffer op initialization in local stream that is
+ * required also by nonlocal stream ) */
+ cudaEvent_t misc_ops_and_local_H2D_done;
+ /*! \} */
+
+ /*! \brief True if there is work for the current domain in the
+ * respective locality.
+ *
+ * This includes local/nonlocal GPU work, either bonded or
+ * nonbonded, scheduled to be executed in the current
+ * domain. As long as bonded work is not split up into
+ * local/nonlocal, if there is bonded GPU work, both flags
+ * will be true. */
gmx::EnumerationArray<Nbnxm::InteractionLocality, bool> haveWork;
- /*! \brief Pointer to event synchronizer triggered when the local GPU buffer ops / reduction is complete
+ /*! \brief Pointer to event synchronizer triggered when the local
+ * GPU buffer ops / reduction is complete
*
- * \note That the synchronizer is managed outside of this module in StatePropagatorDataGpu.
+ * \note That the synchronizer is managed outside of this module
+ * in StatePropagatorDataGpu.
*/
GpuEventSynchronizer* localFReductionDone;
- GpuEventSynchronizer* xNonLocalCopyD2HDone; /**< event triggered when
- non-local coordinate buffer has been
- copied from device to host*/
+ /*! \brief Event triggered when non-local coordinate buffer
+ * has been copied from device to host. */
+ GpuEventSynchronizer* xNonLocalCopyD2HDone;
/* NOTE: With current CUDA versions (<=5.0) timing doesn't work with multiple
* concurrent streams, so we won't time if both l/nl work is done on GPUs.
* Timer init/uninit is still done even with timing off so only the condition
* setting bDoTime needs to be change if this CUDA "feature" gets fixed. */
- //! True if event-based timing is enabled.
+ /*! \brief True if event-based timing is enabled. */
bool bDoTime;
- //! CUDA event-based timers.
+ /*! \brief CUDA event-based timers. */
cu_timers_t* timers;
- //! Timing data. TODO: deprecate this and query timers for accumulated data instead
+ /*! \brief Timing data. TODO: deprecate this and query timers for accumulated data instead */
gmx_wallclock_gpu_nbnxn_t* timings;
};
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
//NOLINTNEXTLINE(misc-definitions-in-headers)
-void setupGpuShortRangeWork(gmx_nbnxn_gpu_t* nb,
+void setupGpuShortRangeWork(gmx_nbnxm_gpu_t* nb,
const gmx::GpuBonded* gpuBonded,
const gmx::InteractionLocality iLocality)
{
* \param[inout] nb Pointer to the nonbonded GPU data structure
* \param[in] iLocality Interaction locality identifier
*/
-static bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t& nb, const gmx::InteractionLocality iLocality)
+static bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t& nb, const gmx::InteractionLocality iLocality)
{
return nb.haveWork[iLocality];
}
//NOLINTNEXTLINE(misc-definitions-in-headers)
-bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t* nb, const gmx::AtomLocality aLocality)
+bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t* nb, const gmx::AtomLocality aLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
* \todo Move into shared source file with gmx_compile_cpp_as_cuda
*/
//NOLINTNEXTLINE(misc-definitions-in-headers)
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t* nb,
+bool gpu_try_finish_task(gmx_nbnxm_gpu_t* nb,
const gmx::StepWorkload& stepWork,
const AtomLocality aloc,
real* e_lj,
* \return The number of cycles the gpu wait took
*/
//NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
-float gpu_wait_finish_task(gmx_nbnxn_gpu_t* nb,
+float gpu_wait_finish_task(gmx_nbnxm_gpu_t* nb,
const gmx::StepWorkload& stepWork,
AtomLocality aloc,
real* e_lj,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* local part of the force array also depends on the non-local kernel.
* The skip of the local kernel is taken care of separately.
*/
-static inline bool canSkipNonbondedWork(const gmx_nbnxn_gpu_t& nb, InteractionLocality iloc)
+static inline bool canSkipNonbondedWork(const gmx_nbnxm_gpu_t& nb, InteractionLocality iloc)
{
assert(nb.plist[iloc]);
return (iloc == InteractionLocality::NonLocal && nb.plist[iloc]->nsci == 0);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/locality.h"
-#include "gpu_types.h"
-
-struct NbnxnPairlistGpu;
-struct nbnxn_atomdata_t;
-struct PairlistParams;
-struct gmx_wallclock_gpu_nbnxn_t;
+struct gmx_nbnxm_gpu_t;
struct gmx_gpu_info_t;
struct gmx_device_info_t;
+struct gmx_wallclock_gpu_nbnxn_t;
+struct nbnxn_atomdata_t;
+struct NbnxnPairlistGpu;
+struct PairlistParams;
namespace Nbnxm
{
/** Initializes the data structures related to GPU nonbonded calculations. */
GPU_FUNC_QUALIFIER
-gmx_nbnxn_gpu_t* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
+gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
const interaction_const_t gmx_unused* ic,
const PairlistParams gmx_unused& listParams,
const nbnxn_atomdata_t gmx_unused* nbat,
/** Initializes pair-list data for GPU, called at every pair search step. */
GPU_FUNC_QUALIFIER
-void gpu_init_pairlist(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_init_pairlist(gmx_nbnxm_gpu_t gmx_unused* nb,
const struct NbnxnPairlistGpu gmx_unused* h_nblist,
gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
/** Initializes atom-data on the GPU, called at every pair search step. */
GPU_FUNC_QUALIFIER
-void gpu_init_atomdata(gmx_nbnxn_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
+void gpu_init_atomdata(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
/*! \brief Re-generate the GPU Ewald force table, resets rlist, and update the
* electrostatic type switching to twin cut-off (or back) if needed.
/** Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */
GPU_FUNC_QUALIFIER
-void gpu_upload_shiftvec(gmx_nbnxn_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
+void gpu_upload_shiftvec(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
/** Clears GPU outputs: nonbonded force, shift force and energy. */
GPU_FUNC_QUALIFIER
-void gpu_clear_outputs(gmx_nbnxn_gpu_t gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
+void gpu_clear_outputs(gmx_nbnxm_gpu_t gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
/** Frees all GPU resources used for the nonbonded calculations. */
GPU_FUNC_QUALIFIER
-void gpu_free(gmx_nbnxn_gpu_t gmx_unused* nb) GPU_FUNC_TERM;
+void gpu_free(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM;
/** Returns the GPU timings structure or NULL if GPU is not used or timing is off. */
GPU_FUNC_QUALIFIER
-struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxn_gpu_t gmx_unused* nb)
+struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t gmx_unused* nb)
GPU_FUNC_TERM_WITH_RETURN(nullptr);
/** Resets nonbonded GPU timings. */
/** Calculates the minimum size of proximity lists to improve SM load balance
* with GPU non-bonded kernels. */
GPU_FUNC_QUALIFIER
-int gpu_min_ci_balanced(gmx_nbnxn_gpu_t gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
+int gpu_min_ci_balanced(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
/** Returns if analytical Ewald GPU kernels are used. */
GPU_FUNC_QUALIFIER
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_gpu_t gmx_unused* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t gmx_unused* nb)
GPU_FUNC_TERM_WITH_RETURN(FALSE);
/** Returns an opaque pointer to the GPU command stream
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void* gpu_get_command_stream(gmx_nbnxn_gpu_t gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
+void* gpu_get_command_stream(gmx_nbnxm_gpu_t gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU coordinate+charge array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void* gpu_get_xq(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_xq(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU force array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void* gpu_get_f(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_f(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU shift force array
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-rvec* gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+rvec* gpu_get_fshift(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
} // namespace Nbnxm
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/basedefinitions.h"
-#include "gpu_types.h"
+struct gmx_nbnxm_gpu_t;
/*! \brief Handles any JIT compilation of nbnxn kernels for the selected device */
-OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(gmx_nbnxn_gpu_t gmx_unused* nb) OPENCL_FUNC_TERM;
+OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t gmx_unused* nb) OPENCL_FUNC_TERM;
#endif
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2018 by the GROMACS development team.
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \libinternal \file
- * \brief Sets gmx_nbnxn_gpu_t to the correct type depending on the build
- *
- * \ingroup module_nbnxm
- */
-
-#ifndef GMX_NBNXN_GPU_TYPES_H
-#define GMX_NBNXN_GPU_TYPES_H
-
-#include "config.h"
-
-#ifndef DOXYGEN
-
-# if GMX_GPU == GMX_GPU_OPENCL
-struct gmx_nbnxn_ocl_t;
-using gmx_nbnxn_gpu_t = gmx_nbnxn_ocl_t;
-# endif
-
-# if GMX_GPU == GMX_GPU_CUDA
-struct gmx_nbnxn_cuda_t;
-using gmx_nbnxn_gpu_t = gmx_nbnxn_cuda_t;
-# endif
-
-# if GMX_GPU == GMX_GPU_NONE
-using gmx_nbnxn_gpu_t = int;
-# endif
-
-#endif // !DOXYGEN
-
-#endif
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/updategroupscog.h"
#include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_*
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/simd/simd.h"
#include "gromacs/simd/vector_operations.h"
-#include "atomdata.h"
#include "boundingboxes.h"
#include "gridsetdata.h"
#include "nbnxm_geometry.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/updategroupscog.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/utility/fatalerror.h"
-#include "atomdata.h"
-
namespace Nbnxm
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <vector>
+#include "gromacs/gpu_utils/hostallocator.h"
namespace Nbnxm
{
#include "gromacs/math/vectypes.h"
/* nbnxn_atomdata_t and nbnxn_pairlist_t could be forward declared, but that requires modifications in all SIMD kernel files */
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/utility/real.h"
-#include "atomdata.h"
#include "pairlist.h"
struct interaction_const_t;
#include "gromacs/utility/real.h"
#include "kernel_common.h"
+#include "nbnxm_gpu.h"
#include "nbnxm_simd.h"
#include "pairlistset.h"
#include "pairlistsets.h"
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "nbnxm.h"
#include "gromacs/domdec/domdec_struct.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/timing/wallcycle.h"
-#include "atomdata.h"
+#include "nbnxm_gpu.h"
#include "pairlistsets.h"
#include "pairsearch.h"
/* Skip the reduction if there was no short-range GPU work to do
* (either NB or both NB and bonded work). */
- if (!pairlistIsSimple() && !haveGpuShortRangeWork(locality))
+ if (!pairlistIsSimple() && !Nbnxm::haveGpuShortRangeWork(gpu_nbv, locality))
{
return;
}
/* Skip the reduction if there was no short-range GPU work to do
* (either NB or both NB and bonded work). */
- if (!pairlistIsSimple() && !haveGpuShortRangeWork(locality))
+ if (!pairlistIsSimple() && !Nbnxm::haveGpuShortRangeWork(gpu_nbv, locality))
{
return;
}
pairlistSets_->changePairlistRadii(rlistOuter, rlistInner);
}
+void nonbonded_verlet_t::setupGpuShortRangeWork(const gmx::GpuBonded* gpuBonded,
+ const gmx::InteractionLocality iLocality)
+{
+ if (useGpu() && !emulateGpu())
+ {
+ Nbnxm::setupGpuShortRangeWork(gpu_nbv, gpuBonded, iLocality);
+ }
+}
+
void nonbonded_verlet_t::atomdata_init_copy_x_to_nbat_x_gpu()
{
Nbnxm::nbnxn_gpu_init_x_to_nbat_x(pairSearch_->gridSet(), gpu_nbv);
#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/enumerationhelpers.h"
-#include "gromacs/utility/range.h"
#include "gromacs/utility/real.h"
-// TODO: Remove this include
-#include "nbnxm_gpu.h"
-
struct gmx_device_info_t;
struct gmx_domdec_zones_t;
struct gmx_enerdata_t;
struct gmx_hw_info_t;
struct gmx_mtop_t;
+struct gmx_nbnxm_gpu_t;
struct gmx_wallcycle;
struct interaction_const_t;
+struct nbnxn_atomdata_t;
struct nonbonded_verlet_t;
class PairSearch;
class PairlistSets;
namespace gmx
{
class ForceWithShiftForces;
+class GpuBonded;
template<typename>
class ListOfLists;
class MDLogger;
+template<typename>
+class Range;
+class StepWorkload;
class UpdateGroupsCog;
} // namespace gmx
std::unique_ptr<PairSearch> pairSearch,
std::unique_ptr<nbnxn_atomdata_t> nbat,
const Nbnxm::KernelSetup& kernelSetup,
- gmx_nbnxn_gpu_t* gpu_nbv,
+ gmx_nbnxm_gpu_t* gpu_nbv,
gmx_wallcycle* wcycle);
~nonbonded_verlet_t();
void changePairlistRadii(real rlistOuter, real rlistInner);
//! Set up internal flags that indicate what type of short-range work there is.
- void setupGpuShortRangeWork(const gmx::GpuBonded* gpuBonded, const gmx::InteractionLocality iLocality)
- {
- if (useGpu() && !emulateGpu())
- {
- Nbnxm::setupGpuShortRangeWork(gpu_nbv, gpuBonded, iLocality);
- }
- }
-
- //! Returns true if there is GPU short-range work for the given atom locality.
- bool haveGpuShortRangeWork(const gmx::AtomLocality aLocality)
- {
- return ((useGpu() && !emulateGpu()) && Nbnxm::haveGpuShortRangeWork(gpu_nbv, aLocality));
- }
+ void setupGpuShortRangeWork(const gmx::GpuBonded* gpuBonded, gmx::InteractionLocality iLocality);
// TODO: Make all data members private
public:
public:
//! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
- gmx_nbnxn_gpu_t* gpu_nbv;
+ gmx_nbnxm_gpu_t* gpu_nbv;
};
namespace Nbnxm
#include "gromacs/gpu_utils/gpu_macros.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/mdtypes/locality.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#include "atomdata.h"
-#include "gpu_types.h"
-
struct interaction_const_t;
struct nbnxn_atomdata_t;
struct gmx_wallcycle;
* \param [in] aloc Atom locality flag.
*/
GPU_FUNC_QUALIFIER
-void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t gmx_unused* nb,
const struct nbnxn_atomdata_t gmx_unused* nbdata,
gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
*
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_launch_kernel(gmx_nbnxm_gpu_t gmx_unused* nb,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
* \param [in] numParts Number of parts the pair list is split into in the rolling kernel.
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t gmx_unused* nb,
gmx::InteractionLocality gmx_unused iloc,
int gmx_unused numParts) GPU_FUNC_TERM;
* (and energies/shift forces if required).
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_launch_cpyback(gmx_nbnxm_gpu_t gmx_unused* nb,
nbnxn_atomdata_t gmx_unused* nbatom,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
* \returns True if the nonbonded tasks associated with \p aloc locality have completed
*/
GPU_FUNC_QUALIFIER
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused* nb,
+bool gpu_try_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::AtomLocality gmx_unused aloc,
real gmx_unused* e_lj,
* \param[out] shiftForces Shift forces buffer to accumulate into
* \param[out] wcycle Pointer to wallcycle data structure */
GPU_FUNC_QUALIFIER
-float gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused* nb,
+float gpu_wait_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
const gmx::StepWorkload gmx_unused& stepWork,
gmx::AtomLocality gmx_unused aloc,
real gmx_unused* e_lj,
* Called on the NS step and performs (re-)allocations and memory copies. !*/
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
- gmx_nbnxn_gpu_t gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
+ gmx_nbnxm_gpu_t gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
/*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
*
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
bool gmx_unused setFillerCoords,
- gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+ gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
DeviceBuffer<float> gmx_unused d_x,
GpuEventSynchronizer gmx_unused* xReadyOnDevice,
gmx::AtomLocality gmx_unused locality,
* \param[in] interactionLocality Local or NonLocal sync point
*/
CUDA_FUNC_QUALIFIER
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused* nb,
+void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t gmx_unused* nb,
gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
/*! \brief Set up internal flags that indicate what type of short-range work there is.
* \param[in] iLocality Interaction locality identifier
*/
GPU_FUNC_QUALIFIER
-void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused* nb,
+void setupGpuShortRangeWork(gmx_nbnxm_gpu_t gmx_unused* nb,
const gmx::GpuBonded gmx_unused* gpuBonded,
gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
* \param[in] aLocality Atom locality identifier
*/
GPU_FUNC_QUALIFIER
-bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
+bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Initialization for F buffer operations on GPU */
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused* cell,
- gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+ gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
int gmx_unused natoms_total,
GpuEventSynchronizer gmx_unused* localReductionDone) CUDA_FUNC_TERM;
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
DeviceBuffer<float> gmx_unused totalForcesDevice,
- gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+ gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
void gmx_unused* pmeForcesDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> gmx_unused dependencyList,
int gmx_unused atomStart,
* \param[in] nb The nonbonded data GPU structure
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
+void nbnxn_wait_x_on_device(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
} // namespace Nbnxm
#endif
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/pairlist_tuning.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/logger.h"
-#include "atomdata.h"
-#include "gpu_types.h"
#include "grid.h"
#include "nbnxm_geometry.h"
#include "nbnxm_simd.h"
{
/*! \brief Gets and returns the minimum i-list count for balacing based on the GPU used or env.var. when set */
-static int getMinimumIlistCountForGpuBalancing(gmx_nbnxn_gpu_t* nbnxmGpu)
+static int getMinimumIlistCountForGpuBalancing(gmx_nbnxm_gpu_t* nbnxmGpu)
{
int minimumIlistCount;
fr->nbfp, mimimumNumEnergyGroupNonbonded,
(useGpu || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded));
- gmx_nbnxn_gpu_t* gpu_nbv = nullptr;
+ gmx_nbnxm_gpu_t* gpu_nbv = nullptr;
int minimumIlistCountForGpuBalancing = 0;
if (useGpu)
{
std::unique_ptr<PairSearch> pairSearch,
std::unique_ptr<nbnxn_atomdata_t> nbat_in,
const Nbnxm::KernelSetup& kernelSetup,
- gmx_nbnxn_gpu_t* gpu_nbv_ptr,
+ gmx_nbnxm_gpu_t* gpu_nbv_ptr,
gmx_wallcycle* wcycle) :
pairlistSets_(std::move(pairlistSets)),
pairSearch_(std::move(pairSearch)),
* OpenCL kernel objects are cached in nb. If the requested kernel is not
* found in the cache, it will be created and the cache will be updated.
*/
-static inline cl_kernel select_nbnxn_kernel(gmx_nbnxn_ocl_t* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
+static inline cl_kernel select_nbnxn_kernel(gmx_nbnxm_gpu_t* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
{
const char* kernel_name_to_run;
cl_kernel* kernel_ptr;
}
/*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
misc_ops_done event to record the point in time when the above operations
are finished and synchronize with this event in the non-local stream.
*/
-void gpu_launch_kernel(gmx_nbnxn_ocl_t* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
+void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
{
cl_atomdata_t* adat = nb->atdat;
cl_nbparam_t* nbp = nb->nbparam;
* Launch the pairlist prune only kernel for the given locality.
* \p numParts tells in how many parts, i.e. calls the list will be pruned.
*/
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
{
cl_atomdata_t* adat = nb->atdat;
cl_nbparam_t* nbp = nb->nbparam;
* Launch asynchronously the download of nonbonded forces from the GPU
* (and energies/shift forces if required).
*/
-void gpu_launch_cpyback(gmx_nbnxn_ocl_t* nb,
+void gpu_launch_cpyback(gmx_nbnxm_gpu_t* nb,
struct nbnxn_atomdata_t* nbatom,
const gmx::StepWorkload& stepWork,
const AtomLocality aloc)
#include <cmath>
-// TODO We would like to move this down, but the way gmx_nbnxn_gpu_t
-// is currently declared means this has to be before gpu_types.h
-#include "nbnxm_ocl_types.h"
-
-// TODO Remove this comment when the above order issue is resolved
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/gpu_utils/oclutils.h"
#include "gromacs/hardware/gpu_hw_info.h"
#include "gromacs/utility/smalloc.h"
#include "nbnxm_ocl_internal.h"
+#include "nbnxm_ocl_types.h"
namespace Nbnxm
{
{
return;
}
- gmx_nbnxn_ocl_t* nb = nbv->gpu_nbv;
+ gmx_nbnxm_gpu_t* nb = nbv->gpu_nbv;
cl_nbparam_t* nbp = nb->nbparam;
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
}
/*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxn_ocl_t* nb, const char* kernel_name)
+static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxm_gpu_t* nb, const char* kernel_name)
{
cl_kernel kernel;
cl_int cl_error;
/*! \brief Clears nonbonded shift force output array and energy outputs on the GPU.
*/
-static void nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t* nb)
+static void nbnxn_ocl_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
{
cl_int cl_error;
}
/*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static void nbnxn_gpu_init_kernels(gmx_nbnxn_ocl_t* nb)
+static void nbnxn_gpu_init_kernels(gmx_nbnxm_gpu_t* nb)
{
/* Init to 0 main kernel arrays */
/* They will be later on initialized in select_nbnxn_kernel */
* Initializes members of the atomdata and nbparam structs and
* clears e/fshift output buffers.
*/
-static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t* nb,
+static void nbnxn_ocl_init_const(gmx_nbnxm_gpu_t* nb,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t::Params& nbatParams)
//! This function is documented in the header file
-gmx_nbnxn_ocl_t* gpu_init(const gmx_device_info_t* deviceInfo,
+gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t* deviceInfo,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
const int rank,
const gmx_bool bLocalAndNonlocal)
{
- gmx_nbnxn_ocl_t* nb;
+ gmx_nbnxm_gpu_t* nb;
cl_int cl_error;
cl_command_queue_properties queue_properties;
/*! \brief Clears the first natoms_clear elements of the GPU nonbonded force output array.
*/
-static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t* nb, int natoms_clear)
+static void nbnxn_ocl_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
{
if (natoms_clear == 0)
{
}
//! This function is documented in the header file
-void gpu_clear_outputs(gmx_nbnxn_ocl_t* nb, bool computeVirial)
+void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
{
nbnxn_ocl_clear_f(nb, nb->atdat->natoms);
/* clear shift force array and energies if the outputs were
}
//! This function is documented in the header file
-void gpu_init_pairlist(gmx_nbnxn_ocl_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
{
char sbuf[STRLEN];
// Timing accumulation should happen only if there was work to do
}
//! This function is documented in the header file
-void gpu_upload_shiftvec(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
{
cl_atomdata_t* adat = nb->atdat;
cl_command_queue ls = nb->stream[InteractionLocality::Local];
}
//! This function is documented in the header file
-void gpu_init_atomdata(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
{
cl_int cl_error;
int nalloc, natoms;
}
//! This function is documented in the header file
-void gpu_free(gmx_nbnxn_ocl_t* nb)
+void gpu_free(gmx_nbnxm_gpu_t* nb)
{
if (nb == nullptr)
{
}
//! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxn_ocl_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
{
return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
}
}
//! This function is documented in the header file
-int gpu_min_ci_balanced(gmx_nbnxn_ocl_t* nb)
+int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
{
return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
}
//! This function is documented in the header file
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_ocl_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
{
return ((nb->nbparam->eeltype == eelOclEWALD_ANA) || (nb->nbparam->eeltype == eelOclEWALD_ANA_TWIN));
}
*
* Does not throw
*/
-void nbnxn_gpu_compile_kernels(gmx_nbnxn_ocl_t* nb)
+void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t* nb)
{
gmx_bool bFastGen = TRUE;
cl_program program = nullptr;
/*! \internal
* \brief Main data structure for OpenCL nonbonded force calculations.
*/
-struct gmx_nbnxn_ocl_t
+struct gmx_nbnxm_gpu_t
{
const gmx_device_info_t* dev_info; /**< OpenCL device information */
struct gmx_device_runtime_data_t* dev_rundata; /**< OpenCL runtime data (context, kernels) */
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdtypes/group.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/utility/listoflists.h"
#include "gromacs/utility/smalloc.h"
-#include "atomdata.h"
#include "boundingboxes.h"
#include "clusterdistancekerneltype.h"
#include "gridset.h"
#include "gromacs/domdec/domdec.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/timing/cyclecounter.h"
#include "gromacs/utility/alignedallocator.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/real.h"
-#include "atomdata.h"
#include "gridset.h"
#include "pairlist.h"
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/gmxassert.h"
#include "clusterdistancekerneltype.h"
+#include "nbnxm_gpu.h"
#include "pairlistset.h"
#include "pairlistsets.h"
#include "kernels_reference/kernel_ref_prune.h"