Fix random typos
[alexxy/gromacs.git] / src / gromacs / nbnxm / nbnxm.h
index b799738f75d86881a7ff228db0daef9290d05fb9..51b7a4a853043828eff01431fdbf5373dbd7c14f 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
 #include "gromacs/math/vectypes.h"
+#include "gromacs/mdtypes/locality.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/enumerationhelpers.h"
-#include "gromacs/utility/range.h"
 #include "gromacs/utility/real.h"
 
-#include "locality.h"
-
-// TODO: Remove this include and the two nbnxm includes above
-#include "nbnxm_gpu.h"
-
-struct gmx_device_info_t;
+struct DeviceInformation;
+class FreeEnergyDispatch;
 struct gmx_domdec_zones_t;
 struct gmx_enerdata_t;
 struct gmx_hw_info_t;
 struct gmx_mtop_t;
+struct NbnxmGpu;
 struct gmx_wallcycle;
 struct interaction_const_t;
+enum class LJCombinationRule;
+struct nbnxn_atomdata_t;
 struct nonbonded_verlet_t;
 class PairSearch;
 class PairlistSets;
-struct t_blocka;
 struct t_commrec;
 struct t_lambda;
-struct t_mdatoms;
 struct t_nrnb;
 struct t_forcerec;
 struct t_inputrec;
-
-/*! \brief Switch for whether to use GPU for buffer ops*/
-enum class BufferOpsUseGpu
-{
-    True,
-    False
-};
+struct gmx_grppairener_t;
 
 class GpuEventSynchronizer;
 
 namespace gmx
 {
+template<typename>
+class ArrayRefWithPadding;
+class DeviceStreamManager;
 class ForceWithShiftForces;
+class ListedForcesGpu;
+template<typename>
+class ListOfLists;
 class MDLogger;
+template<typename>
+class Range;
+class StepWorkload;
 class UpdateGroupsCog;
-}
+} // namespace gmx
 
+//! Namespace for non-bonded kernels
 namespace Nbnxm
 {
 enum class KernelType;
-}
 
-namespace Nbnxm
+/*! \brief Nbnxm electrostatic GPU kernel flavors.
+ *
+ *  Types of electrostatics implementations available in the GPU non-bonded
+ *  force kernels. These represent both the electrostatics types implemented
+ *  by the kernels (cut-off, RF, and Ewald - a subset of what's defined in
+ *  enums.h) as well as encode implementation details analytical/tabulated
+ *  and single or twin cut-off (for Ewald kernels).
+ *  Note that the cut-off and RF kernels have only analytical flavor and unlike
+ *  in the CPU kernels, the tabulated kernels are ATM Ewald-only.
+ *
+ *  The row-order of pointers to different electrostatic kernels defined in
+ *  nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table
+ *  should match the order of enumerated types below.
+ */
+enum class ElecType : int
+{
+    Cut,          //!< Plain cut-off
+    RF,           //!< Reaction field
+    EwaldTab,     //!< Tabulated Ewald with single cut-off
+    EwaldTabTwin, //!< Tabulated Ewald with twin cut-off
+    EwaldAna,     //!< Analytical Ewald with single cut-off
+    EwaldAnaTwin, //!< Analytical Ewald with twin cut-off
+    Count         //!< Number of valid values
+};
+
+//! Number of possible \ref ElecType values.
+constexpr int c_numElecTypes = static_cast<int>(ElecType::Count);
+
+/*! \brief Nbnxm VdW GPU kernel flavors.
+ *
+ * The enumerates values correspond to the LJ implementations in the GPU non-bonded
+ * kernels.
+ *
+ * The column-order of pointers to different electrostatic kernels defined in
+ * nbnxn_cuda_ocl.cpp/.cu by the nb_*_kfunc_ptr function pointer table
+ * should match the order of enumerated types below.
+ */
+enum class VdwType : int
 {
+    Cut,         //!< Plain cut-off
+    CutCombGeom, //!< Cut-off with geometric combination rules
+    CutCombLB,   //!< Cut-off with Lorentz-Berthelot combination rules
+    FSwitch,     //!< Smooth force switch
+    PSwitch,     //!< Smooth potential switch
+    EwaldGeom,   //!< Ewald with geometric combination rules
+    EwaldLB,     //!< Ewald with Lorentz-Berthelot combination rules
+    Count        //!< Number of valid values
+};
+
+//! Number of possible \ref VdwType values.
+constexpr int c_numVdwTypes = static_cast<int>(VdwType::Count);
 
 /*! \brief Nonbonded NxN kernel types: plain C, CPU SIMD, GPU, GPU emulation */
 enum class KernelType : int
@@ -191,7 +241,7 @@ enum class EwaldExclusionType : int
 struct KernelSetup
 {
     //! The non-bonded type, also affects the pairlist construction kernel
-    KernelType         kernelType = KernelType::NotSet;
+    KernelType kernelType = KernelType::NotSet;
     //! Ewald exclusion computation handling type, currently only used for CPU
     EwaldExclusionType ewaldExclusionType = EwaldExclusionType::NotSet;
 };
@@ -201,249 +251,215 @@ struct KernelSetup
  * \param [in] kernelType   nonbonded kernel type, takes values from the nbnxn_kernel_type enum
  * \returns                 a string identifying the kernel corresponding to the type passed as argument
  */
-const char *lookup_kernel_name(Nbnxm::KernelType kernelType);
+const charlookup_kernel_name(Nbnxm::KernelType kernelType);
 
 } // namespace Nbnxm
 
 /*! \brief Flag to tell the nonbonded kernels whether to clear the force output buffers */
-enum {
-    enbvClearFNo, enbvClearFYes
+enum
+{
+    enbvClearFNo,
+    enbvClearFYes
 };
 
 /*! \libinternal
  *  \brief Top-level non-bonded data structure for the Verlet-type cut-off scheme. */
 struct nonbonded_verlet_t
 {
-    public:
-        //! Constructs an object from its components
-        nonbonded_verlet_t(std::unique_ptr<PairlistSets>      pairlistSets,
-                           std::unique_ptr<PairSearch>        pairSearch,
-                           std::unique_ptr<nbnxn_atomdata_t>  nbat,
-                           const Nbnxm::KernelSetup          &kernelSetup,
-                           gmx_nbnxn_gpu_t                   *gpu_nbv,
-                           gmx_wallcycle                     *wcycle);
-
-        ~nonbonded_verlet_t();
-
-        //! Returns whether a GPU is use for the non-bonded calculations
-        bool useGpu() const
-        {
-            return kernelSetup_.kernelType == Nbnxm::KernelType::Gpu8x8x8;
-        }
-
-        //! Returns whether a GPU is emulated for the non-bonded calculations
-        bool emulateGpu() const
-        {
-            return kernelSetup_.kernelType == Nbnxm::KernelType::Cpu8x8x8_PlainC;
-        }
-
-        //! Return whether the pairlist is of simple, CPU type
-        bool pairlistIsSimple() const
-        {
-            return !useGpu() && !emulateGpu();
-        }
-
-        //! Initialize the pair list sets, TODO this should be private
-        void initPairlistSets(bool haveMultipleDomains);
-
-        //! Returns the order of the local atoms on the grid
-        gmx::ArrayRef<const int> getLocalAtomOrder() const;
-
-        //! Sets the order of the local atoms to the order grid atom ordering
-        void setLocalAtomOrder();
-
-        //! Returns the index position of the atoms on the search grid
-        gmx::ArrayRef<const int> getGridIndices() const;
-
-        //! Constructs the pairlist for the given locality
-        void constructPairlist(Nbnxm::InteractionLocality  iLocality,
-                               const t_blocka             *excl,
-                               int64_t                     step,
-                               t_nrnb                     *nrnb);
-
-        //! Updates all the atom properties in Nbnxm
-        void setAtomProperties(const t_mdatoms          &mdatoms,
-                               gmx::ArrayRef<const int>  atomInfo);
-
-        /*!\brief Convert the coordinates to NBNXM format for the given locality.
-         *
-         * The API function for the transformation of the coordinates from one layout to another.
-         *
-         * \param[in] locality     Whether coordinates for local or non-local atoms should be transformed.
-         * \param[in] fillLocal    If the coordinates for filler particles should be zeroed.
-         * \param[in] coordinates  Coordinates in plain rvec format to be transformed.
-         */
-        void convertCoordinates(Nbnxm::AtomLocality             locality,
-                                bool                            fillLocal,
-                                gmx::ArrayRef<const gmx::RVec>  coordinates);
-
-        /*!\brief Convert the coordinates to NBNXM format on the GPU for the given locality
-         *
-         * The API function for the transformation of the coordinates from one layout to another in the GPU memory.
-         *
-         * \param[in] locality        Whether coordinates for local or non-local atoms should be transformed.
-         * \param[in] fillLocal       If the coordinates for filler particles should be zeroed.
-         * \param[in] d_x             GPU coordinates buffer in plain rvec format to be transformed.
-         * \param[in] xReadyOnDevice  Event synchronizer indicating that the coordinates are ready in the device memory.
-         */
-        void convertCoordinatesGpu(Nbnxm::AtomLocality              locality,
-                                   bool                             fillLocal,
-                                   DeviceBuffer<float>              d_x,
-                                   GpuEventSynchronizer            *xReadyOnDevice);
-
-        //! Init for GPU version of setup coordinates in Nbnxm
-        void atomdata_init_copy_x_to_nbat_x_gpu();
-
-        //! Sync the nonlocal GPU stream with dependent tasks in the local queue.
-        void insertNonlocalGpuDependency(Nbnxm::InteractionLocality interactionLocality);
-
-        //! Returns a reference to the pairlist sets
-        const PairlistSets &pairlistSets() const
-        {
-            return *pairlistSets_;
-        }
-
-        //! Returns whether step is a dynamic list pruning step, for CPU lists
-        bool isDynamicPruningStepCpu(int64_t step) const;
-
-        //! Returns whether step is a dynamic list pruning step, for GPU lists
-        bool isDynamicPruningStepGpu(int64_t step) const;
-
-        //! Dispatches the dynamic pruning kernel for the given locality, for CPU lists
-        void dispatchPruneKernelCpu(Nbnxm::InteractionLocality  iLocality,
-                                    const rvec                 *shift_vec);
-
-        //! Dispatches the dynamic pruning kernel for GPU lists
-        void dispatchPruneKernelGpu(int64_t step);
-
-        //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
-        void dispatchNonbondedKernel(Nbnxm::InteractionLocality  iLocality,
-                                     const interaction_const_t  &ic,
-                                     const gmx::StepWorkload    &stepWork,
-                                     int                         clearF,
-                                     const t_forcerec           &fr,
-                                     gmx_enerdata_t             *enerd,
-                                     t_nrnb                     *nrnb);
-
-        //! Executes the non-bonded free-energy kernel, always runs on the CPU
-        void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocality,
-                                      const t_forcerec           *fr,
-                                      rvec                        x[],
-                                      gmx::ForceWithShiftForces  *forceWithShiftForces,
-                                      const t_mdatoms            &mdatoms,
-                                      t_lambda                   *fepvals,
-                                      real                       *lambda,
-                                      gmx_enerdata_t             *enerd,
-                                      const gmx::StepWorkload    &stepWork,
-                                      t_nrnb                     *nrnb);
-
-        /*! \brief Add the forces stored in nbat to f, zeros the forces in nbat
-         * \param [in] locality         Local or non-local
-         * \param [inout] force         Force to be added to
-         */
-        void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality                 locality,
-                                      gmx::ArrayRef<gmx::RVec>            force);
-
-        /*! \brief Add the forces stored in nbat to total force using GPU buffer opse
-         *
-         * \param [in]     locality             Local or non-local
-         * \param [in,out] totalForcesDevice    Force to be added to
-         * \param [in]     forcesPmeDevice      Device buffer with PME forces
-         * \param[in]      dependencyList       List of synchronizers that represent the dependencies the reduction task needs to sync on.
-         * \param [in]     useGpuFPmeReduction  Whether PME forces should be added
-         * \param [in]     accumulateForce      If the total force buffer already contains data
-         */
-        void atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality                         locality,
-                                          DeviceBuffer<float>                         totalForcesDevice,
-                                          void                                       *forcesPmeDevice,
-                                          gmx::ArrayRef<GpuEventSynchronizer* const>  dependencyList,
-                                          bool                                        useGpuFPmeReduction,
-                                          bool                                        accumulateForce);
-
-        /*! \brief Outer body of function to perform initialization for F buffer operations on GPU.
-         *
-         * \param localReductionDone     Pointer to an event synchronizer that marks the completion of the local f buffer ops kernel.
-         */
-        void atomdata_init_add_nbat_f_to_f_gpu(GpuEventSynchronizer* localReductionDone);
-
-        /*! \brief return pointer to GPU event recorded when coordinates have been copied to device */
-        void* get_x_on_device_event();
-
-        /*! \brief Wait for non-local copy of coordinate buffer from device to host */
-        void wait_nonlocal_x_copy_D2H_done();
-
-        /*! \brief return GPU pointer to f in rvec format */
-        void* get_gpu_frvec();
-
-        /*! \brief Ensure local stream waits for non-local stream */
-        void stream_local_wait_for_nonlocal();
-
-        //! Return the kernel setup
-        const Nbnxm::KernelSetup &kernelSetup() const
-        {
-            return kernelSetup_;
-        }
-
-        //! Returns the outer radius for the pair list
-        real pairlistInnerRadius() const;
-
-        //! Returns the outer radius for the pair list
-        real pairlistOuterRadius() const;
-
-        //! Changes the pair-list outer and inner radius
-        void changePairlistRadii(real rlistOuter,
-                                 real rlistInner);
-
-        //! Set up internal flags that indicate what type of short-range work there is.
-        void setupGpuShortRangeWork(const gmx::GpuBonded             *gpuBonded,
-                                    const Nbnxm::InteractionLocality  iLocality)
-        {
-            if (useGpu() && !emulateGpu())
-            {
-                Nbnxm::setupGpuShortRangeWork(gpu_nbv, gpuBonded, iLocality);
-            }
-        }
-
-        //! Returns true if there is GPU short-range work for the given atom locality.
-        bool haveGpuShortRangeWork(const Nbnxm::AtomLocality aLocality)
-        {
-            return ((useGpu() && !emulateGpu()) &&
-                    Nbnxm::haveGpuShortRangeWork(gpu_nbv, aLocality));
-        }
-
-        // TODO: Make all data members private
-    public:
-        //! All data related to the pair lists
-        std::unique_ptr<PairlistSets>     pairlistSets_;
-        //! Working data for constructing the pairlists
-        std::unique_ptr<PairSearch>       pairSearch_;
-        //! Atom data
-        std::unique_ptr<nbnxn_atomdata_t> nbat;
-    private:
-        //! The non-bonded setup, also affects the pairlist construction kernel
-        Nbnxm::KernelSetup                kernelSetup_;
-        //! \brief Pointer to wallcycle structure.
-        gmx_wallcycle                    *wcycle_;
-    public:
-        //! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
-        gmx_nbnxn_gpu_t                  *gpu_nbv;
+public:
+    //! Constructs an object from its components
+    nonbonded_verlet_t(std::unique_ptr<PairlistSets>     pairlistSets,
+                       std::unique_ptr<PairSearch>       pairSearch,
+                       std::unique_ptr<nbnxn_atomdata_t> nbat,
+                       const Nbnxm::KernelSetup&         kernelSetup,
+                       NbnxmGpu*                         gpu_nbv,
+                       gmx_wallcycle*                    wcycle);
+
+    ~nonbonded_verlet_t();
+
+    //! Returns whether a GPU is use for the non-bonded calculations
+    bool useGpu() const { return kernelSetup_.kernelType == Nbnxm::KernelType::Gpu8x8x8; }
+
+    //! Returns whether a GPU is emulated for the non-bonded calculations
+    bool emulateGpu() const
+    {
+        return kernelSetup_.kernelType == Nbnxm::KernelType::Cpu8x8x8_PlainC;
+    }
+
+    //! Return whether the pairlist is of simple, CPU type
+    bool pairlistIsSimple() const { return !useGpu() && !emulateGpu(); }
+
+
+    //! Returns the order of the local atoms on the grid
+    gmx::ArrayRef<const int> getLocalAtomOrder() const;
+
+    //! Sets the order of the local atoms to the order grid atom ordering
+    void setLocalAtomOrder() const;
+
+    //! Returns the index position of the atoms on the search grid
+    gmx::ArrayRef<const int> getGridIndices() const;
+
+    /*! \brief Constructs the pairlist for the given locality
+     *
+     * When there are no non-self exclusions, \p exclusions can be empty.
+     * Otherwise the number of lists in \p exclusions should match the number
+     * of atoms when not using DD, or the total number of atoms in the i-zones
+     * when using DD.
+     *
+     * \param[in] iLocality   The interaction locality: local or non-local
+     * \param[in] exclusions  Lists of exclusions for every atom.
+     * \param[in] step        Used to set the list creation step
+     * \param[in,out] nrnb    Flop accounting struct, can be nullptr
+     */
+    void constructPairlist(gmx::InteractionLocality     iLocality,
+                           const gmx::ListOfLists<int>& exclusions,
+                           int64_t                      step,
+                           t_nrnb*                      nrnb) const;
+
+    //! Updates all the atom properties in Nbnxm
+    void setAtomProperties(gmx::ArrayRef<const int>     atomTypes,
+                           gmx::ArrayRef<const real>    atomCharges,
+                           gmx::ArrayRef<const int64_t> atomInfo) const;
+
+    /*!\brief Convert the coordinates to NBNXM format for the given locality.
+     *
+     * The API function for the transformation of the coordinates from one layout to another.
+     *
+     * \param[in] locality     Whether coordinates for local or non-local atoms should be
+     *                         transformed.
+     * \param[in] coordinates  Coordinates in plain rvec format to be transformed.
+     */
+    void convertCoordinates(gmx::AtomLocality locality, gmx::ArrayRef<const gmx::RVec> coordinates);
+
+    /*!\brief Convert the coordinates to NBNXM format on the GPU for the given locality
+     *
+     * The API function for the transformation of the coordinates from one layout to another in the GPU memory.
+     *
+     * \param[in] locality        Whether coordinates for local or non-local atoms should be transformed.
+     * \param[in] d_x             GPU coordinates buffer in plain rvec format to be transformed.
+     * \param[in] xReadyOnDevice  Event synchronizer indicating that the coordinates are ready in the device memory.
+     */
+    void convertCoordinatesGpu(gmx::AtomLocality       locality,
+                               DeviceBuffer<gmx::RVec> d_x,
+                               GpuEventSynchronizer*   xReadyOnDevice);
+
+    //! Init for GPU version of setup coordinates in Nbnxm
+    void atomdata_init_copy_x_to_nbat_x_gpu() const;
+
+    //! Returns a reference to the pairlist sets
+    const PairlistSets& pairlistSets() const { return *pairlistSets_; }
+
+    //! Returns whether step is a dynamic list pruning step, for CPU lists
+    bool isDynamicPruningStepCpu(int64_t step) const;
+
+    //! Returns whether step is a dynamic list pruning step, for GPU lists
+    bool isDynamicPruningStepGpu(int64_t step) const;
+
+    //! Dispatches the dynamic pruning kernel for the given locality, for CPU lists
+    void dispatchPruneKernelCpu(gmx::InteractionLocality       iLocality,
+                                gmx::ArrayRef<const gmx::RVec> shift_vec) const;
+
+    //! Dispatches the dynamic pruning kernel for GPU lists
+    void dispatchPruneKernelGpu(int64_t step);
+
+    //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
+    void dispatchNonbondedKernel(gmx::InteractionLocality       iLocality,
+                                 const interaction_const_t&     ic,
+                                 const gmx::StepWorkload&       stepWork,
+                                 int                            clearF,
+                                 gmx::ArrayRef<const gmx::RVec> shiftvec,
+                                 gmx::ArrayRef<real>            repulsionDispersionSR,
+                                 gmx::ArrayRef<real>            CoulombSR,
+                                 t_nrnb*                        nrnb) const;
+
+    //! Executes the non-bonded free-energy kernels, local + non-local, always runs on the CPU
+    void dispatchFreeEnergyKernels(const gmx::ArrayRefWithPadding<const gmx::RVec>& coords,
+                                   gmx::ForceWithShiftForces*     forceWithShiftForces,
+                                   bool                           useSimd,
+                                   int                            ntype,
+                                   real                           rlist,
+                                   const interaction_const_t&     ic,
+                                   gmx::ArrayRef<const gmx::RVec> shiftvec,
+                                   gmx::ArrayRef<const real>      nbfp,
+                                   gmx::ArrayRef<const real>      nbfp_grid,
+                                   gmx::ArrayRef<const real>      chargeA,
+                                   gmx::ArrayRef<const real>      chargeB,
+                                   gmx::ArrayRef<const int>       typeA,
+                                   gmx::ArrayRef<const int>       typeB,
+                                   t_lambda*                      fepvals,
+                                   gmx::ArrayRef<const real>      lambda,
+                                   gmx_enerdata_t*                enerd,
+                                   const gmx::StepWorkload&       stepWork,
+                                   t_nrnb*                        nrnb);
+
+    /*! \brief Add the forces stored in nbat to f, zeros the forces in nbat
+     * \param [in] locality         Local or non-local
+     * \param [inout] force         Force to be added to
+     */
+    void atomdata_add_nbat_f_to_f(gmx::AtomLocality locality, gmx::ArrayRef<gmx::RVec> force);
+
+    /*! \brief Get the number of atoms for a given locality
+     *
+     * \param [in] locality   Local or non-local
+     * \returns               The number of atoms for given locality
+     */
+    int getNumAtoms(gmx::AtomLocality locality) const;
+
+    //! Return the kernel setup
+    const Nbnxm::KernelSetup& kernelSetup() const { return kernelSetup_; }
+
+    //! Returns the outer radius for the pair list
+    real pairlistInnerRadius() const;
+
+    //! Returns the outer radius for the pair list
+    real pairlistOuterRadius() const;
+
+    //! Changes the pair-list outer and inner radius
+    void changePairlistRadii(real rlistOuter, real rlistInner) const;
+
+    //! Set up internal flags that indicate what type of short-range work there is.
+    void setupGpuShortRangeWork(const gmx::ListedForcesGpu* listedForcesGpu,
+                                gmx::InteractionLocality    iLocality) const;
+
+    void setupFepThreadedForceBuffer(int numAtomsForce);
+
+    // TODO: Make all data members private
+    //! All data related to the pair lists
+    std::unique_ptr<PairlistSets> pairlistSets_;
+    //! Working data for constructing the pairlists
+    std::unique_ptr<PairSearch> pairSearch_;
+    //! Atom data
+    std::unique_ptr<nbnxn_atomdata_t> nbat;
+
+private:
+    //! The non-bonded setup, also affects the pairlist construction kernel
+    Nbnxm::KernelSetup kernelSetup_;
+
+    //! \brief Pointer to wallcycle structure.
+    gmx_wallcycle* wcycle_;
+
+    //! \brief The non-bonded free-energy kernel dispatcher
+    std::unique_ptr<FreeEnergyDispatch> freeEnergyDispatch_;
+
+public:
+    //! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
+    NbnxmGpu* gpu_nbv;
 };
 
 namespace Nbnxm
 {
 
 /*! \brief Creates an Nbnxm object */
-std::unique_ptr<nonbonded_verlet_t>
-init_nb_verlet(const gmx::MDLogger     &mdlog,
-               gmx_bool                 bFEP_NonBonded,
-               const t_inputrec        *ir,
-               const t_forcerec        *fr,
-               const t_commrec         *cr,
-               const gmx_hw_info_t     &hardwareInfo,
-               const gmx_device_info_t *deviceInfo,
-               const gmx_mtop_t        *mtop,
-               matrix                   box,
-               gmx_wallcycle           *wcycle);
+std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger& mdlog,
+                                                   const t_inputrec&    inputrec,
+                                                   const t_forcerec&    forcerec,
+                                                   const t_commrec*     commrec,
+                                                   const gmx_hw_info_t& hardwareInfo,
+                                                   bool                 useGpuForNonbonded,
+                                                   const gmx::DeviceStreamManager* deviceStreamManager,
+                                                   const gmx_mtop_t&               mtop,
+                                                   matrix                          box,
+                                                   gmx_wallcycle*                  wcycle);
 
 } // namespace Nbnxm
 
@@ -474,27 +490,34 @@ init_nb_verlet(const gmx::MDLogger     &mdlog,
  * \param[in]     numAtomsMoved  The number of atoms that will move to another domain, pass 0 without DD
  * \param[in]     move         Move flags for atoms, pass nullptr without DD
  */
-void nbnxn_put_on_grid(nonbonded_verlet_t             *nb_verlet,
-                       const matrix                    box,
-                       int                             gridIndex,
-                       const rvec                      lowerCorner,
-                       const rvec                      upperCorner,
-                       const gmx::UpdateGroupsCog     *updateGroupsCog,
-                       gmx::Range<int>                 atomRange,
-                       real                            atomDensity,
-                       gmx::ArrayRef<const int>        atomInfo,
-                       gmx::ArrayRef<const gmx::RVec>  x,
-                       int                             numAtomsMoved,
-                       const int                      *move);
+void nbnxn_put_on_grid(nonbonded_verlet_t*            nb_verlet,
+                       const matrix                   box,
+                       int                            gridIndex,
+                       const rvec                     lowerCorner,
+                       const rvec                     upperCorner,
+                       const gmx::UpdateGroupsCog*    updateGroupsCog,
+                       gmx::Range<int>                atomRange,
+                       real                           atomDensity,
+                       gmx::ArrayRef<const int64_t>   atomInfo,
+                       gmx::ArrayRef<const gmx::RVec> x,
+                       int                            numAtomsMoved,
+                       const int*                     move);
 
 /*! \brief As nbnxn_put_on_grid, but for the non-local atoms
  *
  * with domain decomposition. Should be called after calling
  * nbnxn_search_put_on_grid for the local atoms / home zone.
  */
-void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t              *nb_verlet,
-                                const struct gmx_domdec_zones_t *zones,
-                                gmx::ArrayRef<const int>         atomInfo,
+void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t*              nb_verlet,
+                                const struct gmx_domdec_zones_tzones,
+                                gmx::ArrayRef<const int64_t>     atomInfo,
                                 gmx::ArrayRef<const gmx::RVec>   x);
 
+/*! \brief Check if GROMACS has been built with GPU support.
+ *
+ * \param[in] error Pointer to error string or nullptr.
+ * \todo Move this to NB module once it exists.
+ */
+bool buildSupportsNonbondedOnGpu(std::string* error);
+
 #endif // GMX_NBNXN_NBNXM_H