Unify gpu_init_atomdata(...) function
[alexxy/gromacs.git] / src / gromacs / nbnxm / sycl / nbnxm_sycl_data_mgmt.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  *  \brief
38  *  Stubs of functions that must be defined by nbnxm sycl implementation.
39  *
40  *  \ingroup module_nbnxm
41  */
42 #include "gmxpre.h"
43
44 #include "gromacs/gpu_utils/device_stream_manager.h"
45 #include "gromacs/gpu_utils/pmalloc.h"
46 #include "gromacs/hardware/device_information.h"
47 #include "gromacs/mdtypes/interaction_const.h"
48 #include "gromacs/nbnxm/atomdata.h"
49 #include "gromacs/nbnxm/gpu_data_mgmt.h"
50 #include "gromacs/nbnxm/nbnxm_gpu.h"
51 #include "gromacs/nbnxm/nbnxm_gpu_data_mgmt.h"
52 #include "gromacs/pbcutil/ishift.h"
53 #include "gromacs/utility/exceptions.h"
54
55 #include "nbnxm_sycl_types.h"
56
57 namespace Nbnxm
58 {
59
60 /*! \brief Initialize \p atomdata first time; it only gets filled at pair-search. */
61 static void initAtomdataFirst(NBAtomData*          atomdata,
62                               int                  numTypes,
63                               const DeviceContext& deviceContext,
64                               const DeviceStream&  localStream)
65 {
66     atomdata->numTypes = numTypes;
67     allocateDeviceBuffer(&atomdata->shiftVec, SHIFTS, deviceContext);
68     atomdata->shiftVecUploaded = false;
69
70     allocateDeviceBuffer(&atomdata->fShift, SHIFTS, deviceContext);
71     allocateDeviceBuffer(&atomdata->eLJ, 1, deviceContext);
72     allocateDeviceBuffer(&atomdata->eElec, 1, deviceContext);
73
74     clearDeviceBufferAsync(&atomdata->fShift, 0, SHIFTS, localStream);
75     clearDeviceBufferAsync(&atomdata->eElec, 0, 1, localStream);
76     clearDeviceBufferAsync(&atomdata->eLJ, 0, 1, localStream);
77
78     /* initialize to nullptr pointers to data that is not allocated here and will
79        need reallocation in later */
80     atomdata->xq = nullptr;
81     atomdata->f  = nullptr;
82
83     /* size -1 indicates that the respective array hasn't been initialized yet */
84     atomdata->numAtoms      = -1;
85     atomdata->numAtomsAlloc = -1;
86 }
87
88 /*! \brief Initialize the nonbonded parameter data structure. */
89 static void initNbparam(NBParamGpu*                     nbp,
90                         const interaction_const_t&      ic,
91                         const PairlistParams&           listParams,
92                         const nbnxn_atomdata_t::Params& nbatParams,
93                         const DeviceContext&            deviceContext)
94 {
95     const int numTypes = nbatParams.numTypes;
96
97     set_cutoff_parameters(nbp, &ic, listParams);
98
99     nbp->vdwType  = nbnxmGpuPickVdwKernelType(&ic, nbatParams.ljCombinationRule);
100     nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(&ic, deviceContext.deviceInfo());
101
102     /* generate table for PME */
103     nbp->coulomb_tab = nullptr;
104     if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin)
105     {
106         GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
107         init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, deviceContext);
108     }
109
110     /* set up LJ parameter lookup table */
111     if (!useLjCombRule(nbp->vdwType))
112     {
113         static_assert(sizeof(decltype(nbp->nbfp)) == 2 * sizeof(decltype(*nbatParams.nbfp.data())),
114                       "Mismatch in the size of host / device data types");
115         initParamLookupTable(&nbp->nbfp,
116                              &nbp->nbfp_texobj,
117                              reinterpret_cast<const Float2*>(nbatParams.nbfp.data()),
118                              numTypes * numTypes,
119                              deviceContext);
120     }
121
122     /* set up LJ-PME parameter lookup table */
123     if (ic.vdwtype == VanDerWaalsType::Pme)
124     {
125         static_assert(sizeof(decltype(nbp->nbfp_comb))
126                               == 2 * sizeof(decltype(*nbatParams.nbfp_comb.data())),
127                       "Mismatch in the size of host / device data types");
128         initParamLookupTable(&nbp->nbfp_comb,
129                              &nbp->nbfp_comb_texobj,
130                              reinterpret_cast<const Float2*>(nbatParams.nbfp_comb.data()),
131                              numTypes,
132                              deviceContext);
133     }
134 }
135
136 NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
137                    const interaction_const_t*      ic,
138                    const PairlistParams&           listParams,
139                    const nbnxn_atomdata_t*         nbat,
140                    const bool                      bLocalAndNonlocal)
141 {
142     auto* nb                              = new NbnxmGpu();
143     nb->deviceContext_                    = &deviceStreamManager.context();
144     nb->atdat                             = new NBAtomData;
145     nb->nbparam                           = new NBParamGpu;
146     nb->plist[InteractionLocality::Local] = new Nbnxm::gpu_plist;
147     if (bLocalAndNonlocal)
148     {
149         nb->plist[InteractionLocality::NonLocal] = new Nbnxm::gpu_plist;
150     }
151
152     nb->bUseTwoStreams = bLocalAndNonlocal;
153
154     nb->timers  = nullptr;
155     nb->timings = nullptr;
156
157     /* init nbst */
158     pmalloc(reinterpret_cast<void**>(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ));
159     pmalloc(reinterpret_cast<void**>(&nb->nbst.eElec), sizeof(*nb->nbst.eElec));
160     pmalloc(reinterpret_cast<void**>(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift));
161
162     init_plist(nb->plist[InteractionLocality::Local]);
163
164     /* local/non-local GPU streams */
165     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
166                        "Local non-bonded stream should be initialized to use GPU for non-bonded.");
167     const DeviceStream& localStream = deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal);
168     nb->deviceStreams[InteractionLocality::Local] = &localStream;
169     // In general, it's not strictly necessary to use 2 streams for SYCL, since they are
170     // out-of-order. But for the time being, it will be less disruptive to keep them.
171     if (nb->bUseTwoStreams)
172     {
173         init_plist(nb->plist[InteractionLocality::NonLocal]);
174
175         GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
176                            "Non-local non-bonded stream should be initialized to use GPU for "
177                            "non-bonded with domain decomposition.");
178         nb->deviceStreams[InteractionLocality::NonLocal] =
179                 &deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal);
180     }
181
182     nb->bDoTime = false;
183
184     const nbnxn_atomdata_t::Params& nbatParams    = nbat->params();
185     const DeviceContext&            deviceContext = *nb->deviceContext_;
186
187     initNbparam(nb->nbparam, *ic, listParams, nbatParams, deviceContext);
188     initAtomdataFirst(nb->atdat, nbatParams.numTypes, deviceContext, localStream);
189
190     return nb;
191 }
192
193 void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
194 {
195     NBAtomData*         adat        = nb->atdat;
196     const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
197
198     /* only if we have a dynamic box */
199     if (nbatom->bDynamicBox || !adat->shiftVecUploaded)
200     {
201         GMX_ASSERT(adat->shiftVec.elementSize() == sizeof(nbatom->shift_vec[0]),
202                    "Sizes of host- and device-side shift vectors should be the same.");
203         copyToDeviceBuffer(&adat->shiftVec,
204                            reinterpret_cast<const Float3*>(nbatom->shift_vec.data()),
205                            0,
206                            SHIFTS,
207                            localStream,
208                            GpuApiCallBehavior::Async,
209                            nullptr);
210         adat->shiftVecUploaded = true;
211     }
212 }
213
214 void gpu_free(NbnxmGpu* nb)
215 {
216     if (nb == nullptr)
217     {
218         return;
219     }
220
221     NBAtomData* atdat   = nb->atdat;
222     NBParamGpu* nbparam = nb->nbparam;
223
224     if ((!nbparam->coulomb_tab)
225         && (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin))
226     {
227         destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj);
228     }
229
230     if (!useLjCombRule(nb->nbparam->vdwType))
231     {
232         destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj);
233     }
234
235     if (nbparam->vdwType == VdwType::EwaldGeom || nbparam->vdwType == VdwType::EwaldLB)
236     {
237         destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj);
238     }
239
240     /* Free plist */
241     auto* plist = nb->plist[InteractionLocality::Local];
242     delete plist;
243     if (nb->bUseTwoStreams)
244     {
245         auto* plist_nl = nb->plist[InteractionLocality::NonLocal];
246         delete plist_nl;
247     }
248
249     /* Free nbst */
250     pfree(nb->nbst.eLJ);
251     nb->nbst.eLJ = nullptr;
252
253     pfree(nb->nbst.eElec);
254     nb->nbst.eElec = nullptr;
255
256     pfree(nb->nbst.fShift);
257     nb->nbst.fShift = nullptr;
258
259     delete atdat;
260     delete nbparam;
261     delete nb;
262 }
263
264 int gpu_min_ci_balanced(NbnxmGpu* nb)
265 {
266     // SYCL-TODO: Logic and magic values taken from OpenCL
267     static constexpr unsigned int balancedFactor = 50;
268     if (nb == nullptr)
269     {
270         return 0;
271     }
272     const cl::sycl::device device = nb->deviceContext_->deviceInfo().syclDevice;
273     const int numComputeUnits     = device.get_info<cl::sycl::info::device::max_compute_units>();
274     return balancedFactor * numComputeUnits;
275 }
276
277 } // namespace Nbnxm