Apply clang-tidy-11 fixes to CUDA files
[alexxy/gromacs.git] / src / gromacs / mdlib / gpuforcereduction_impl.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*! \internal \file
36  *
37  * \brief Implements backend-agnostic GPU Force Reduction functions
38  *
39  * \author Alan Gray <alang@nvidia.com>
40  *
41  * \ingroup module_mdlib
42  */
43
44 #include "gmxpre.h"
45
46 #include "gpuforcereduction_impl.h"
47
48 #include "gromacs/gpu_utils/device_stream.h"
49 #include "gromacs/gpu_utils/devicebuffer.h"
50 #if GMX_GPU_CUDA
51 #    include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
52 #elif GMX_GPU_SYCL
53 #    include "gromacs/gpu_utils/gpueventsynchronizer_sycl.h"
54 #endif
55 #include "gromacs/mdlib/gpuforcereduction_impl_internal.h"
56 #include "gromacs/utility/gmxassert.h"
57
58 namespace gmx
59 {
60
61 GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext,
62                               const DeviceStream&  deviceStream,
63                               gmx_wallcycle*       wcycle) :
64     baseForce_(),
65     deviceContext_(deviceContext),
66     deviceStream_(deviceStream),
67     nbnxmForceToAdd_(),
68     rvecForceToAdd_(),
69     wcycle_(wcycle)
70 {
71 }
72
73 void GpuForceReduction::Impl::reinit(DeviceBuffer<Float3>  baseForcePtr,
74                                      const int             numAtoms,
75                                      ArrayRef<const int>   cell,
76                                      const int             atomStart,
77                                      const bool            accumulate,
78                                      GpuEventSynchronizer* completionMarker)
79 {
80     GMX_ASSERT(baseForcePtr, "Input base force for reduction has no data");
81     baseForce_        = baseForcePtr;
82     numAtoms_         = numAtoms;
83     atomStart_        = atomStart;
84     accumulate_       = accumulate;
85     completionMarker_ = completionMarker;
86     cellInfo_.cell    = cell.data();
87
88     wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
89     reallocateDeviceBuffer(
90             &cellInfo_.d_cell, numAtoms_, &cellInfo_.cellSize, &cellInfo_.cellSizeAlloc, deviceContext_);
91     copyToDeviceBuffer(&cellInfo_.d_cell,
92                        &(cellInfo_.cell[atomStart]),
93                        0,
94                        numAtoms_,
95                        deviceStream_,
96                        GpuApiCallBehavior::Async,
97                        nullptr);
98     wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
99
100     dependencyList_.clear();
101 };
102
103 void GpuForceReduction::Impl::registerNbnxmForce(DeviceBuffer<RVec> forcePtr)
104 {
105     GMX_ASSERT(forcePtr, "Input force for reduction has no data");
106     nbnxmForceToAdd_ = forcePtr;
107 };
108
109 void GpuForceReduction::Impl::registerRvecForce(DeviceBuffer<RVec> forcePtr)
110 {
111     GMX_ASSERT(forcePtr, "Input force for reduction has no data");
112     rvecForceToAdd_ = forcePtr;
113 };
114
115 void GpuForceReduction::Impl::addDependency(GpuEventSynchronizer* dependency)
116 {
117     dependencyList_.push_back(dependency);
118 }
119
120 void GpuForceReduction::Impl::execute()
121 {
122     wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
123     wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
124
125     if (numAtoms_ == 0)
126     {
127         return;
128     }
129
130     GMX_ASSERT(nbnxmForceToAdd_, "Nbnxm force for reduction has no data");
131
132     // Enqueue wait on all dependencies passed
133     for (auto* synchronizer : dependencyList_)
134     {
135         synchronizer->enqueueWaitEvent(deviceStream_);
136     }
137
138     const bool addRvecForce = static_cast<bool>(rvecForceToAdd_); // True iff initialized
139
140     launchForceReductionKernel(numAtoms_,
141                                atomStart_,
142                                addRvecForce,
143                                accumulate_,
144                                nbnxmForceToAdd_,
145                                rvecForceToAdd_,
146                                baseForce_,
147                                cellInfo_.d_cell,
148                                deviceStream_);
149
150     // Mark that kernel has been launched
151     if (completionMarker_ != nullptr)
152     {
153         completionMarker_->markEvent(deviceStream_);
154     }
155
156     wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
157     wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
158 }
159
160 GpuForceReduction::GpuForceReduction(const DeviceContext& deviceContext,
161                                      const DeviceStream&  deviceStream,
162                                      gmx_wallcycle*       wcycle) :
163     impl_(new Impl(deviceContext, deviceStream, wcycle))
164 {
165 }
166
167 void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> forcePtr)
168 {
169     impl_->registerNbnxmForce(forcePtr);
170 }
171
172 void GpuForceReduction::registerRvecForce(DeviceBuffer<RVec> forcePtr)
173 {
174     impl_->registerRvecForce(forcePtr);
175 }
176
177 void GpuForceReduction::addDependency(GpuEventSynchronizer* dependency)
178 {
179     impl_->addDependency(dependency);
180 }
181
182 void GpuForceReduction::reinit(DeviceBuffer<RVec>    baseForcePtr,
183                                const int             numAtoms,
184                                ArrayRef<const int>   cell,
185                                const int             atomStart,
186                                const bool            accumulate,
187                                GpuEventSynchronizer* completionMarker)
188 {
189     impl_->reinit(baseForcePtr, numAtoms, cell, atomStart, accumulate, completionMarker);
190 }
191 void GpuForceReduction::execute()
192 {
193     impl_->execute();
194 }
195
196 GpuForceReduction::~GpuForceReduction() = default;
197
198 } // namespace gmx