SYCL: Use acc.bind(cgh) instead of cgh.require(acc)
[alexxy/gromacs.git] / src / gromacs / nbnxm / pairlist_simd_4xm.h
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5  * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36
37 /*! \internal \file
38  *
39  * \brief
40  * Declares inline-friendly code for making 4xN pairlists
41  *
42  * \author Berk Hess <hess@kth.se>
43  * \ingroup module_nbnxm
44  */
45
46 //! Stride of the packed x coordinate array
47 static constexpr int c_xStride4xN =
48         (GMX_SIMD_REAL_WIDTH > c_nbnxnCpuIClusterSize ? GMX_SIMD_REAL_WIDTH : c_nbnxnCpuIClusterSize);
49
50 //! Copies PBC shifted i-cell packed atom coordinates to working array
51 static inline void icell_set_x_simd_4xn(int                   ci,
52                                         real                  shx,
53                                         real                  shy,
54                                         real                  shz,
55                                         int gmx_unused        stride,
56                                         const real*           x,
57                                         NbnxnPairlistCpuWork* work)
58 {
59     real* x_ci_simd = work->iClusterData.xSimd.data();
60
61     const int ia = xIndexFromCi<NbnxnLayout::Simd4xN>(ci);
62
63     store(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN] + shx));
64     store(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN] + shy));
65     store(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN] + shz));
66     store(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 1] + shx));
67     store(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 1] + shy));
68     store(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 1] + shz));
69     store(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 2] + shx));
70     store(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 2] + shy));
71     store(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 2] + shz));
72     store(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 3] + shx));
73     store(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 3] + shy));
74     store(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 3] + shz));
75 }
76
77 /*! \brief SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
78  *
79  * Checks bounding box distances and possibly atom pair distances.
80  * This is an accelerated version of make_cluster_list_simple.
81  *
82  * \param[in]     jGrid               The j-grid
83  * \param[in,out] nbl                 The pair-list to store the cluster pairs in
84  * \param[in]     icluster            The index of the i-cluster
85  * \param[in]     firstCell           The first cluster in the j-range, using i-cluster size indexing
86  * \param[in]     lastCell            The last cluster in the j-range, using i-cluster size indexing
87  * \param[in]     excludeSubDiagonal  Exclude atom pairs with i-index > j-index
88  * \param[in]     x_j                 Coordinates for the j-atom, in SIMD packed format
89  * \param[in]     rlist2              The squared list cut-off
90  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
91  * \param[in,out] numDistanceChecks   The number of distance checks performed
92  */
93 static inline void makeClusterListSimd4xn(const Grid&              jGrid,
94                                           NbnxnPairlistCpu*        nbl,
95                                           int                      icluster,
96                                           int                      firstCell,
97                                           int                      lastCell,
98                                           bool                     excludeSubDiagonal,
99                                           const real* gmx_restrict x_j,
100                                           real                     rlist2,
101                                           float                    rbb2,
102                                           int* gmx_restrict        numDistanceChecks)
103 {
104     using namespace gmx;
105     const real* gmx_restrict        x_ci_simd = nbl->work->iClusterData.xSimd.data();
106     const BoundingBox* gmx_restrict bb_ci     = nbl->work->iClusterData.bb.data();
107
108     SimdReal jx_S, jy_S, jz_S;
109
110     SimdReal dx_S0, dy_S0, dz_S0;
111     SimdReal dx_S1, dy_S1, dz_S1;
112     SimdReal dx_S2, dy_S2, dz_S2;
113     SimdReal dx_S3, dy_S3, dz_S3;
114
115     SimdReal rsq_S0;
116     SimdReal rsq_S1;
117     SimdReal rsq_S2;
118     SimdReal rsq_S3;
119
120     SimdBool wco_S0;
121     SimdBool wco_S1;
122     SimdBool wco_S2;
123     SimdBool wco_S3;
124     SimdBool wco_any_S01, wco_any_S23, wco_any_S;
125
126     SimdReal rc2_S;
127
128     /* Convert the j-range from i-cluster size indexing to j-cluster indexing */
129     int jclusterFirst = cjFromCi<NbnxnLayout::Simd4xN, 0>(firstCell);
130     int jclusterLast  = cjFromCi<NbnxnLayout::Simd4xN, 1>(lastCell);
131     GMX_ASSERT(jclusterLast >= jclusterFirst,
132                "We should have a non-empty j-cluster range, since the calling code should have "
133                "ensured a non-empty cell range");
134
135     rc2_S = SimdReal(rlist2);
136
137     bool InRange = false;
138     while (!InRange && jclusterFirst <= jclusterLast)
139     {
140         const float d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
141         *numDistanceChecks += 2;
142
143         /* Check if the distance is within the distance where
144          * we use only the bounding box distance rbb,
145          * or within the cut-off and there is at least one atom pair
146          * within the cut-off.
147          */
148         if (d2 < rbb2)
149         {
150             InRange = true;
151         }
152         else if (d2 < rlist2)
153         {
154             const int xind_f = xIndexFromCj<NbnxnLayout::Simd4xN>(
155                     cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterFirst);
156
157             jx_S = load<SimdReal>(x_j + xind_f + 0 * c_xStride4xN);
158             jy_S = load<SimdReal>(x_j + xind_f + 1 * c_xStride4xN);
159             jz_S = load<SimdReal>(x_j + xind_f + 2 * c_xStride4xN);
160
161
162             /* Calculate distance */
163             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
164             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
165             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
166             dx_S1 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
167             dy_S1 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
168             dz_S1 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
169             dx_S2 = load<SimdReal>(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH) - jx_S;
170             dy_S2 = load<SimdReal>(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH) - jy_S;
171             dz_S2 = load<SimdReal>(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH) - jz_S;
172             dx_S3 = load<SimdReal>(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH) - jx_S;
173             dy_S3 = load<SimdReal>(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH) - jy_S;
174             dz_S3 = load<SimdReal>(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH) - jz_S;
175
176             /* rsq = dx*dx+dy*dy+dz*dz */
177             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
178             rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
179             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
180             rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
181
182             wco_S0 = (rsq_S0 < rc2_S);
183             wco_S1 = (rsq_S1 < rc2_S);
184             wco_S2 = (rsq_S2 < rc2_S);
185             wco_S3 = (rsq_S3 < rc2_S);
186
187             wco_any_S01 = wco_S0 || wco_S1;
188             wco_any_S23 = wco_S2 || wco_S3;
189             wco_any_S   = wco_any_S01 || wco_any_S23;
190
191             InRange = anyTrue(wco_any_S);
192
193             *numDistanceChecks += 4 * GMX_SIMD_REAL_WIDTH;
194         }
195         if (!InRange)
196         {
197             jclusterFirst++;
198         }
199     }
200     if (!InRange)
201     {
202         return;
203     }
204
205     InRange = false;
206     while (!InRange && jclusterLast > jclusterFirst)
207     {
208         const float d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
209         *numDistanceChecks += 2;
210
211         /* Check if the distance is within the distance where
212          * we use only the bounding box distance rbb,
213          * or within the cut-off and there is at least one atom pair
214          * within the cut-off.
215          */
216         if (d2 < rbb2)
217         {
218             InRange = true;
219         }
220         else if (d2 < rlist2)
221         {
222             const int xind_l = xIndexFromCj<NbnxnLayout::Simd4xN>(
223                     cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterLast);
224
225             jx_S = load<SimdReal>(x_j + xind_l + 0 * c_xStride4xN);
226             jy_S = load<SimdReal>(x_j + xind_l + 1 * c_xStride4xN);
227             jz_S = load<SimdReal>(x_j + xind_l + 2 * c_xStride4xN);
228
229             /* Calculate distance */
230             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
231             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
232             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
233             dx_S1 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
234             dy_S1 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
235             dz_S1 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
236             dx_S2 = load<SimdReal>(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH) - jx_S;
237             dy_S2 = load<SimdReal>(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH) - jy_S;
238             dz_S2 = load<SimdReal>(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH) - jz_S;
239             dx_S3 = load<SimdReal>(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH) - jx_S;
240             dy_S3 = load<SimdReal>(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH) - jy_S;
241             dz_S3 = load<SimdReal>(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH) - jz_S;
242
243             /* rsq = dx*dx+dy*dy+dz*dz */
244             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
245             rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
246             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
247             rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
248
249             wco_S0 = (rsq_S0 < rc2_S);
250             wco_S1 = (rsq_S1 < rc2_S);
251             wco_S2 = (rsq_S2 < rc2_S);
252             wco_S3 = (rsq_S3 < rc2_S);
253
254             wco_any_S01 = wco_S0 || wco_S1;
255             wco_any_S23 = wco_S2 || wco_S3;
256             wco_any_S   = wco_any_S01 || wco_any_S23;
257
258             InRange = anyTrue(wco_any_S);
259
260             *numDistanceChecks += 4 * GMX_SIMD_REAL_WIDTH;
261         }
262         if (!InRange)
263         {
264             jclusterLast--;
265         }
266     }
267
268     if (jclusterFirst <= jclusterLast)
269     {
270         for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++)
271         {
272             /* Store cj and the interaction mask */
273             nbnxn_cj_t cjEntry;
274             cjEntry.cj   = cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jcluster;
275             cjEntry.excl = get_imask_simd_4xn(excludeSubDiagonal, icluster, jcluster);
276             nbl->cj.push_back(cjEntry);
277         }
278         /* Increase the closing index in the i list */
279         nbl->ci.back().cj_ind_end = nbl->cj.size();
280     }
281 }