1cadfff9327d498f722e74ff7a31cd4b6de815d7
[alexxy/gromacs.git] / src / gromacs / nbnxm / pairlist_simd_4xm.h
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /* Stride of the packed x coordinate array */
37 static constexpr int c_xStride4xN =
38         (GMX_SIMD_REAL_WIDTH > c_nbnxnCpuIClusterSize ? GMX_SIMD_REAL_WIDTH : c_nbnxnCpuIClusterSize);
39
40 /* Copies PBC shifted i-cell packed atom coordinates to working array */
41 static inline void icell_set_x_simd_4xn(int  ci,
42                                         real shx,
43                                         real shy,
44                                         real shz,
45                                         int gmx_unused        stride,
46                                         const real*           x,
47                                         NbnxnPairlistCpuWork* work)
48 {
49     int   ia;
50     real* x_ci_simd = work->iClusterData.xSimd.data();
51
52     ia = xIndexFromCi<NbnxnLayout::Simd4xN>(ci);
53
54     store(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN] + shx));
55     store(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN] + shy));
56     store(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN] + shz));
57     store(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 1] + shx));
58     store(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 1] + shy));
59     store(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 1] + shz));
60     store(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 2] + shx));
61     store(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 2] + shy));
62     store(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 2] + shz));
63     store(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0 * c_xStride4xN + 3] + shx));
64     store(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1 * c_xStride4xN + 3] + shy));
65     store(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2 * c_xStride4xN + 3] + shz));
66 }
67
68 /* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
69  *
70  * Checks bouding box distances and possibly atom pair distances.
71  * This is an accelerated version of make_cluster_list_simple.
72  *
73  * \param[in]     jGrid               The j-grid
74  * \param[in,out] nbl                 The pair-list to store the cluster pairs in
75  * \param[in]     icluster            The index of the i-cluster
76  * \param[in]     firstCell           The first cluster in the j-range, using i-cluster size indexing
77  * \param[in]     lastCell            The last cluster in the j-range, using i-cluster size indexing
78  * \param[in]     excludeSubDiagonal  Exclude atom pairs with i-index > j-index
79  * \param[in]     x_j                 Coordinates for the j-atom, in SIMD packed format
80  * \param[in]     rlist2              The squared list cut-off
81  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
82  * \param[in,out] numDistanceChecks   The number of distance checks performed
83  */
84 static inline void makeClusterListSimd4xn(const Grid&       jGrid,
85                                           NbnxnPairlistCpu* nbl,
86                                           int               icluster,
87                                           int               firstCell,
88                                           int               lastCell,
89                                           bool              excludeSubDiagonal,
90                                           const real* gmx_restrict x_j,
91                                           real                     rlist2,
92                                           float                    rbb2,
93                                           int* gmx_restrict numDistanceChecks)
94 {
95     using namespace gmx;
96     const real* gmx_restrict x_ci_simd    = nbl->work->iClusterData.xSimd.data();
97     const BoundingBox* gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
98
99     SimdReal jx_S, jy_S, jz_S;
100
101     SimdReal dx_S0, dy_S0, dz_S0;
102     SimdReal dx_S1, dy_S1, dz_S1;
103     SimdReal dx_S2, dy_S2, dz_S2;
104     SimdReal dx_S3, dy_S3, dz_S3;
105
106     SimdReal rsq_S0;
107     SimdReal rsq_S1;
108     SimdReal rsq_S2;
109     SimdReal rsq_S3;
110
111     SimdBool wco_S0;
112     SimdBool wco_S1;
113     SimdBool wco_S2;
114     SimdBool wco_S3;
115     SimdBool wco_any_S01, wco_any_S23, wco_any_S;
116
117     SimdReal rc2_S;
118
119     gmx_bool InRange;
120     float    d2;
121     int      xind_f, xind_l;
122
123     /* Convert the j-range from i-cluster size indexing to j-cluster indexing */
124     int jclusterFirst = cjFromCi<NbnxnLayout::Simd4xN, 0>(firstCell);
125     int jclusterLast  = cjFromCi<NbnxnLayout::Simd4xN, 1>(lastCell);
126     GMX_ASSERT(jclusterLast >= jclusterFirst,
127                "We should have a non-empty j-cluster range, since the calling code should have "
128                "ensured a non-empty cell range");
129
130     rc2_S = SimdReal(rlist2);
131
132     InRange = FALSE;
133     while (!InRange && jclusterFirst <= jclusterLast)
134     {
135         d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
136         *numDistanceChecks += 2;
137
138         /* Check if the distance is within the distance where
139          * we use only the bounding box distance rbb,
140          * or within the cut-off and there is at least one atom pair
141          * within the cut-off.
142          */
143         if (d2 < rbb2)
144         {
145             InRange = TRUE;
146         }
147         else if (d2 < rlist2)
148         {
149             xind_f = xIndexFromCj<NbnxnLayout::Simd4xN>(
150                     cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterFirst);
151
152             jx_S = load<SimdReal>(x_j + xind_f + 0 * c_xStride4xN);
153             jy_S = load<SimdReal>(x_j + xind_f + 1 * c_xStride4xN);
154             jz_S = load<SimdReal>(x_j + xind_f + 2 * c_xStride4xN);
155
156
157             /* Calculate distance */
158             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
159             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
160             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
161             dx_S1 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
162             dy_S1 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
163             dz_S1 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
164             dx_S2 = load<SimdReal>(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH) - jx_S;
165             dy_S2 = load<SimdReal>(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH) - jy_S;
166             dz_S2 = load<SimdReal>(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH) - jz_S;
167             dx_S3 = load<SimdReal>(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH) - jx_S;
168             dy_S3 = load<SimdReal>(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH) - jy_S;
169             dz_S3 = load<SimdReal>(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH) - jz_S;
170
171             /* rsq = dx*dx+dy*dy+dz*dz */
172             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
173             rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
174             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
175             rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
176
177             wco_S0 = (rsq_S0 < rc2_S);
178             wco_S1 = (rsq_S1 < rc2_S);
179             wco_S2 = (rsq_S2 < rc2_S);
180             wco_S3 = (rsq_S3 < rc2_S);
181
182             wco_any_S01 = wco_S0 || wco_S1;
183             wco_any_S23 = wco_S2 || wco_S3;
184             wco_any_S   = wco_any_S01 || wco_any_S23;
185
186             InRange = anyTrue(wco_any_S);
187
188             *numDistanceChecks += 4 * GMX_SIMD_REAL_WIDTH;
189         }
190         if (!InRange)
191         {
192             jclusterFirst++;
193         }
194     }
195     if (!InRange)
196     {
197         return;
198     }
199
200     InRange = FALSE;
201     while (!InRange && jclusterLast > jclusterFirst)
202     {
203         d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
204         *numDistanceChecks += 2;
205
206         /* Check if the distance is within the distance where
207          * we use only the bounding box distance rbb,
208          * or within the cut-off and there is at least one atom pair
209          * within the cut-off.
210          */
211         if (d2 < rbb2)
212         {
213             InRange = TRUE;
214         }
215         else if (d2 < rlist2)
216         {
217             xind_l = xIndexFromCj<NbnxnLayout::Simd4xN>(
218                     cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterLast);
219
220             jx_S = load<SimdReal>(x_j + xind_l + 0 * c_xStride4xN);
221             jy_S = load<SimdReal>(x_j + xind_l + 1 * c_xStride4xN);
222             jz_S = load<SimdReal>(x_j + xind_l + 2 * c_xStride4xN);
223
224             /* Calculate distance */
225             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
226             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
227             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
228             dx_S1 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
229             dy_S1 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
230             dz_S1 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
231             dx_S2 = load<SimdReal>(x_ci_simd + 6 * GMX_SIMD_REAL_WIDTH) - jx_S;
232             dy_S2 = load<SimdReal>(x_ci_simd + 7 * GMX_SIMD_REAL_WIDTH) - jy_S;
233             dz_S2 = load<SimdReal>(x_ci_simd + 8 * GMX_SIMD_REAL_WIDTH) - jz_S;
234             dx_S3 = load<SimdReal>(x_ci_simd + 9 * GMX_SIMD_REAL_WIDTH) - jx_S;
235             dy_S3 = load<SimdReal>(x_ci_simd + 10 * GMX_SIMD_REAL_WIDTH) - jy_S;
236             dz_S3 = load<SimdReal>(x_ci_simd + 11 * GMX_SIMD_REAL_WIDTH) - jz_S;
237
238             /* rsq = dx*dx+dy*dy+dz*dz */
239             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
240             rsq_S1 = norm2(dx_S1, dy_S1, dz_S1);
241             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
242             rsq_S3 = norm2(dx_S3, dy_S3, dz_S3);
243
244             wco_S0 = (rsq_S0 < rc2_S);
245             wco_S1 = (rsq_S1 < rc2_S);
246             wco_S2 = (rsq_S2 < rc2_S);
247             wco_S3 = (rsq_S3 < rc2_S);
248
249             wco_any_S01 = wco_S0 || wco_S1;
250             wco_any_S23 = wco_S2 || wco_S3;
251             wco_any_S   = wco_any_S01 || wco_any_S23;
252
253             InRange = anyTrue(wco_any_S);
254
255             *numDistanceChecks += 4 * GMX_SIMD_REAL_WIDTH;
256         }
257         if (!InRange)
258         {
259             jclusterLast--;
260         }
261     }
262
263     if (jclusterFirst <= jclusterLast)
264     {
265         for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++)
266         {
267             /* Store cj and the interaction mask */
268             nbnxn_cj_t cjEntry;
269             cjEntry.cj   = cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jcluster;
270             cjEntry.excl = get_imask_simd_4xn(excludeSubDiagonal, icluster, jcluster);
271             nbl->cj.push_back(cjEntry);
272         }
273         /* Increase the closing index in the i list */
274         nbl->ci.back().cj_ind_end = nbl->cj.size();
275     }
276 }