11714a42391757b483fba3dc744e0a8c3cff1df9
[alexxy/gromacs.git] / src / gromacs / nbnxm / pairlist_simd_4xm.h
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /* Stride of the packed x coordinate array */
37 static constexpr int c_xStride4xN = (GMX_SIMD_REAL_WIDTH > c_nbnxnCpuIClusterSize ? GMX_SIMD_REAL_WIDTH : c_nbnxnCpuIClusterSize);
38
39 /* Copies PBC shifted i-cell packed atom coordinates to working array */
40 static inline void
41 icell_set_x_simd_4xn(int ci,
42                      real shx, real shy, real shz,
43                      int gmx_unused stride, const real *x,
44                      NbnxnPairlistCpuWork *work)
45 {
46     int    ia;
47     real  *x_ci_simd = work->iClusterData.xSimd.data();
48
49     ia = xIndexFromCi<NbnxnLayout::Simd4xN>(ci);
50
51     store(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN    ] + shx) );
52     store(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN    ] + shy) );
53     store(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN    ] + shz) );
54     store(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN + 1] + shx) );
55     store(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN + 1] + shy) );
56     store(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN + 1] + shz) );
57     store(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN + 2] + shx) );
58     store(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN + 2] + shy) );
59     store(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN + 2] + shz) );
60     store(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*c_xStride4xN + 3] + shx) );
61     store(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*c_xStride4xN + 3] + shy) );
62     store(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*c_xStride4xN + 3] + shz) );
63 }
64
65 /* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
66  *
67  * Checks bouding box distances and possibly atom pair distances.
68  * This is an accelerated version of make_cluster_list_simple.
69  *
70  * \param[in]     jGrid               The j-grid
71  * \param[in,out] nbl                 The pair-list to store the cluster pairs in
72  * \param[in]     icluster            The index of the i-cluster
73  * \param[in]     firstCell           The first cluster in the j-range, using i-cluster size indexing
74  * \param[in]     lastCell            The last cluster in the j-range, using i-cluster size indexing
75  * \param[in]     excludeSubDiagonal  Exclude atom pairs with i-index > j-index
76  * \param[in]     x_j                 Coordinates for the j-atom, in SIMD packed format
77  * \param[in]     rlist2              The squared list cut-off
78  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
79  * \param[in,out] numDistanceChecks   The number of distance checks performed
80  */
81 static inline void
82 makeClusterListSimd4xn(const Grid               &jGrid,
83                        NbnxnPairlistCpu *        nbl,
84                        int                       icluster,
85                        int                       firstCell,
86                        int                       lastCell,
87                        bool                      excludeSubDiagonal,
88                        const real * gmx_restrict x_j,
89                        real                      rlist2,
90                        float                     rbb2,
91                        int * gmx_restrict        numDistanceChecks)
92 {
93     using namespace gmx;
94     const real * gmx_restrict          x_ci_simd = nbl->work->iClusterData.xSimd.data();
95     const nbnxn_bb_t * gmx_restrict    bb_ci     = nbl->work->iClusterData.bb.data();
96
97     SimdReal                           jx_S, jy_S, jz_S;
98
99     SimdReal                           dx_S0, dy_S0, dz_S0;
100     SimdReal                           dx_S1, dy_S1, dz_S1;
101     SimdReal                           dx_S2, dy_S2, dz_S2;
102     SimdReal                           dx_S3, dy_S3, dz_S3;
103
104     SimdReal                           rsq_S0;
105     SimdReal                           rsq_S1;
106     SimdReal                           rsq_S2;
107     SimdReal                           rsq_S3;
108
109     SimdBool                           wco_S0;
110     SimdBool                           wco_S1;
111     SimdBool                           wco_S2;
112     SimdBool                           wco_S3;
113     SimdBool                           wco_any_S01, wco_any_S23, wco_any_S;
114
115     SimdReal                           rc2_S;
116
117     gmx_bool                           InRange;
118     float                              d2;
119     int                                xind_f, xind_l;
120
121     /* Convert the j-range from i-cluster size indexing to j-cluster indexing */
122     int jclusterFirst = cjFromCi<NbnxnLayout::Simd4xN, 0>(firstCell);
123     int jclusterLast  = cjFromCi<NbnxnLayout::Simd4xN, 1>(lastCell);
124     GMX_ASSERT(jclusterLast >= jclusterFirst, "We should have a non-empty j-cluster range, since the calling code should have ensured a non-empty cell range");
125
126     rc2_S   = SimdReal(rlist2);
127
128     InRange = FALSE;
129     while (!InRange && jclusterFirst <= jclusterLast)
130     {
131 #if NBNXN_SEARCH_BB_SIMD4
132         d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
133 #else
134         d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
135 #endif
136         *numDistanceChecks += 2;
137
138         /* Check if the distance is within the distance where
139          * we use only the bounding box distance rbb,
140          * or within the cut-off and there is at least one atom pair
141          * within the cut-off.
142          */
143         if (d2 < rbb2)
144         {
145             InRange = TRUE;
146         }
147         else if (d2 < rlist2)
148         {
149             xind_f  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterFirst);
150
151             jx_S  = load<SimdReal>(x_j + xind_f + 0*c_xStride4xN);
152             jy_S  = load<SimdReal>(x_j + xind_f + 1*c_xStride4xN);
153             jz_S  = load<SimdReal>(x_j + xind_f + 2*c_xStride4xN);
154
155
156             /* Calculate distance */
157             dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
158             dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
159             dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
160             dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
161             dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
162             dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
163             dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
164             dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
165             dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
166             dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
167             dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
168             dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
169
170             /* rsq = dx*dx+dy*dy+dz*dz */
171             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
172             rsq_S1           = norm2(dx_S1, dy_S1, dz_S1);
173             rsq_S2           = norm2(dx_S2, dy_S2, dz_S2);
174             rsq_S3           = norm2(dx_S3, dy_S3, dz_S3);
175
176             wco_S0           = (rsq_S0 < rc2_S);
177             wco_S1           = (rsq_S1 < rc2_S);
178             wco_S2           = (rsq_S2 < rc2_S);
179             wco_S3           = (rsq_S3 < rc2_S);
180
181             wco_any_S01      = wco_S0 || wco_S1;
182             wco_any_S23      = wco_S2 || wco_S3;
183             wco_any_S        = wco_any_S01 || wco_any_S23;
184
185             InRange          = anyTrue(wco_any_S);
186
187             *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH;
188         }
189         if (!InRange)
190         {
191             jclusterFirst++;
192         }
193     }
194     if (!InRange)
195     {
196         return;
197     }
198
199     InRange = FALSE;
200     while (!InRange && jclusterLast > jclusterFirst)
201     {
202 #if NBNXN_SEARCH_BB_SIMD4
203         d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
204 #else
205         d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
206 #endif
207         *numDistanceChecks += 2;
208
209         /* Check if the distance is within the distance where
210          * we use only the bounding box distance rbb,
211          * or within the cut-off and there is at least one atom pair
212          * within the cut-off.
213          */
214         if (d2 < rbb2)
215         {
216             InRange = TRUE;
217         }
218         else if (d2 < rlist2)
219         {
220             xind_l  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterLast);
221
222             jx_S  = load<SimdReal>(x_j +xind_l + 0*c_xStride4xN);
223             jy_S  = load<SimdReal>(x_j +xind_l + 1*c_xStride4xN);
224             jz_S  = load<SimdReal>(x_j +xind_l + 2*c_xStride4xN);
225
226             /* Calculate distance */
227             dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
228             dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
229             dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
230             dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
231             dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
232             dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
233             dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
234             dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
235             dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
236             dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
237             dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
238             dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
239
240             /* rsq = dx*dx+dy*dy+dz*dz */
241             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
242             rsq_S1           = norm2(dx_S1, dy_S1, dz_S1);
243             rsq_S2           = norm2(dx_S2, dy_S2, dz_S2);
244             rsq_S3           = norm2(dx_S3, dy_S3, dz_S3);
245
246             wco_S0           = (rsq_S0 < rc2_S);
247             wco_S1           = (rsq_S1 < rc2_S);
248             wco_S2           = (rsq_S2 < rc2_S);
249             wco_S3           = (rsq_S3 < rc2_S);
250
251             wco_any_S01      = wco_S0 || wco_S1;
252             wco_any_S23      = wco_S2 || wco_S3;
253             wco_any_S        = wco_any_S01 || wco_any_S23;
254
255             InRange          = anyTrue(wco_any_S);
256
257             *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH;
258         }
259         if (!InRange)
260         {
261             jclusterLast--;
262         }
263     }
264
265     if (jclusterFirst <= jclusterLast)
266     {
267         for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++)
268         {
269             /* Store cj and the interaction mask */
270             nbnxn_cj_t cjEntry;
271             cjEntry.cj   = cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jcluster;
272             cjEntry.excl = get_imask_simd_4xn(excludeSubDiagonal, icluster, jcluster);
273             nbl->cj.push_back(cjEntry);
274         }
275         /* Increase the closing index in the i list */
276         nbl->ci.back().cj_ind_end = nbl->cj.size();
277     }
278 }