Apply clang-format to source tree
[alexxy/gromacs.git] / src / gromacs / nbnxm / pairlist_simd_2xmm.h
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /* Stride of the packed x coordinate array */
37 static constexpr int c_xStride2xNN = c_nbnxnCpuIClusterSize;
38
39 /* Copies PBC shifted i-cell packed atom coordinates to working array */
40 static inline void icell_set_x_simd_2xnn(int  ci,
41                                          real shx,
42                                          real shy,
43                                          real shz,
44                                          int gmx_unused        stride,
45                                          const real*           x,
46                                          NbnxnPairlistCpuWork* work)
47 {
48     int   ia;
49     real* x_ci_simd = work->iClusterData.xSimd.data();
50
51     ia = xIndexFromCi<NbnxnLayout::Simd2xNN>(ci);
52
53     store(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH,
54           loadU1DualHsimd(x + ia + 0 * c_xStride2xNN + 0) + SimdReal(shx));
55     store(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH,
56           loadU1DualHsimd(x + ia + 1 * c_xStride2xNN + 0) + SimdReal(shy));
57     store(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH,
58           loadU1DualHsimd(x + ia + 2 * c_xStride2xNN + 0) + SimdReal(shz));
59     store(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH,
60           loadU1DualHsimd(x + ia + 0 * c_xStride2xNN + 2) + SimdReal(shx));
61     store(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH,
62           loadU1DualHsimd(x + ia + 1 * c_xStride2xNN + 2) + SimdReal(shy));
63     store(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH,
64           loadU1DualHsimd(x + ia + 2 * c_xStride2xNN + 2) + SimdReal(shz));
65 }
66
67 /* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
68  *
69  * Checks bouding box distances and possibly atom pair distances.
70  * This is an accelerated version of make_cluster_list_simple.
71  *
72  * \param[in]     jGrid               The j-grid
73  * \param[in,out] nbl                 The pair-list to store the cluster pairs in
74  * \param[in]     icluster            The index of the i-cluster
75  * \param[in]     firstCell           The first cluster in the j-range, using i-cluster size indexing
76  * \param[in]     lastCell            The last cluster in the j-range, using i-cluster size indexing
77  * \param[in]     excludeSubDiagonal  Exclude atom pairs with i-index > j-index
78  * \param[in]     x_j                 Coordinates for the j-atom, in SIMD packed format
79  * \param[in]     rlist2              The squared list cut-off
80  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
81  * \param[in,out] numDistanceChecks   The number of distance checks performed
82  */
83 static inline void makeClusterListSimd2xnn(const Grid&       jGrid,
84                                            NbnxnPairlistCpu* nbl,
85                                            int               icluster,
86                                            int               firstCell,
87                                            int               lastCell,
88                                            bool              excludeSubDiagonal,
89                                            const real* gmx_restrict x_j,
90                                            real                     rlist2,
91                                            float                    rbb2,
92                                            int* gmx_restrict numDistanceChecks)
93 {
94     using namespace gmx;
95     const real* gmx_restrict x_ci_simd    = nbl->work->iClusterData.xSimd.data();
96     const BoundingBox* gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
97
98     SimdReal jx_S, jy_S, jz_S;
99
100     SimdReal dx_S0, dy_S0, dz_S0;
101     SimdReal dx_S2, dy_S2, dz_S2;
102
103     SimdReal rsq_S0;
104     SimdReal rsq_S2;
105
106     SimdBool wco_S0;
107     SimdBool wco_S2;
108     SimdBool wco_any_S;
109
110     SimdReal rc2_S;
111
112     gmx_bool InRange;
113     float    d2;
114     int      xind_f, xind_l;
115
116     int jclusterFirst = cjFromCi<NbnxnLayout::Simd2xNN, 0>(firstCell);
117     int jclusterLast  = cjFromCi<NbnxnLayout::Simd2xNN, 1>(lastCell);
118     GMX_ASSERT(jclusterLast >= jclusterFirst,
119                "We should have a non-empty j-cluster range, since the calling code should have "
120                "ensured a non-empty cell range");
121
122     rc2_S = SimdReal(rlist2);
123
124     InRange = FALSE;
125     while (!InRange && jclusterFirst <= jclusterLast)
126     {
127         d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
128         *numDistanceChecks += 2;
129
130         /* Check if the distance is within the distance where
131          * we use only the bounding box distance rbb,
132          * or within the cut-off and there is at least one atom pair
133          * within the cut-off.
134          */
135         if (d2 < rbb2)
136         {
137             InRange = TRUE;
138         }
139         else if (d2 < rlist2)
140         {
141             xind_f = xIndexFromCj<NbnxnLayout::Simd2xNN>(
142                     cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterFirst);
143
144             jx_S = loadDuplicateHsimd(x_j + xind_f + 0 * c_xStride2xNN);
145             jy_S = loadDuplicateHsimd(x_j + xind_f + 1 * c_xStride2xNN);
146             jz_S = loadDuplicateHsimd(x_j + xind_f + 2 * c_xStride2xNN);
147
148             /* Calculate distance */
149             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
150             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
151             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
152             dx_S2 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
153             dy_S2 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
154             dz_S2 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
155
156             /* rsq = dx*dx+dy*dy+dz*dz */
157             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
158             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
159
160             wco_S0 = (rsq_S0 < rc2_S);
161             wco_S2 = (rsq_S2 < rc2_S);
162
163             wco_any_S = wco_S0 || wco_S2;
164
165             InRange = anyTrue(wco_any_S);
166
167             *numDistanceChecks += 2 * GMX_SIMD_REAL_WIDTH;
168         }
169         if (!InRange)
170         {
171             jclusterFirst++;
172         }
173     }
174     if (!InRange)
175     {
176         return;
177     }
178
179     InRange = FALSE;
180     while (!InRange && jclusterLast > jclusterFirst)
181     {
182         d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
183         *numDistanceChecks += 2;
184
185         /* Check if the distance is within the distance where
186          * we use only the bounding box distance rbb,
187          * or within the cut-off and there is at least one atom pair
188          * within the cut-off.
189          */
190         if (d2 < rbb2)
191         {
192             InRange = TRUE;
193         }
194         else if (d2 < rlist2)
195         {
196             xind_l = xIndexFromCj<NbnxnLayout::Simd2xNN>(
197                     cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterLast);
198
199             jx_S = loadDuplicateHsimd(x_j + xind_l + 0 * c_xStride2xNN);
200             jy_S = loadDuplicateHsimd(x_j + xind_l + 1 * c_xStride2xNN);
201             jz_S = loadDuplicateHsimd(x_j + xind_l + 2 * c_xStride2xNN);
202
203             /* Calculate distance */
204             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
205             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
206             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
207             dx_S2 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
208             dy_S2 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
209             dz_S2 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
210
211             /* rsq = dx*dx+dy*dy+dz*dz */
212             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
213             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
214
215             wco_S0 = (rsq_S0 < rc2_S);
216             wco_S2 = (rsq_S2 < rc2_S);
217
218             wco_any_S = wco_S0 || wco_S2;
219
220             InRange = anyTrue(wco_any_S);
221
222             *numDistanceChecks += 2 * GMX_SIMD_REAL_WIDTH;
223         }
224         if (!InRange)
225         {
226             jclusterLast--;
227         }
228     }
229
230     if (jclusterFirst <= jclusterLast)
231     {
232         for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++)
233         {
234             /* Store cj and the interaction mask */
235             nbnxn_cj_t cjEntry;
236             cjEntry.cj   = cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jcluster;
237             cjEntry.excl = get_imask_simd_2xnn(excludeSubDiagonal, icluster, jcluster);
238             nbl->cj.push_back(cjEntry);
239         }
240         /* Increase the closing index in the i list */
241         nbl->ci.back().cj_ind_end = nbl->cj.size();
242     }
243 }