c226d86021ec42a182a25162b5e09d13a67d9568
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_search_simd_4xn.h
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
37
38 #if GMX_SIMD_REAL_WIDTH >= NBNXN_CPU_CLUSTER_I_SIZE
39 #define STRIDE_S  (GMX_SIMD_REAL_WIDTH)
40 #else
41 #define STRIDE_S  NBNXN_CPU_CLUSTER_I_SIZE
42 #endif
43
44 /* Copies PBC shifted i-cell packed atom coordinates to working array */
45 static inline void
46 icell_set_x_simd_4xn(int ci,
47                      real shx, real shy, real shz,
48                      int gmx_unused stride, const real *x,
49                      nbnxn_list_work_t *work)
50 {
51     int    ia;
52     real  *x_ci_simd = work->x_ci_simd;
53
54     ia = xIndexFromCi<NbnxnLayout::Simd4xN>(ci);
55
56     store(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*STRIDE_S    ] + shx) );
57     store(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*STRIDE_S    ] + shy) );
58     store(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*STRIDE_S    ] + shz) );
59     store(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*STRIDE_S + 1] + shx) );
60     store(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*STRIDE_S + 1] + shy) );
61     store(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*STRIDE_S + 1] + shz) );
62     store(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*STRIDE_S + 2] + shx) );
63     store(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*STRIDE_S + 2] + shy) );
64     store(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*STRIDE_S + 2] + shz) );
65     store(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 0*STRIDE_S + 3] + shx) );
66     store(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 1*STRIDE_S + 3] + shy) );
67     store(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH, SimdReal(x[ia + 2*STRIDE_S + 3] + shz) );
68 }
69
70 /* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
71  *
72  * Checks bouding box distances and possibly atom pair distances.
73  * This is an accelerated version of make_cluster_list_simple.
74  *
75  * \param[in]     gridj               The j-grid
76  * \param[in,out] nbl                 The pair-list to store the cluster pairs in
77  * \param[in]     icluster            The index of the i-cluster
78  * \param[in]     firstCell           The first cluster in the j-range, using i-cluster size indexing
79  * \param[in]     lastCell            The last cluster in the j-range, using i-cluster size indexing
80  * \param[in]     excludeSubDiagonal  Exclude atom pairs with i-index > j-index
81  * \param[in]     x_j                 Coordinates for the j-atom, in SIMD packed format
82  * \param[in]     rlist2              The squared list cut-off
83  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
84  * \param[in,out] numDistanceChecks   The number of distance checks performed
85  */
86 static inline void
87 makeClusterListSimd4xn(const nbnxn_grid_t *      gridj,
88                        nbnxn_pairlist_t *        nbl,
89                        int                       icluster,
90                        int                       firstCell,
91                        int                       lastCell,
92                        bool                      excludeSubDiagonal,
93                        const real * gmx_restrict x_j,
94                        real                      rlist2,
95                        float                     rbb2,
96                        int * gmx_restrict        numDistanceChecks)
97 {
98     const real * gmx_restrict          x_ci_simd = nbl->work->x_ci_simd;
99     const nbnxn_bb_t * gmx_restrict    bb_ci     = nbl->work->bb_ci;
100
101     SimdReal                           jx_S, jy_S, jz_S;
102
103     SimdReal                           dx_S0, dy_S0, dz_S0;
104     SimdReal                           dx_S1, dy_S1, dz_S1;
105     SimdReal                           dx_S2, dy_S2, dz_S2;
106     SimdReal                           dx_S3, dy_S3, dz_S3;
107
108     SimdReal                           rsq_S0;
109     SimdReal                           rsq_S1;
110     SimdReal                           rsq_S2;
111     SimdReal                           rsq_S3;
112
113     SimdBool                           wco_S0;
114     SimdBool                           wco_S1;
115     SimdBool                           wco_S2;
116     SimdBool                           wco_S3;
117     SimdBool                           wco_any_S01, wco_any_S23, wco_any_S;
118
119     SimdReal                           rc2_S;
120
121     gmx_bool                           InRange;
122     float                              d2;
123     int                                xind_f, xind_l;
124
125     /* Convert the j-range from i-cluster size indexing to j-cluster indexing */
126     /* cppcheck-suppress selfAssignment . selfAssignment for width 4.*/
127     int jclusterFirst = cjFromCi<NbnxnLayout::Simd4xN>(firstCell);
128 #if GMX_SIMD_REAL_WIDTH >= NBNXN_CPU_CLUSTER_I_SIZE
129     int jclusterLast  = cjFromCi<NbnxnLayout::Simd4xN>(lastCell);
130 #else
131     /* Set the correct last j-cluster with a j-cluster size of 2 */
132     int jclusterLast  = cjFromCi<NbnxnLayout::Simd4xN>(lastCell + 1) - 1;
133 #endif
134     GMX_ASSERT(jclusterLast >= jclusterFirst, "We should have a non-empty j-cluster range, since the calling code should have ensured a non-empty cell range");
135
136     rc2_S   = SimdReal(rlist2);
137
138     InRange = FALSE;
139     while (!InRange && jclusterFirst <= jclusterLast)
140     {
141 #if NBNXN_SEARCH_BB_SIMD4
142         d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, gridj->bbj);
143 #else
144         d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, gridj->bbj);
145 #endif
146         *numDistanceChecks += 2;
147
148         /* Check if the distance is within the distance where
149          * we use only the bounding box distance rbb,
150          * or within the cut-off and there is at least one atom pair
151          * within the cut-off.
152          */
153         if (d2 < rbb2)
154         {
155             InRange = TRUE;
156         }
157         else if (d2 < rlist2)
158         {
159             xind_f  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterFirst);
160
161             jx_S  = load<SimdReal>(x_j + xind_f + 0*STRIDE_S);
162             jy_S  = load<SimdReal>(x_j + xind_f + 1*STRIDE_S);
163             jz_S  = load<SimdReal>(x_j + xind_f + 2*STRIDE_S);
164
165
166             /* Calculate distance */
167             dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
168             dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
169             dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
170             dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
171             dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
172             dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
173             dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
174             dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
175             dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
176             dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
177             dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
178             dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
179
180             /* rsq = dx*dx+dy*dy+dz*dz */
181             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
182             rsq_S1           = norm2(dx_S1, dy_S1, dz_S1);
183             rsq_S2           = norm2(dx_S2, dy_S2, dz_S2);
184             rsq_S3           = norm2(dx_S3, dy_S3, dz_S3);
185
186             wco_S0           = (rsq_S0 < rc2_S);
187             wco_S1           = (rsq_S1 < rc2_S);
188             wco_S2           = (rsq_S2 < rc2_S);
189             wco_S3           = (rsq_S3 < rc2_S);
190
191             wco_any_S01      = wco_S0 || wco_S1;
192             wco_any_S23      = wco_S2 || wco_S3;
193             wco_any_S        = wco_any_S01 || wco_any_S23;
194
195             InRange          = anyTrue(wco_any_S);
196
197             *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH;
198         }
199         if (!InRange)
200         {
201             jclusterFirst++;
202         }
203     }
204     if (!InRange)
205     {
206         return;
207     }
208
209     InRange = FALSE;
210     while (!InRange && jclusterLast > jclusterFirst)
211     {
212 #if NBNXN_SEARCH_BB_SIMD4
213         d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, gridj->bbj);
214 #else
215         d2 = subc_bb_dist2(0, bb_ci, jclusterLast, gridj->bbj);
216 #endif
217         *numDistanceChecks += 2;
218
219         /* Check if the distance is within the distance where
220          * we use only the bounding box distance rbb,
221          * or within the cut-off and there is at least one atom pair
222          * within the cut-off.
223          */
224         if (d2 < rbb2)
225         {
226             InRange = TRUE;
227         }
228         else if (d2 < rlist2)
229         {
230             xind_l  = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jclusterLast);
231
232             jx_S  = load<SimdReal>(x_j +xind_l + 0*STRIDE_S);
233             jy_S  = load<SimdReal>(x_j +xind_l + 1*STRIDE_S);
234             jz_S  = load<SimdReal>(x_j +xind_l + 2*STRIDE_S);
235
236             /* Calculate distance */
237             dx_S0            = load<SimdReal>(x_ci_simd +  0*GMX_SIMD_REAL_WIDTH) - jx_S;
238             dy_S0            = load<SimdReal>(x_ci_simd +  1*GMX_SIMD_REAL_WIDTH) - jy_S;
239             dz_S0            = load<SimdReal>(x_ci_simd +  2*GMX_SIMD_REAL_WIDTH) - jz_S;
240             dx_S1            = load<SimdReal>(x_ci_simd +  3*GMX_SIMD_REAL_WIDTH) - jx_S;
241             dy_S1            = load<SimdReal>(x_ci_simd +  4*GMX_SIMD_REAL_WIDTH) - jy_S;
242             dz_S1            = load<SimdReal>(x_ci_simd +  5*GMX_SIMD_REAL_WIDTH) - jz_S;
243             dx_S2            = load<SimdReal>(x_ci_simd +  6*GMX_SIMD_REAL_WIDTH) - jx_S;
244             dy_S2            = load<SimdReal>(x_ci_simd +  7*GMX_SIMD_REAL_WIDTH) - jy_S;
245             dz_S2            = load<SimdReal>(x_ci_simd +  8*GMX_SIMD_REAL_WIDTH) - jz_S;
246             dx_S3            = load<SimdReal>(x_ci_simd +  9*GMX_SIMD_REAL_WIDTH) - jx_S;
247             dy_S3            = load<SimdReal>(x_ci_simd + 10*GMX_SIMD_REAL_WIDTH) - jy_S;
248             dz_S3            = load<SimdReal>(x_ci_simd + 11*GMX_SIMD_REAL_WIDTH) - jz_S;
249
250             /* rsq = dx*dx+dy*dy+dz*dz */
251             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
252             rsq_S1           = norm2(dx_S1, dy_S1, dz_S1);
253             rsq_S2           = norm2(dx_S2, dy_S2, dz_S2);
254             rsq_S3           = norm2(dx_S3, dy_S3, dz_S3);
255
256             wco_S0           = (rsq_S0 < rc2_S);
257             wco_S1           = (rsq_S1 < rc2_S);
258             wco_S2           = (rsq_S2 < rc2_S);
259             wco_S3           = (rsq_S3 < rc2_S);
260
261             wco_any_S01      = wco_S0 || wco_S1;
262             wco_any_S23      = wco_S2 || wco_S3;
263             wco_any_S        = wco_any_S01 || wco_any_S23;
264
265             InRange          = anyTrue(wco_any_S);
266
267             *numDistanceChecks += 4*GMX_SIMD_REAL_WIDTH;
268         }
269         if (!InRange)
270         {
271             jclusterLast--;
272         }
273     }
274
275     if (jclusterFirst <= jclusterLast)
276     {
277         for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++)
278         {
279             /* Store cj and the interaction mask */
280             nbl->cj[nbl->ncj].cj   = cjFromCi<NbnxnLayout::Simd4xN>(gridj->cell0) + jcluster;
281             nbl->cj[nbl->ncj].excl = get_imask_simd_4xn(excludeSubDiagonal, icluster, jcluster);
282             nbl->ncj++;
283         }
284         /* Increase the closing index in i super-cell list */
285         nbl->ci[nbl->nci].cj_ind_end = nbl->ncj;
286     }
287 }
288
289 #undef STRIDE_S