Turn nbnxn_grid_t into a class
[alexxy/gromacs.git] / src / gromacs / nbnxm / pairlist_simd_2xmm.h
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /* Stride of the packed x coordinate array */
37 static constexpr int c_xStride2xNN = c_nbnxnCpuIClusterSize;
38
39 /* Copies PBC shifted i-cell packed atom coordinates to working array */
40 static inline void
41 icell_set_x_simd_2xnn(int ci,
42                       real shx, real shy, real shz,
43                       int gmx_unused stride, const real *x,
44                       NbnxnPairlistCpuWork *work)
45 {
46     int   ia;
47     real *x_ci_simd = work->iClusterData.xSimd.data();
48
49     ia = xIndexFromCi<NbnxnLayout::Simd2xNN>(ci);
50
51     store(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 0*c_xStride2xNN + 0) + SimdReal(shx) );
52     store(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 1*c_xStride2xNN + 0) + SimdReal(shy) );
53     store(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 2*c_xStride2xNN + 0) + SimdReal(shz) );
54     store(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 0*c_xStride2xNN + 2) + SimdReal(shx) );
55     store(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 1*c_xStride2xNN + 2) + SimdReal(shy) );
56     store(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH, loadU1DualHsimd(x + ia + 2*c_xStride2xNN + 2) + SimdReal(shz) );
57 }
58
59 /* SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
60  *
61  * Checks bouding box distances and possibly atom pair distances.
62  * This is an accelerated version of make_cluster_list_simple.
63  *
64  * \param[in]     jGrid               The j-grid
65  * \param[in,out] nbl                 The pair-list to store the cluster pairs in
66  * \param[in]     icluster            The index of the i-cluster
67  * \param[in]     firstCell           The first cluster in the j-range, using i-cluster size indexing
68  * \param[in]     lastCell            The last cluster in the j-range, using i-cluster size indexing
69  * \param[in]     excludeSubDiagonal  Exclude atom pairs with i-index > j-index
70  * \param[in]     x_j                 Coordinates for the j-atom, in SIMD packed format
71  * \param[in]     rlist2              The squared list cut-off
72  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
73  * \param[in,out] numDistanceChecks   The number of distance checks performed
74  */
75 static inline void
76 makeClusterListSimd2xnn(const Grid               &jGrid,
77                         NbnxnPairlistCpu *        nbl,
78                         int                       icluster,
79                         int                       firstCell,
80                         int                       lastCell,
81                         bool                      excludeSubDiagonal,
82                         const real * gmx_restrict x_j,
83                         real                      rlist2,
84                         float                     rbb2,
85                         int * gmx_restrict        numDistanceChecks)
86 {
87     using namespace gmx;
88     const real * gmx_restrict           x_ci_simd = nbl->work->iClusterData.xSimd.data();
89     const nbnxn_bb_t * gmx_restrict     bb_ci     = nbl->work->iClusterData.bb.data();
90
91     SimdReal                            jx_S, jy_S, jz_S;
92
93     SimdReal                            dx_S0, dy_S0, dz_S0;
94     SimdReal                            dx_S2, dy_S2, dz_S2;
95
96     SimdReal                            rsq_S0;
97     SimdReal                            rsq_S2;
98
99     SimdBool                            wco_S0;
100     SimdBool                            wco_S2;
101     SimdBool                            wco_any_S;
102
103     SimdReal                            rc2_S;
104
105     gmx_bool                            InRange;
106     float                               d2;
107     int                                 xind_f, xind_l;
108
109     int jclusterFirst = cjFromCi<NbnxnLayout::Simd2xNN, 0>(firstCell);
110     int jclusterLast  = cjFromCi<NbnxnLayout::Simd2xNN, 1>(lastCell);
111     GMX_ASSERT(jclusterLast >= jclusterFirst, "We should have a non-empty j-cluster range, since the calling code should have ensured a non-empty cell range");
112
113     rc2_S   = SimdReal(rlist2);
114
115     InRange = FALSE;
116     while (!InRange && jclusterFirst <= jclusterLast)
117     {
118 #if NBNXN_SEARCH_BB_SIMD4
119         d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
120 #else
121         d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
122 #endif
123         *numDistanceChecks += 2;
124
125         /* Check if the distance is within the distance where
126          * we use only the bounding box distance rbb,
127          * or within the cut-off and there is at least one atom pair
128          * within the cut-off.
129          */
130         if (d2 < rbb2)
131         {
132             InRange = TRUE;
133         }
134         else if (d2 < rlist2)
135         {
136             xind_f  = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterFirst);
137
138             jx_S  = loadDuplicateHsimd(x_j + xind_f + 0*c_xStride2xNN);
139             jy_S  = loadDuplicateHsimd(x_j + xind_f + 1*c_xStride2xNN);
140             jz_S  = loadDuplicateHsimd(x_j + xind_f + 2*c_xStride2xNN);
141
142             /* Calculate distance */
143             dx_S0            = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
144             dy_S0            = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
145             dz_S0            = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
146             dx_S2            = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
147             dy_S2            = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
148             dz_S2            = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
149
150             /* rsq = dx*dx+dy*dy+dz*dz */
151             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
152             rsq_S2           = norm2(dx_S2, dy_S2, dz_S2);
153
154             wco_S0           = (rsq_S0 < rc2_S);
155             wco_S2           = (rsq_S2 < rc2_S);
156
157             wco_any_S        = wco_S0 || wco_S2;
158
159             InRange          = anyTrue(wco_any_S);
160
161             *numDistanceChecks += 2*GMX_SIMD_REAL_WIDTH;
162         }
163         if (!InRange)
164         {
165             jclusterFirst++;
166         }
167     }
168     if (!InRange)
169     {
170         return;
171     }
172
173     InRange = FALSE;
174     while (!InRange && jclusterLast > jclusterFirst)
175     {
176 #if NBNXN_SEARCH_BB_SIMD4
177         d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
178 #else
179         d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
180 #endif
181         *numDistanceChecks += 2;
182
183         /* Check if the distance is within the distance where
184          * we use only the bounding box distance rbb,
185          * or within the cut-off and there is at least one atom pair
186          * within the cut-off.
187          */
188         if (d2 < rbb2)
189         {
190             InRange = TRUE;
191         }
192         else if (d2 < rlist2)
193         {
194             xind_l  = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterLast);
195
196             jx_S  = loadDuplicateHsimd(x_j + xind_l + 0*c_xStride2xNN);
197             jy_S  = loadDuplicateHsimd(x_j + xind_l + 1*c_xStride2xNN);
198             jz_S  = loadDuplicateHsimd(x_j + xind_l + 2*c_xStride2xNN);
199
200             /* Calculate distance */
201             dx_S0            = load<SimdReal>(x_ci_simd + 0*GMX_SIMD_REAL_WIDTH) - jx_S;
202             dy_S0            = load<SimdReal>(x_ci_simd + 1*GMX_SIMD_REAL_WIDTH) - jy_S;
203             dz_S0            = load<SimdReal>(x_ci_simd + 2*GMX_SIMD_REAL_WIDTH) - jz_S;
204             dx_S2            = load<SimdReal>(x_ci_simd + 3*GMX_SIMD_REAL_WIDTH) - jx_S;
205             dy_S2            = load<SimdReal>(x_ci_simd + 4*GMX_SIMD_REAL_WIDTH) - jy_S;
206             dz_S2            = load<SimdReal>(x_ci_simd + 5*GMX_SIMD_REAL_WIDTH) - jz_S;
207
208             /* rsq = dx*dx+dy*dy+dz*dz */
209             rsq_S0           = norm2(dx_S0, dy_S0, dz_S0);
210             rsq_S2           = norm2(dx_S2, dy_S2, dz_S2);
211
212             wco_S0           = (rsq_S0 < rc2_S);
213             wco_S2           = (rsq_S2 < rc2_S);
214
215             wco_any_S        = wco_S0 || wco_S2;
216
217             InRange          = anyTrue(wco_any_S);
218
219             *numDistanceChecks += 2*GMX_SIMD_REAL_WIDTH;
220         }
221         if (!InRange)
222         {
223             jclusterLast--;
224         }
225     }
226
227     if (jclusterFirst <= jclusterLast)
228     {
229         for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++)
230         {
231             /* Store cj and the interaction mask */
232             nbnxn_cj_t cjEntry;
233             cjEntry.cj   = cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jcluster;
234             cjEntry.excl = get_imask_simd_2xnn(excludeSubDiagonal, icluster, jcluster);
235             nbl->cj.push_back(cjEntry);
236         }
237         /* Increase the closing index in the i list */
238         nbl->ci.back().cj_ind_end = nbl->cj.size();
239     }
240 }