6c8bc8f46de2110504854536bebbe7b3c1b86e24
[alexxy/gromacs.git] / src / gromacs / nbnxm / pairlist_simd_2xmm.h
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5  * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief
39  * Declares inline-friendly code for making 2xNN pairlists
40  *
41  * \author Berk Hess <hess@kth.se>
42  * \ingroup module_nbnxm
43  */
44
45
46 //! Stride of the packed x coordinate array
47 static constexpr int c_xStride2xNN = (GMX_SIMD_REAL_WIDTH >= 2 * c_nbnxnCpuIClusterSize)
48                                              ? GMX_SIMD_REAL_WIDTH / 2
49                                              : c_nbnxnCpuIClusterSize;
50
51 //! Copies PBC shifted i-cell packed atom coordinates to working array
52 static inline void icell_set_x_simd_2xnn(int  ci,
53                                          real shx,
54                                          real shy,
55                                          real shz,
56                                          int gmx_unused        stride,
57                                          const real*           x,
58                                          NbnxnPairlistCpuWork* work)
59 {
60     real* x_ci_simd = work->iClusterData.xSimd.data();
61
62     const int ia = xIndexFromCi<NbnxnLayout::Simd2xNN>(ci);
63
64     store(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH,
65           loadU1DualHsimd(x + ia + 0 * c_xStride2xNN + 0) + SimdReal(shx));
66     store(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH,
67           loadU1DualHsimd(x + ia + 1 * c_xStride2xNN + 0) + SimdReal(shy));
68     store(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH,
69           loadU1DualHsimd(x + ia + 2 * c_xStride2xNN + 0) + SimdReal(shz));
70     store(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH,
71           loadU1DualHsimd(x + ia + 0 * c_xStride2xNN + 2) + SimdReal(shx));
72     store(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH,
73           loadU1DualHsimd(x + ia + 1 * c_xStride2xNN + 2) + SimdReal(shy));
74     store(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH,
75           loadU1DualHsimd(x + ia + 2 * c_xStride2xNN + 2) + SimdReal(shz));
76 }
77
78 /*! \brief SIMD code for checking and adding cluster-pairs to the list using coordinates in packed format.
79  *
80  * Checks bounding box distances and possibly atom pair distances.
81  * This is an accelerated version of make_cluster_list_simple.
82  *
83  * \param[in]     jGrid               The j-grid
84  * \param[in,out] nbl                 The pair-list to store the cluster pairs in
85  * \param[in]     icluster            The index of the i-cluster
86  * \param[in]     firstCell           The first cluster in the j-range, using i-cluster size indexing
87  * \param[in]     lastCell            The last cluster in the j-range, using i-cluster size indexing
88  * \param[in]     excludeSubDiagonal  Exclude atom pairs with i-index > j-index
89  * \param[in]     x_j                 Coordinates for the j-atom, in SIMD packed format
90  * \param[in]     rlist2              The squared list cut-off
91  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
92  * \param[in,out] numDistanceChecks   The number of distance checks performed
93  */
94 static inline void makeClusterListSimd2xnn(const Grid&       jGrid,
95                                            NbnxnPairlistCpu* nbl,
96                                            int               icluster,
97                                            int               firstCell,
98                                            int               lastCell,
99                                            bool              excludeSubDiagonal,
100                                            const real* gmx_restrict x_j,
101                                            real                     rlist2,
102                                            float                    rbb2,
103                                            int* gmx_restrict numDistanceChecks)
104 {
105     using namespace gmx;
106     const real* gmx_restrict x_ci_simd    = nbl->work->iClusterData.xSimd.data();
107     const BoundingBox* gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
108
109     SimdReal jx_S, jy_S, jz_S;
110
111     SimdReal dx_S0, dy_S0, dz_S0;
112     SimdReal dx_S2, dy_S2, dz_S2;
113
114     SimdReal rsq_S0;
115     SimdReal rsq_S2;
116
117     SimdBool wco_S0;
118     SimdBool wco_S2;
119     SimdBool wco_any_S;
120
121     SimdReal rc2_S;
122
123     int jclusterFirst = cjFromCi<NbnxnLayout::Simd2xNN, 0>(firstCell);
124     int jclusterLast  = cjFromCi<NbnxnLayout::Simd2xNN, 1>(lastCell);
125     GMX_ASSERT(jclusterLast >= jclusterFirst,
126                "We should have a non-empty j-cluster range, since the calling code should have "
127                "ensured a non-empty cell range");
128
129     rc2_S = SimdReal(rlist2);
130
131     bool InRange = false;
132     while (!InRange && jclusterFirst <= jclusterLast)
133     {
134         const float d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
135         *numDistanceChecks += 2;
136
137         /* Check if the distance is within the distance where
138          * we use only the bounding box distance rbb,
139          * or within the cut-off and there is at least one atom pair
140          * within the cut-off.
141          */
142         if (d2 < rbb2)
143         {
144             InRange = true;
145         }
146         else if (d2 < rlist2)
147         {
148             const int xind_f = xIndexFromCj<NbnxnLayout::Simd2xNN>(
149                     cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterFirst);
150
151             jx_S = loadDuplicateHsimd(x_j + xind_f + 0 * c_xStride2xNN);
152             jy_S = loadDuplicateHsimd(x_j + xind_f + 1 * c_xStride2xNN);
153             jz_S = loadDuplicateHsimd(x_j + xind_f + 2 * c_xStride2xNN);
154
155             /* Calculate distance */
156             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
157             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
158             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
159             dx_S2 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
160             dy_S2 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
161             dz_S2 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
162
163             /* rsq = dx*dx+dy*dy+dz*dz */
164             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
165             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
166
167             wco_S0 = (rsq_S0 < rc2_S);
168             wco_S2 = (rsq_S2 < rc2_S);
169
170             wco_any_S = wco_S0 || wco_S2;
171
172             InRange = anyTrue(wco_any_S);
173
174             *numDistanceChecks += 2 * GMX_SIMD_REAL_WIDTH;
175         }
176         if (!InRange)
177         {
178             jclusterFirst++;
179         }
180     }
181     if (!InRange)
182     {
183         return;
184     }
185
186     InRange = false;
187     while (!InRange && jclusterLast > jclusterFirst)
188     {
189         const float d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
190         *numDistanceChecks += 2;
191
192         /* Check if the distance is within the distance where
193          * we use only the bounding box distance rbb,
194          * or within the cut-off and there is at least one atom pair
195          * within the cut-off.
196          */
197         if (d2 < rbb2)
198         {
199             InRange = true;
200         }
201         else if (d2 < rlist2)
202         {
203             const int xind_l = xIndexFromCj<NbnxnLayout::Simd2xNN>(
204                     cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterLast);
205
206             jx_S = loadDuplicateHsimd(x_j + xind_l + 0 * c_xStride2xNN);
207             jy_S = loadDuplicateHsimd(x_j + xind_l + 1 * c_xStride2xNN);
208             jz_S = loadDuplicateHsimd(x_j + xind_l + 2 * c_xStride2xNN);
209
210             /* Calculate distance */
211             dx_S0 = load<SimdReal>(x_ci_simd + 0 * GMX_SIMD_REAL_WIDTH) - jx_S;
212             dy_S0 = load<SimdReal>(x_ci_simd + 1 * GMX_SIMD_REAL_WIDTH) - jy_S;
213             dz_S0 = load<SimdReal>(x_ci_simd + 2 * GMX_SIMD_REAL_WIDTH) - jz_S;
214             dx_S2 = load<SimdReal>(x_ci_simd + 3 * GMX_SIMD_REAL_WIDTH) - jx_S;
215             dy_S2 = load<SimdReal>(x_ci_simd + 4 * GMX_SIMD_REAL_WIDTH) - jy_S;
216             dz_S2 = load<SimdReal>(x_ci_simd + 5 * GMX_SIMD_REAL_WIDTH) - jz_S;
217
218             /* rsq = dx*dx+dy*dy+dz*dz */
219             rsq_S0 = norm2(dx_S0, dy_S0, dz_S0);
220             rsq_S2 = norm2(dx_S2, dy_S2, dz_S2);
221
222             wco_S0 = (rsq_S0 < rc2_S);
223             wco_S2 = (rsq_S2 < rc2_S);
224
225             wco_any_S = wco_S0 || wco_S2;
226
227             InRange = anyTrue(wco_any_S);
228
229             *numDistanceChecks += 2 * GMX_SIMD_REAL_WIDTH;
230         }
231         if (!InRange)
232         {
233             jclusterLast--;
234         }
235     }
236
237     if (jclusterFirst <= jclusterLast)
238     {
239         for (int jcluster = jclusterFirst; jcluster <= jclusterLast; jcluster++)
240         {
241             /* Store cj and the interaction mask */
242             nbnxn_cj_t cjEntry;
243             cjEntry.cj   = cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jcluster;
244             cjEntry.excl = get_imask_simd_2xnn(excludeSubDiagonal, icluster, jcluster);
245             nbl->cj.push_back(cjEntry);
246         }
247         /* Increase the closing index in the i list */
248         nbl->ci.back().cj_ind_end = nbl->cj.size();
249     }
250 }