2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #if GMX_SIMD_REAL_WIDTH >= NBNXN_CPU_CLUSTER_I_SIZE
39 #define STRIDE_S (GMX_SIMD_REAL_WIDTH)
41 #define STRIDE_S NBNXN_CPU_CLUSTER_I_SIZE
44 /* Copies PBC shifted i-cell packed atom coordinates to working array */
45 static gmx_inline void
46 icell_set_x_simd_4xn(int ci,
47 real shx, real shy, real shz,
49 int gmx_unused stride, const real *x,
50 nbnxn_list_work_t *work)
53 nbnxn_x_ci_simd_4xn_t *x_ci;
55 x_ci = work->x_ci_simd_4xn;
57 ia = X_IND_CI_SIMD_4XN(ci);
59 x_ci->ix_S0 = gmx_simd_set1_r(x[ia + 0*STRIDE_S ] + shx);
60 x_ci->iy_S0 = gmx_simd_set1_r(x[ia + 1*STRIDE_S ] + shy);
61 x_ci->iz_S0 = gmx_simd_set1_r(x[ia + 2*STRIDE_S ] + shz);
62 x_ci->ix_S1 = gmx_simd_set1_r(x[ia + 0*STRIDE_S + 1] + shx);
63 x_ci->iy_S1 = gmx_simd_set1_r(x[ia + 1*STRIDE_S + 1] + shy);
64 x_ci->iz_S1 = gmx_simd_set1_r(x[ia + 2*STRIDE_S + 1] + shz);
65 x_ci->ix_S2 = gmx_simd_set1_r(x[ia + 0*STRIDE_S + 2] + shx);
66 x_ci->iy_S2 = gmx_simd_set1_r(x[ia + 1*STRIDE_S + 2] + shy);
67 x_ci->iz_S2 = gmx_simd_set1_r(x[ia + 2*STRIDE_S + 2] + shz);
68 x_ci->ix_S3 = gmx_simd_set1_r(x[ia + 0*STRIDE_S + 3] + shx);
69 x_ci->iy_S3 = gmx_simd_set1_r(x[ia + 1*STRIDE_S + 3] + shy);
70 x_ci->iz_S3 = gmx_simd_set1_r(x[ia + 2*STRIDE_S + 3] + shz);
73 /* SIMD code for making a pair list of cell ci vs cell cjf-cjl
74 * for coordinates in packed format.
75 * Checks bouding box distances and possibly atom pair distances.
76 * This is an accelerated version of make_cluster_list_simple.
78 static gmx_inline void
79 make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj,
80 nbnxn_pairlist_t *nbl,
81 int ci, int cjf, int cjl,
82 gmx_bool remove_sub_diag,
87 const nbnxn_x_ci_simd_4xn_t *work;
88 const nbnxn_bb_t *bb_ci;
90 gmx_simd_real_t jx_S, jy_S, jz_S;
92 gmx_simd_real_t dx_S0, dy_S0, dz_S0;
93 gmx_simd_real_t dx_S1, dy_S1, dz_S1;
94 gmx_simd_real_t dx_S2, dy_S2, dz_S2;
95 gmx_simd_real_t dx_S3, dy_S3, dz_S3;
97 gmx_simd_real_t rsq_S0;
98 gmx_simd_real_t rsq_S1;
99 gmx_simd_real_t rsq_S2;
100 gmx_simd_real_t rsq_S3;
102 gmx_simd_bool_t wco_S0;
103 gmx_simd_bool_t wco_S1;
104 gmx_simd_bool_t wco_S2;
105 gmx_simd_bool_t wco_S3;
106 gmx_simd_bool_t wco_any_S01, wco_any_S23, wco_any_S;
108 gmx_simd_real_t rc2_S;
112 int xind_f, xind_l, cj;
114 /* cppcheck-suppress selfAssignment . selfAssignment for width 4.*/
115 cjf = CI_TO_CJ_SIMD_4XN(cjf);
116 cjl = CI_TO_CJ_SIMD_4XN(cjl+1) - 1;
118 work = nbl->work->x_ci_simd_4xn;
120 bb_ci = nbl->work->bb_ci;
122 rc2_S = gmx_simd_set1_r(rl2);
125 while (!InRange && cjf <= cjl)
127 #ifdef NBNXN_SEARCH_BB_SIMD4
128 d2 = subc_bb_dist2_simd4(0, bb_ci, cjf, gridj->bbj);
130 d2 = subc_bb_dist2(0, bb_ci, cjf, gridj->bbj);
134 /* Check if the distance is within the distance where
135 * we use only the bounding box distance rbb,
136 * or within the cut-off and there is at least one atom pair
137 * within the cut-off.
145 xind_f = X_IND_CJ_SIMD_4XN(CI_TO_CJ_SIMD_4XN(gridj->cell0) + cjf);
147 jx_S = gmx_simd_load_r(x_j+xind_f+0*STRIDE_S);
148 jy_S = gmx_simd_load_r(x_j+xind_f+1*STRIDE_S);
149 jz_S = gmx_simd_load_r(x_j+xind_f+2*STRIDE_S);
152 /* Calculate distance */
153 dx_S0 = gmx_simd_sub_r(work->ix_S0, jx_S);
154 dy_S0 = gmx_simd_sub_r(work->iy_S0, jy_S);
155 dz_S0 = gmx_simd_sub_r(work->iz_S0, jz_S);
156 dx_S1 = gmx_simd_sub_r(work->ix_S1, jx_S);
157 dy_S1 = gmx_simd_sub_r(work->iy_S1, jy_S);
158 dz_S1 = gmx_simd_sub_r(work->iz_S1, jz_S);
159 dx_S2 = gmx_simd_sub_r(work->ix_S2, jx_S);
160 dy_S2 = gmx_simd_sub_r(work->iy_S2, jy_S);
161 dz_S2 = gmx_simd_sub_r(work->iz_S2, jz_S);
162 dx_S3 = gmx_simd_sub_r(work->ix_S3, jx_S);
163 dy_S3 = gmx_simd_sub_r(work->iy_S3, jy_S);
164 dz_S3 = gmx_simd_sub_r(work->iz_S3, jz_S);
166 /* rsq = dx*dx+dy*dy+dz*dz */
167 rsq_S0 = gmx_simd_calc_rsq_r(dx_S0, dy_S0, dz_S0);
168 rsq_S1 = gmx_simd_calc_rsq_r(dx_S1, dy_S1, dz_S1);
169 rsq_S2 = gmx_simd_calc_rsq_r(dx_S2, dy_S2, dz_S2);
170 rsq_S3 = gmx_simd_calc_rsq_r(dx_S3, dy_S3, dz_S3);
172 wco_S0 = gmx_simd_cmplt_r(rsq_S0, rc2_S);
173 wco_S1 = gmx_simd_cmplt_r(rsq_S1, rc2_S);
174 wco_S2 = gmx_simd_cmplt_r(rsq_S2, rc2_S);
175 wco_S3 = gmx_simd_cmplt_r(rsq_S3, rc2_S);
177 wco_any_S01 = gmx_simd_or_b(wco_S0, wco_S1);
178 wco_any_S23 = gmx_simd_or_b(wco_S2, wco_S3);
179 wco_any_S = gmx_simd_or_b(wco_any_S01, wco_any_S23);
181 InRange = gmx_simd_anytrue_b(wco_any_S);
183 *ndistc += 4*GMX_SIMD_REAL_WIDTH;
196 while (!InRange && cjl > cjf)
198 #ifdef NBNXN_SEARCH_BB_SIMD4
199 d2 = subc_bb_dist2_simd4(0, bb_ci, cjl, gridj->bbj);
201 d2 = subc_bb_dist2(0, bb_ci, cjl, gridj->bbj);
205 /* Check if the distance is within the distance where
206 * we use only the bounding box distance rbb,
207 * or within the cut-off and there is at least one atom pair
208 * within the cut-off.
216 xind_l = X_IND_CJ_SIMD_4XN(CI_TO_CJ_SIMD_4XN(gridj->cell0) + cjl);
218 jx_S = gmx_simd_load_r(x_j+xind_l+0*STRIDE_S);
219 jy_S = gmx_simd_load_r(x_j+xind_l+1*STRIDE_S);
220 jz_S = gmx_simd_load_r(x_j+xind_l+2*STRIDE_S);
222 /* Calculate distance */
223 dx_S0 = gmx_simd_sub_r(work->ix_S0, jx_S);
224 dy_S0 = gmx_simd_sub_r(work->iy_S0, jy_S);
225 dz_S0 = gmx_simd_sub_r(work->iz_S0, jz_S);
226 dx_S1 = gmx_simd_sub_r(work->ix_S1, jx_S);
227 dy_S1 = gmx_simd_sub_r(work->iy_S1, jy_S);
228 dz_S1 = gmx_simd_sub_r(work->iz_S1, jz_S);
229 dx_S2 = gmx_simd_sub_r(work->ix_S2, jx_S);
230 dy_S2 = gmx_simd_sub_r(work->iy_S2, jy_S);
231 dz_S2 = gmx_simd_sub_r(work->iz_S2, jz_S);
232 dx_S3 = gmx_simd_sub_r(work->ix_S3, jx_S);
233 dy_S3 = gmx_simd_sub_r(work->iy_S3, jy_S);
234 dz_S3 = gmx_simd_sub_r(work->iz_S3, jz_S);
236 /* rsq = dx*dx+dy*dy+dz*dz */
237 rsq_S0 = gmx_simd_calc_rsq_r(dx_S0, dy_S0, dz_S0);
238 rsq_S1 = gmx_simd_calc_rsq_r(dx_S1, dy_S1, dz_S1);
239 rsq_S2 = gmx_simd_calc_rsq_r(dx_S2, dy_S2, dz_S2);
240 rsq_S3 = gmx_simd_calc_rsq_r(dx_S3, dy_S3, dz_S3);
242 wco_S0 = gmx_simd_cmplt_r(rsq_S0, rc2_S);
243 wco_S1 = gmx_simd_cmplt_r(rsq_S1, rc2_S);
244 wco_S2 = gmx_simd_cmplt_r(rsq_S2, rc2_S);
245 wco_S3 = gmx_simd_cmplt_r(rsq_S3, rc2_S);
247 wco_any_S01 = gmx_simd_or_b(wco_S0, wco_S1);
248 wco_any_S23 = gmx_simd_or_b(wco_S2, wco_S3);
249 wco_any_S = gmx_simd_or_b(wco_any_S01, wco_any_S23);
251 InRange = gmx_simd_anytrue_b(wco_any_S);
253 *ndistc += 4*GMX_SIMD_REAL_WIDTH;
263 for (cj = cjf; cj <= cjl; cj++)
265 /* Store cj and the interaction mask */
266 nbl->cj[nbl->ncj].cj = CI_TO_CJ_SIMD_4XN(gridj->cell0) + cj;
267 nbl->cj[nbl->ncj].excl = get_imask_simd_4xn(remove_sub_diag, ci, cj);
268 #ifdef GMX_SIMD_IBM_QPX
269 nbl->cj[nbl->ncj].interaction_mask_indices[0] = (nbl->cj[nbl->ncj].excl & 0x000F) >> (0 * 4);
270 nbl->cj[nbl->ncj].interaction_mask_indices[1] = (nbl->cj[nbl->ncj].excl & 0x00F0) >> (1 * 4);
271 nbl->cj[nbl->ncj].interaction_mask_indices[2] = (nbl->cj[nbl->ncj].excl & 0x0F00) >> (2 * 4);
272 nbl->cj[nbl->ncj].interaction_mask_indices[3] = (nbl->cj[nbl->ncj].excl & 0xF000) >> (3 * 4);
276 /* Increase the closing index in i super-cell list */
277 nbl->ci[nbl->nci].cj_ind_end = nbl->ncj;