2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2012, The GROMACS development team.
6 * Copyright (c) 2012, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /* Get the half-width SIMD stuff from the kernel utils files */
39 #include "nbnxn_kernels/nbnxn_kernel_simd_utils.h"
42 #if GMX_SIMD_WIDTH_HERE >= 2*NBNXN_CPU_CLUSTER_I_SIZE
43 #define STRIDE_S (GMX_SIMD_WIDTH_HERE/2)
45 #define STRIDE_S NBNXN_CPU_CLUSTER_I_SIZE
48 static gmx_inline gmx_mm_pr gmx_load_hpr_hilo_pr(const real *a)
53 gmx_load_hpr(&a_S, a);
55 gmx_2hpr_to_pr(a_S, a_S, &a_a_S);
60 static gmx_inline gmx_mm_pr gmx_set_2real_shift_pr(const real *a, real shift)
62 gmx_mm_hpr a0_S, a1_S;
65 gmx_set1_hpr(&a0_S, a[0] + shift);
66 gmx_set1_hpr(&a1_S, a[1] + shift);
68 gmx_2hpr_to_pr(a0_S, a1_S, &a0_a1_S);
73 /* Copies PBC shifted i-cell packed atom coordinates to working array */
74 static gmx_inline void
75 icell_set_x_simd_2xnn(int ci,
76 real shx, real shy, real shz,
78 int gmx_unused stride, const real *x,
79 nbnxn_list_work_t *work)
82 nbnxn_x_ci_simd_2xnn_t *x_ci;
84 x_ci = work->x_ci_simd_2xnn;
86 ia = X_IND_CI_SIMD_2XNN(ci);
88 x_ci->ix_S0 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 0, shx);
89 x_ci->iy_S0 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 0, shy);
90 x_ci->iz_S0 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 0, shz);
91 x_ci->ix_S2 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 2, shx);
92 x_ci->iy_S2 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 2, shy);
93 x_ci->iz_S2 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 2, shz);
96 /* SIMD code for making a pair list of cell ci vs cell cjf-cjl
97 * for coordinates in packed format.
98 * Checks bouding box distances and possibly atom pair distances.
99 * This is an accelerated version of make_cluster_list_simple.
101 static gmx_inline void
102 make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj,
103 nbnxn_pairlist_t *nbl,
104 int ci, int cjf, int cjl,
105 gmx_bool remove_sub_diag,
107 real rl2, float rbb2,
110 const nbnxn_x_ci_simd_2xnn_t *work;
111 const nbnxn_bb_t *bb_ci;
113 gmx_mm_pr jx_S, jy_S, jz_S;
115 gmx_mm_pr dx_S0, dy_S0, dz_S0;
116 gmx_mm_pr dx_S2, dy_S2, dz_S2;
129 int xind_f, xind_l, cj;
131 cjf = CI_TO_CJ_SIMD_2XNN(cjf);
132 cjl = CI_TO_CJ_SIMD_2XNN(cjl+1) - 1;
134 work = nbl->work->x_ci_simd_2xnn;
136 bb_ci = nbl->work->bb_ci;
138 rc2_S = gmx_set1_pr(rl2);
141 while (!InRange && cjf <= cjl)
143 #ifdef NBNXN_SEARCH_BB_SIMD4
144 d2 = subc_bb_dist2_simd4(0, bb_ci, cjf, gridj->bbj);
146 d2 = subc_bb_dist2(0, bb_ci, cjf, gridj->bbj);
150 /* Check if the distance is within the distance where
151 * we use only the bounding box distance rbb,
152 * or within the cut-off and there is at least one atom pair
153 * within the cut-off.
161 xind_f = X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cjf);
163 jx_S = gmx_load_hpr_hilo_pr(x_j+xind_f+0*STRIDE_S);
164 jy_S = gmx_load_hpr_hilo_pr(x_j+xind_f+1*STRIDE_S);
165 jz_S = gmx_load_hpr_hilo_pr(x_j+xind_f+2*STRIDE_S);
167 /* Calculate distance */
168 dx_S0 = gmx_sub_pr(work->ix_S0, jx_S);
169 dy_S0 = gmx_sub_pr(work->iy_S0, jy_S);
170 dz_S0 = gmx_sub_pr(work->iz_S0, jz_S);
171 dx_S2 = gmx_sub_pr(work->ix_S2, jx_S);
172 dy_S2 = gmx_sub_pr(work->iy_S2, jy_S);
173 dz_S2 = gmx_sub_pr(work->iz_S2, jz_S);
175 /* rsq = dx*dx+dy*dy+dz*dz */
176 rsq_S0 = gmx_calc_rsq_pr(dx_S0, dy_S0, dz_S0);
177 rsq_S2 = gmx_calc_rsq_pr(dx_S2, dy_S2, dz_S2);
179 wco_S0 = gmx_cmplt_pr(rsq_S0, rc2_S);
180 wco_S2 = gmx_cmplt_pr(rsq_S2, rc2_S);
182 wco_any_S = gmx_or_pb(wco_S0, wco_S2);
184 InRange = gmx_anytrue_pb(wco_any_S);
186 *ndistc += 2*GMX_SIMD_WIDTH_HERE;
199 while (!InRange && cjl > cjf)
201 #ifdef NBNXN_SEARCH_BB_SIMD4
202 d2 = subc_bb_dist2_simd4(0, bb_ci, cjl, gridj->bbj);
204 d2 = subc_bb_dist2(0, bb_ci, cjl, gridj->bbj);
208 /* Check if the distance is within the distance where
209 * we use only the bounding box distance rbb,
210 * or within the cut-off and there is at least one atom pair
211 * within the cut-off.
219 xind_l = X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cjl);
221 jx_S = gmx_load_hpr_hilo_pr(x_j+xind_l+0*STRIDE_S);
222 jy_S = gmx_load_hpr_hilo_pr(x_j+xind_l+1*STRIDE_S);
223 jz_S = gmx_load_hpr_hilo_pr(x_j+xind_l+2*STRIDE_S);
225 /* Calculate distance */
226 dx_S0 = gmx_sub_pr(work->ix_S0, jx_S);
227 dy_S0 = gmx_sub_pr(work->iy_S0, jy_S);
228 dz_S0 = gmx_sub_pr(work->iz_S0, jz_S);
229 dx_S2 = gmx_sub_pr(work->ix_S2, jx_S);
230 dy_S2 = gmx_sub_pr(work->iy_S2, jy_S);
231 dz_S2 = gmx_sub_pr(work->iz_S2, jz_S);
233 /* rsq = dx*dx+dy*dy+dz*dz */
234 rsq_S0 = gmx_calc_rsq_pr(dx_S0, dy_S0, dz_S0);
235 rsq_S2 = gmx_calc_rsq_pr(dx_S2, dy_S2, dz_S2);
237 wco_S0 = gmx_cmplt_pr(rsq_S0, rc2_S);
238 wco_S2 = gmx_cmplt_pr(rsq_S2, rc2_S);
240 wco_any_S = gmx_or_pb(wco_S0, wco_S2);
242 InRange = gmx_anytrue_pb(wco_any_S);
244 *ndistc += 2*GMX_SIMD_WIDTH_HERE;
254 for (cj = cjf; cj <= cjl; cj++)
256 /* Store cj and the interaction mask */
257 nbl->cj[nbl->ncj].cj = CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cj;
258 nbl->cj[nbl->ncj].excl = get_imask_simd_2xnn(remove_sub_diag, ci, cj);
261 /* Increase the closing index in i super-cell list */
262 nbl->ci[nbl->nci].cj_ind_end = nbl->ncj;