2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/legacyheaders/vec.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: Ewald
54 * VdW interaction: LennardJones
55 * Geometry: Water3-Water3
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
105 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
108 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
109 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
113 _fjsp_v2r8 dummy_mask,cutoff_mask;
114 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
115 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
116 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
123 jindex = nlist->jindex;
125 shiftidx = nlist->shift;
127 shiftvec = fr->shift_vec[0];
128 fshift = fr->fshift[0];
129 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
130 charge = mdatoms->chargeA;
131 nvdwtype = fr->ntype;
133 vdwtype = mdatoms->typeA;
135 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
136 ewtab = fr->ic->tabq_coul_FDV0;
137 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
138 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
140 /* Setup water-specific parameters */
141 inr = nlist->iinr[0];
142 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
143 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
144 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
145 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
147 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
148 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
149 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
150 vdwjidx0A = 2*vdwtype[inr+0];
151 qq00 = _fjsp_mul_v2r8(iq0,jq0);
152 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
153 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
154 qq01 = _fjsp_mul_v2r8(iq0,jq1);
155 qq02 = _fjsp_mul_v2r8(iq0,jq2);
156 qq10 = _fjsp_mul_v2r8(iq1,jq0);
157 qq11 = _fjsp_mul_v2r8(iq1,jq1);
158 qq12 = _fjsp_mul_v2r8(iq1,jq2);
159 qq20 = _fjsp_mul_v2r8(iq2,jq0);
160 qq21 = _fjsp_mul_v2r8(iq2,jq1);
161 qq22 = _fjsp_mul_v2r8(iq2,jq2);
163 /* Avoid stupid compiler warnings */
171 /* Start outer loop over neighborlists */
172 for(iidx=0; iidx<nri; iidx++)
174 /* Load shift vector for this list */
175 i_shift_offset = DIM*shiftidx[iidx];
177 /* Load limits for loop over neighbors */
178 j_index_start = jindex[iidx];
179 j_index_end = jindex[iidx+1];
181 /* Get outer coordinate index */
183 i_coord_offset = DIM*inr;
185 /* Load i particle coords and add shift vector */
186 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
187 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
189 fix0 = _fjsp_setzero_v2r8();
190 fiy0 = _fjsp_setzero_v2r8();
191 fiz0 = _fjsp_setzero_v2r8();
192 fix1 = _fjsp_setzero_v2r8();
193 fiy1 = _fjsp_setzero_v2r8();
194 fiz1 = _fjsp_setzero_v2r8();
195 fix2 = _fjsp_setzero_v2r8();
196 fiy2 = _fjsp_setzero_v2r8();
197 fiz2 = _fjsp_setzero_v2r8();
199 /* Reset potential sums */
200 velecsum = _fjsp_setzero_v2r8();
201 vvdwsum = _fjsp_setzero_v2r8();
203 /* Start inner kernel loop */
204 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
207 /* Get j neighbor index, and coordinate index */
210 j_coord_offsetA = DIM*jnrA;
211 j_coord_offsetB = DIM*jnrB;
213 /* load j atom coordinates */
214 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
215 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
217 /* Calculate displacement vector */
218 dx00 = _fjsp_sub_v2r8(ix0,jx0);
219 dy00 = _fjsp_sub_v2r8(iy0,jy0);
220 dz00 = _fjsp_sub_v2r8(iz0,jz0);
221 dx01 = _fjsp_sub_v2r8(ix0,jx1);
222 dy01 = _fjsp_sub_v2r8(iy0,jy1);
223 dz01 = _fjsp_sub_v2r8(iz0,jz1);
224 dx02 = _fjsp_sub_v2r8(ix0,jx2);
225 dy02 = _fjsp_sub_v2r8(iy0,jy2);
226 dz02 = _fjsp_sub_v2r8(iz0,jz2);
227 dx10 = _fjsp_sub_v2r8(ix1,jx0);
228 dy10 = _fjsp_sub_v2r8(iy1,jy0);
229 dz10 = _fjsp_sub_v2r8(iz1,jz0);
230 dx11 = _fjsp_sub_v2r8(ix1,jx1);
231 dy11 = _fjsp_sub_v2r8(iy1,jy1);
232 dz11 = _fjsp_sub_v2r8(iz1,jz1);
233 dx12 = _fjsp_sub_v2r8(ix1,jx2);
234 dy12 = _fjsp_sub_v2r8(iy1,jy2);
235 dz12 = _fjsp_sub_v2r8(iz1,jz2);
236 dx20 = _fjsp_sub_v2r8(ix2,jx0);
237 dy20 = _fjsp_sub_v2r8(iy2,jy0);
238 dz20 = _fjsp_sub_v2r8(iz2,jz0);
239 dx21 = _fjsp_sub_v2r8(ix2,jx1);
240 dy21 = _fjsp_sub_v2r8(iy2,jy1);
241 dz21 = _fjsp_sub_v2r8(iz2,jz1);
242 dx22 = _fjsp_sub_v2r8(ix2,jx2);
243 dy22 = _fjsp_sub_v2r8(iy2,jy2);
244 dz22 = _fjsp_sub_v2r8(iz2,jz2);
246 /* Calculate squared distance and things based on it */
247 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
248 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
249 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
250 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
251 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
252 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
253 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
254 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
255 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
257 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
258 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
259 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
260 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
261 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
262 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
263 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
264 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
265 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
267 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
268 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
269 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
270 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
271 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
272 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
273 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
274 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
275 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
277 fjx0 = _fjsp_setzero_v2r8();
278 fjy0 = _fjsp_setzero_v2r8();
279 fjz0 = _fjsp_setzero_v2r8();
280 fjx1 = _fjsp_setzero_v2r8();
281 fjy1 = _fjsp_setzero_v2r8();
282 fjz1 = _fjsp_setzero_v2r8();
283 fjx2 = _fjsp_setzero_v2r8();
284 fjy2 = _fjsp_setzero_v2r8();
285 fjz2 = _fjsp_setzero_v2r8();
287 /**************************
288 * CALCULATE INTERACTIONS *
289 **************************/
291 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
293 /* EWALD ELECTROSTATICS */
295 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
296 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
297 itab_tmp = _fjsp_dtox_v2r8(ewrt);
298 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
299 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
301 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
302 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
303 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
304 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
305 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
306 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
307 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
308 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
309 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
310 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
312 /* LENNARD-JONES DISPERSION/REPULSION */
314 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
315 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
316 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
317 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
318 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
320 /* Update potential sum for this i atom from the interaction with this j atom. */
321 velecsum = _fjsp_add_v2r8(velecsum,velec);
322 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
324 fscal = _fjsp_add_v2r8(felec,fvdw);
326 /* Update vectorial force */
327 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
328 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
329 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
331 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
332 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
333 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
335 /**************************
336 * CALCULATE INTERACTIONS *
337 **************************/
339 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
341 /* EWALD ELECTROSTATICS */
343 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
344 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
345 itab_tmp = _fjsp_dtox_v2r8(ewrt);
346 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
347 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
349 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
350 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
351 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
352 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
353 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
354 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
355 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
356 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
357 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
358 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
360 /* Update potential sum for this i atom from the interaction with this j atom. */
361 velecsum = _fjsp_add_v2r8(velecsum,velec);
365 /* Update vectorial force */
366 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
367 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
368 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
370 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
371 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
372 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
374 /**************************
375 * CALCULATE INTERACTIONS *
376 **************************/
378 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
380 /* EWALD ELECTROSTATICS */
382 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
383 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
384 itab_tmp = _fjsp_dtox_v2r8(ewrt);
385 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
386 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
388 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
389 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
390 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
391 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
392 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
393 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
394 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
395 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
396 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
397 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
399 /* Update potential sum for this i atom from the interaction with this j atom. */
400 velecsum = _fjsp_add_v2r8(velecsum,velec);
404 /* Update vectorial force */
405 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
406 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
407 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
409 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
410 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
411 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
413 /**************************
414 * CALCULATE INTERACTIONS *
415 **************************/
417 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
419 /* EWALD ELECTROSTATICS */
421 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
422 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
423 itab_tmp = _fjsp_dtox_v2r8(ewrt);
424 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
425 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
427 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
428 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
429 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
430 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
431 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
432 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
433 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
434 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
435 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
436 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
438 /* Update potential sum for this i atom from the interaction with this j atom. */
439 velecsum = _fjsp_add_v2r8(velecsum,velec);
443 /* Update vectorial force */
444 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
445 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
446 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
448 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
449 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
450 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
452 /**************************
453 * CALCULATE INTERACTIONS *
454 **************************/
456 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
458 /* EWALD ELECTROSTATICS */
460 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
461 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
462 itab_tmp = _fjsp_dtox_v2r8(ewrt);
463 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
464 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
466 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
467 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
468 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
469 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
470 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
471 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
472 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
473 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
474 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
475 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
477 /* Update potential sum for this i atom from the interaction with this j atom. */
478 velecsum = _fjsp_add_v2r8(velecsum,velec);
482 /* Update vectorial force */
483 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
484 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
485 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
487 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
488 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
489 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
491 /**************************
492 * CALCULATE INTERACTIONS *
493 **************************/
495 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
497 /* EWALD ELECTROSTATICS */
499 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
500 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
501 itab_tmp = _fjsp_dtox_v2r8(ewrt);
502 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
503 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
505 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
506 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
507 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
508 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
509 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
510 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
511 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
512 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
513 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
514 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
516 /* Update potential sum for this i atom from the interaction with this j atom. */
517 velecsum = _fjsp_add_v2r8(velecsum,velec);
521 /* Update vectorial force */
522 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
523 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
524 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
526 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
527 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
528 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
530 /**************************
531 * CALCULATE INTERACTIONS *
532 **************************/
534 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
536 /* EWALD ELECTROSTATICS */
538 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
539 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
540 itab_tmp = _fjsp_dtox_v2r8(ewrt);
541 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
542 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
544 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
545 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
546 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
547 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
548 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
549 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
550 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
551 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
552 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
553 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
555 /* Update potential sum for this i atom from the interaction with this j atom. */
556 velecsum = _fjsp_add_v2r8(velecsum,velec);
560 /* Update vectorial force */
561 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
562 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
563 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
565 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
566 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
567 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
569 /**************************
570 * CALCULATE INTERACTIONS *
571 **************************/
573 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
575 /* EWALD ELECTROSTATICS */
577 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
578 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
579 itab_tmp = _fjsp_dtox_v2r8(ewrt);
580 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
581 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
583 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
584 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
585 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
586 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
587 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
588 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
589 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
590 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
591 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
592 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
594 /* Update potential sum for this i atom from the interaction with this j atom. */
595 velecsum = _fjsp_add_v2r8(velecsum,velec);
599 /* Update vectorial force */
600 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
601 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
602 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
604 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
605 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
606 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
608 /**************************
609 * CALCULATE INTERACTIONS *
610 **************************/
612 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
614 /* EWALD ELECTROSTATICS */
616 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
617 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
618 itab_tmp = _fjsp_dtox_v2r8(ewrt);
619 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
620 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
622 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
623 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
624 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
625 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
626 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
627 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
628 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
629 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
630 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
631 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
633 /* Update potential sum for this i atom from the interaction with this j atom. */
634 velecsum = _fjsp_add_v2r8(velecsum,velec);
638 /* Update vectorial force */
639 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
640 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
641 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
643 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
644 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
645 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
647 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
649 /* Inner loop uses 408 flops */
656 j_coord_offsetA = DIM*jnrA;
658 /* load j atom coordinates */
659 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
660 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
662 /* Calculate displacement vector */
663 dx00 = _fjsp_sub_v2r8(ix0,jx0);
664 dy00 = _fjsp_sub_v2r8(iy0,jy0);
665 dz00 = _fjsp_sub_v2r8(iz0,jz0);
666 dx01 = _fjsp_sub_v2r8(ix0,jx1);
667 dy01 = _fjsp_sub_v2r8(iy0,jy1);
668 dz01 = _fjsp_sub_v2r8(iz0,jz1);
669 dx02 = _fjsp_sub_v2r8(ix0,jx2);
670 dy02 = _fjsp_sub_v2r8(iy0,jy2);
671 dz02 = _fjsp_sub_v2r8(iz0,jz2);
672 dx10 = _fjsp_sub_v2r8(ix1,jx0);
673 dy10 = _fjsp_sub_v2r8(iy1,jy0);
674 dz10 = _fjsp_sub_v2r8(iz1,jz0);
675 dx11 = _fjsp_sub_v2r8(ix1,jx1);
676 dy11 = _fjsp_sub_v2r8(iy1,jy1);
677 dz11 = _fjsp_sub_v2r8(iz1,jz1);
678 dx12 = _fjsp_sub_v2r8(ix1,jx2);
679 dy12 = _fjsp_sub_v2r8(iy1,jy2);
680 dz12 = _fjsp_sub_v2r8(iz1,jz2);
681 dx20 = _fjsp_sub_v2r8(ix2,jx0);
682 dy20 = _fjsp_sub_v2r8(iy2,jy0);
683 dz20 = _fjsp_sub_v2r8(iz2,jz0);
684 dx21 = _fjsp_sub_v2r8(ix2,jx1);
685 dy21 = _fjsp_sub_v2r8(iy2,jy1);
686 dz21 = _fjsp_sub_v2r8(iz2,jz1);
687 dx22 = _fjsp_sub_v2r8(ix2,jx2);
688 dy22 = _fjsp_sub_v2r8(iy2,jy2);
689 dz22 = _fjsp_sub_v2r8(iz2,jz2);
691 /* Calculate squared distance and things based on it */
692 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
693 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
694 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
695 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
696 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
697 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
698 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
699 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
700 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
702 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
703 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
704 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
705 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
706 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
707 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
708 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
709 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
710 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
712 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
713 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
714 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
715 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
716 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
717 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
718 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
719 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
720 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
722 fjx0 = _fjsp_setzero_v2r8();
723 fjy0 = _fjsp_setzero_v2r8();
724 fjz0 = _fjsp_setzero_v2r8();
725 fjx1 = _fjsp_setzero_v2r8();
726 fjy1 = _fjsp_setzero_v2r8();
727 fjz1 = _fjsp_setzero_v2r8();
728 fjx2 = _fjsp_setzero_v2r8();
729 fjy2 = _fjsp_setzero_v2r8();
730 fjz2 = _fjsp_setzero_v2r8();
732 /**************************
733 * CALCULATE INTERACTIONS *
734 **************************/
736 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
738 /* EWALD ELECTROSTATICS */
740 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
741 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
742 itab_tmp = _fjsp_dtox_v2r8(ewrt);
743 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
744 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
746 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
747 ewtabD = _fjsp_setzero_v2r8();
748 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
749 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
750 ewtabFn = _fjsp_setzero_v2r8();
751 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
752 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
753 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
754 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
755 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
757 /* LENNARD-JONES DISPERSION/REPULSION */
759 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
760 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
761 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
762 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
763 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
765 /* Update potential sum for this i atom from the interaction with this j atom. */
766 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
767 velecsum = _fjsp_add_v2r8(velecsum,velec);
768 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
769 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
771 fscal = _fjsp_add_v2r8(felec,fvdw);
773 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
775 /* Update vectorial force */
776 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
777 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
778 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
780 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
781 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
782 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
784 /**************************
785 * CALCULATE INTERACTIONS *
786 **************************/
788 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
790 /* EWALD ELECTROSTATICS */
792 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
793 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
794 itab_tmp = _fjsp_dtox_v2r8(ewrt);
795 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
796 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
798 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
799 ewtabD = _fjsp_setzero_v2r8();
800 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
801 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
802 ewtabFn = _fjsp_setzero_v2r8();
803 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
804 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
805 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
806 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
807 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
809 /* Update potential sum for this i atom from the interaction with this j atom. */
810 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
811 velecsum = _fjsp_add_v2r8(velecsum,velec);
815 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
817 /* Update vectorial force */
818 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
819 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
820 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
822 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
823 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
824 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
826 /**************************
827 * CALCULATE INTERACTIONS *
828 **************************/
830 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
832 /* EWALD ELECTROSTATICS */
834 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
835 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
836 itab_tmp = _fjsp_dtox_v2r8(ewrt);
837 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
838 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
840 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
841 ewtabD = _fjsp_setzero_v2r8();
842 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
843 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
844 ewtabFn = _fjsp_setzero_v2r8();
845 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
846 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
847 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
848 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
849 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
851 /* Update potential sum for this i atom from the interaction with this j atom. */
852 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
853 velecsum = _fjsp_add_v2r8(velecsum,velec);
857 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
859 /* Update vectorial force */
860 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
861 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
862 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
864 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
865 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
866 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
868 /**************************
869 * CALCULATE INTERACTIONS *
870 **************************/
872 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
874 /* EWALD ELECTROSTATICS */
876 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
877 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
878 itab_tmp = _fjsp_dtox_v2r8(ewrt);
879 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
880 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
882 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
883 ewtabD = _fjsp_setzero_v2r8();
884 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
885 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
886 ewtabFn = _fjsp_setzero_v2r8();
887 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
888 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
889 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
890 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
891 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
893 /* Update potential sum for this i atom from the interaction with this j atom. */
894 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
895 velecsum = _fjsp_add_v2r8(velecsum,velec);
899 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
901 /* Update vectorial force */
902 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
903 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
904 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
906 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
907 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
908 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
910 /**************************
911 * CALCULATE INTERACTIONS *
912 **************************/
914 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
916 /* EWALD ELECTROSTATICS */
918 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
919 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
920 itab_tmp = _fjsp_dtox_v2r8(ewrt);
921 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
922 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
924 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
925 ewtabD = _fjsp_setzero_v2r8();
926 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
927 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
928 ewtabFn = _fjsp_setzero_v2r8();
929 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
930 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
931 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
932 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
933 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
935 /* Update potential sum for this i atom from the interaction with this j atom. */
936 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
937 velecsum = _fjsp_add_v2r8(velecsum,velec);
941 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
943 /* Update vectorial force */
944 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
945 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
946 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
948 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
949 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
950 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
952 /**************************
953 * CALCULATE INTERACTIONS *
954 **************************/
956 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
958 /* EWALD ELECTROSTATICS */
960 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
961 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
962 itab_tmp = _fjsp_dtox_v2r8(ewrt);
963 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
964 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
966 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
967 ewtabD = _fjsp_setzero_v2r8();
968 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
969 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
970 ewtabFn = _fjsp_setzero_v2r8();
971 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
972 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
973 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
974 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
975 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
977 /* Update potential sum for this i atom from the interaction with this j atom. */
978 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
979 velecsum = _fjsp_add_v2r8(velecsum,velec);
983 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
985 /* Update vectorial force */
986 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
987 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
988 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
990 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
991 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
992 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
994 /**************************
995 * CALCULATE INTERACTIONS *
996 **************************/
998 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1000 /* EWALD ELECTROSTATICS */
1002 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1003 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1004 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1005 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1006 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1008 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1009 ewtabD = _fjsp_setzero_v2r8();
1010 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1011 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1012 ewtabFn = _fjsp_setzero_v2r8();
1013 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1014 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1015 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1016 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
1017 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1019 /* Update potential sum for this i atom from the interaction with this j atom. */
1020 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1021 velecsum = _fjsp_add_v2r8(velecsum,velec);
1025 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1027 /* Update vectorial force */
1028 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1029 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1030 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1032 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1033 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1034 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1036 /**************************
1037 * CALCULATE INTERACTIONS *
1038 **************************/
1040 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1042 /* EWALD ELECTROSTATICS */
1044 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1045 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1046 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1047 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1048 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1050 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1051 ewtabD = _fjsp_setzero_v2r8();
1052 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1053 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1054 ewtabFn = _fjsp_setzero_v2r8();
1055 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1056 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1057 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1058 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
1059 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1061 /* Update potential sum for this i atom from the interaction with this j atom. */
1062 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1063 velecsum = _fjsp_add_v2r8(velecsum,velec);
1067 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1069 /* Update vectorial force */
1070 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1071 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1072 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1074 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1075 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1076 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1078 /**************************
1079 * CALCULATE INTERACTIONS *
1080 **************************/
1082 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1084 /* EWALD ELECTROSTATICS */
1086 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1087 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1088 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1089 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1090 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1092 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1093 ewtabD = _fjsp_setzero_v2r8();
1094 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1095 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1096 ewtabFn = _fjsp_setzero_v2r8();
1097 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1098 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1099 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1100 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
1101 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1103 /* Update potential sum for this i atom from the interaction with this j atom. */
1104 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1105 velecsum = _fjsp_add_v2r8(velecsum,velec);
1109 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1111 /* Update vectorial force */
1112 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1113 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1114 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1116 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1117 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1118 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1120 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1122 /* Inner loop uses 408 flops */
1125 /* End of innermost loop */
1127 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1128 f+i_coord_offset,fshift+i_shift_offset);
1131 /* Update potential energies */
1132 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1133 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1135 /* Increment number of inner iterations */
1136 inneriter += j_index_end - j_index_start;
1138 /* Outer loop uses 20 flops */
1141 /* Increment number of outer iterations */
1144 /* Update outer/inner flops */
1146 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*408);
1149 * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1150 * Electrostatics interaction: Ewald
1151 * VdW interaction: LennardJones
1152 * Geometry: Water3-Water3
1153 * Calculate force/pot: Force
1156 nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1157 (t_nblist * gmx_restrict nlist,
1158 rvec * gmx_restrict xx,
1159 rvec * gmx_restrict ff,
1160 t_forcerec * gmx_restrict fr,
1161 t_mdatoms * gmx_restrict mdatoms,
1162 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1163 t_nrnb * gmx_restrict nrnb)
1165 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1166 * just 0 for non-waters.
1167 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1168 * jnr indices corresponding to data put in the four positions in the SIMD register.
1170 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1171 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1173 int j_coord_offsetA,j_coord_offsetB;
1174 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1175 real rcutoff_scalar;
1176 real *shiftvec,*fshift,*x,*f;
1177 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1179 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1181 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1183 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1184 int vdwjidx0A,vdwjidx0B;
1185 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1186 int vdwjidx1A,vdwjidx1B;
1187 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1188 int vdwjidx2A,vdwjidx2B;
1189 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1190 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1191 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1192 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1193 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1194 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1195 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1196 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1197 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1198 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1199 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1202 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1205 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1206 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1207 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1209 _fjsp_v2r8 itab_tmp;
1210 _fjsp_v2r8 dummy_mask,cutoff_mask;
1211 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1212 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1213 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1220 jindex = nlist->jindex;
1222 shiftidx = nlist->shift;
1224 shiftvec = fr->shift_vec[0];
1225 fshift = fr->fshift[0];
1226 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1227 charge = mdatoms->chargeA;
1228 nvdwtype = fr->ntype;
1229 vdwparam = fr->nbfp;
1230 vdwtype = mdatoms->typeA;
1232 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1233 ewtab = fr->ic->tabq_coul_F;
1234 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1235 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1237 /* Setup water-specific parameters */
1238 inr = nlist->iinr[0];
1239 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1240 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1241 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1242 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1244 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1245 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1246 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1247 vdwjidx0A = 2*vdwtype[inr+0];
1248 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1249 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1250 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1251 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1252 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1253 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1254 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1255 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1256 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1257 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1258 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1260 /* Avoid stupid compiler warnings */
1262 j_coord_offsetA = 0;
1263 j_coord_offsetB = 0;
1268 /* Start outer loop over neighborlists */
1269 for(iidx=0; iidx<nri; iidx++)
1271 /* Load shift vector for this list */
1272 i_shift_offset = DIM*shiftidx[iidx];
1274 /* Load limits for loop over neighbors */
1275 j_index_start = jindex[iidx];
1276 j_index_end = jindex[iidx+1];
1278 /* Get outer coordinate index */
1280 i_coord_offset = DIM*inr;
1282 /* Load i particle coords and add shift vector */
1283 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1284 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1286 fix0 = _fjsp_setzero_v2r8();
1287 fiy0 = _fjsp_setzero_v2r8();
1288 fiz0 = _fjsp_setzero_v2r8();
1289 fix1 = _fjsp_setzero_v2r8();
1290 fiy1 = _fjsp_setzero_v2r8();
1291 fiz1 = _fjsp_setzero_v2r8();
1292 fix2 = _fjsp_setzero_v2r8();
1293 fiy2 = _fjsp_setzero_v2r8();
1294 fiz2 = _fjsp_setzero_v2r8();
1296 /* Start inner kernel loop */
1297 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1300 /* Get j neighbor index, and coordinate index */
1302 jnrB = jjnr[jidx+1];
1303 j_coord_offsetA = DIM*jnrA;
1304 j_coord_offsetB = DIM*jnrB;
1306 /* load j atom coordinates */
1307 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1308 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1310 /* Calculate displacement vector */
1311 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1312 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1313 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1314 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1315 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1316 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1317 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1318 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1319 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1320 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1321 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1322 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1323 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1324 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1325 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1326 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1327 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1328 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1329 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1330 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1331 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1332 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1333 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1334 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1335 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1336 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1337 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1339 /* Calculate squared distance and things based on it */
1340 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1341 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1342 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1343 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1344 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1345 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1346 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1347 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1348 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1350 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1351 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1352 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1353 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1354 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1355 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1356 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1357 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1358 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1360 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1361 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1362 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1363 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1364 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1365 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1366 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1367 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1368 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1370 fjx0 = _fjsp_setzero_v2r8();
1371 fjy0 = _fjsp_setzero_v2r8();
1372 fjz0 = _fjsp_setzero_v2r8();
1373 fjx1 = _fjsp_setzero_v2r8();
1374 fjy1 = _fjsp_setzero_v2r8();
1375 fjz1 = _fjsp_setzero_v2r8();
1376 fjx2 = _fjsp_setzero_v2r8();
1377 fjy2 = _fjsp_setzero_v2r8();
1378 fjz2 = _fjsp_setzero_v2r8();
1380 /**************************
1381 * CALCULATE INTERACTIONS *
1382 **************************/
1384 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1386 /* EWALD ELECTROSTATICS */
1388 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1389 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
1390 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1391 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1392 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1394 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1396 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1397 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1399 /* LENNARD-JONES DISPERSION/REPULSION */
1401 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1402 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1404 fscal = _fjsp_add_v2r8(felec,fvdw);
1406 /* Update vectorial force */
1407 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1408 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1409 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1411 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1412 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1413 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1415 /**************************
1416 * CALCULATE INTERACTIONS *
1417 **************************/
1419 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1421 /* EWALD ELECTROSTATICS */
1423 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1424 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
1425 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1426 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1427 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1429 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1431 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1432 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1436 /* Update vectorial force */
1437 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1438 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1439 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1441 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1442 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1443 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1445 /**************************
1446 * CALCULATE INTERACTIONS *
1447 **************************/
1449 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1451 /* EWALD ELECTROSTATICS */
1453 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1454 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
1455 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1456 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1457 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1459 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1461 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1462 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1466 /* Update vectorial force */
1467 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1468 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1469 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1471 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1472 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1473 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1475 /**************************
1476 * CALCULATE INTERACTIONS *
1477 **************************/
1479 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1481 /* EWALD ELECTROSTATICS */
1483 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1484 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1485 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1486 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1487 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1489 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1491 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1492 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1496 /* Update vectorial force */
1497 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1498 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1499 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1501 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1502 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1503 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1505 /**************************
1506 * CALCULATE INTERACTIONS *
1507 **************************/
1509 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1511 /* EWALD ELECTROSTATICS */
1513 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1514 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1515 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1516 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1517 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1519 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1521 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1522 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1526 /* Update vectorial force */
1527 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1528 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1529 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1531 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1532 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1533 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1535 /**************************
1536 * CALCULATE INTERACTIONS *
1537 **************************/
1539 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1541 /* EWALD ELECTROSTATICS */
1543 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1544 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1545 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1546 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1547 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1549 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1551 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1552 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1556 /* Update vectorial force */
1557 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1558 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1559 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1561 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1562 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1563 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1565 /**************************
1566 * CALCULATE INTERACTIONS *
1567 **************************/
1569 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1571 /* EWALD ELECTROSTATICS */
1573 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1574 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1575 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1576 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1577 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1579 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1581 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1582 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1586 /* Update vectorial force */
1587 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1588 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1589 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1591 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1592 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1593 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1595 /**************************
1596 * CALCULATE INTERACTIONS *
1597 **************************/
1599 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1601 /* EWALD ELECTROSTATICS */
1603 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1604 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1605 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1606 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1607 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1609 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1611 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1612 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1616 /* Update vectorial force */
1617 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1618 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1619 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1621 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1622 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1623 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1625 /**************************
1626 * CALCULATE INTERACTIONS *
1627 **************************/
1629 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1631 /* EWALD ELECTROSTATICS */
1633 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1634 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1635 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1636 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1637 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1639 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1641 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1642 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1646 /* Update vectorial force */
1647 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1648 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1649 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1651 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1652 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1653 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1655 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1657 /* Inner loop uses 358 flops */
1660 if(jidx<j_index_end)
1664 j_coord_offsetA = DIM*jnrA;
1666 /* load j atom coordinates */
1667 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1668 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1670 /* Calculate displacement vector */
1671 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1672 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1673 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1674 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1675 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1676 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1677 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1678 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1679 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1680 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1681 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1682 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1683 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1684 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1685 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1686 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1687 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1688 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1689 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1690 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1691 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1692 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1693 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1694 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1695 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1696 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1697 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1699 /* Calculate squared distance and things based on it */
1700 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1701 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1702 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1703 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1704 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1705 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1706 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1707 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1708 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1710 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1711 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1712 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1713 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1714 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1715 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1716 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1717 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1718 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1720 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1721 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1722 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1723 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1724 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1725 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1726 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1727 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1728 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1730 fjx0 = _fjsp_setzero_v2r8();
1731 fjy0 = _fjsp_setzero_v2r8();
1732 fjz0 = _fjsp_setzero_v2r8();
1733 fjx1 = _fjsp_setzero_v2r8();
1734 fjy1 = _fjsp_setzero_v2r8();
1735 fjz1 = _fjsp_setzero_v2r8();
1736 fjx2 = _fjsp_setzero_v2r8();
1737 fjy2 = _fjsp_setzero_v2r8();
1738 fjz2 = _fjsp_setzero_v2r8();
1740 /**************************
1741 * CALCULATE INTERACTIONS *
1742 **************************/
1744 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1746 /* EWALD ELECTROSTATICS */
1748 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1749 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
1750 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1751 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1752 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1754 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1755 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1756 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1758 /* LENNARD-JONES DISPERSION/REPULSION */
1760 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1761 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1763 fscal = _fjsp_add_v2r8(felec,fvdw);
1765 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1767 /* Update vectorial force */
1768 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1769 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1770 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1772 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1773 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1774 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1776 /**************************
1777 * CALCULATE INTERACTIONS *
1778 **************************/
1780 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1782 /* EWALD ELECTROSTATICS */
1784 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1785 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
1786 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1787 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1788 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1790 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1791 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1792 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1796 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1798 /* Update vectorial force */
1799 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1800 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1801 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1803 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1804 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1805 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1807 /**************************
1808 * CALCULATE INTERACTIONS *
1809 **************************/
1811 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1813 /* EWALD ELECTROSTATICS */
1815 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1816 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
1817 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1818 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1819 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1821 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1822 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1823 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1827 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1829 /* Update vectorial force */
1830 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1831 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1832 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1834 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1835 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1836 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1838 /**************************
1839 * CALCULATE INTERACTIONS *
1840 **************************/
1842 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1844 /* EWALD ELECTROSTATICS */
1846 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1847 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1848 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1849 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1850 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1852 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1853 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1854 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1858 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1860 /* Update vectorial force */
1861 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1862 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1863 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1865 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1866 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1867 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1869 /**************************
1870 * CALCULATE INTERACTIONS *
1871 **************************/
1873 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1875 /* EWALD ELECTROSTATICS */
1877 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1878 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1879 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1880 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1881 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1883 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1884 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1885 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1889 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1891 /* Update vectorial force */
1892 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1893 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1894 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1896 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1897 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1898 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1900 /**************************
1901 * CALCULATE INTERACTIONS *
1902 **************************/
1904 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1906 /* EWALD ELECTROSTATICS */
1908 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1909 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1910 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1911 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1912 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1914 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1915 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1916 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1920 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1922 /* Update vectorial force */
1923 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1924 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1925 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1927 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1928 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1929 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1931 /**************************
1932 * CALCULATE INTERACTIONS *
1933 **************************/
1935 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1937 /* EWALD ELECTROSTATICS */
1939 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1940 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1941 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1942 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1943 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1945 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1946 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1947 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1951 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1953 /* Update vectorial force */
1954 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1955 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1956 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1958 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1959 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1960 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1962 /**************************
1963 * CALCULATE INTERACTIONS *
1964 **************************/
1966 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1968 /* EWALD ELECTROSTATICS */
1970 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1971 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1972 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1973 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1974 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1976 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1977 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1978 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1982 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1984 /* Update vectorial force */
1985 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1986 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1987 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1989 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1990 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1991 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1993 /**************************
1994 * CALCULATE INTERACTIONS *
1995 **************************/
1997 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1999 /* EWALD ELECTROSTATICS */
2001 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2002 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
2003 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2004 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2005 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2007 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2008 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2009 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2013 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2015 /* Update vectorial force */
2016 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2017 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2018 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2020 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2021 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2022 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2024 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2026 /* Inner loop uses 358 flops */
2029 /* End of innermost loop */
2031 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2032 f+i_coord_offset,fshift+i_shift_offset);
2034 /* Increment number of inner iterations */
2035 inneriter += j_index_end - j_index_start;
2037 /* Outer loop uses 18 flops */
2040 /* Increment number of outer iterations */
2043 /* Update outer/inner flops */
2045 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*358);