2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: CubicSplineTable
52 * VdW interaction: None
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85 int vdwjidx0A,vdwjidx0B;
86 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87 int vdwjidx1A,vdwjidx1B;
88 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89 int vdwjidx2A,vdwjidx2B;
90 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
102 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
105 _fjsp_v2r8 dummy_mask,cutoff_mask;
106 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
107 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
108 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115 jindex = nlist->jindex;
117 shiftidx = nlist->shift;
119 shiftvec = fr->shift_vec[0];
120 fshift = fr->fshift[0];
121 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
122 charge = mdatoms->chargeA;
124 vftab = kernel_data->table_elec->data;
125 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
127 /* Setup water-specific parameters */
128 inr = nlist->iinr[0];
129 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
130 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
131 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
133 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
134 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
135 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
136 qq00 = _fjsp_mul_v2r8(iq0,jq0);
137 qq01 = _fjsp_mul_v2r8(iq0,jq1);
138 qq02 = _fjsp_mul_v2r8(iq0,jq2);
139 qq10 = _fjsp_mul_v2r8(iq1,jq0);
140 qq11 = _fjsp_mul_v2r8(iq1,jq1);
141 qq12 = _fjsp_mul_v2r8(iq1,jq2);
142 qq20 = _fjsp_mul_v2r8(iq2,jq0);
143 qq21 = _fjsp_mul_v2r8(iq2,jq1);
144 qq22 = _fjsp_mul_v2r8(iq2,jq2);
146 /* Avoid stupid compiler warnings */
154 /* Start outer loop over neighborlists */
155 for(iidx=0; iidx<nri; iidx++)
157 /* Load shift vector for this list */
158 i_shift_offset = DIM*shiftidx[iidx];
160 /* Load limits for loop over neighbors */
161 j_index_start = jindex[iidx];
162 j_index_end = jindex[iidx+1];
164 /* Get outer coordinate index */
166 i_coord_offset = DIM*inr;
168 /* Load i particle coords and add shift vector */
169 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
170 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
172 fix0 = _fjsp_setzero_v2r8();
173 fiy0 = _fjsp_setzero_v2r8();
174 fiz0 = _fjsp_setzero_v2r8();
175 fix1 = _fjsp_setzero_v2r8();
176 fiy1 = _fjsp_setzero_v2r8();
177 fiz1 = _fjsp_setzero_v2r8();
178 fix2 = _fjsp_setzero_v2r8();
179 fiy2 = _fjsp_setzero_v2r8();
180 fiz2 = _fjsp_setzero_v2r8();
182 /* Reset potential sums */
183 velecsum = _fjsp_setzero_v2r8();
185 /* Start inner kernel loop */
186 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
189 /* Get j neighbor index, and coordinate index */
192 j_coord_offsetA = DIM*jnrA;
193 j_coord_offsetB = DIM*jnrB;
195 /* load j atom coordinates */
196 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
197 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
199 /* Calculate displacement vector */
200 dx00 = _fjsp_sub_v2r8(ix0,jx0);
201 dy00 = _fjsp_sub_v2r8(iy0,jy0);
202 dz00 = _fjsp_sub_v2r8(iz0,jz0);
203 dx01 = _fjsp_sub_v2r8(ix0,jx1);
204 dy01 = _fjsp_sub_v2r8(iy0,jy1);
205 dz01 = _fjsp_sub_v2r8(iz0,jz1);
206 dx02 = _fjsp_sub_v2r8(ix0,jx2);
207 dy02 = _fjsp_sub_v2r8(iy0,jy2);
208 dz02 = _fjsp_sub_v2r8(iz0,jz2);
209 dx10 = _fjsp_sub_v2r8(ix1,jx0);
210 dy10 = _fjsp_sub_v2r8(iy1,jy0);
211 dz10 = _fjsp_sub_v2r8(iz1,jz0);
212 dx11 = _fjsp_sub_v2r8(ix1,jx1);
213 dy11 = _fjsp_sub_v2r8(iy1,jy1);
214 dz11 = _fjsp_sub_v2r8(iz1,jz1);
215 dx12 = _fjsp_sub_v2r8(ix1,jx2);
216 dy12 = _fjsp_sub_v2r8(iy1,jy2);
217 dz12 = _fjsp_sub_v2r8(iz1,jz2);
218 dx20 = _fjsp_sub_v2r8(ix2,jx0);
219 dy20 = _fjsp_sub_v2r8(iy2,jy0);
220 dz20 = _fjsp_sub_v2r8(iz2,jz0);
221 dx21 = _fjsp_sub_v2r8(ix2,jx1);
222 dy21 = _fjsp_sub_v2r8(iy2,jy1);
223 dz21 = _fjsp_sub_v2r8(iz2,jz1);
224 dx22 = _fjsp_sub_v2r8(ix2,jx2);
225 dy22 = _fjsp_sub_v2r8(iy2,jy2);
226 dz22 = _fjsp_sub_v2r8(iz2,jz2);
228 /* Calculate squared distance and things based on it */
229 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
230 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
231 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
232 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
233 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
234 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
235 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
236 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
237 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
239 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
240 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
241 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
242 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
243 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
244 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
245 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
246 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
247 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
249 fjx0 = _fjsp_setzero_v2r8();
250 fjy0 = _fjsp_setzero_v2r8();
251 fjz0 = _fjsp_setzero_v2r8();
252 fjx1 = _fjsp_setzero_v2r8();
253 fjy1 = _fjsp_setzero_v2r8();
254 fjz1 = _fjsp_setzero_v2r8();
255 fjx2 = _fjsp_setzero_v2r8();
256 fjy2 = _fjsp_setzero_v2r8();
257 fjz2 = _fjsp_setzero_v2r8();
259 /**************************
260 * CALCULATE INTERACTIONS *
261 **************************/
263 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
265 /* Calculate table index by multiplying r with table scale and truncate to integer */
266 rt = _fjsp_mul_v2r8(r00,vftabscale);
267 itab_tmp = _fjsp_dtox_v2r8(rt);
268 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
269 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
270 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
275 /* CUBIC SPLINE TABLE ELECTROSTATICS */
276 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
277 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
278 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
279 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
280 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
281 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
282 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
283 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
284 velec = _fjsp_mul_v2r8(qq00,VV);
285 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
286 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
288 /* Update potential sum for this i atom from the interaction with this j atom. */
289 velecsum = _fjsp_add_v2r8(velecsum,velec);
293 /* Update vectorial force */
294 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
295 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
296 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
298 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
299 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
300 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
302 /**************************
303 * CALCULATE INTERACTIONS *
304 **************************/
306 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
308 /* Calculate table index by multiplying r with table scale and truncate to integer */
309 rt = _fjsp_mul_v2r8(r01,vftabscale);
310 itab_tmp = _fjsp_dtox_v2r8(rt);
311 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
312 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
313 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
318 /* CUBIC SPLINE TABLE ELECTROSTATICS */
319 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
320 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
321 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
322 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
323 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
324 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
325 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
326 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
327 velec = _fjsp_mul_v2r8(qq01,VV);
328 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
329 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
331 /* Update potential sum for this i atom from the interaction with this j atom. */
332 velecsum = _fjsp_add_v2r8(velecsum,velec);
336 /* Update vectorial force */
337 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
338 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
339 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
341 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
342 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
343 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
345 /**************************
346 * CALCULATE INTERACTIONS *
347 **************************/
349 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
351 /* Calculate table index by multiplying r with table scale and truncate to integer */
352 rt = _fjsp_mul_v2r8(r02,vftabscale);
353 itab_tmp = _fjsp_dtox_v2r8(rt);
354 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
355 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
356 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
361 /* CUBIC SPLINE TABLE ELECTROSTATICS */
362 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
363 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
364 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
365 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
366 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
367 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
368 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
369 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
370 velec = _fjsp_mul_v2r8(qq02,VV);
371 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
372 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
374 /* Update potential sum for this i atom from the interaction with this j atom. */
375 velecsum = _fjsp_add_v2r8(velecsum,velec);
379 /* Update vectorial force */
380 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
381 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
382 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
384 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
385 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
386 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
388 /**************************
389 * CALCULATE INTERACTIONS *
390 **************************/
392 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
394 /* Calculate table index by multiplying r with table scale and truncate to integer */
395 rt = _fjsp_mul_v2r8(r10,vftabscale);
396 itab_tmp = _fjsp_dtox_v2r8(rt);
397 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
398 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
399 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
404 /* CUBIC SPLINE TABLE ELECTROSTATICS */
405 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
406 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
407 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
408 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
409 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
410 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
411 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
412 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
413 velec = _fjsp_mul_v2r8(qq10,VV);
414 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
415 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
417 /* Update potential sum for this i atom from the interaction with this j atom. */
418 velecsum = _fjsp_add_v2r8(velecsum,velec);
422 /* Update vectorial force */
423 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
424 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
425 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
427 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
428 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
429 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
431 /**************************
432 * CALCULATE INTERACTIONS *
433 **************************/
435 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
437 /* Calculate table index by multiplying r with table scale and truncate to integer */
438 rt = _fjsp_mul_v2r8(r11,vftabscale);
439 itab_tmp = _fjsp_dtox_v2r8(rt);
440 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
441 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
442 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
447 /* CUBIC SPLINE TABLE ELECTROSTATICS */
448 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
449 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
450 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
451 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
452 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
453 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
454 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
455 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
456 velec = _fjsp_mul_v2r8(qq11,VV);
457 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
458 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
460 /* Update potential sum for this i atom from the interaction with this j atom. */
461 velecsum = _fjsp_add_v2r8(velecsum,velec);
465 /* Update vectorial force */
466 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
467 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
468 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
470 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
471 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
472 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
474 /**************************
475 * CALCULATE INTERACTIONS *
476 **************************/
478 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
480 /* Calculate table index by multiplying r with table scale and truncate to integer */
481 rt = _fjsp_mul_v2r8(r12,vftabscale);
482 itab_tmp = _fjsp_dtox_v2r8(rt);
483 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
484 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
485 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
490 /* CUBIC SPLINE TABLE ELECTROSTATICS */
491 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
492 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
493 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
494 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
495 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
496 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
497 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
498 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
499 velec = _fjsp_mul_v2r8(qq12,VV);
500 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
501 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
503 /* Update potential sum for this i atom from the interaction with this j atom. */
504 velecsum = _fjsp_add_v2r8(velecsum,velec);
508 /* Update vectorial force */
509 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
510 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
511 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
513 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
514 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
515 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
517 /**************************
518 * CALCULATE INTERACTIONS *
519 **************************/
521 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
523 /* Calculate table index by multiplying r with table scale and truncate to integer */
524 rt = _fjsp_mul_v2r8(r20,vftabscale);
525 itab_tmp = _fjsp_dtox_v2r8(rt);
526 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
527 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
528 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
533 /* CUBIC SPLINE TABLE ELECTROSTATICS */
534 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
535 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
536 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
537 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
538 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
539 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
540 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
541 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
542 velec = _fjsp_mul_v2r8(qq20,VV);
543 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
544 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
546 /* Update potential sum for this i atom from the interaction with this j atom. */
547 velecsum = _fjsp_add_v2r8(velecsum,velec);
551 /* Update vectorial force */
552 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
553 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
554 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
556 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
557 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
558 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
560 /**************************
561 * CALCULATE INTERACTIONS *
562 **************************/
564 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
566 /* Calculate table index by multiplying r with table scale and truncate to integer */
567 rt = _fjsp_mul_v2r8(r21,vftabscale);
568 itab_tmp = _fjsp_dtox_v2r8(rt);
569 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
570 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
571 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
576 /* CUBIC SPLINE TABLE ELECTROSTATICS */
577 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
578 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
579 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
580 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
581 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
582 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
583 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
584 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
585 velec = _fjsp_mul_v2r8(qq21,VV);
586 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
587 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
589 /* Update potential sum for this i atom from the interaction with this j atom. */
590 velecsum = _fjsp_add_v2r8(velecsum,velec);
594 /* Update vectorial force */
595 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
596 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
597 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
599 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
600 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
601 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
603 /**************************
604 * CALCULATE INTERACTIONS *
605 **************************/
607 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
609 /* Calculate table index by multiplying r with table scale and truncate to integer */
610 rt = _fjsp_mul_v2r8(r22,vftabscale);
611 itab_tmp = _fjsp_dtox_v2r8(rt);
612 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
613 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
614 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
619 /* CUBIC SPLINE TABLE ELECTROSTATICS */
620 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
621 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
622 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
623 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
624 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
625 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
626 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
627 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
628 velec = _fjsp_mul_v2r8(qq22,VV);
629 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
630 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
632 /* Update potential sum for this i atom from the interaction with this j atom. */
633 velecsum = _fjsp_add_v2r8(velecsum,velec);
637 /* Update vectorial force */
638 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
639 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
640 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
642 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
643 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
644 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
646 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
648 /* Inner loop uses 414 flops */
655 j_coord_offsetA = DIM*jnrA;
657 /* load j atom coordinates */
658 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
659 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
661 /* Calculate displacement vector */
662 dx00 = _fjsp_sub_v2r8(ix0,jx0);
663 dy00 = _fjsp_sub_v2r8(iy0,jy0);
664 dz00 = _fjsp_sub_v2r8(iz0,jz0);
665 dx01 = _fjsp_sub_v2r8(ix0,jx1);
666 dy01 = _fjsp_sub_v2r8(iy0,jy1);
667 dz01 = _fjsp_sub_v2r8(iz0,jz1);
668 dx02 = _fjsp_sub_v2r8(ix0,jx2);
669 dy02 = _fjsp_sub_v2r8(iy0,jy2);
670 dz02 = _fjsp_sub_v2r8(iz0,jz2);
671 dx10 = _fjsp_sub_v2r8(ix1,jx0);
672 dy10 = _fjsp_sub_v2r8(iy1,jy0);
673 dz10 = _fjsp_sub_v2r8(iz1,jz0);
674 dx11 = _fjsp_sub_v2r8(ix1,jx1);
675 dy11 = _fjsp_sub_v2r8(iy1,jy1);
676 dz11 = _fjsp_sub_v2r8(iz1,jz1);
677 dx12 = _fjsp_sub_v2r8(ix1,jx2);
678 dy12 = _fjsp_sub_v2r8(iy1,jy2);
679 dz12 = _fjsp_sub_v2r8(iz1,jz2);
680 dx20 = _fjsp_sub_v2r8(ix2,jx0);
681 dy20 = _fjsp_sub_v2r8(iy2,jy0);
682 dz20 = _fjsp_sub_v2r8(iz2,jz0);
683 dx21 = _fjsp_sub_v2r8(ix2,jx1);
684 dy21 = _fjsp_sub_v2r8(iy2,jy1);
685 dz21 = _fjsp_sub_v2r8(iz2,jz1);
686 dx22 = _fjsp_sub_v2r8(ix2,jx2);
687 dy22 = _fjsp_sub_v2r8(iy2,jy2);
688 dz22 = _fjsp_sub_v2r8(iz2,jz2);
690 /* Calculate squared distance and things based on it */
691 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
692 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
693 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
694 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
695 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
696 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
697 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
698 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
699 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
701 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
702 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
703 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
704 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
705 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
706 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
707 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
708 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
709 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
711 fjx0 = _fjsp_setzero_v2r8();
712 fjy0 = _fjsp_setzero_v2r8();
713 fjz0 = _fjsp_setzero_v2r8();
714 fjx1 = _fjsp_setzero_v2r8();
715 fjy1 = _fjsp_setzero_v2r8();
716 fjz1 = _fjsp_setzero_v2r8();
717 fjx2 = _fjsp_setzero_v2r8();
718 fjy2 = _fjsp_setzero_v2r8();
719 fjz2 = _fjsp_setzero_v2r8();
721 /**************************
722 * CALCULATE INTERACTIONS *
723 **************************/
725 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
727 /* Calculate table index by multiplying r with table scale and truncate to integer */
728 rt = _fjsp_mul_v2r8(r00,vftabscale);
729 itab_tmp = _fjsp_dtox_v2r8(rt);
730 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
731 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
732 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
737 /* CUBIC SPLINE TABLE ELECTROSTATICS */
738 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
739 F = _fjsp_setzero_v2r8();
740 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
741 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
742 H = _fjsp_setzero_v2r8();
743 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
744 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
745 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
746 velec = _fjsp_mul_v2r8(qq00,VV);
747 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
748 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
750 /* Update potential sum for this i atom from the interaction with this j atom. */
751 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
752 velecsum = _fjsp_add_v2r8(velecsum,velec);
756 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
758 /* Update vectorial force */
759 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
760 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
761 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
763 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
764 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
765 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
767 /**************************
768 * CALCULATE INTERACTIONS *
769 **************************/
771 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
773 /* Calculate table index by multiplying r with table scale and truncate to integer */
774 rt = _fjsp_mul_v2r8(r01,vftabscale);
775 itab_tmp = _fjsp_dtox_v2r8(rt);
776 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
777 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
778 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
783 /* CUBIC SPLINE TABLE ELECTROSTATICS */
784 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
785 F = _fjsp_setzero_v2r8();
786 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
787 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
788 H = _fjsp_setzero_v2r8();
789 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
790 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
791 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
792 velec = _fjsp_mul_v2r8(qq01,VV);
793 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
794 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
796 /* Update potential sum for this i atom from the interaction with this j atom. */
797 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
798 velecsum = _fjsp_add_v2r8(velecsum,velec);
802 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
804 /* Update vectorial force */
805 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
806 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
807 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
809 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
810 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
811 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
813 /**************************
814 * CALCULATE INTERACTIONS *
815 **************************/
817 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
819 /* Calculate table index by multiplying r with table scale and truncate to integer */
820 rt = _fjsp_mul_v2r8(r02,vftabscale);
821 itab_tmp = _fjsp_dtox_v2r8(rt);
822 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
823 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
824 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
829 /* CUBIC SPLINE TABLE ELECTROSTATICS */
830 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
831 F = _fjsp_setzero_v2r8();
832 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
833 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
834 H = _fjsp_setzero_v2r8();
835 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
836 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
837 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
838 velec = _fjsp_mul_v2r8(qq02,VV);
839 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
840 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
842 /* Update potential sum for this i atom from the interaction with this j atom. */
843 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
844 velecsum = _fjsp_add_v2r8(velecsum,velec);
848 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
850 /* Update vectorial force */
851 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
852 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
853 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
855 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
856 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
857 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
859 /**************************
860 * CALCULATE INTERACTIONS *
861 **************************/
863 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
865 /* Calculate table index by multiplying r with table scale and truncate to integer */
866 rt = _fjsp_mul_v2r8(r10,vftabscale);
867 itab_tmp = _fjsp_dtox_v2r8(rt);
868 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
869 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
870 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
875 /* CUBIC SPLINE TABLE ELECTROSTATICS */
876 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
877 F = _fjsp_setzero_v2r8();
878 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
879 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
880 H = _fjsp_setzero_v2r8();
881 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
882 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
883 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
884 velec = _fjsp_mul_v2r8(qq10,VV);
885 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
886 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
888 /* Update potential sum for this i atom from the interaction with this j atom. */
889 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
890 velecsum = _fjsp_add_v2r8(velecsum,velec);
894 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
896 /* Update vectorial force */
897 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
898 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
899 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
901 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
902 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
903 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
905 /**************************
906 * CALCULATE INTERACTIONS *
907 **************************/
909 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
911 /* Calculate table index by multiplying r with table scale and truncate to integer */
912 rt = _fjsp_mul_v2r8(r11,vftabscale);
913 itab_tmp = _fjsp_dtox_v2r8(rt);
914 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
915 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
916 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
921 /* CUBIC SPLINE TABLE ELECTROSTATICS */
922 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
923 F = _fjsp_setzero_v2r8();
924 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
925 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
926 H = _fjsp_setzero_v2r8();
927 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
928 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
929 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
930 velec = _fjsp_mul_v2r8(qq11,VV);
931 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
932 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
934 /* Update potential sum for this i atom from the interaction with this j atom. */
935 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
936 velecsum = _fjsp_add_v2r8(velecsum,velec);
940 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
942 /* Update vectorial force */
943 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
944 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
945 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
947 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
948 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
949 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
951 /**************************
952 * CALCULATE INTERACTIONS *
953 **************************/
955 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
957 /* Calculate table index by multiplying r with table scale and truncate to integer */
958 rt = _fjsp_mul_v2r8(r12,vftabscale);
959 itab_tmp = _fjsp_dtox_v2r8(rt);
960 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
961 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
962 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
967 /* CUBIC SPLINE TABLE ELECTROSTATICS */
968 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
969 F = _fjsp_setzero_v2r8();
970 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
971 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
972 H = _fjsp_setzero_v2r8();
973 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
974 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
975 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
976 velec = _fjsp_mul_v2r8(qq12,VV);
977 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
978 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
980 /* Update potential sum for this i atom from the interaction with this j atom. */
981 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
982 velecsum = _fjsp_add_v2r8(velecsum,velec);
986 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
988 /* Update vectorial force */
989 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
990 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
991 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
993 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
994 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
995 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
997 /**************************
998 * CALCULATE INTERACTIONS *
999 **************************/
1001 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1003 /* Calculate table index by multiplying r with table scale and truncate to integer */
1004 rt = _fjsp_mul_v2r8(r20,vftabscale);
1005 itab_tmp = _fjsp_dtox_v2r8(rt);
1006 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1007 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1008 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1013 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1014 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1015 F = _fjsp_setzero_v2r8();
1016 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1017 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1018 H = _fjsp_setzero_v2r8();
1019 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1020 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1021 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1022 velec = _fjsp_mul_v2r8(qq20,VV);
1023 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1024 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1026 /* Update potential sum for this i atom from the interaction with this j atom. */
1027 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1028 velecsum = _fjsp_add_v2r8(velecsum,velec);
1032 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1034 /* Update vectorial force */
1035 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1036 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1037 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1039 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1040 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1041 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1043 /**************************
1044 * CALCULATE INTERACTIONS *
1045 **************************/
1047 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1049 /* Calculate table index by multiplying r with table scale and truncate to integer */
1050 rt = _fjsp_mul_v2r8(r21,vftabscale);
1051 itab_tmp = _fjsp_dtox_v2r8(rt);
1052 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1053 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1054 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1059 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1060 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1061 F = _fjsp_setzero_v2r8();
1062 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1063 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1064 H = _fjsp_setzero_v2r8();
1065 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1066 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1067 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1068 velec = _fjsp_mul_v2r8(qq21,VV);
1069 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1070 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1072 /* Update potential sum for this i atom from the interaction with this j atom. */
1073 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1074 velecsum = _fjsp_add_v2r8(velecsum,velec);
1078 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1080 /* Update vectorial force */
1081 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1082 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1083 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1085 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1086 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1087 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1089 /**************************
1090 * CALCULATE INTERACTIONS *
1091 **************************/
1093 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1095 /* Calculate table index by multiplying r with table scale and truncate to integer */
1096 rt = _fjsp_mul_v2r8(r22,vftabscale);
1097 itab_tmp = _fjsp_dtox_v2r8(rt);
1098 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1099 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1100 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1105 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1106 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1107 F = _fjsp_setzero_v2r8();
1108 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1109 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1110 H = _fjsp_setzero_v2r8();
1111 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1112 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1113 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1114 velec = _fjsp_mul_v2r8(qq22,VV);
1115 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1116 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1118 /* Update potential sum for this i atom from the interaction with this j atom. */
1119 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1120 velecsum = _fjsp_add_v2r8(velecsum,velec);
1124 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1126 /* Update vectorial force */
1127 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1128 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1129 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1131 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1132 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1133 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1135 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1137 /* Inner loop uses 414 flops */
1140 /* End of innermost loop */
1142 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1143 f+i_coord_offset,fshift+i_shift_offset);
1146 /* Update potential energies */
1147 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1149 /* Increment number of inner iterations */
1150 inneriter += j_index_end - j_index_start;
1152 /* Outer loop uses 19 flops */
1155 /* Increment number of outer iterations */
1158 /* Update outer/inner flops */
1160 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*414);
1163 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
1164 * Electrostatics interaction: CubicSplineTable
1165 * VdW interaction: None
1166 * Geometry: Water3-Water3
1167 * Calculate force/pot: Force
1170 nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
1171 (t_nblist * gmx_restrict nlist,
1172 rvec * gmx_restrict xx,
1173 rvec * gmx_restrict ff,
1174 t_forcerec * gmx_restrict fr,
1175 t_mdatoms * gmx_restrict mdatoms,
1176 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1177 t_nrnb * gmx_restrict nrnb)
1179 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1180 * just 0 for non-waters.
1181 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1182 * jnr indices corresponding to data put in the four positions in the SIMD register.
1184 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1185 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1187 int j_coord_offsetA,j_coord_offsetB;
1188 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1189 real rcutoff_scalar;
1190 real *shiftvec,*fshift,*x,*f;
1191 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1193 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1195 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1197 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1198 int vdwjidx0A,vdwjidx0B;
1199 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1200 int vdwjidx1A,vdwjidx1B;
1201 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1202 int vdwjidx2A,vdwjidx2B;
1203 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1204 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1205 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1206 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1207 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1208 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1209 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1210 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1211 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1212 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1213 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1215 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1217 _fjsp_v2r8 itab_tmp;
1218 _fjsp_v2r8 dummy_mask,cutoff_mask;
1219 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1220 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1221 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1228 jindex = nlist->jindex;
1230 shiftidx = nlist->shift;
1232 shiftvec = fr->shift_vec[0];
1233 fshift = fr->fshift[0];
1234 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1235 charge = mdatoms->chargeA;
1237 vftab = kernel_data->table_elec->data;
1238 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
1240 /* Setup water-specific parameters */
1241 inr = nlist->iinr[0];
1242 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1243 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1244 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1246 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1247 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1248 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1249 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1250 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1251 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1252 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1253 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1254 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1255 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1256 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1257 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1259 /* Avoid stupid compiler warnings */
1261 j_coord_offsetA = 0;
1262 j_coord_offsetB = 0;
1267 /* Start outer loop over neighborlists */
1268 for(iidx=0; iidx<nri; iidx++)
1270 /* Load shift vector for this list */
1271 i_shift_offset = DIM*shiftidx[iidx];
1273 /* Load limits for loop over neighbors */
1274 j_index_start = jindex[iidx];
1275 j_index_end = jindex[iidx+1];
1277 /* Get outer coordinate index */
1279 i_coord_offset = DIM*inr;
1281 /* Load i particle coords and add shift vector */
1282 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1283 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1285 fix0 = _fjsp_setzero_v2r8();
1286 fiy0 = _fjsp_setzero_v2r8();
1287 fiz0 = _fjsp_setzero_v2r8();
1288 fix1 = _fjsp_setzero_v2r8();
1289 fiy1 = _fjsp_setzero_v2r8();
1290 fiz1 = _fjsp_setzero_v2r8();
1291 fix2 = _fjsp_setzero_v2r8();
1292 fiy2 = _fjsp_setzero_v2r8();
1293 fiz2 = _fjsp_setzero_v2r8();
1295 /* Start inner kernel loop */
1296 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1299 /* Get j neighbor index, and coordinate index */
1301 jnrB = jjnr[jidx+1];
1302 j_coord_offsetA = DIM*jnrA;
1303 j_coord_offsetB = DIM*jnrB;
1305 /* load j atom coordinates */
1306 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1307 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1309 /* Calculate displacement vector */
1310 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1311 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1312 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1313 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1314 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1315 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1316 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1317 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1318 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1319 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1320 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1321 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1322 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1323 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1324 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1325 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1326 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1327 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1328 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1329 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1330 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1331 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1332 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1333 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1334 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1335 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1336 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1338 /* Calculate squared distance and things based on it */
1339 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1340 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1341 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1342 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1343 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1344 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1345 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1346 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1347 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1349 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1350 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1351 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1352 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1353 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1354 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1355 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1356 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1357 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1359 fjx0 = _fjsp_setzero_v2r8();
1360 fjy0 = _fjsp_setzero_v2r8();
1361 fjz0 = _fjsp_setzero_v2r8();
1362 fjx1 = _fjsp_setzero_v2r8();
1363 fjy1 = _fjsp_setzero_v2r8();
1364 fjz1 = _fjsp_setzero_v2r8();
1365 fjx2 = _fjsp_setzero_v2r8();
1366 fjy2 = _fjsp_setzero_v2r8();
1367 fjz2 = _fjsp_setzero_v2r8();
1369 /**************************
1370 * CALCULATE INTERACTIONS *
1371 **************************/
1373 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1375 /* Calculate table index by multiplying r with table scale and truncate to integer */
1376 rt = _fjsp_mul_v2r8(r00,vftabscale);
1377 itab_tmp = _fjsp_dtox_v2r8(rt);
1378 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1379 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1380 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1385 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1386 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1387 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1388 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1389 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1390 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1391 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1392 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1393 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1394 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1398 /* Update vectorial force */
1399 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1400 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1401 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1403 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1404 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1405 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1407 /**************************
1408 * CALCULATE INTERACTIONS *
1409 **************************/
1411 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1413 /* Calculate table index by multiplying r with table scale and truncate to integer */
1414 rt = _fjsp_mul_v2r8(r01,vftabscale);
1415 itab_tmp = _fjsp_dtox_v2r8(rt);
1416 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1417 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1418 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1423 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1424 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1425 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1426 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1427 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1428 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1429 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1430 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1431 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1432 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1436 /* Update vectorial force */
1437 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1438 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1439 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1441 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1442 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1443 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1445 /**************************
1446 * CALCULATE INTERACTIONS *
1447 **************************/
1449 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1451 /* Calculate table index by multiplying r with table scale and truncate to integer */
1452 rt = _fjsp_mul_v2r8(r02,vftabscale);
1453 itab_tmp = _fjsp_dtox_v2r8(rt);
1454 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1455 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1456 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1461 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1462 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1463 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1464 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1465 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1466 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1467 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1468 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1469 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1470 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1474 /* Update vectorial force */
1475 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1476 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1477 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1479 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1480 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1481 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1483 /**************************
1484 * CALCULATE INTERACTIONS *
1485 **************************/
1487 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1489 /* Calculate table index by multiplying r with table scale and truncate to integer */
1490 rt = _fjsp_mul_v2r8(r10,vftabscale);
1491 itab_tmp = _fjsp_dtox_v2r8(rt);
1492 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1493 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1494 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1499 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1500 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1501 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1502 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1503 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1504 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1505 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1506 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1507 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1508 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1512 /* Update vectorial force */
1513 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1514 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1515 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1517 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1518 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1519 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1521 /**************************
1522 * CALCULATE INTERACTIONS *
1523 **************************/
1525 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1527 /* Calculate table index by multiplying r with table scale and truncate to integer */
1528 rt = _fjsp_mul_v2r8(r11,vftabscale);
1529 itab_tmp = _fjsp_dtox_v2r8(rt);
1530 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1531 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1532 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1537 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1538 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1539 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1540 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1541 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1542 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1543 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1544 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1545 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1546 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1550 /* Update vectorial force */
1551 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1552 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1553 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1555 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1556 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1557 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1559 /**************************
1560 * CALCULATE INTERACTIONS *
1561 **************************/
1563 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1565 /* Calculate table index by multiplying r with table scale and truncate to integer */
1566 rt = _fjsp_mul_v2r8(r12,vftabscale);
1567 itab_tmp = _fjsp_dtox_v2r8(rt);
1568 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1569 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1570 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1575 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1576 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1577 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1578 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1579 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1580 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1581 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1582 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1583 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1584 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1588 /* Update vectorial force */
1589 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1590 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1591 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1593 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1594 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1595 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1597 /**************************
1598 * CALCULATE INTERACTIONS *
1599 **************************/
1601 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1603 /* Calculate table index by multiplying r with table scale and truncate to integer */
1604 rt = _fjsp_mul_v2r8(r20,vftabscale);
1605 itab_tmp = _fjsp_dtox_v2r8(rt);
1606 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1607 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1608 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1613 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1614 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1615 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1616 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1617 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1618 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1619 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1620 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1621 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1622 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1626 /* Update vectorial force */
1627 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1628 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1629 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1631 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1632 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1633 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1635 /**************************
1636 * CALCULATE INTERACTIONS *
1637 **************************/
1639 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1641 /* Calculate table index by multiplying r with table scale and truncate to integer */
1642 rt = _fjsp_mul_v2r8(r21,vftabscale);
1643 itab_tmp = _fjsp_dtox_v2r8(rt);
1644 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1645 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1646 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1651 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1652 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1653 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1654 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1655 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1656 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1657 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1658 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1659 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1660 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1664 /* Update vectorial force */
1665 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1666 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1667 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1669 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1670 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1671 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1673 /**************************
1674 * CALCULATE INTERACTIONS *
1675 **************************/
1677 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1679 /* Calculate table index by multiplying r with table scale and truncate to integer */
1680 rt = _fjsp_mul_v2r8(r22,vftabscale);
1681 itab_tmp = _fjsp_dtox_v2r8(rt);
1682 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1683 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1684 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1689 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1690 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1691 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1692 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1693 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1694 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1695 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1696 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1697 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1698 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1702 /* Update vectorial force */
1703 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1704 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1705 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1707 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1708 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1709 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1711 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1713 /* Inner loop uses 378 flops */
1716 if(jidx<j_index_end)
1720 j_coord_offsetA = DIM*jnrA;
1722 /* load j atom coordinates */
1723 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1724 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1726 /* Calculate displacement vector */
1727 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1728 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1729 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1730 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1731 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1732 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1733 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1734 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1735 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1736 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1737 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1738 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1739 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1740 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1741 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1742 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1743 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1744 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1745 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1746 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1747 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1748 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1749 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1750 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1751 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1752 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1753 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1755 /* Calculate squared distance and things based on it */
1756 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1757 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1758 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1759 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1760 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1761 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1762 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1763 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1764 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1766 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1767 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1768 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1769 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1770 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1771 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1772 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1773 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1774 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1776 fjx0 = _fjsp_setzero_v2r8();
1777 fjy0 = _fjsp_setzero_v2r8();
1778 fjz0 = _fjsp_setzero_v2r8();
1779 fjx1 = _fjsp_setzero_v2r8();
1780 fjy1 = _fjsp_setzero_v2r8();
1781 fjz1 = _fjsp_setzero_v2r8();
1782 fjx2 = _fjsp_setzero_v2r8();
1783 fjy2 = _fjsp_setzero_v2r8();
1784 fjz2 = _fjsp_setzero_v2r8();
1786 /**************************
1787 * CALCULATE INTERACTIONS *
1788 **************************/
1790 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1792 /* Calculate table index by multiplying r with table scale and truncate to integer */
1793 rt = _fjsp_mul_v2r8(r00,vftabscale);
1794 itab_tmp = _fjsp_dtox_v2r8(rt);
1795 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1796 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1797 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1802 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1803 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1804 F = _fjsp_setzero_v2r8();
1805 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1806 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1807 H = _fjsp_setzero_v2r8();
1808 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1809 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1810 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1811 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1815 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1817 /* Update vectorial force */
1818 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1819 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1820 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1822 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1823 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1824 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1826 /**************************
1827 * CALCULATE INTERACTIONS *
1828 **************************/
1830 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1832 /* Calculate table index by multiplying r with table scale and truncate to integer */
1833 rt = _fjsp_mul_v2r8(r01,vftabscale);
1834 itab_tmp = _fjsp_dtox_v2r8(rt);
1835 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1836 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1837 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1842 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1843 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1844 F = _fjsp_setzero_v2r8();
1845 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1846 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1847 H = _fjsp_setzero_v2r8();
1848 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1849 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1850 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1851 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1855 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1857 /* Update vectorial force */
1858 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1859 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1860 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1862 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1863 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1864 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1866 /**************************
1867 * CALCULATE INTERACTIONS *
1868 **************************/
1870 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1872 /* Calculate table index by multiplying r with table scale and truncate to integer */
1873 rt = _fjsp_mul_v2r8(r02,vftabscale);
1874 itab_tmp = _fjsp_dtox_v2r8(rt);
1875 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1876 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1877 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1882 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1883 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1884 F = _fjsp_setzero_v2r8();
1885 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1886 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1887 H = _fjsp_setzero_v2r8();
1888 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1889 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1890 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1891 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1895 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1897 /* Update vectorial force */
1898 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1899 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1900 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1902 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1903 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1904 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1906 /**************************
1907 * CALCULATE INTERACTIONS *
1908 **************************/
1910 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1912 /* Calculate table index by multiplying r with table scale and truncate to integer */
1913 rt = _fjsp_mul_v2r8(r10,vftabscale);
1914 itab_tmp = _fjsp_dtox_v2r8(rt);
1915 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1916 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1917 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1922 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1923 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1924 F = _fjsp_setzero_v2r8();
1925 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1926 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1927 H = _fjsp_setzero_v2r8();
1928 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1929 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1930 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1931 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1935 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1937 /* Update vectorial force */
1938 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1939 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1940 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1942 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1943 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1944 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1946 /**************************
1947 * CALCULATE INTERACTIONS *
1948 **************************/
1950 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1952 /* Calculate table index by multiplying r with table scale and truncate to integer */
1953 rt = _fjsp_mul_v2r8(r11,vftabscale);
1954 itab_tmp = _fjsp_dtox_v2r8(rt);
1955 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1956 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1957 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1962 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1963 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1964 F = _fjsp_setzero_v2r8();
1965 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1966 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1967 H = _fjsp_setzero_v2r8();
1968 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1969 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1970 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1971 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1975 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1977 /* Update vectorial force */
1978 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1979 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1980 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1982 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1983 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1984 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1986 /**************************
1987 * CALCULATE INTERACTIONS *
1988 **************************/
1990 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1992 /* Calculate table index by multiplying r with table scale and truncate to integer */
1993 rt = _fjsp_mul_v2r8(r12,vftabscale);
1994 itab_tmp = _fjsp_dtox_v2r8(rt);
1995 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1996 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1997 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2002 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2003 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2004 F = _fjsp_setzero_v2r8();
2005 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2006 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2007 H = _fjsp_setzero_v2r8();
2008 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2009 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2010 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2011 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2015 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2017 /* Update vectorial force */
2018 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
2019 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2020 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2022 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2023 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2024 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2026 /**************************
2027 * CALCULATE INTERACTIONS *
2028 **************************/
2030 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
2032 /* Calculate table index by multiplying r with table scale and truncate to integer */
2033 rt = _fjsp_mul_v2r8(r20,vftabscale);
2034 itab_tmp = _fjsp_dtox_v2r8(rt);
2035 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2036 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2037 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2042 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2043 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2044 F = _fjsp_setzero_v2r8();
2045 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2046 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2047 H = _fjsp_setzero_v2r8();
2048 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2049 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2050 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2051 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
2055 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2057 /* Update vectorial force */
2058 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
2059 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2060 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2062 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2063 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2064 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2066 /**************************
2067 * CALCULATE INTERACTIONS *
2068 **************************/
2070 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2072 /* Calculate table index by multiplying r with table scale and truncate to integer */
2073 rt = _fjsp_mul_v2r8(r21,vftabscale);
2074 itab_tmp = _fjsp_dtox_v2r8(rt);
2075 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2076 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2077 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2082 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2083 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2084 F = _fjsp_setzero_v2r8();
2085 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2086 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2087 H = _fjsp_setzero_v2r8();
2088 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2089 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2090 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2091 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2095 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2097 /* Update vectorial force */
2098 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2099 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2100 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2102 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2103 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2104 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2106 /**************************
2107 * CALCULATE INTERACTIONS *
2108 **************************/
2110 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2112 /* Calculate table index by multiplying r with table scale and truncate to integer */
2113 rt = _fjsp_mul_v2r8(r22,vftabscale);
2114 itab_tmp = _fjsp_dtox_v2r8(rt);
2115 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2116 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2117 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2122 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2123 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2124 F = _fjsp_setzero_v2r8();
2125 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2126 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2127 H = _fjsp_setzero_v2r8();
2128 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2129 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2130 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2131 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2135 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2137 /* Update vectorial force */
2138 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2139 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2140 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2142 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2143 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2144 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2146 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2148 /* Inner loop uses 378 flops */
2151 /* End of innermost loop */
2153 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2154 f+i_coord_offset,fshift+i_shift_offset);
2156 /* Increment number of inner iterations */
2157 inneriter += j_index_end - j_index_start;
2159 /* Outer loop uses 18 flops */
2162 /* Increment number of outer iterations */
2165 /* Update outer/inner flops */
2167 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*378);