2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012, by the GROMACS development team, led by
5 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6 * others, as listed in the AUTHORS file in the top-level source
7 * directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: CubicSplineTable
54 * VdW interaction: CubicSplineTable
55 * Geometry: Water4-Water4
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
88 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
89 int vdwjidx0A,vdwjidx0B;
90 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
91 int vdwjidx1A,vdwjidx1B;
92 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
93 int vdwjidx2A,vdwjidx2B;
94 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
95 int vdwjidx3A,vdwjidx3B;
96 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
97 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
98 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
99 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
100 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
101 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
102 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
103 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
104 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
105 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
106 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
107 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
110 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
113 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
114 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
115 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
118 _fjsp_v2r8 dummy_mask,cutoff_mask;
119 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
120 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
121 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
128 jindex = nlist->jindex;
130 shiftidx = nlist->shift;
132 shiftvec = fr->shift_vec[0];
133 fshift = fr->fshift[0];
134 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
135 charge = mdatoms->chargeA;
136 nvdwtype = fr->ntype;
138 vdwtype = mdatoms->typeA;
140 vftab = kernel_data->table_elec_vdw->data;
141 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
143 /* Setup water-specific parameters */
144 inr = nlist->iinr[0];
145 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
146 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
147 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
148 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
150 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
151 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
152 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
153 vdwjidx0A = 2*vdwtype[inr+0];
154 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
155 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
156 qq11 = _fjsp_mul_v2r8(iq1,jq1);
157 qq12 = _fjsp_mul_v2r8(iq1,jq2);
158 qq13 = _fjsp_mul_v2r8(iq1,jq3);
159 qq21 = _fjsp_mul_v2r8(iq2,jq1);
160 qq22 = _fjsp_mul_v2r8(iq2,jq2);
161 qq23 = _fjsp_mul_v2r8(iq2,jq3);
162 qq31 = _fjsp_mul_v2r8(iq3,jq1);
163 qq32 = _fjsp_mul_v2r8(iq3,jq2);
164 qq33 = _fjsp_mul_v2r8(iq3,jq3);
166 /* Avoid stupid compiler warnings */
174 /* Start outer loop over neighborlists */
175 for(iidx=0; iidx<nri; iidx++)
177 /* Load shift vector for this list */
178 i_shift_offset = DIM*shiftidx[iidx];
180 /* Load limits for loop over neighbors */
181 j_index_start = jindex[iidx];
182 j_index_end = jindex[iidx+1];
184 /* Get outer coordinate index */
186 i_coord_offset = DIM*inr;
188 /* Load i particle coords and add shift vector */
189 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
190 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
192 fix0 = _fjsp_setzero_v2r8();
193 fiy0 = _fjsp_setzero_v2r8();
194 fiz0 = _fjsp_setzero_v2r8();
195 fix1 = _fjsp_setzero_v2r8();
196 fiy1 = _fjsp_setzero_v2r8();
197 fiz1 = _fjsp_setzero_v2r8();
198 fix2 = _fjsp_setzero_v2r8();
199 fiy2 = _fjsp_setzero_v2r8();
200 fiz2 = _fjsp_setzero_v2r8();
201 fix3 = _fjsp_setzero_v2r8();
202 fiy3 = _fjsp_setzero_v2r8();
203 fiz3 = _fjsp_setzero_v2r8();
205 /* Reset potential sums */
206 velecsum = _fjsp_setzero_v2r8();
207 vvdwsum = _fjsp_setzero_v2r8();
209 /* Start inner kernel loop */
210 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
213 /* Get j neighbor index, and coordinate index */
216 j_coord_offsetA = DIM*jnrA;
217 j_coord_offsetB = DIM*jnrB;
219 /* load j atom coordinates */
220 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
221 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
222 &jy2,&jz2,&jx3,&jy3,&jz3);
224 /* Calculate displacement vector */
225 dx00 = _fjsp_sub_v2r8(ix0,jx0);
226 dy00 = _fjsp_sub_v2r8(iy0,jy0);
227 dz00 = _fjsp_sub_v2r8(iz0,jz0);
228 dx11 = _fjsp_sub_v2r8(ix1,jx1);
229 dy11 = _fjsp_sub_v2r8(iy1,jy1);
230 dz11 = _fjsp_sub_v2r8(iz1,jz1);
231 dx12 = _fjsp_sub_v2r8(ix1,jx2);
232 dy12 = _fjsp_sub_v2r8(iy1,jy2);
233 dz12 = _fjsp_sub_v2r8(iz1,jz2);
234 dx13 = _fjsp_sub_v2r8(ix1,jx3);
235 dy13 = _fjsp_sub_v2r8(iy1,jy3);
236 dz13 = _fjsp_sub_v2r8(iz1,jz3);
237 dx21 = _fjsp_sub_v2r8(ix2,jx1);
238 dy21 = _fjsp_sub_v2r8(iy2,jy1);
239 dz21 = _fjsp_sub_v2r8(iz2,jz1);
240 dx22 = _fjsp_sub_v2r8(ix2,jx2);
241 dy22 = _fjsp_sub_v2r8(iy2,jy2);
242 dz22 = _fjsp_sub_v2r8(iz2,jz2);
243 dx23 = _fjsp_sub_v2r8(ix2,jx3);
244 dy23 = _fjsp_sub_v2r8(iy2,jy3);
245 dz23 = _fjsp_sub_v2r8(iz2,jz3);
246 dx31 = _fjsp_sub_v2r8(ix3,jx1);
247 dy31 = _fjsp_sub_v2r8(iy3,jy1);
248 dz31 = _fjsp_sub_v2r8(iz3,jz1);
249 dx32 = _fjsp_sub_v2r8(ix3,jx2);
250 dy32 = _fjsp_sub_v2r8(iy3,jy2);
251 dz32 = _fjsp_sub_v2r8(iz3,jz2);
252 dx33 = _fjsp_sub_v2r8(ix3,jx3);
253 dy33 = _fjsp_sub_v2r8(iy3,jy3);
254 dz33 = _fjsp_sub_v2r8(iz3,jz3);
256 /* Calculate squared distance and things based on it */
257 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
258 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
259 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
260 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
261 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
262 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
263 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
264 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
265 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
266 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
268 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
269 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
270 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
271 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
272 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
273 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
274 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
275 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
276 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
277 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
279 fjx0 = _fjsp_setzero_v2r8();
280 fjy0 = _fjsp_setzero_v2r8();
281 fjz0 = _fjsp_setzero_v2r8();
282 fjx1 = _fjsp_setzero_v2r8();
283 fjy1 = _fjsp_setzero_v2r8();
284 fjz1 = _fjsp_setzero_v2r8();
285 fjx2 = _fjsp_setzero_v2r8();
286 fjy2 = _fjsp_setzero_v2r8();
287 fjz2 = _fjsp_setzero_v2r8();
288 fjx3 = _fjsp_setzero_v2r8();
289 fjy3 = _fjsp_setzero_v2r8();
290 fjz3 = _fjsp_setzero_v2r8();
292 /**************************
293 * CALCULATE INTERACTIONS *
294 **************************/
296 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
298 /* Calculate table index by multiplying r with table scale and truncate to integer */
299 rt = _fjsp_mul_v2r8(r00,vftabscale);
300 itab_tmp = _fjsp_dtox_v2r8(rt);
301 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
302 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
303 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
308 /* CUBIC SPLINE TABLE DISPERSION */
311 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
312 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
313 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
314 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
315 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
316 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
317 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
318 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
319 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
320 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
321 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
323 /* CUBIC SPLINE TABLE REPULSION */
324 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
325 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
326 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
327 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
328 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
329 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
330 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
331 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
332 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
333 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
334 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
335 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
336 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
338 /* Update potential sum for this i atom from the interaction with this j atom. */
339 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
343 /* Update vectorial force */
344 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
345 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
346 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
348 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
349 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
350 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
352 /**************************
353 * CALCULATE INTERACTIONS *
354 **************************/
356 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
358 /* Calculate table index by multiplying r with table scale and truncate to integer */
359 rt = _fjsp_mul_v2r8(r11,vftabscale);
360 itab_tmp = _fjsp_dtox_v2r8(rt);
361 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
362 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
363 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
368 /* CUBIC SPLINE TABLE ELECTROSTATICS */
369 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
370 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
371 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
372 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
373 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
374 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
375 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
376 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
377 velec = _fjsp_mul_v2r8(qq11,VV);
378 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
379 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
381 /* Update potential sum for this i atom from the interaction with this j atom. */
382 velecsum = _fjsp_add_v2r8(velecsum,velec);
386 /* Update vectorial force */
387 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
388 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
389 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
391 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
392 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
393 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
395 /**************************
396 * CALCULATE INTERACTIONS *
397 **************************/
399 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
401 /* Calculate table index by multiplying r with table scale and truncate to integer */
402 rt = _fjsp_mul_v2r8(r12,vftabscale);
403 itab_tmp = _fjsp_dtox_v2r8(rt);
404 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
405 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
406 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
411 /* CUBIC SPLINE TABLE ELECTROSTATICS */
412 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
413 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
414 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
415 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
416 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
417 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
418 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
419 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
420 velec = _fjsp_mul_v2r8(qq12,VV);
421 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
422 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
424 /* Update potential sum for this i atom from the interaction with this j atom. */
425 velecsum = _fjsp_add_v2r8(velecsum,velec);
429 /* Update vectorial force */
430 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
431 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
432 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
434 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
435 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
436 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
438 /**************************
439 * CALCULATE INTERACTIONS *
440 **************************/
442 r13 = _fjsp_mul_v2r8(rsq13,rinv13);
444 /* Calculate table index by multiplying r with table scale and truncate to integer */
445 rt = _fjsp_mul_v2r8(r13,vftabscale);
446 itab_tmp = _fjsp_dtox_v2r8(rt);
447 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
448 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
449 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
454 /* CUBIC SPLINE TABLE ELECTROSTATICS */
455 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
456 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
457 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
458 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
459 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
460 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
461 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
462 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
463 velec = _fjsp_mul_v2r8(qq13,VV);
464 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
465 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
467 /* Update potential sum for this i atom from the interaction with this j atom. */
468 velecsum = _fjsp_add_v2r8(velecsum,velec);
472 /* Update vectorial force */
473 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
474 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
475 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
477 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
478 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
479 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
481 /**************************
482 * CALCULATE INTERACTIONS *
483 **************************/
485 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
487 /* Calculate table index by multiplying r with table scale and truncate to integer */
488 rt = _fjsp_mul_v2r8(r21,vftabscale);
489 itab_tmp = _fjsp_dtox_v2r8(rt);
490 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
491 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
492 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
497 /* CUBIC SPLINE TABLE ELECTROSTATICS */
498 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
499 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
500 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
501 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
502 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
503 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
504 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
505 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
506 velec = _fjsp_mul_v2r8(qq21,VV);
507 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
508 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
510 /* Update potential sum for this i atom from the interaction with this j atom. */
511 velecsum = _fjsp_add_v2r8(velecsum,velec);
515 /* Update vectorial force */
516 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
517 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
518 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
520 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
521 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
522 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
524 /**************************
525 * CALCULATE INTERACTIONS *
526 **************************/
528 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
530 /* Calculate table index by multiplying r with table scale and truncate to integer */
531 rt = _fjsp_mul_v2r8(r22,vftabscale);
532 itab_tmp = _fjsp_dtox_v2r8(rt);
533 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
534 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
535 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
540 /* CUBIC SPLINE TABLE ELECTROSTATICS */
541 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
542 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
543 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
544 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
545 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
546 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
547 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
548 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
549 velec = _fjsp_mul_v2r8(qq22,VV);
550 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
551 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
553 /* Update potential sum for this i atom from the interaction with this j atom. */
554 velecsum = _fjsp_add_v2r8(velecsum,velec);
558 /* Update vectorial force */
559 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
560 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
561 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
563 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
564 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
565 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
567 /**************************
568 * CALCULATE INTERACTIONS *
569 **************************/
571 r23 = _fjsp_mul_v2r8(rsq23,rinv23);
573 /* Calculate table index by multiplying r with table scale and truncate to integer */
574 rt = _fjsp_mul_v2r8(r23,vftabscale);
575 itab_tmp = _fjsp_dtox_v2r8(rt);
576 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
577 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
578 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
583 /* CUBIC SPLINE TABLE ELECTROSTATICS */
584 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
585 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
586 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
587 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
588 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
589 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
590 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
591 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
592 velec = _fjsp_mul_v2r8(qq23,VV);
593 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
594 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
596 /* Update potential sum for this i atom from the interaction with this j atom. */
597 velecsum = _fjsp_add_v2r8(velecsum,velec);
601 /* Update vectorial force */
602 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
603 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
604 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
606 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
607 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
608 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
610 /**************************
611 * CALCULATE INTERACTIONS *
612 **************************/
614 r31 = _fjsp_mul_v2r8(rsq31,rinv31);
616 /* Calculate table index by multiplying r with table scale and truncate to integer */
617 rt = _fjsp_mul_v2r8(r31,vftabscale);
618 itab_tmp = _fjsp_dtox_v2r8(rt);
619 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
620 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
621 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
626 /* CUBIC SPLINE TABLE ELECTROSTATICS */
627 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
628 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
629 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
630 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
631 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
632 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
633 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
634 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
635 velec = _fjsp_mul_v2r8(qq31,VV);
636 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
637 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
639 /* Update potential sum for this i atom from the interaction with this j atom. */
640 velecsum = _fjsp_add_v2r8(velecsum,velec);
644 /* Update vectorial force */
645 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
646 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
647 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
649 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
650 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
651 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
653 /**************************
654 * CALCULATE INTERACTIONS *
655 **************************/
657 r32 = _fjsp_mul_v2r8(rsq32,rinv32);
659 /* Calculate table index by multiplying r with table scale and truncate to integer */
660 rt = _fjsp_mul_v2r8(r32,vftabscale);
661 itab_tmp = _fjsp_dtox_v2r8(rt);
662 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
663 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
664 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
669 /* CUBIC SPLINE TABLE ELECTROSTATICS */
670 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
671 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
672 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
673 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
674 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
675 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
676 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
677 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
678 velec = _fjsp_mul_v2r8(qq32,VV);
679 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
680 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
682 /* Update potential sum for this i atom from the interaction with this j atom. */
683 velecsum = _fjsp_add_v2r8(velecsum,velec);
687 /* Update vectorial force */
688 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
689 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
690 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
692 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
693 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
694 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
696 /**************************
697 * CALCULATE INTERACTIONS *
698 **************************/
700 r33 = _fjsp_mul_v2r8(rsq33,rinv33);
702 /* Calculate table index by multiplying r with table scale and truncate to integer */
703 rt = _fjsp_mul_v2r8(r33,vftabscale);
704 itab_tmp = _fjsp_dtox_v2r8(rt);
705 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
706 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
707 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
712 /* CUBIC SPLINE TABLE ELECTROSTATICS */
713 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
714 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
715 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
716 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
717 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
718 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
719 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
720 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
721 velec = _fjsp_mul_v2r8(qq33,VV);
722 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
723 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
725 /* Update potential sum for this i atom from the interaction with this j atom. */
726 velecsum = _fjsp_add_v2r8(velecsum,velec);
730 /* Update vectorial force */
731 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
732 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
733 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
735 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
736 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
737 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
739 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
741 /* Inner loop uses 476 flops */
748 j_coord_offsetA = DIM*jnrA;
750 /* load j atom coordinates */
751 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
752 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
753 &jy2,&jz2,&jx3,&jy3,&jz3);
755 /* Calculate displacement vector */
756 dx00 = _fjsp_sub_v2r8(ix0,jx0);
757 dy00 = _fjsp_sub_v2r8(iy0,jy0);
758 dz00 = _fjsp_sub_v2r8(iz0,jz0);
759 dx11 = _fjsp_sub_v2r8(ix1,jx1);
760 dy11 = _fjsp_sub_v2r8(iy1,jy1);
761 dz11 = _fjsp_sub_v2r8(iz1,jz1);
762 dx12 = _fjsp_sub_v2r8(ix1,jx2);
763 dy12 = _fjsp_sub_v2r8(iy1,jy2);
764 dz12 = _fjsp_sub_v2r8(iz1,jz2);
765 dx13 = _fjsp_sub_v2r8(ix1,jx3);
766 dy13 = _fjsp_sub_v2r8(iy1,jy3);
767 dz13 = _fjsp_sub_v2r8(iz1,jz3);
768 dx21 = _fjsp_sub_v2r8(ix2,jx1);
769 dy21 = _fjsp_sub_v2r8(iy2,jy1);
770 dz21 = _fjsp_sub_v2r8(iz2,jz1);
771 dx22 = _fjsp_sub_v2r8(ix2,jx2);
772 dy22 = _fjsp_sub_v2r8(iy2,jy2);
773 dz22 = _fjsp_sub_v2r8(iz2,jz2);
774 dx23 = _fjsp_sub_v2r8(ix2,jx3);
775 dy23 = _fjsp_sub_v2r8(iy2,jy3);
776 dz23 = _fjsp_sub_v2r8(iz2,jz3);
777 dx31 = _fjsp_sub_v2r8(ix3,jx1);
778 dy31 = _fjsp_sub_v2r8(iy3,jy1);
779 dz31 = _fjsp_sub_v2r8(iz3,jz1);
780 dx32 = _fjsp_sub_v2r8(ix3,jx2);
781 dy32 = _fjsp_sub_v2r8(iy3,jy2);
782 dz32 = _fjsp_sub_v2r8(iz3,jz2);
783 dx33 = _fjsp_sub_v2r8(ix3,jx3);
784 dy33 = _fjsp_sub_v2r8(iy3,jy3);
785 dz33 = _fjsp_sub_v2r8(iz3,jz3);
787 /* Calculate squared distance and things based on it */
788 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
789 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
790 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
791 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
792 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
793 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
794 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
795 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
796 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
797 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
799 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
800 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
801 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
802 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
803 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
804 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
805 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
806 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
807 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
808 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
810 fjx0 = _fjsp_setzero_v2r8();
811 fjy0 = _fjsp_setzero_v2r8();
812 fjz0 = _fjsp_setzero_v2r8();
813 fjx1 = _fjsp_setzero_v2r8();
814 fjy1 = _fjsp_setzero_v2r8();
815 fjz1 = _fjsp_setzero_v2r8();
816 fjx2 = _fjsp_setzero_v2r8();
817 fjy2 = _fjsp_setzero_v2r8();
818 fjz2 = _fjsp_setzero_v2r8();
819 fjx3 = _fjsp_setzero_v2r8();
820 fjy3 = _fjsp_setzero_v2r8();
821 fjz3 = _fjsp_setzero_v2r8();
823 /**************************
824 * CALCULATE INTERACTIONS *
825 **************************/
827 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
829 /* Calculate table index by multiplying r with table scale and truncate to integer */
830 rt = _fjsp_mul_v2r8(r00,vftabscale);
831 itab_tmp = _fjsp_dtox_v2r8(rt);
832 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
833 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
834 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
839 /* CUBIC SPLINE TABLE DISPERSION */
842 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
843 F = _fjsp_setzero_v2r8();
844 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
845 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
846 H = _fjsp_setzero_v2r8();
847 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
848 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
849 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
850 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
851 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
852 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
854 /* CUBIC SPLINE TABLE REPULSION */
855 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
856 F = _fjsp_setzero_v2r8();
857 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
858 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
859 H = _fjsp_setzero_v2r8();
860 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
861 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
862 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
863 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
864 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
865 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
866 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
867 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
869 /* Update potential sum for this i atom from the interaction with this j atom. */
870 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
871 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
875 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
877 /* Update vectorial force */
878 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
879 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
880 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
882 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
883 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
884 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
886 /**************************
887 * CALCULATE INTERACTIONS *
888 **************************/
890 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
892 /* Calculate table index by multiplying r with table scale and truncate to integer */
893 rt = _fjsp_mul_v2r8(r11,vftabscale);
894 itab_tmp = _fjsp_dtox_v2r8(rt);
895 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
896 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
897 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
902 /* CUBIC SPLINE TABLE ELECTROSTATICS */
903 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
904 F = _fjsp_setzero_v2r8();
905 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
906 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
907 H = _fjsp_setzero_v2r8();
908 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
909 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
910 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
911 velec = _fjsp_mul_v2r8(qq11,VV);
912 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
913 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
915 /* Update potential sum for this i atom from the interaction with this j atom. */
916 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
917 velecsum = _fjsp_add_v2r8(velecsum,velec);
921 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
923 /* Update vectorial force */
924 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
925 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
926 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
928 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
929 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
930 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
932 /**************************
933 * CALCULATE INTERACTIONS *
934 **************************/
936 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
938 /* Calculate table index by multiplying r with table scale and truncate to integer */
939 rt = _fjsp_mul_v2r8(r12,vftabscale);
940 itab_tmp = _fjsp_dtox_v2r8(rt);
941 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
942 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
943 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
948 /* CUBIC SPLINE TABLE ELECTROSTATICS */
949 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
950 F = _fjsp_setzero_v2r8();
951 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
952 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
953 H = _fjsp_setzero_v2r8();
954 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
955 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
956 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
957 velec = _fjsp_mul_v2r8(qq12,VV);
958 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
959 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
961 /* Update potential sum for this i atom from the interaction with this j atom. */
962 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
963 velecsum = _fjsp_add_v2r8(velecsum,velec);
967 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
969 /* Update vectorial force */
970 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
971 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
972 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
974 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
975 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
976 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
978 /**************************
979 * CALCULATE INTERACTIONS *
980 **************************/
982 r13 = _fjsp_mul_v2r8(rsq13,rinv13);
984 /* Calculate table index by multiplying r with table scale and truncate to integer */
985 rt = _fjsp_mul_v2r8(r13,vftabscale);
986 itab_tmp = _fjsp_dtox_v2r8(rt);
987 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
988 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
989 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
994 /* CUBIC SPLINE TABLE ELECTROSTATICS */
995 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
996 F = _fjsp_setzero_v2r8();
997 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
998 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
999 H = _fjsp_setzero_v2r8();
1000 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1001 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1002 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1003 velec = _fjsp_mul_v2r8(qq13,VV);
1004 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1005 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
1007 /* Update potential sum for this i atom from the interaction with this j atom. */
1008 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1009 velecsum = _fjsp_add_v2r8(velecsum,velec);
1013 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1015 /* Update vectorial force */
1016 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1017 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1018 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1020 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1021 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1022 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1024 /**************************
1025 * CALCULATE INTERACTIONS *
1026 **************************/
1028 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1030 /* Calculate table index by multiplying r with table scale and truncate to integer */
1031 rt = _fjsp_mul_v2r8(r21,vftabscale);
1032 itab_tmp = _fjsp_dtox_v2r8(rt);
1033 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1034 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1035 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1040 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1041 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1042 F = _fjsp_setzero_v2r8();
1043 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1044 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1045 H = _fjsp_setzero_v2r8();
1046 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1047 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1048 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1049 velec = _fjsp_mul_v2r8(qq21,VV);
1050 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1051 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1053 /* Update potential sum for this i atom from the interaction with this j atom. */
1054 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1055 velecsum = _fjsp_add_v2r8(velecsum,velec);
1059 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1061 /* Update vectorial force */
1062 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1063 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1064 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1066 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1067 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1068 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1070 /**************************
1071 * CALCULATE INTERACTIONS *
1072 **************************/
1074 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1076 /* Calculate table index by multiplying r with table scale and truncate to integer */
1077 rt = _fjsp_mul_v2r8(r22,vftabscale);
1078 itab_tmp = _fjsp_dtox_v2r8(rt);
1079 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1080 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1081 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1086 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1087 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1088 F = _fjsp_setzero_v2r8();
1089 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1090 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1091 H = _fjsp_setzero_v2r8();
1092 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1093 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1094 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1095 velec = _fjsp_mul_v2r8(qq22,VV);
1096 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1097 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1099 /* Update potential sum for this i atom from the interaction with this j atom. */
1100 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1101 velecsum = _fjsp_add_v2r8(velecsum,velec);
1105 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1107 /* Update vectorial force */
1108 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1109 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1110 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1112 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1113 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1114 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1116 /**************************
1117 * CALCULATE INTERACTIONS *
1118 **************************/
1120 r23 = _fjsp_mul_v2r8(rsq23,rinv23);
1122 /* Calculate table index by multiplying r with table scale and truncate to integer */
1123 rt = _fjsp_mul_v2r8(r23,vftabscale);
1124 itab_tmp = _fjsp_dtox_v2r8(rt);
1125 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1126 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1127 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1132 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1133 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1134 F = _fjsp_setzero_v2r8();
1135 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1136 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1137 H = _fjsp_setzero_v2r8();
1138 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1139 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1140 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1141 velec = _fjsp_mul_v2r8(qq23,VV);
1142 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1143 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
1145 /* Update potential sum for this i atom from the interaction with this j atom. */
1146 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1147 velecsum = _fjsp_add_v2r8(velecsum,velec);
1151 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1153 /* Update vectorial force */
1154 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1155 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1156 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1158 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1159 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1160 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1162 /**************************
1163 * CALCULATE INTERACTIONS *
1164 **************************/
1166 r31 = _fjsp_mul_v2r8(rsq31,rinv31);
1168 /* Calculate table index by multiplying r with table scale and truncate to integer */
1169 rt = _fjsp_mul_v2r8(r31,vftabscale);
1170 itab_tmp = _fjsp_dtox_v2r8(rt);
1171 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1172 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1173 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1178 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1179 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1180 F = _fjsp_setzero_v2r8();
1181 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1182 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1183 H = _fjsp_setzero_v2r8();
1184 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1185 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1186 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1187 velec = _fjsp_mul_v2r8(qq31,VV);
1188 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1189 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
1191 /* Update potential sum for this i atom from the interaction with this j atom. */
1192 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1193 velecsum = _fjsp_add_v2r8(velecsum,velec);
1197 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1199 /* Update vectorial force */
1200 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1201 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1202 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1204 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1205 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1206 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1208 /**************************
1209 * CALCULATE INTERACTIONS *
1210 **************************/
1212 r32 = _fjsp_mul_v2r8(rsq32,rinv32);
1214 /* Calculate table index by multiplying r with table scale and truncate to integer */
1215 rt = _fjsp_mul_v2r8(r32,vftabscale);
1216 itab_tmp = _fjsp_dtox_v2r8(rt);
1217 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1218 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1219 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1224 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1225 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1226 F = _fjsp_setzero_v2r8();
1227 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1228 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1229 H = _fjsp_setzero_v2r8();
1230 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1231 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1232 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1233 velec = _fjsp_mul_v2r8(qq32,VV);
1234 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1235 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
1237 /* Update potential sum for this i atom from the interaction with this j atom. */
1238 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1239 velecsum = _fjsp_add_v2r8(velecsum,velec);
1243 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1245 /* Update vectorial force */
1246 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1247 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1248 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1250 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1251 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1252 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1254 /**************************
1255 * CALCULATE INTERACTIONS *
1256 **************************/
1258 r33 = _fjsp_mul_v2r8(rsq33,rinv33);
1260 /* Calculate table index by multiplying r with table scale and truncate to integer */
1261 rt = _fjsp_mul_v2r8(r33,vftabscale);
1262 itab_tmp = _fjsp_dtox_v2r8(rt);
1263 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1264 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1265 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1270 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1271 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1272 F = _fjsp_setzero_v2r8();
1273 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1274 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1275 H = _fjsp_setzero_v2r8();
1276 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1277 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1278 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1279 velec = _fjsp_mul_v2r8(qq33,VV);
1280 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1281 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
1283 /* Update potential sum for this i atom from the interaction with this j atom. */
1284 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1285 velecsum = _fjsp_add_v2r8(velecsum,velec);
1289 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1291 /* Update vectorial force */
1292 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1293 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1294 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1296 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1297 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1298 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1300 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1302 /* Inner loop uses 476 flops */
1305 /* End of innermost loop */
1307 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1308 f+i_coord_offset,fshift+i_shift_offset);
1311 /* Update potential energies */
1312 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1313 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1315 /* Increment number of inner iterations */
1316 inneriter += j_index_end - j_index_start;
1318 /* Outer loop uses 26 flops */
1321 /* Increment number of outer iterations */
1324 /* Update outer/inner flops */
1326 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*476);
1329 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
1330 * Electrostatics interaction: CubicSplineTable
1331 * VdW interaction: CubicSplineTable
1332 * Geometry: Water4-Water4
1333 * Calculate force/pot: Force
1336 nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
1337 (t_nblist * gmx_restrict nlist,
1338 rvec * gmx_restrict xx,
1339 rvec * gmx_restrict ff,
1340 t_forcerec * gmx_restrict fr,
1341 t_mdatoms * gmx_restrict mdatoms,
1342 nb_kernel_data_t * gmx_restrict kernel_data,
1343 t_nrnb * gmx_restrict nrnb)
1345 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1346 * just 0 for non-waters.
1347 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1348 * jnr indices corresponding to data put in the four positions in the SIMD register.
1350 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1351 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1353 int j_coord_offsetA,j_coord_offsetB;
1354 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1355 real rcutoff_scalar;
1356 real *shiftvec,*fshift,*x,*f;
1357 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1359 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1361 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1363 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1365 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1366 int vdwjidx0A,vdwjidx0B;
1367 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1368 int vdwjidx1A,vdwjidx1B;
1369 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1370 int vdwjidx2A,vdwjidx2B;
1371 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1372 int vdwjidx3A,vdwjidx3B;
1373 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1374 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1375 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1376 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1377 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1378 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1379 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1380 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1381 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1382 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1383 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1384 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1387 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1390 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1391 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1392 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1394 _fjsp_v2r8 itab_tmp;
1395 _fjsp_v2r8 dummy_mask,cutoff_mask;
1396 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1397 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1398 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1405 jindex = nlist->jindex;
1407 shiftidx = nlist->shift;
1409 shiftvec = fr->shift_vec[0];
1410 fshift = fr->fshift[0];
1411 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1412 charge = mdatoms->chargeA;
1413 nvdwtype = fr->ntype;
1414 vdwparam = fr->nbfp;
1415 vdwtype = mdatoms->typeA;
1417 vftab = kernel_data->table_elec_vdw->data;
1418 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
1420 /* Setup water-specific parameters */
1421 inr = nlist->iinr[0];
1422 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1423 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1424 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1425 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1427 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1428 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1429 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
1430 vdwjidx0A = 2*vdwtype[inr+0];
1431 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1432 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1433 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1434 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1435 qq13 = _fjsp_mul_v2r8(iq1,jq3);
1436 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1437 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1438 qq23 = _fjsp_mul_v2r8(iq2,jq3);
1439 qq31 = _fjsp_mul_v2r8(iq3,jq1);
1440 qq32 = _fjsp_mul_v2r8(iq3,jq2);
1441 qq33 = _fjsp_mul_v2r8(iq3,jq3);
1443 /* Avoid stupid compiler warnings */
1445 j_coord_offsetA = 0;
1446 j_coord_offsetB = 0;
1451 /* Start outer loop over neighborlists */
1452 for(iidx=0; iidx<nri; iidx++)
1454 /* Load shift vector for this list */
1455 i_shift_offset = DIM*shiftidx[iidx];
1457 /* Load limits for loop over neighbors */
1458 j_index_start = jindex[iidx];
1459 j_index_end = jindex[iidx+1];
1461 /* Get outer coordinate index */
1463 i_coord_offset = DIM*inr;
1465 /* Load i particle coords and add shift vector */
1466 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1467 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1469 fix0 = _fjsp_setzero_v2r8();
1470 fiy0 = _fjsp_setzero_v2r8();
1471 fiz0 = _fjsp_setzero_v2r8();
1472 fix1 = _fjsp_setzero_v2r8();
1473 fiy1 = _fjsp_setzero_v2r8();
1474 fiz1 = _fjsp_setzero_v2r8();
1475 fix2 = _fjsp_setzero_v2r8();
1476 fiy2 = _fjsp_setzero_v2r8();
1477 fiz2 = _fjsp_setzero_v2r8();
1478 fix3 = _fjsp_setzero_v2r8();
1479 fiy3 = _fjsp_setzero_v2r8();
1480 fiz3 = _fjsp_setzero_v2r8();
1482 /* Start inner kernel loop */
1483 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1486 /* Get j neighbor index, and coordinate index */
1488 jnrB = jjnr[jidx+1];
1489 j_coord_offsetA = DIM*jnrA;
1490 j_coord_offsetB = DIM*jnrB;
1492 /* load j atom coordinates */
1493 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1494 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1495 &jy2,&jz2,&jx3,&jy3,&jz3);
1497 /* Calculate displacement vector */
1498 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1499 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1500 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1501 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1502 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1503 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1504 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1505 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1506 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1507 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1508 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1509 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1510 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1511 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1512 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1513 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1514 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1515 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1516 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1517 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1518 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1519 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1520 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1521 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1522 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1523 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1524 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1525 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1526 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1527 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1529 /* Calculate squared distance and things based on it */
1530 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1531 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1532 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1533 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1534 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1535 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1536 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1537 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1538 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1539 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1541 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1542 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1543 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1544 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1545 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1546 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1547 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1548 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1549 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1550 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1552 fjx0 = _fjsp_setzero_v2r8();
1553 fjy0 = _fjsp_setzero_v2r8();
1554 fjz0 = _fjsp_setzero_v2r8();
1555 fjx1 = _fjsp_setzero_v2r8();
1556 fjy1 = _fjsp_setzero_v2r8();
1557 fjz1 = _fjsp_setzero_v2r8();
1558 fjx2 = _fjsp_setzero_v2r8();
1559 fjy2 = _fjsp_setzero_v2r8();
1560 fjz2 = _fjsp_setzero_v2r8();
1561 fjx3 = _fjsp_setzero_v2r8();
1562 fjy3 = _fjsp_setzero_v2r8();
1563 fjz3 = _fjsp_setzero_v2r8();
1565 /**************************
1566 * CALCULATE INTERACTIONS *
1567 **************************/
1569 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1571 /* Calculate table index by multiplying r with table scale and truncate to integer */
1572 rt = _fjsp_mul_v2r8(r00,vftabscale);
1573 itab_tmp = _fjsp_dtox_v2r8(rt);
1574 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1575 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1576 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1581 /* CUBIC SPLINE TABLE DISPERSION */
1584 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1585 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1586 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1587 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1588 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1589 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1590 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1591 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1592 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1594 /* CUBIC SPLINE TABLE REPULSION */
1595 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1596 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1597 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1598 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1599 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1600 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1601 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1602 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1603 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1604 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1608 /* Update vectorial force */
1609 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1610 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1611 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1613 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1614 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1615 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1617 /**************************
1618 * CALCULATE INTERACTIONS *
1619 **************************/
1621 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1623 /* Calculate table index by multiplying r with table scale and truncate to integer */
1624 rt = _fjsp_mul_v2r8(r11,vftabscale);
1625 itab_tmp = _fjsp_dtox_v2r8(rt);
1626 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1627 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1628 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1633 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1634 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1635 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1636 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1637 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1638 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1639 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1640 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1641 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1642 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1646 /* Update vectorial force */
1647 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1648 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1649 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1651 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1652 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1653 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1655 /**************************
1656 * CALCULATE INTERACTIONS *
1657 **************************/
1659 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1661 /* Calculate table index by multiplying r with table scale and truncate to integer */
1662 rt = _fjsp_mul_v2r8(r12,vftabscale);
1663 itab_tmp = _fjsp_dtox_v2r8(rt);
1664 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1665 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1666 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1671 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1672 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1673 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1674 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1675 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1676 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1677 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1678 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1679 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1680 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1684 /* Update vectorial force */
1685 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1686 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1687 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1689 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1690 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1691 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1693 /**************************
1694 * CALCULATE INTERACTIONS *
1695 **************************/
1697 r13 = _fjsp_mul_v2r8(rsq13,rinv13);
1699 /* Calculate table index by multiplying r with table scale and truncate to integer */
1700 rt = _fjsp_mul_v2r8(r13,vftabscale);
1701 itab_tmp = _fjsp_dtox_v2r8(rt);
1702 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1703 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1704 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1709 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1710 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1711 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1712 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1713 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1714 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1715 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1716 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1717 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1718 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
1722 /* Update vectorial force */
1723 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1724 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1725 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1727 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1728 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1729 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1731 /**************************
1732 * CALCULATE INTERACTIONS *
1733 **************************/
1735 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1737 /* Calculate table index by multiplying r with table scale and truncate to integer */
1738 rt = _fjsp_mul_v2r8(r21,vftabscale);
1739 itab_tmp = _fjsp_dtox_v2r8(rt);
1740 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1741 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1742 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1747 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1748 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1749 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1750 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1751 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1752 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1753 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1754 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1755 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1756 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1760 /* Update vectorial force */
1761 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1762 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1763 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1765 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1766 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1767 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1769 /**************************
1770 * CALCULATE INTERACTIONS *
1771 **************************/
1773 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1775 /* Calculate table index by multiplying r with table scale and truncate to integer */
1776 rt = _fjsp_mul_v2r8(r22,vftabscale);
1777 itab_tmp = _fjsp_dtox_v2r8(rt);
1778 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1779 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1780 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1785 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1786 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1787 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1788 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1789 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1790 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1791 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1792 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1793 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1794 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1798 /* Update vectorial force */
1799 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1800 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1801 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1803 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1804 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1805 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1807 /**************************
1808 * CALCULATE INTERACTIONS *
1809 **************************/
1811 r23 = _fjsp_mul_v2r8(rsq23,rinv23);
1813 /* Calculate table index by multiplying r with table scale and truncate to integer */
1814 rt = _fjsp_mul_v2r8(r23,vftabscale);
1815 itab_tmp = _fjsp_dtox_v2r8(rt);
1816 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1817 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1818 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1823 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1824 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1825 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1826 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1827 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1828 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1829 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1830 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1831 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1832 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
1836 /* Update vectorial force */
1837 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1838 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1839 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1841 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1842 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1843 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1845 /**************************
1846 * CALCULATE INTERACTIONS *
1847 **************************/
1849 r31 = _fjsp_mul_v2r8(rsq31,rinv31);
1851 /* Calculate table index by multiplying r with table scale and truncate to integer */
1852 rt = _fjsp_mul_v2r8(r31,vftabscale);
1853 itab_tmp = _fjsp_dtox_v2r8(rt);
1854 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1855 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1856 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1861 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1862 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1863 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1864 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1865 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1866 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1867 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1868 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1869 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1870 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
1874 /* Update vectorial force */
1875 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1876 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1877 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1879 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1880 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1881 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1883 /**************************
1884 * CALCULATE INTERACTIONS *
1885 **************************/
1887 r32 = _fjsp_mul_v2r8(rsq32,rinv32);
1889 /* Calculate table index by multiplying r with table scale and truncate to integer */
1890 rt = _fjsp_mul_v2r8(r32,vftabscale);
1891 itab_tmp = _fjsp_dtox_v2r8(rt);
1892 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1893 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1894 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1899 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1900 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1901 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1902 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1903 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1904 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1905 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1906 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1907 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1908 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
1912 /* Update vectorial force */
1913 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1914 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1915 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1917 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1918 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1919 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1921 /**************************
1922 * CALCULATE INTERACTIONS *
1923 **************************/
1925 r33 = _fjsp_mul_v2r8(rsq33,rinv33);
1927 /* Calculate table index by multiplying r with table scale and truncate to integer */
1928 rt = _fjsp_mul_v2r8(r33,vftabscale);
1929 itab_tmp = _fjsp_dtox_v2r8(rt);
1930 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1931 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1932 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1937 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1938 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1939 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1940 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1941 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1942 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1943 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1944 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1945 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1946 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
1950 /* Update vectorial force */
1951 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1952 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1953 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1955 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1956 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1957 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1959 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1961 /* Inner loop uses 432 flops */
1964 if(jidx<j_index_end)
1968 j_coord_offsetA = DIM*jnrA;
1970 /* load j atom coordinates */
1971 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1972 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1973 &jy2,&jz2,&jx3,&jy3,&jz3);
1975 /* Calculate displacement vector */
1976 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1977 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1978 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1979 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1980 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1981 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1982 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1983 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1984 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1985 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1986 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1987 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1988 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1989 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1990 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1991 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1992 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1993 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1994 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1995 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1996 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1997 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1998 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1999 dz31 = _fjsp_sub_v2r8(iz3,jz1);
2000 dx32 = _fjsp_sub_v2r8(ix3,jx2);
2001 dy32 = _fjsp_sub_v2r8(iy3,jy2);
2002 dz32 = _fjsp_sub_v2r8(iz3,jz2);
2003 dx33 = _fjsp_sub_v2r8(ix3,jx3);
2004 dy33 = _fjsp_sub_v2r8(iy3,jy3);
2005 dz33 = _fjsp_sub_v2r8(iz3,jz3);
2007 /* Calculate squared distance and things based on it */
2008 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
2009 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
2010 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
2011 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
2012 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
2013 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
2014 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
2015 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
2016 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
2017 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
2019 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
2020 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
2021 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
2022 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
2023 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
2024 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
2025 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
2026 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
2027 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
2028 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
2030 fjx0 = _fjsp_setzero_v2r8();
2031 fjy0 = _fjsp_setzero_v2r8();
2032 fjz0 = _fjsp_setzero_v2r8();
2033 fjx1 = _fjsp_setzero_v2r8();
2034 fjy1 = _fjsp_setzero_v2r8();
2035 fjz1 = _fjsp_setzero_v2r8();
2036 fjx2 = _fjsp_setzero_v2r8();
2037 fjy2 = _fjsp_setzero_v2r8();
2038 fjz2 = _fjsp_setzero_v2r8();
2039 fjx3 = _fjsp_setzero_v2r8();
2040 fjy3 = _fjsp_setzero_v2r8();
2041 fjz3 = _fjsp_setzero_v2r8();
2043 /**************************
2044 * CALCULATE INTERACTIONS *
2045 **************************/
2047 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
2049 /* Calculate table index by multiplying r with table scale and truncate to integer */
2050 rt = _fjsp_mul_v2r8(r00,vftabscale);
2051 itab_tmp = _fjsp_dtox_v2r8(rt);
2052 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2053 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2054 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2059 /* CUBIC SPLINE TABLE DISPERSION */
2062 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2063 F = _fjsp_setzero_v2r8();
2064 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2065 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
2066 H = _fjsp_setzero_v2r8();
2067 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2068 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
2069 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
2070 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
2072 /* CUBIC SPLINE TABLE REPULSION */
2073 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
2074 F = _fjsp_setzero_v2r8();
2075 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2076 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
2077 H = _fjsp_setzero_v2r8();
2078 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2079 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
2080 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
2081 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
2082 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
2086 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2088 /* Update vectorial force */
2089 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
2090 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
2091 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
2093 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
2094 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
2095 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
2097 /**************************
2098 * CALCULATE INTERACTIONS *
2099 **************************/
2101 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
2103 /* Calculate table index by multiplying r with table scale and truncate to integer */
2104 rt = _fjsp_mul_v2r8(r11,vftabscale);
2105 itab_tmp = _fjsp_dtox_v2r8(rt);
2106 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2107 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2108 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2113 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2114 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2115 F = _fjsp_setzero_v2r8();
2116 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2117 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2118 H = _fjsp_setzero_v2r8();
2119 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2120 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2121 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2122 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
2126 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2128 /* Update vectorial force */
2129 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
2130 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2131 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2133 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2134 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2135 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2137 /**************************
2138 * CALCULATE INTERACTIONS *
2139 **************************/
2141 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
2143 /* Calculate table index by multiplying r with table scale and truncate to integer */
2144 rt = _fjsp_mul_v2r8(r12,vftabscale);
2145 itab_tmp = _fjsp_dtox_v2r8(rt);
2146 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2147 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2148 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2153 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2154 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2155 F = _fjsp_setzero_v2r8();
2156 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2157 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2158 H = _fjsp_setzero_v2r8();
2159 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2160 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2161 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2162 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2166 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2168 /* Update vectorial force */
2169 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
2170 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2171 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2173 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2174 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2175 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2177 /**************************
2178 * CALCULATE INTERACTIONS *
2179 **************************/
2181 r13 = _fjsp_mul_v2r8(rsq13,rinv13);
2183 /* Calculate table index by multiplying r with table scale and truncate to integer */
2184 rt = _fjsp_mul_v2r8(r13,vftabscale);
2185 itab_tmp = _fjsp_dtox_v2r8(rt);
2186 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2187 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2188 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2193 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2194 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2195 F = _fjsp_setzero_v2r8();
2196 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2197 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2198 H = _fjsp_setzero_v2r8();
2199 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2200 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2201 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2202 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
2206 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2208 /* Update vectorial force */
2209 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
2210 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
2211 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
2213 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
2214 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
2215 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
2217 /**************************
2218 * CALCULATE INTERACTIONS *
2219 **************************/
2221 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2223 /* Calculate table index by multiplying r with table scale and truncate to integer */
2224 rt = _fjsp_mul_v2r8(r21,vftabscale);
2225 itab_tmp = _fjsp_dtox_v2r8(rt);
2226 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2227 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2228 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2233 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2234 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2235 F = _fjsp_setzero_v2r8();
2236 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2237 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2238 H = _fjsp_setzero_v2r8();
2239 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2240 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2241 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2242 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2246 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2248 /* Update vectorial force */
2249 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2250 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2251 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2253 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2254 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2255 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2257 /**************************
2258 * CALCULATE INTERACTIONS *
2259 **************************/
2261 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2263 /* Calculate table index by multiplying r with table scale and truncate to integer */
2264 rt = _fjsp_mul_v2r8(r22,vftabscale);
2265 itab_tmp = _fjsp_dtox_v2r8(rt);
2266 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2267 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2268 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2273 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2274 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2275 F = _fjsp_setzero_v2r8();
2276 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2277 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2278 H = _fjsp_setzero_v2r8();
2279 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2280 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2281 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2282 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2286 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2288 /* Update vectorial force */
2289 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2290 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2291 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2293 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2294 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2295 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2297 /**************************
2298 * CALCULATE INTERACTIONS *
2299 **************************/
2301 r23 = _fjsp_mul_v2r8(rsq23,rinv23);
2303 /* Calculate table index by multiplying r with table scale and truncate to integer */
2304 rt = _fjsp_mul_v2r8(r23,vftabscale);
2305 itab_tmp = _fjsp_dtox_v2r8(rt);
2306 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2307 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2308 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2313 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2314 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2315 F = _fjsp_setzero_v2r8();
2316 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2317 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2318 H = _fjsp_setzero_v2r8();
2319 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2320 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2321 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2322 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
2326 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2328 /* Update vectorial force */
2329 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
2330 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2331 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2333 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2334 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2335 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2337 /**************************
2338 * CALCULATE INTERACTIONS *
2339 **************************/
2341 r31 = _fjsp_mul_v2r8(rsq31,rinv31);
2343 /* Calculate table index by multiplying r with table scale and truncate to integer */
2344 rt = _fjsp_mul_v2r8(r31,vftabscale);
2345 itab_tmp = _fjsp_dtox_v2r8(rt);
2346 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2347 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2348 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2353 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2354 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2355 F = _fjsp_setzero_v2r8();
2356 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2357 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2358 H = _fjsp_setzero_v2r8();
2359 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2360 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2361 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2362 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
2366 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2368 /* Update vectorial force */
2369 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
2370 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2371 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2373 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2374 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2375 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2377 /**************************
2378 * CALCULATE INTERACTIONS *
2379 **************************/
2381 r32 = _fjsp_mul_v2r8(rsq32,rinv32);
2383 /* Calculate table index by multiplying r with table scale and truncate to integer */
2384 rt = _fjsp_mul_v2r8(r32,vftabscale);
2385 itab_tmp = _fjsp_dtox_v2r8(rt);
2386 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2387 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2388 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2393 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2394 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2395 F = _fjsp_setzero_v2r8();
2396 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2397 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2398 H = _fjsp_setzero_v2r8();
2399 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2400 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2401 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2402 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
2406 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2408 /* Update vectorial force */
2409 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
2410 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2411 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2413 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2414 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2415 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2417 /**************************
2418 * CALCULATE INTERACTIONS *
2419 **************************/
2421 r33 = _fjsp_mul_v2r8(rsq33,rinv33);
2423 /* Calculate table index by multiplying r with table scale and truncate to integer */
2424 rt = _fjsp_mul_v2r8(r33,vftabscale);
2425 itab_tmp = _fjsp_dtox_v2r8(rt);
2426 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2427 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2428 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2433 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2434 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2435 F = _fjsp_setzero_v2r8();
2436 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2437 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2438 H = _fjsp_setzero_v2r8();
2439 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2440 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2441 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2442 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
2446 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2448 /* Update vectorial force */
2449 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
2450 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2451 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2453 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2454 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2455 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2457 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2459 /* Inner loop uses 432 flops */
2462 /* End of innermost loop */
2464 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2465 f+i_coord_offset,fshift+i_shift_offset);
2467 /* Increment number of inner iterations */
2468 inneriter += j_index_end - j_index_start;
2470 /* Outer loop uses 24 flops */
2473 /* Increment number of outer iterations */
2476 /* Update outer/inner flops */
2478 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*432);