2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/legacyheaders/vec.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: CubicSplineTable
54 * VdW interaction: LennardJones
55 * Geometry: Water3-Water3
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
105 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
108 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
109 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
113 _fjsp_v2r8 dummy_mask,cutoff_mask;
114 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
115 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
116 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
123 jindex = nlist->jindex;
125 shiftidx = nlist->shift;
127 shiftvec = fr->shift_vec[0];
128 fshift = fr->fshift[0];
129 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
130 charge = mdatoms->chargeA;
131 nvdwtype = fr->ntype;
133 vdwtype = mdatoms->typeA;
135 vftab = kernel_data->table_elec->data;
136 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
138 /* Setup water-specific parameters */
139 inr = nlist->iinr[0];
140 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
141 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
142 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
143 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
145 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
146 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
147 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
148 vdwjidx0A = 2*vdwtype[inr+0];
149 qq00 = _fjsp_mul_v2r8(iq0,jq0);
150 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
151 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
152 qq01 = _fjsp_mul_v2r8(iq0,jq1);
153 qq02 = _fjsp_mul_v2r8(iq0,jq2);
154 qq10 = _fjsp_mul_v2r8(iq1,jq0);
155 qq11 = _fjsp_mul_v2r8(iq1,jq1);
156 qq12 = _fjsp_mul_v2r8(iq1,jq2);
157 qq20 = _fjsp_mul_v2r8(iq2,jq0);
158 qq21 = _fjsp_mul_v2r8(iq2,jq1);
159 qq22 = _fjsp_mul_v2r8(iq2,jq2);
161 /* Avoid stupid compiler warnings */
169 /* Start outer loop over neighborlists */
170 for(iidx=0; iidx<nri; iidx++)
172 /* Load shift vector for this list */
173 i_shift_offset = DIM*shiftidx[iidx];
175 /* Load limits for loop over neighbors */
176 j_index_start = jindex[iidx];
177 j_index_end = jindex[iidx+1];
179 /* Get outer coordinate index */
181 i_coord_offset = DIM*inr;
183 /* Load i particle coords and add shift vector */
184 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
185 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
187 fix0 = _fjsp_setzero_v2r8();
188 fiy0 = _fjsp_setzero_v2r8();
189 fiz0 = _fjsp_setzero_v2r8();
190 fix1 = _fjsp_setzero_v2r8();
191 fiy1 = _fjsp_setzero_v2r8();
192 fiz1 = _fjsp_setzero_v2r8();
193 fix2 = _fjsp_setzero_v2r8();
194 fiy2 = _fjsp_setzero_v2r8();
195 fiz2 = _fjsp_setzero_v2r8();
197 /* Reset potential sums */
198 velecsum = _fjsp_setzero_v2r8();
199 vvdwsum = _fjsp_setzero_v2r8();
201 /* Start inner kernel loop */
202 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
205 /* Get j neighbor index, and coordinate index */
208 j_coord_offsetA = DIM*jnrA;
209 j_coord_offsetB = DIM*jnrB;
211 /* load j atom coordinates */
212 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
213 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
215 /* Calculate displacement vector */
216 dx00 = _fjsp_sub_v2r8(ix0,jx0);
217 dy00 = _fjsp_sub_v2r8(iy0,jy0);
218 dz00 = _fjsp_sub_v2r8(iz0,jz0);
219 dx01 = _fjsp_sub_v2r8(ix0,jx1);
220 dy01 = _fjsp_sub_v2r8(iy0,jy1);
221 dz01 = _fjsp_sub_v2r8(iz0,jz1);
222 dx02 = _fjsp_sub_v2r8(ix0,jx2);
223 dy02 = _fjsp_sub_v2r8(iy0,jy2);
224 dz02 = _fjsp_sub_v2r8(iz0,jz2);
225 dx10 = _fjsp_sub_v2r8(ix1,jx0);
226 dy10 = _fjsp_sub_v2r8(iy1,jy0);
227 dz10 = _fjsp_sub_v2r8(iz1,jz0);
228 dx11 = _fjsp_sub_v2r8(ix1,jx1);
229 dy11 = _fjsp_sub_v2r8(iy1,jy1);
230 dz11 = _fjsp_sub_v2r8(iz1,jz1);
231 dx12 = _fjsp_sub_v2r8(ix1,jx2);
232 dy12 = _fjsp_sub_v2r8(iy1,jy2);
233 dz12 = _fjsp_sub_v2r8(iz1,jz2);
234 dx20 = _fjsp_sub_v2r8(ix2,jx0);
235 dy20 = _fjsp_sub_v2r8(iy2,jy0);
236 dz20 = _fjsp_sub_v2r8(iz2,jz0);
237 dx21 = _fjsp_sub_v2r8(ix2,jx1);
238 dy21 = _fjsp_sub_v2r8(iy2,jy1);
239 dz21 = _fjsp_sub_v2r8(iz2,jz1);
240 dx22 = _fjsp_sub_v2r8(ix2,jx2);
241 dy22 = _fjsp_sub_v2r8(iy2,jy2);
242 dz22 = _fjsp_sub_v2r8(iz2,jz2);
244 /* Calculate squared distance and things based on it */
245 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
246 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
247 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
248 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
249 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
250 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
251 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
252 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
253 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
255 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
256 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
257 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
258 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
259 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
260 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
261 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
262 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
263 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
265 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
267 fjx0 = _fjsp_setzero_v2r8();
268 fjy0 = _fjsp_setzero_v2r8();
269 fjz0 = _fjsp_setzero_v2r8();
270 fjx1 = _fjsp_setzero_v2r8();
271 fjy1 = _fjsp_setzero_v2r8();
272 fjz1 = _fjsp_setzero_v2r8();
273 fjx2 = _fjsp_setzero_v2r8();
274 fjy2 = _fjsp_setzero_v2r8();
275 fjz2 = _fjsp_setzero_v2r8();
277 /**************************
278 * CALCULATE INTERACTIONS *
279 **************************/
281 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
283 /* Calculate table index by multiplying r with table scale and truncate to integer */
284 rt = _fjsp_mul_v2r8(r00,vftabscale);
285 itab_tmp = _fjsp_dtox_v2r8(rt);
286 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
287 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
288 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
293 /* CUBIC SPLINE TABLE ELECTROSTATICS */
294 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
295 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
296 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
297 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
298 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
299 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
300 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
301 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
302 velec = _fjsp_mul_v2r8(qq00,VV);
303 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
304 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
306 /* LENNARD-JONES DISPERSION/REPULSION */
308 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
309 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
310 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
311 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
312 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
314 /* Update potential sum for this i atom from the interaction with this j atom. */
315 velecsum = _fjsp_add_v2r8(velecsum,velec);
316 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
318 fscal = _fjsp_add_v2r8(felec,fvdw);
320 /* Update vectorial force */
321 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
322 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
323 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
325 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
326 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
327 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
329 /**************************
330 * CALCULATE INTERACTIONS *
331 **************************/
333 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
335 /* Calculate table index by multiplying r with table scale and truncate to integer */
336 rt = _fjsp_mul_v2r8(r01,vftabscale);
337 itab_tmp = _fjsp_dtox_v2r8(rt);
338 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
339 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
340 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
345 /* CUBIC SPLINE TABLE ELECTROSTATICS */
346 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
347 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
348 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
349 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
350 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
351 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
352 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
353 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
354 velec = _fjsp_mul_v2r8(qq01,VV);
355 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
356 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
358 /* Update potential sum for this i atom from the interaction with this j atom. */
359 velecsum = _fjsp_add_v2r8(velecsum,velec);
363 /* Update vectorial force */
364 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
365 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
366 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
368 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
369 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
370 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
372 /**************************
373 * CALCULATE INTERACTIONS *
374 **************************/
376 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
378 /* Calculate table index by multiplying r with table scale and truncate to integer */
379 rt = _fjsp_mul_v2r8(r02,vftabscale);
380 itab_tmp = _fjsp_dtox_v2r8(rt);
381 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
382 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
383 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
388 /* CUBIC SPLINE TABLE ELECTROSTATICS */
389 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
390 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
391 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
392 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
393 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
394 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
395 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
396 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
397 velec = _fjsp_mul_v2r8(qq02,VV);
398 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
399 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
401 /* Update potential sum for this i atom from the interaction with this j atom. */
402 velecsum = _fjsp_add_v2r8(velecsum,velec);
406 /* Update vectorial force */
407 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
408 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
409 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
411 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
412 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
413 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
415 /**************************
416 * CALCULATE INTERACTIONS *
417 **************************/
419 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
421 /* Calculate table index by multiplying r with table scale and truncate to integer */
422 rt = _fjsp_mul_v2r8(r10,vftabscale);
423 itab_tmp = _fjsp_dtox_v2r8(rt);
424 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
425 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
426 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
431 /* CUBIC SPLINE TABLE ELECTROSTATICS */
432 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
433 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
434 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
435 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
436 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
437 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
438 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
439 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
440 velec = _fjsp_mul_v2r8(qq10,VV);
441 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
442 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
444 /* Update potential sum for this i atom from the interaction with this j atom. */
445 velecsum = _fjsp_add_v2r8(velecsum,velec);
449 /* Update vectorial force */
450 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
451 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
452 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
454 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
455 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
456 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
458 /**************************
459 * CALCULATE INTERACTIONS *
460 **************************/
462 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
464 /* Calculate table index by multiplying r with table scale and truncate to integer */
465 rt = _fjsp_mul_v2r8(r11,vftabscale);
466 itab_tmp = _fjsp_dtox_v2r8(rt);
467 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
468 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
469 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
474 /* CUBIC SPLINE TABLE ELECTROSTATICS */
475 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
476 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
477 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
478 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
479 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
480 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
481 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
482 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
483 velec = _fjsp_mul_v2r8(qq11,VV);
484 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
485 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
487 /* Update potential sum for this i atom from the interaction with this j atom. */
488 velecsum = _fjsp_add_v2r8(velecsum,velec);
492 /* Update vectorial force */
493 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
494 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
495 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
497 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
498 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
499 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
501 /**************************
502 * CALCULATE INTERACTIONS *
503 **************************/
505 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
507 /* Calculate table index by multiplying r with table scale and truncate to integer */
508 rt = _fjsp_mul_v2r8(r12,vftabscale);
509 itab_tmp = _fjsp_dtox_v2r8(rt);
510 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
511 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
512 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
517 /* CUBIC SPLINE TABLE ELECTROSTATICS */
518 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
519 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
520 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
521 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
522 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
523 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
524 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
525 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
526 velec = _fjsp_mul_v2r8(qq12,VV);
527 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
528 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
530 /* Update potential sum for this i atom from the interaction with this j atom. */
531 velecsum = _fjsp_add_v2r8(velecsum,velec);
535 /* Update vectorial force */
536 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
537 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
538 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
540 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
541 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
542 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
544 /**************************
545 * CALCULATE INTERACTIONS *
546 **************************/
548 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
550 /* Calculate table index by multiplying r with table scale and truncate to integer */
551 rt = _fjsp_mul_v2r8(r20,vftabscale);
552 itab_tmp = _fjsp_dtox_v2r8(rt);
553 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
554 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
555 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
560 /* CUBIC SPLINE TABLE ELECTROSTATICS */
561 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
562 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
563 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
564 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
565 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
566 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
567 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
568 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
569 velec = _fjsp_mul_v2r8(qq20,VV);
570 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
571 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
573 /* Update potential sum for this i atom from the interaction with this j atom. */
574 velecsum = _fjsp_add_v2r8(velecsum,velec);
578 /* Update vectorial force */
579 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
580 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
581 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
583 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
584 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
585 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
587 /**************************
588 * CALCULATE INTERACTIONS *
589 **************************/
591 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
593 /* Calculate table index by multiplying r with table scale and truncate to integer */
594 rt = _fjsp_mul_v2r8(r21,vftabscale);
595 itab_tmp = _fjsp_dtox_v2r8(rt);
596 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
597 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
598 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
603 /* CUBIC SPLINE TABLE ELECTROSTATICS */
604 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
605 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
606 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
607 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
608 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
609 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
610 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
611 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
612 velec = _fjsp_mul_v2r8(qq21,VV);
613 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
614 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
616 /* Update potential sum for this i atom from the interaction with this j atom. */
617 velecsum = _fjsp_add_v2r8(velecsum,velec);
621 /* Update vectorial force */
622 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
623 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
624 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
626 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
627 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
628 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
630 /**************************
631 * CALCULATE INTERACTIONS *
632 **************************/
634 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
636 /* Calculate table index by multiplying r with table scale and truncate to integer */
637 rt = _fjsp_mul_v2r8(r22,vftabscale);
638 itab_tmp = _fjsp_dtox_v2r8(rt);
639 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
640 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
641 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
646 /* CUBIC SPLINE TABLE ELECTROSTATICS */
647 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
648 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
649 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
650 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
651 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
652 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
653 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
654 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
655 velec = _fjsp_mul_v2r8(qq22,VV);
656 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
657 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
659 /* Update potential sum for this i atom from the interaction with this j atom. */
660 velecsum = _fjsp_add_v2r8(velecsum,velec);
664 /* Update vectorial force */
665 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
666 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
667 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
669 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
670 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
671 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
673 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
675 /* Inner loop uses 427 flops */
682 j_coord_offsetA = DIM*jnrA;
684 /* load j atom coordinates */
685 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
686 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
688 /* Calculate displacement vector */
689 dx00 = _fjsp_sub_v2r8(ix0,jx0);
690 dy00 = _fjsp_sub_v2r8(iy0,jy0);
691 dz00 = _fjsp_sub_v2r8(iz0,jz0);
692 dx01 = _fjsp_sub_v2r8(ix0,jx1);
693 dy01 = _fjsp_sub_v2r8(iy0,jy1);
694 dz01 = _fjsp_sub_v2r8(iz0,jz1);
695 dx02 = _fjsp_sub_v2r8(ix0,jx2);
696 dy02 = _fjsp_sub_v2r8(iy0,jy2);
697 dz02 = _fjsp_sub_v2r8(iz0,jz2);
698 dx10 = _fjsp_sub_v2r8(ix1,jx0);
699 dy10 = _fjsp_sub_v2r8(iy1,jy0);
700 dz10 = _fjsp_sub_v2r8(iz1,jz0);
701 dx11 = _fjsp_sub_v2r8(ix1,jx1);
702 dy11 = _fjsp_sub_v2r8(iy1,jy1);
703 dz11 = _fjsp_sub_v2r8(iz1,jz1);
704 dx12 = _fjsp_sub_v2r8(ix1,jx2);
705 dy12 = _fjsp_sub_v2r8(iy1,jy2);
706 dz12 = _fjsp_sub_v2r8(iz1,jz2);
707 dx20 = _fjsp_sub_v2r8(ix2,jx0);
708 dy20 = _fjsp_sub_v2r8(iy2,jy0);
709 dz20 = _fjsp_sub_v2r8(iz2,jz0);
710 dx21 = _fjsp_sub_v2r8(ix2,jx1);
711 dy21 = _fjsp_sub_v2r8(iy2,jy1);
712 dz21 = _fjsp_sub_v2r8(iz2,jz1);
713 dx22 = _fjsp_sub_v2r8(ix2,jx2);
714 dy22 = _fjsp_sub_v2r8(iy2,jy2);
715 dz22 = _fjsp_sub_v2r8(iz2,jz2);
717 /* Calculate squared distance and things based on it */
718 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
719 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
720 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
721 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
722 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
723 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
724 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
725 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
726 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
728 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
729 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
730 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
731 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
732 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
733 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
734 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
735 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
736 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
738 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
740 fjx0 = _fjsp_setzero_v2r8();
741 fjy0 = _fjsp_setzero_v2r8();
742 fjz0 = _fjsp_setzero_v2r8();
743 fjx1 = _fjsp_setzero_v2r8();
744 fjy1 = _fjsp_setzero_v2r8();
745 fjz1 = _fjsp_setzero_v2r8();
746 fjx2 = _fjsp_setzero_v2r8();
747 fjy2 = _fjsp_setzero_v2r8();
748 fjz2 = _fjsp_setzero_v2r8();
750 /**************************
751 * CALCULATE INTERACTIONS *
752 **************************/
754 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
756 /* Calculate table index by multiplying r with table scale and truncate to integer */
757 rt = _fjsp_mul_v2r8(r00,vftabscale);
758 itab_tmp = _fjsp_dtox_v2r8(rt);
759 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
760 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
761 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
766 /* CUBIC SPLINE TABLE ELECTROSTATICS */
767 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
768 F = _fjsp_setzero_v2r8();
769 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
770 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
771 H = _fjsp_setzero_v2r8();
772 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
773 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
774 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
775 velec = _fjsp_mul_v2r8(qq00,VV);
776 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
777 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
779 /* LENNARD-JONES DISPERSION/REPULSION */
781 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
782 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
783 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
784 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
785 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
787 /* Update potential sum for this i atom from the interaction with this j atom. */
788 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
789 velecsum = _fjsp_add_v2r8(velecsum,velec);
790 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
791 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
793 fscal = _fjsp_add_v2r8(felec,fvdw);
795 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
797 /* Update vectorial force */
798 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
799 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
800 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
802 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
803 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
804 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
806 /**************************
807 * CALCULATE INTERACTIONS *
808 **************************/
810 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
812 /* Calculate table index by multiplying r with table scale and truncate to integer */
813 rt = _fjsp_mul_v2r8(r01,vftabscale);
814 itab_tmp = _fjsp_dtox_v2r8(rt);
815 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
816 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
817 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
822 /* CUBIC SPLINE TABLE ELECTROSTATICS */
823 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
824 F = _fjsp_setzero_v2r8();
825 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
826 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
827 H = _fjsp_setzero_v2r8();
828 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
829 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
830 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
831 velec = _fjsp_mul_v2r8(qq01,VV);
832 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
833 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
835 /* Update potential sum for this i atom from the interaction with this j atom. */
836 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
837 velecsum = _fjsp_add_v2r8(velecsum,velec);
841 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
843 /* Update vectorial force */
844 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
845 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
846 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
848 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
849 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
850 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
852 /**************************
853 * CALCULATE INTERACTIONS *
854 **************************/
856 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
858 /* Calculate table index by multiplying r with table scale and truncate to integer */
859 rt = _fjsp_mul_v2r8(r02,vftabscale);
860 itab_tmp = _fjsp_dtox_v2r8(rt);
861 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
862 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
863 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
868 /* CUBIC SPLINE TABLE ELECTROSTATICS */
869 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
870 F = _fjsp_setzero_v2r8();
871 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
872 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
873 H = _fjsp_setzero_v2r8();
874 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
875 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
876 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
877 velec = _fjsp_mul_v2r8(qq02,VV);
878 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
879 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
881 /* Update potential sum for this i atom from the interaction with this j atom. */
882 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
883 velecsum = _fjsp_add_v2r8(velecsum,velec);
887 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
889 /* Update vectorial force */
890 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
891 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
892 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
894 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
895 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
896 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
898 /**************************
899 * CALCULATE INTERACTIONS *
900 **************************/
902 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
904 /* Calculate table index by multiplying r with table scale and truncate to integer */
905 rt = _fjsp_mul_v2r8(r10,vftabscale);
906 itab_tmp = _fjsp_dtox_v2r8(rt);
907 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
908 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
909 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
914 /* CUBIC SPLINE TABLE ELECTROSTATICS */
915 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
916 F = _fjsp_setzero_v2r8();
917 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
918 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
919 H = _fjsp_setzero_v2r8();
920 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
921 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
922 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
923 velec = _fjsp_mul_v2r8(qq10,VV);
924 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
925 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
927 /* Update potential sum for this i atom from the interaction with this j atom. */
928 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
929 velecsum = _fjsp_add_v2r8(velecsum,velec);
933 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
935 /* Update vectorial force */
936 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
937 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
938 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
940 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
941 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
942 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
944 /**************************
945 * CALCULATE INTERACTIONS *
946 **************************/
948 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
950 /* Calculate table index by multiplying r with table scale and truncate to integer */
951 rt = _fjsp_mul_v2r8(r11,vftabscale);
952 itab_tmp = _fjsp_dtox_v2r8(rt);
953 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
954 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
955 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
960 /* CUBIC SPLINE TABLE ELECTROSTATICS */
961 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
962 F = _fjsp_setzero_v2r8();
963 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
964 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
965 H = _fjsp_setzero_v2r8();
966 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
967 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
968 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
969 velec = _fjsp_mul_v2r8(qq11,VV);
970 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
971 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
973 /* Update potential sum for this i atom from the interaction with this j atom. */
974 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
975 velecsum = _fjsp_add_v2r8(velecsum,velec);
979 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
981 /* Update vectorial force */
982 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
983 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
984 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
986 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
987 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
988 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
990 /**************************
991 * CALCULATE INTERACTIONS *
992 **************************/
994 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
996 /* Calculate table index by multiplying r with table scale and truncate to integer */
997 rt = _fjsp_mul_v2r8(r12,vftabscale);
998 itab_tmp = _fjsp_dtox_v2r8(rt);
999 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1000 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1001 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1006 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1007 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1008 F = _fjsp_setzero_v2r8();
1009 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1010 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1011 H = _fjsp_setzero_v2r8();
1012 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1013 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1014 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1015 velec = _fjsp_mul_v2r8(qq12,VV);
1016 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1017 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1019 /* Update potential sum for this i atom from the interaction with this j atom. */
1020 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1021 velecsum = _fjsp_add_v2r8(velecsum,velec);
1025 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1027 /* Update vectorial force */
1028 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1029 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1030 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1032 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1033 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1034 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1036 /**************************
1037 * CALCULATE INTERACTIONS *
1038 **************************/
1040 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1042 /* Calculate table index by multiplying r with table scale and truncate to integer */
1043 rt = _fjsp_mul_v2r8(r20,vftabscale);
1044 itab_tmp = _fjsp_dtox_v2r8(rt);
1045 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1046 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1047 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1052 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1053 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1054 F = _fjsp_setzero_v2r8();
1055 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1056 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1057 H = _fjsp_setzero_v2r8();
1058 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1059 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1060 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1061 velec = _fjsp_mul_v2r8(qq20,VV);
1062 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1063 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1065 /* Update potential sum for this i atom from the interaction with this j atom. */
1066 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1067 velecsum = _fjsp_add_v2r8(velecsum,velec);
1071 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1073 /* Update vectorial force */
1074 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1075 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1076 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1078 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1079 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1080 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1082 /**************************
1083 * CALCULATE INTERACTIONS *
1084 **************************/
1086 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1088 /* Calculate table index by multiplying r with table scale and truncate to integer */
1089 rt = _fjsp_mul_v2r8(r21,vftabscale);
1090 itab_tmp = _fjsp_dtox_v2r8(rt);
1091 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1092 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1093 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1098 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1099 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1100 F = _fjsp_setzero_v2r8();
1101 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1102 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1103 H = _fjsp_setzero_v2r8();
1104 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1105 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1106 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1107 velec = _fjsp_mul_v2r8(qq21,VV);
1108 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1109 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1111 /* Update potential sum for this i atom from the interaction with this j atom. */
1112 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1113 velecsum = _fjsp_add_v2r8(velecsum,velec);
1117 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1119 /* Update vectorial force */
1120 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1121 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1122 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1124 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1125 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1126 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1128 /**************************
1129 * CALCULATE INTERACTIONS *
1130 **************************/
1132 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1134 /* Calculate table index by multiplying r with table scale and truncate to integer */
1135 rt = _fjsp_mul_v2r8(r22,vftabscale);
1136 itab_tmp = _fjsp_dtox_v2r8(rt);
1137 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1138 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1139 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1144 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1145 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1146 F = _fjsp_setzero_v2r8();
1147 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1148 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1149 H = _fjsp_setzero_v2r8();
1150 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1151 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1152 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1153 velec = _fjsp_mul_v2r8(qq22,VV);
1154 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1155 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1157 /* Update potential sum for this i atom from the interaction with this j atom. */
1158 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1159 velecsum = _fjsp_add_v2r8(velecsum,velec);
1163 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1165 /* Update vectorial force */
1166 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1167 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1168 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1170 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1171 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1172 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1174 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1176 /* Inner loop uses 427 flops */
1179 /* End of innermost loop */
1181 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1182 f+i_coord_offset,fshift+i_shift_offset);
1185 /* Update potential energies */
1186 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1187 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1189 /* Increment number of inner iterations */
1190 inneriter += j_index_end - j_index_start;
1192 /* Outer loop uses 20 flops */
1195 /* Increment number of outer iterations */
1198 /* Update outer/inner flops */
1200 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*427);
1203 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1204 * Electrostatics interaction: CubicSplineTable
1205 * VdW interaction: LennardJones
1206 * Geometry: Water3-Water3
1207 * Calculate force/pot: Force
1210 nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1211 (t_nblist * gmx_restrict nlist,
1212 rvec * gmx_restrict xx,
1213 rvec * gmx_restrict ff,
1214 t_forcerec * gmx_restrict fr,
1215 t_mdatoms * gmx_restrict mdatoms,
1216 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1217 t_nrnb * gmx_restrict nrnb)
1219 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1220 * just 0 for non-waters.
1221 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1222 * jnr indices corresponding to data put in the four positions in the SIMD register.
1224 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1225 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1227 int j_coord_offsetA,j_coord_offsetB;
1228 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1229 real rcutoff_scalar;
1230 real *shiftvec,*fshift,*x,*f;
1231 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1233 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1235 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1237 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1238 int vdwjidx0A,vdwjidx0B;
1239 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1240 int vdwjidx1A,vdwjidx1B;
1241 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1242 int vdwjidx2A,vdwjidx2B;
1243 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1244 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1245 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1246 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1247 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1248 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1249 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1250 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1251 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1252 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1253 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1256 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1259 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1260 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1261 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1263 _fjsp_v2r8 itab_tmp;
1264 _fjsp_v2r8 dummy_mask,cutoff_mask;
1265 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1266 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1267 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1274 jindex = nlist->jindex;
1276 shiftidx = nlist->shift;
1278 shiftvec = fr->shift_vec[0];
1279 fshift = fr->fshift[0];
1280 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1281 charge = mdatoms->chargeA;
1282 nvdwtype = fr->ntype;
1283 vdwparam = fr->nbfp;
1284 vdwtype = mdatoms->typeA;
1286 vftab = kernel_data->table_elec->data;
1287 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
1289 /* Setup water-specific parameters */
1290 inr = nlist->iinr[0];
1291 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1292 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1293 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1294 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1296 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1297 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1298 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1299 vdwjidx0A = 2*vdwtype[inr+0];
1300 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1301 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1302 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1303 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1304 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1305 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1306 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1307 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1308 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1309 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1310 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1312 /* Avoid stupid compiler warnings */
1314 j_coord_offsetA = 0;
1315 j_coord_offsetB = 0;
1320 /* Start outer loop over neighborlists */
1321 for(iidx=0; iidx<nri; iidx++)
1323 /* Load shift vector for this list */
1324 i_shift_offset = DIM*shiftidx[iidx];
1326 /* Load limits for loop over neighbors */
1327 j_index_start = jindex[iidx];
1328 j_index_end = jindex[iidx+1];
1330 /* Get outer coordinate index */
1332 i_coord_offset = DIM*inr;
1334 /* Load i particle coords and add shift vector */
1335 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1336 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1338 fix0 = _fjsp_setzero_v2r8();
1339 fiy0 = _fjsp_setzero_v2r8();
1340 fiz0 = _fjsp_setzero_v2r8();
1341 fix1 = _fjsp_setzero_v2r8();
1342 fiy1 = _fjsp_setzero_v2r8();
1343 fiz1 = _fjsp_setzero_v2r8();
1344 fix2 = _fjsp_setzero_v2r8();
1345 fiy2 = _fjsp_setzero_v2r8();
1346 fiz2 = _fjsp_setzero_v2r8();
1348 /* Start inner kernel loop */
1349 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1352 /* Get j neighbor index, and coordinate index */
1354 jnrB = jjnr[jidx+1];
1355 j_coord_offsetA = DIM*jnrA;
1356 j_coord_offsetB = DIM*jnrB;
1358 /* load j atom coordinates */
1359 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1360 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1362 /* Calculate displacement vector */
1363 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1364 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1365 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1366 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1367 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1368 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1369 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1370 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1371 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1372 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1373 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1374 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1375 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1376 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1377 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1378 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1379 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1380 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1381 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1382 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1383 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1384 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1385 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1386 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1387 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1388 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1389 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1391 /* Calculate squared distance and things based on it */
1392 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1393 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1394 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1395 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1396 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1397 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1398 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1399 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1400 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1402 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1403 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1404 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1405 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1406 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1407 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1408 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1409 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1410 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1412 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1414 fjx0 = _fjsp_setzero_v2r8();
1415 fjy0 = _fjsp_setzero_v2r8();
1416 fjz0 = _fjsp_setzero_v2r8();
1417 fjx1 = _fjsp_setzero_v2r8();
1418 fjy1 = _fjsp_setzero_v2r8();
1419 fjz1 = _fjsp_setzero_v2r8();
1420 fjx2 = _fjsp_setzero_v2r8();
1421 fjy2 = _fjsp_setzero_v2r8();
1422 fjz2 = _fjsp_setzero_v2r8();
1424 /**************************
1425 * CALCULATE INTERACTIONS *
1426 **************************/
1428 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1430 /* Calculate table index by multiplying r with table scale and truncate to integer */
1431 rt = _fjsp_mul_v2r8(r00,vftabscale);
1432 itab_tmp = _fjsp_dtox_v2r8(rt);
1433 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1434 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1435 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1440 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1441 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1442 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1443 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1444 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1445 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1446 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1447 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1448 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1449 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1451 /* LENNARD-JONES DISPERSION/REPULSION */
1453 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1454 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1456 fscal = _fjsp_add_v2r8(felec,fvdw);
1458 /* Update vectorial force */
1459 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1460 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1461 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1463 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1464 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1465 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1467 /**************************
1468 * CALCULATE INTERACTIONS *
1469 **************************/
1471 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1473 /* Calculate table index by multiplying r with table scale and truncate to integer */
1474 rt = _fjsp_mul_v2r8(r01,vftabscale);
1475 itab_tmp = _fjsp_dtox_v2r8(rt);
1476 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1477 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1478 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1483 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1484 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1485 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1486 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1487 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1488 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1489 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1490 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1491 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1492 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1496 /* Update vectorial force */
1497 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1498 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1499 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1501 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1502 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1503 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1505 /**************************
1506 * CALCULATE INTERACTIONS *
1507 **************************/
1509 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1511 /* Calculate table index by multiplying r with table scale and truncate to integer */
1512 rt = _fjsp_mul_v2r8(r02,vftabscale);
1513 itab_tmp = _fjsp_dtox_v2r8(rt);
1514 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1515 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1516 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1521 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1522 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1523 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1524 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1525 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1526 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1527 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1528 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1529 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1530 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1534 /* Update vectorial force */
1535 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1536 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1537 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1539 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1540 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1541 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1543 /**************************
1544 * CALCULATE INTERACTIONS *
1545 **************************/
1547 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1549 /* Calculate table index by multiplying r with table scale and truncate to integer */
1550 rt = _fjsp_mul_v2r8(r10,vftabscale);
1551 itab_tmp = _fjsp_dtox_v2r8(rt);
1552 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1553 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1554 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1559 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1560 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1561 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1562 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1563 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1564 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1565 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1566 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1567 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1568 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1572 /* Update vectorial force */
1573 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1574 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1575 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1577 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1578 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1579 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1581 /**************************
1582 * CALCULATE INTERACTIONS *
1583 **************************/
1585 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1587 /* Calculate table index by multiplying r with table scale and truncate to integer */
1588 rt = _fjsp_mul_v2r8(r11,vftabscale);
1589 itab_tmp = _fjsp_dtox_v2r8(rt);
1590 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1591 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1592 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1597 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1598 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1599 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1600 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1601 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1602 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1603 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1604 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1605 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1606 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1610 /* Update vectorial force */
1611 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1612 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1613 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1615 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1616 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1617 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1619 /**************************
1620 * CALCULATE INTERACTIONS *
1621 **************************/
1623 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1625 /* Calculate table index by multiplying r with table scale and truncate to integer */
1626 rt = _fjsp_mul_v2r8(r12,vftabscale);
1627 itab_tmp = _fjsp_dtox_v2r8(rt);
1628 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1629 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1630 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1635 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1636 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1637 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1638 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1639 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1640 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1641 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1642 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1643 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1644 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1648 /* Update vectorial force */
1649 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1650 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1651 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1653 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1654 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1655 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1657 /**************************
1658 * CALCULATE INTERACTIONS *
1659 **************************/
1661 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1663 /* Calculate table index by multiplying r with table scale and truncate to integer */
1664 rt = _fjsp_mul_v2r8(r20,vftabscale);
1665 itab_tmp = _fjsp_dtox_v2r8(rt);
1666 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1667 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1668 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1673 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1674 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1675 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1676 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1677 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1678 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1679 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1680 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1681 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1682 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1686 /* Update vectorial force */
1687 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1688 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1689 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1691 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1692 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1693 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1695 /**************************
1696 * CALCULATE INTERACTIONS *
1697 **************************/
1699 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1701 /* Calculate table index by multiplying r with table scale and truncate to integer */
1702 rt = _fjsp_mul_v2r8(r21,vftabscale);
1703 itab_tmp = _fjsp_dtox_v2r8(rt);
1704 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1705 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1706 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1711 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1712 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1713 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1714 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1715 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1716 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1717 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1718 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1719 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1720 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1724 /* Update vectorial force */
1725 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1726 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1727 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1729 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1730 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1731 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1733 /**************************
1734 * CALCULATE INTERACTIONS *
1735 **************************/
1737 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1739 /* Calculate table index by multiplying r with table scale and truncate to integer */
1740 rt = _fjsp_mul_v2r8(r22,vftabscale);
1741 itab_tmp = _fjsp_dtox_v2r8(rt);
1742 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1743 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1744 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1749 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1750 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1751 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1752 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1753 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1754 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1755 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1756 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1757 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1758 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1762 /* Update vectorial force */
1763 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1764 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1765 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1767 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1768 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1769 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1771 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1773 /* Inner loop uses 386 flops */
1776 if(jidx<j_index_end)
1780 j_coord_offsetA = DIM*jnrA;
1782 /* load j atom coordinates */
1783 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1784 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1786 /* Calculate displacement vector */
1787 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1788 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1789 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1790 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1791 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1792 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1793 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1794 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1795 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1796 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1797 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1798 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1799 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1800 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1801 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1802 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1803 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1804 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1805 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1806 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1807 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1808 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1809 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1810 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1811 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1812 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1813 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1815 /* Calculate squared distance and things based on it */
1816 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1817 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1818 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1819 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1820 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1821 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1822 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1823 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1824 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1826 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1827 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1828 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1829 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1830 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1831 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1832 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1833 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1834 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1836 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1838 fjx0 = _fjsp_setzero_v2r8();
1839 fjy0 = _fjsp_setzero_v2r8();
1840 fjz0 = _fjsp_setzero_v2r8();
1841 fjx1 = _fjsp_setzero_v2r8();
1842 fjy1 = _fjsp_setzero_v2r8();
1843 fjz1 = _fjsp_setzero_v2r8();
1844 fjx2 = _fjsp_setzero_v2r8();
1845 fjy2 = _fjsp_setzero_v2r8();
1846 fjz2 = _fjsp_setzero_v2r8();
1848 /**************************
1849 * CALCULATE INTERACTIONS *
1850 **************************/
1852 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1854 /* Calculate table index by multiplying r with table scale and truncate to integer */
1855 rt = _fjsp_mul_v2r8(r00,vftabscale);
1856 itab_tmp = _fjsp_dtox_v2r8(rt);
1857 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1858 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1859 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1864 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1865 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1866 F = _fjsp_setzero_v2r8();
1867 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1868 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1869 H = _fjsp_setzero_v2r8();
1870 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1871 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1872 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1873 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1875 /* LENNARD-JONES DISPERSION/REPULSION */
1877 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1878 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1880 fscal = _fjsp_add_v2r8(felec,fvdw);
1882 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1884 /* Update vectorial force */
1885 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1886 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1887 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1889 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1890 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1891 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1893 /**************************
1894 * CALCULATE INTERACTIONS *
1895 **************************/
1897 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1899 /* Calculate table index by multiplying r with table scale and truncate to integer */
1900 rt = _fjsp_mul_v2r8(r01,vftabscale);
1901 itab_tmp = _fjsp_dtox_v2r8(rt);
1902 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1903 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1904 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1909 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1910 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1911 F = _fjsp_setzero_v2r8();
1912 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1913 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1914 H = _fjsp_setzero_v2r8();
1915 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1916 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1917 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1918 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1922 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1924 /* Update vectorial force */
1925 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1926 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1927 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1929 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1930 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1931 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1933 /**************************
1934 * CALCULATE INTERACTIONS *
1935 **************************/
1937 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1939 /* Calculate table index by multiplying r with table scale and truncate to integer */
1940 rt = _fjsp_mul_v2r8(r02,vftabscale);
1941 itab_tmp = _fjsp_dtox_v2r8(rt);
1942 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1943 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1944 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1949 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1950 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1951 F = _fjsp_setzero_v2r8();
1952 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1953 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1954 H = _fjsp_setzero_v2r8();
1955 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1956 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1957 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1958 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1962 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1964 /* Update vectorial force */
1965 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1966 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1967 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1969 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1970 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1971 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1973 /**************************
1974 * CALCULATE INTERACTIONS *
1975 **************************/
1977 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1979 /* Calculate table index by multiplying r with table scale and truncate to integer */
1980 rt = _fjsp_mul_v2r8(r10,vftabscale);
1981 itab_tmp = _fjsp_dtox_v2r8(rt);
1982 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1983 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1984 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1989 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1990 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1991 F = _fjsp_setzero_v2r8();
1992 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1993 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1994 H = _fjsp_setzero_v2r8();
1995 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1996 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1997 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1998 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
2002 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2004 /* Update vectorial force */
2005 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
2006 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2007 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2009 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2010 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2011 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2013 /**************************
2014 * CALCULATE INTERACTIONS *
2015 **************************/
2017 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
2019 /* Calculate table index by multiplying r with table scale and truncate to integer */
2020 rt = _fjsp_mul_v2r8(r11,vftabscale);
2021 itab_tmp = _fjsp_dtox_v2r8(rt);
2022 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2023 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2024 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2029 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2030 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2031 F = _fjsp_setzero_v2r8();
2032 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2033 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2034 H = _fjsp_setzero_v2r8();
2035 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2036 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2037 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2038 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
2042 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2044 /* Update vectorial force */
2045 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
2046 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2047 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2049 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2050 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2051 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2053 /**************************
2054 * CALCULATE INTERACTIONS *
2055 **************************/
2057 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
2059 /* Calculate table index by multiplying r with table scale and truncate to integer */
2060 rt = _fjsp_mul_v2r8(r12,vftabscale);
2061 itab_tmp = _fjsp_dtox_v2r8(rt);
2062 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2063 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2064 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2069 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2070 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2071 F = _fjsp_setzero_v2r8();
2072 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2073 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2074 H = _fjsp_setzero_v2r8();
2075 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2076 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2077 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2078 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2082 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2084 /* Update vectorial force */
2085 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
2086 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2087 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2089 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2090 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2091 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2093 /**************************
2094 * CALCULATE INTERACTIONS *
2095 **************************/
2097 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
2099 /* Calculate table index by multiplying r with table scale and truncate to integer */
2100 rt = _fjsp_mul_v2r8(r20,vftabscale);
2101 itab_tmp = _fjsp_dtox_v2r8(rt);
2102 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2103 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2104 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2109 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2110 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2111 F = _fjsp_setzero_v2r8();
2112 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2113 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2114 H = _fjsp_setzero_v2r8();
2115 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2116 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2117 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2118 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
2122 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2124 /* Update vectorial force */
2125 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
2126 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2127 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2129 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2130 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2131 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2133 /**************************
2134 * CALCULATE INTERACTIONS *
2135 **************************/
2137 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2139 /* Calculate table index by multiplying r with table scale and truncate to integer */
2140 rt = _fjsp_mul_v2r8(r21,vftabscale);
2141 itab_tmp = _fjsp_dtox_v2r8(rt);
2142 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2143 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2144 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2149 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2150 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2151 F = _fjsp_setzero_v2r8();
2152 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2153 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2154 H = _fjsp_setzero_v2r8();
2155 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2156 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2157 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2158 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2162 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2164 /* Update vectorial force */
2165 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2166 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2167 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2169 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2170 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2171 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2173 /**************************
2174 * CALCULATE INTERACTIONS *
2175 **************************/
2177 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2179 /* Calculate table index by multiplying r with table scale and truncate to integer */
2180 rt = _fjsp_mul_v2r8(r22,vftabscale);
2181 itab_tmp = _fjsp_dtox_v2r8(rt);
2182 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2183 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2184 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2189 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2190 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2191 F = _fjsp_setzero_v2r8();
2192 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2193 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2194 H = _fjsp_setzero_v2r8();
2195 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2196 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2197 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2198 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2202 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2204 /* Update vectorial force */
2205 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2206 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2207 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2209 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2210 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2211 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2213 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2215 /* Inner loop uses 386 flops */
2218 /* End of innermost loop */
2220 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2221 f+i_coord_offset,fshift+i_shift_offset);
2223 /* Increment number of inner iterations */
2224 inneriter += j_index_end - j_index_start;
2226 /* Outer loop uses 18 flops */
2229 /* Increment number of outer iterations */
2232 /* Update outer/inner flops */
2234 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*386);