2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: CubicSplineTable
52 * VdW interaction: CubicSplineTable
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85 int vdwjidx0A,vdwjidx0B;
86 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87 int vdwjidx1A,vdwjidx1B;
88 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89 int vdwjidx2A,vdwjidx2B;
90 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
103 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
107 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
111 _fjsp_v2r8 dummy_mask,cutoff_mask;
112 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
113 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
114 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
121 jindex = nlist->jindex;
123 shiftidx = nlist->shift;
125 shiftvec = fr->shift_vec[0];
126 fshift = fr->fshift[0];
127 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
128 charge = mdatoms->chargeA;
129 nvdwtype = fr->ntype;
131 vdwtype = mdatoms->typeA;
133 vftab = kernel_data->table_elec_vdw->data;
134 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
136 /* Setup water-specific parameters */
137 inr = nlist->iinr[0];
138 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
139 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
140 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
141 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
143 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
144 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
145 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
146 vdwjidx0A = 2*vdwtype[inr+0];
147 qq00 = _fjsp_mul_v2r8(iq0,jq0);
148 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
149 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
150 qq01 = _fjsp_mul_v2r8(iq0,jq1);
151 qq02 = _fjsp_mul_v2r8(iq0,jq2);
152 qq10 = _fjsp_mul_v2r8(iq1,jq0);
153 qq11 = _fjsp_mul_v2r8(iq1,jq1);
154 qq12 = _fjsp_mul_v2r8(iq1,jq2);
155 qq20 = _fjsp_mul_v2r8(iq2,jq0);
156 qq21 = _fjsp_mul_v2r8(iq2,jq1);
157 qq22 = _fjsp_mul_v2r8(iq2,jq2);
159 /* Avoid stupid compiler warnings */
167 /* Start outer loop over neighborlists */
168 for(iidx=0; iidx<nri; iidx++)
170 /* Load shift vector for this list */
171 i_shift_offset = DIM*shiftidx[iidx];
173 /* Load limits for loop over neighbors */
174 j_index_start = jindex[iidx];
175 j_index_end = jindex[iidx+1];
177 /* Get outer coordinate index */
179 i_coord_offset = DIM*inr;
181 /* Load i particle coords and add shift vector */
182 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
183 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
185 fix0 = _fjsp_setzero_v2r8();
186 fiy0 = _fjsp_setzero_v2r8();
187 fiz0 = _fjsp_setzero_v2r8();
188 fix1 = _fjsp_setzero_v2r8();
189 fiy1 = _fjsp_setzero_v2r8();
190 fiz1 = _fjsp_setzero_v2r8();
191 fix2 = _fjsp_setzero_v2r8();
192 fiy2 = _fjsp_setzero_v2r8();
193 fiz2 = _fjsp_setzero_v2r8();
195 /* Reset potential sums */
196 velecsum = _fjsp_setzero_v2r8();
197 vvdwsum = _fjsp_setzero_v2r8();
199 /* Start inner kernel loop */
200 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
203 /* Get j neighbor index, and coordinate index */
206 j_coord_offsetA = DIM*jnrA;
207 j_coord_offsetB = DIM*jnrB;
209 /* load j atom coordinates */
210 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
211 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
213 /* Calculate displacement vector */
214 dx00 = _fjsp_sub_v2r8(ix0,jx0);
215 dy00 = _fjsp_sub_v2r8(iy0,jy0);
216 dz00 = _fjsp_sub_v2r8(iz0,jz0);
217 dx01 = _fjsp_sub_v2r8(ix0,jx1);
218 dy01 = _fjsp_sub_v2r8(iy0,jy1);
219 dz01 = _fjsp_sub_v2r8(iz0,jz1);
220 dx02 = _fjsp_sub_v2r8(ix0,jx2);
221 dy02 = _fjsp_sub_v2r8(iy0,jy2);
222 dz02 = _fjsp_sub_v2r8(iz0,jz2);
223 dx10 = _fjsp_sub_v2r8(ix1,jx0);
224 dy10 = _fjsp_sub_v2r8(iy1,jy0);
225 dz10 = _fjsp_sub_v2r8(iz1,jz0);
226 dx11 = _fjsp_sub_v2r8(ix1,jx1);
227 dy11 = _fjsp_sub_v2r8(iy1,jy1);
228 dz11 = _fjsp_sub_v2r8(iz1,jz1);
229 dx12 = _fjsp_sub_v2r8(ix1,jx2);
230 dy12 = _fjsp_sub_v2r8(iy1,jy2);
231 dz12 = _fjsp_sub_v2r8(iz1,jz2);
232 dx20 = _fjsp_sub_v2r8(ix2,jx0);
233 dy20 = _fjsp_sub_v2r8(iy2,jy0);
234 dz20 = _fjsp_sub_v2r8(iz2,jz0);
235 dx21 = _fjsp_sub_v2r8(ix2,jx1);
236 dy21 = _fjsp_sub_v2r8(iy2,jy1);
237 dz21 = _fjsp_sub_v2r8(iz2,jz1);
238 dx22 = _fjsp_sub_v2r8(ix2,jx2);
239 dy22 = _fjsp_sub_v2r8(iy2,jy2);
240 dz22 = _fjsp_sub_v2r8(iz2,jz2);
242 /* Calculate squared distance and things based on it */
243 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
244 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
245 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
246 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
247 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
248 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
249 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
250 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
251 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
253 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
254 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
255 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
256 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
257 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
258 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
259 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
260 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
261 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
263 fjx0 = _fjsp_setzero_v2r8();
264 fjy0 = _fjsp_setzero_v2r8();
265 fjz0 = _fjsp_setzero_v2r8();
266 fjx1 = _fjsp_setzero_v2r8();
267 fjy1 = _fjsp_setzero_v2r8();
268 fjz1 = _fjsp_setzero_v2r8();
269 fjx2 = _fjsp_setzero_v2r8();
270 fjy2 = _fjsp_setzero_v2r8();
271 fjz2 = _fjsp_setzero_v2r8();
273 /**************************
274 * CALCULATE INTERACTIONS *
275 **************************/
277 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
279 /* Calculate table index by multiplying r with table scale and truncate to integer */
280 rt = _fjsp_mul_v2r8(r00,vftabscale);
281 itab_tmp = _fjsp_dtox_v2r8(rt);
282 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
283 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
284 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
289 /* CUBIC SPLINE TABLE ELECTROSTATICS */
290 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
291 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
292 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
293 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
294 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
295 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
296 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
297 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
298 velec = _fjsp_mul_v2r8(qq00,VV);
299 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
300 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
302 /* CUBIC SPLINE TABLE DISPERSION */
305 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
306 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
307 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
308 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
309 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
310 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
311 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
312 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
313 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
314 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
315 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
317 /* CUBIC SPLINE TABLE REPULSION */
318 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
319 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
320 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
321 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
322 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
323 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
324 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
325 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
326 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
327 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
328 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
329 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
330 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
332 /* Update potential sum for this i atom from the interaction with this j atom. */
333 velecsum = _fjsp_add_v2r8(velecsum,velec);
334 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
336 fscal = _fjsp_add_v2r8(felec,fvdw);
338 /* Update vectorial force */
339 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
340 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
341 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
343 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
344 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
345 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
347 /**************************
348 * CALCULATE INTERACTIONS *
349 **************************/
351 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
353 /* Calculate table index by multiplying r with table scale and truncate to integer */
354 rt = _fjsp_mul_v2r8(r01,vftabscale);
355 itab_tmp = _fjsp_dtox_v2r8(rt);
356 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
357 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
358 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
363 /* CUBIC SPLINE TABLE ELECTROSTATICS */
364 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
365 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
366 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
367 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
368 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
369 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
370 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
371 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
372 velec = _fjsp_mul_v2r8(qq01,VV);
373 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
374 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
376 /* Update potential sum for this i atom from the interaction with this j atom. */
377 velecsum = _fjsp_add_v2r8(velecsum,velec);
381 /* Update vectorial force */
382 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
383 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
384 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
386 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
387 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
388 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
390 /**************************
391 * CALCULATE INTERACTIONS *
392 **************************/
394 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
396 /* Calculate table index by multiplying r with table scale and truncate to integer */
397 rt = _fjsp_mul_v2r8(r02,vftabscale);
398 itab_tmp = _fjsp_dtox_v2r8(rt);
399 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
400 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
401 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
406 /* CUBIC SPLINE TABLE ELECTROSTATICS */
407 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
408 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
409 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
410 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
411 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
412 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
413 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
414 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
415 velec = _fjsp_mul_v2r8(qq02,VV);
416 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
417 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
419 /* Update potential sum for this i atom from the interaction with this j atom. */
420 velecsum = _fjsp_add_v2r8(velecsum,velec);
424 /* Update vectorial force */
425 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
426 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
427 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
429 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
430 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
431 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
433 /**************************
434 * CALCULATE INTERACTIONS *
435 **************************/
437 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
439 /* Calculate table index by multiplying r with table scale and truncate to integer */
440 rt = _fjsp_mul_v2r8(r10,vftabscale);
441 itab_tmp = _fjsp_dtox_v2r8(rt);
442 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
443 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
444 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
449 /* CUBIC SPLINE TABLE ELECTROSTATICS */
450 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
451 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
452 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
453 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
454 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
455 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
456 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
457 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
458 velec = _fjsp_mul_v2r8(qq10,VV);
459 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
460 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
462 /* Update potential sum for this i atom from the interaction with this j atom. */
463 velecsum = _fjsp_add_v2r8(velecsum,velec);
467 /* Update vectorial force */
468 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
469 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
470 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
472 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
473 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
474 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
476 /**************************
477 * CALCULATE INTERACTIONS *
478 **************************/
480 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
482 /* Calculate table index by multiplying r with table scale and truncate to integer */
483 rt = _fjsp_mul_v2r8(r11,vftabscale);
484 itab_tmp = _fjsp_dtox_v2r8(rt);
485 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
486 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
487 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
492 /* CUBIC SPLINE TABLE ELECTROSTATICS */
493 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
494 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
495 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
496 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
497 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
498 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
499 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
500 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
501 velec = _fjsp_mul_v2r8(qq11,VV);
502 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
503 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
505 /* Update potential sum for this i atom from the interaction with this j atom. */
506 velecsum = _fjsp_add_v2r8(velecsum,velec);
510 /* Update vectorial force */
511 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
512 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
513 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
515 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
516 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
517 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
519 /**************************
520 * CALCULATE INTERACTIONS *
521 **************************/
523 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
525 /* Calculate table index by multiplying r with table scale and truncate to integer */
526 rt = _fjsp_mul_v2r8(r12,vftabscale);
527 itab_tmp = _fjsp_dtox_v2r8(rt);
528 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
529 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
530 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
535 /* CUBIC SPLINE TABLE ELECTROSTATICS */
536 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
537 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
538 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
539 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
540 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
541 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
542 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
543 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
544 velec = _fjsp_mul_v2r8(qq12,VV);
545 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
546 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
548 /* Update potential sum for this i atom from the interaction with this j atom. */
549 velecsum = _fjsp_add_v2r8(velecsum,velec);
553 /* Update vectorial force */
554 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
555 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
556 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
558 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
559 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
560 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
562 /**************************
563 * CALCULATE INTERACTIONS *
564 **************************/
566 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
568 /* Calculate table index by multiplying r with table scale and truncate to integer */
569 rt = _fjsp_mul_v2r8(r20,vftabscale);
570 itab_tmp = _fjsp_dtox_v2r8(rt);
571 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
572 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
573 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
578 /* CUBIC SPLINE TABLE ELECTROSTATICS */
579 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
580 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
581 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
582 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
583 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
584 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
585 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
586 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
587 velec = _fjsp_mul_v2r8(qq20,VV);
588 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
589 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
591 /* Update potential sum for this i atom from the interaction with this j atom. */
592 velecsum = _fjsp_add_v2r8(velecsum,velec);
596 /* Update vectorial force */
597 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
598 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
599 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
601 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
602 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
603 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
605 /**************************
606 * CALCULATE INTERACTIONS *
607 **************************/
609 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
611 /* Calculate table index by multiplying r with table scale and truncate to integer */
612 rt = _fjsp_mul_v2r8(r21,vftabscale);
613 itab_tmp = _fjsp_dtox_v2r8(rt);
614 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
615 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
616 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
621 /* CUBIC SPLINE TABLE ELECTROSTATICS */
622 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
623 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
624 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
625 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
626 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
627 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
628 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
629 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
630 velec = _fjsp_mul_v2r8(qq21,VV);
631 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
632 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
634 /* Update potential sum for this i atom from the interaction with this j atom. */
635 velecsum = _fjsp_add_v2r8(velecsum,velec);
639 /* Update vectorial force */
640 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
641 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
642 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
644 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
645 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
646 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
648 /**************************
649 * CALCULATE INTERACTIONS *
650 **************************/
652 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
654 /* Calculate table index by multiplying r with table scale and truncate to integer */
655 rt = _fjsp_mul_v2r8(r22,vftabscale);
656 itab_tmp = _fjsp_dtox_v2r8(rt);
657 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
658 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
659 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
664 /* CUBIC SPLINE TABLE ELECTROSTATICS */
665 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
666 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
667 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
668 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
669 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
670 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
671 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
672 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
673 velec = _fjsp_mul_v2r8(qq22,VV);
674 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
675 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
677 /* Update potential sum for this i atom from the interaction with this j atom. */
678 velecsum = _fjsp_add_v2r8(velecsum,velec);
682 /* Update vectorial force */
683 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
684 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
685 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
687 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
688 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
689 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
691 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
693 /* Inner loop uses 444 flops */
700 j_coord_offsetA = DIM*jnrA;
702 /* load j atom coordinates */
703 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
704 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
706 /* Calculate displacement vector */
707 dx00 = _fjsp_sub_v2r8(ix0,jx0);
708 dy00 = _fjsp_sub_v2r8(iy0,jy0);
709 dz00 = _fjsp_sub_v2r8(iz0,jz0);
710 dx01 = _fjsp_sub_v2r8(ix0,jx1);
711 dy01 = _fjsp_sub_v2r8(iy0,jy1);
712 dz01 = _fjsp_sub_v2r8(iz0,jz1);
713 dx02 = _fjsp_sub_v2r8(ix0,jx2);
714 dy02 = _fjsp_sub_v2r8(iy0,jy2);
715 dz02 = _fjsp_sub_v2r8(iz0,jz2);
716 dx10 = _fjsp_sub_v2r8(ix1,jx0);
717 dy10 = _fjsp_sub_v2r8(iy1,jy0);
718 dz10 = _fjsp_sub_v2r8(iz1,jz0);
719 dx11 = _fjsp_sub_v2r8(ix1,jx1);
720 dy11 = _fjsp_sub_v2r8(iy1,jy1);
721 dz11 = _fjsp_sub_v2r8(iz1,jz1);
722 dx12 = _fjsp_sub_v2r8(ix1,jx2);
723 dy12 = _fjsp_sub_v2r8(iy1,jy2);
724 dz12 = _fjsp_sub_v2r8(iz1,jz2);
725 dx20 = _fjsp_sub_v2r8(ix2,jx0);
726 dy20 = _fjsp_sub_v2r8(iy2,jy0);
727 dz20 = _fjsp_sub_v2r8(iz2,jz0);
728 dx21 = _fjsp_sub_v2r8(ix2,jx1);
729 dy21 = _fjsp_sub_v2r8(iy2,jy1);
730 dz21 = _fjsp_sub_v2r8(iz2,jz1);
731 dx22 = _fjsp_sub_v2r8(ix2,jx2);
732 dy22 = _fjsp_sub_v2r8(iy2,jy2);
733 dz22 = _fjsp_sub_v2r8(iz2,jz2);
735 /* Calculate squared distance and things based on it */
736 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
737 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
738 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
739 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
740 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
741 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
742 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
743 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
744 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
746 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
747 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
748 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
749 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
750 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
751 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
752 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
753 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
754 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
756 fjx0 = _fjsp_setzero_v2r8();
757 fjy0 = _fjsp_setzero_v2r8();
758 fjz0 = _fjsp_setzero_v2r8();
759 fjx1 = _fjsp_setzero_v2r8();
760 fjy1 = _fjsp_setzero_v2r8();
761 fjz1 = _fjsp_setzero_v2r8();
762 fjx2 = _fjsp_setzero_v2r8();
763 fjy2 = _fjsp_setzero_v2r8();
764 fjz2 = _fjsp_setzero_v2r8();
766 /**************************
767 * CALCULATE INTERACTIONS *
768 **************************/
770 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
772 /* Calculate table index by multiplying r with table scale and truncate to integer */
773 rt = _fjsp_mul_v2r8(r00,vftabscale);
774 itab_tmp = _fjsp_dtox_v2r8(rt);
775 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
776 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
777 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
782 /* CUBIC SPLINE TABLE ELECTROSTATICS */
783 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
784 F = _fjsp_setzero_v2r8();
785 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
786 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
787 H = _fjsp_setzero_v2r8();
788 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
789 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
790 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
791 velec = _fjsp_mul_v2r8(qq00,VV);
792 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
793 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
795 /* CUBIC SPLINE TABLE DISPERSION */
798 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
799 F = _fjsp_setzero_v2r8();
800 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
801 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
802 H = _fjsp_setzero_v2r8();
803 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
804 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
805 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
806 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
807 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
808 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
810 /* CUBIC SPLINE TABLE REPULSION */
811 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
812 F = _fjsp_setzero_v2r8();
813 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
814 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
815 H = _fjsp_setzero_v2r8();
816 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
817 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
818 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
819 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
820 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
821 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
822 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
823 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
825 /* Update potential sum for this i atom from the interaction with this j atom. */
826 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
827 velecsum = _fjsp_add_v2r8(velecsum,velec);
828 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
829 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
831 fscal = _fjsp_add_v2r8(felec,fvdw);
833 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
835 /* Update vectorial force */
836 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
837 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
838 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
840 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
841 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
842 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
844 /**************************
845 * CALCULATE INTERACTIONS *
846 **************************/
848 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
850 /* Calculate table index by multiplying r with table scale and truncate to integer */
851 rt = _fjsp_mul_v2r8(r01,vftabscale);
852 itab_tmp = _fjsp_dtox_v2r8(rt);
853 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
854 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
855 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
860 /* CUBIC SPLINE TABLE ELECTROSTATICS */
861 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
862 F = _fjsp_setzero_v2r8();
863 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
864 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
865 H = _fjsp_setzero_v2r8();
866 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
867 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
868 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
869 velec = _fjsp_mul_v2r8(qq01,VV);
870 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
871 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
873 /* Update potential sum for this i atom from the interaction with this j atom. */
874 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
875 velecsum = _fjsp_add_v2r8(velecsum,velec);
879 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
881 /* Update vectorial force */
882 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
883 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
884 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
886 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
887 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
888 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
890 /**************************
891 * CALCULATE INTERACTIONS *
892 **************************/
894 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
896 /* Calculate table index by multiplying r with table scale and truncate to integer */
897 rt = _fjsp_mul_v2r8(r02,vftabscale);
898 itab_tmp = _fjsp_dtox_v2r8(rt);
899 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
900 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
901 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
906 /* CUBIC SPLINE TABLE ELECTROSTATICS */
907 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
908 F = _fjsp_setzero_v2r8();
909 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
910 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
911 H = _fjsp_setzero_v2r8();
912 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
913 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
914 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
915 velec = _fjsp_mul_v2r8(qq02,VV);
916 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
917 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
919 /* Update potential sum for this i atom from the interaction with this j atom. */
920 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
921 velecsum = _fjsp_add_v2r8(velecsum,velec);
925 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
927 /* Update vectorial force */
928 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
929 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
930 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
932 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
933 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
934 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
936 /**************************
937 * CALCULATE INTERACTIONS *
938 **************************/
940 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
942 /* Calculate table index by multiplying r with table scale and truncate to integer */
943 rt = _fjsp_mul_v2r8(r10,vftabscale);
944 itab_tmp = _fjsp_dtox_v2r8(rt);
945 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
946 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
947 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
952 /* CUBIC SPLINE TABLE ELECTROSTATICS */
953 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
954 F = _fjsp_setzero_v2r8();
955 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
956 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
957 H = _fjsp_setzero_v2r8();
958 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
959 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
960 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
961 velec = _fjsp_mul_v2r8(qq10,VV);
962 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
963 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
965 /* Update potential sum for this i atom from the interaction with this j atom. */
966 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
967 velecsum = _fjsp_add_v2r8(velecsum,velec);
971 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
973 /* Update vectorial force */
974 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
975 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
976 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
978 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
979 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
980 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
982 /**************************
983 * CALCULATE INTERACTIONS *
984 **************************/
986 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
988 /* Calculate table index by multiplying r with table scale and truncate to integer */
989 rt = _fjsp_mul_v2r8(r11,vftabscale);
990 itab_tmp = _fjsp_dtox_v2r8(rt);
991 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
992 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
993 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
998 /* CUBIC SPLINE TABLE ELECTROSTATICS */
999 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1000 F = _fjsp_setzero_v2r8();
1001 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1002 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1003 H = _fjsp_setzero_v2r8();
1004 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1005 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1006 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1007 velec = _fjsp_mul_v2r8(qq11,VV);
1008 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1009 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1011 /* Update potential sum for this i atom from the interaction with this j atom. */
1012 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1013 velecsum = _fjsp_add_v2r8(velecsum,velec);
1017 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1019 /* Update vectorial force */
1020 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1021 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1022 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1024 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1025 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1026 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1028 /**************************
1029 * CALCULATE INTERACTIONS *
1030 **************************/
1032 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1034 /* Calculate table index by multiplying r with table scale and truncate to integer */
1035 rt = _fjsp_mul_v2r8(r12,vftabscale);
1036 itab_tmp = _fjsp_dtox_v2r8(rt);
1037 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1038 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1039 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1044 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1045 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1046 F = _fjsp_setzero_v2r8();
1047 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1048 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1049 H = _fjsp_setzero_v2r8();
1050 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1051 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1052 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1053 velec = _fjsp_mul_v2r8(qq12,VV);
1054 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1055 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1057 /* Update potential sum for this i atom from the interaction with this j atom. */
1058 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1059 velecsum = _fjsp_add_v2r8(velecsum,velec);
1063 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1065 /* Update vectorial force */
1066 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1067 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1068 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1070 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1071 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1072 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1074 /**************************
1075 * CALCULATE INTERACTIONS *
1076 **************************/
1078 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1080 /* Calculate table index by multiplying r with table scale and truncate to integer */
1081 rt = _fjsp_mul_v2r8(r20,vftabscale);
1082 itab_tmp = _fjsp_dtox_v2r8(rt);
1083 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1084 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1085 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1090 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1091 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1092 F = _fjsp_setzero_v2r8();
1093 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1094 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1095 H = _fjsp_setzero_v2r8();
1096 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1097 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1098 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1099 velec = _fjsp_mul_v2r8(qq20,VV);
1100 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1101 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1103 /* Update potential sum for this i atom from the interaction with this j atom. */
1104 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1105 velecsum = _fjsp_add_v2r8(velecsum,velec);
1109 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1111 /* Update vectorial force */
1112 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1113 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1114 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1116 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1117 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1118 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1120 /**************************
1121 * CALCULATE INTERACTIONS *
1122 **************************/
1124 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1126 /* Calculate table index by multiplying r with table scale and truncate to integer */
1127 rt = _fjsp_mul_v2r8(r21,vftabscale);
1128 itab_tmp = _fjsp_dtox_v2r8(rt);
1129 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1130 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1131 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1136 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1137 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1138 F = _fjsp_setzero_v2r8();
1139 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1140 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1141 H = _fjsp_setzero_v2r8();
1142 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1143 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1144 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1145 velec = _fjsp_mul_v2r8(qq21,VV);
1146 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1147 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1149 /* Update potential sum for this i atom from the interaction with this j atom. */
1150 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1151 velecsum = _fjsp_add_v2r8(velecsum,velec);
1155 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1157 /* Update vectorial force */
1158 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1159 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1160 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1162 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1163 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1164 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1166 /**************************
1167 * CALCULATE INTERACTIONS *
1168 **************************/
1170 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1172 /* Calculate table index by multiplying r with table scale and truncate to integer */
1173 rt = _fjsp_mul_v2r8(r22,vftabscale);
1174 itab_tmp = _fjsp_dtox_v2r8(rt);
1175 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1176 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1177 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1182 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1183 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1184 F = _fjsp_setzero_v2r8();
1185 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1186 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1187 H = _fjsp_setzero_v2r8();
1188 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1189 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1190 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1191 velec = _fjsp_mul_v2r8(qq22,VV);
1192 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1193 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1195 /* Update potential sum for this i atom from the interaction with this j atom. */
1196 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1197 velecsum = _fjsp_add_v2r8(velecsum,velec);
1201 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1203 /* Update vectorial force */
1204 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1205 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1206 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1208 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1209 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1210 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1212 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1214 /* Inner loop uses 444 flops */
1217 /* End of innermost loop */
1219 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1220 f+i_coord_offset,fshift+i_shift_offset);
1223 /* Update potential energies */
1224 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1225 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1227 /* Increment number of inner iterations */
1228 inneriter += j_index_end - j_index_start;
1230 /* Outer loop uses 20 flops */
1233 /* Increment number of outer iterations */
1236 /* Update outer/inner flops */
1238 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444);
1241 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1242 * Electrostatics interaction: CubicSplineTable
1243 * VdW interaction: CubicSplineTable
1244 * Geometry: Water3-Water3
1245 * Calculate force/pot: Force
1248 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1249 (t_nblist * gmx_restrict nlist,
1250 rvec * gmx_restrict xx,
1251 rvec * gmx_restrict ff,
1252 t_forcerec * gmx_restrict fr,
1253 t_mdatoms * gmx_restrict mdatoms,
1254 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1255 t_nrnb * gmx_restrict nrnb)
1257 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1258 * just 0 for non-waters.
1259 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1260 * jnr indices corresponding to data put in the four positions in the SIMD register.
1262 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1263 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1265 int j_coord_offsetA,j_coord_offsetB;
1266 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1267 real rcutoff_scalar;
1268 real *shiftvec,*fshift,*x,*f;
1269 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1271 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1273 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1275 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1276 int vdwjidx0A,vdwjidx0B;
1277 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1278 int vdwjidx1A,vdwjidx1B;
1279 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1280 int vdwjidx2A,vdwjidx2B;
1281 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1282 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1283 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1284 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1285 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1286 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1287 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1288 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1289 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1290 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1291 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1294 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1297 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1298 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1299 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1301 _fjsp_v2r8 itab_tmp;
1302 _fjsp_v2r8 dummy_mask,cutoff_mask;
1303 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1304 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1305 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1312 jindex = nlist->jindex;
1314 shiftidx = nlist->shift;
1316 shiftvec = fr->shift_vec[0];
1317 fshift = fr->fshift[0];
1318 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1319 charge = mdatoms->chargeA;
1320 nvdwtype = fr->ntype;
1321 vdwparam = fr->nbfp;
1322 vdwtype = mdatoms->typeA;
1324 vftab = kernel_data->table_elec_vdw->data;
1325 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
1327 /* Setup water-specific parameters */
1328 inr = nlist->iinr[0];
1329 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1330 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1331 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1332 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1334 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1335 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1336 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1337 vdwjidx0A = 2*vdwtype[inr+0];
1338 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1339 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1340 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1341 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1342 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1343 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1344 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1345 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1346 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1347 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1348 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1350 /* Avoid stupid compiler warnings */
1352 j_coord_offsetA = 0;
1353 j_coord_offsetB = 0;
1358 /* Start outer loop over neighborlists */
1359 for(iidx=0; iidx<nri; iidx++)
1361 /* Load shift vector for this list */
1362 i_shift_offset = DIM*shiftidx[iidx];
1364 /* Load limits for loop over neighbors */
1365 j_index_start = jindex[iidx];
1366 j_index_end = jindex[iidx+1];
1368 /* Get outer coordinate index */
1370 i_coord_offset = DIM*inr;
1372 /* Load i particle coords and add shift vector */
1373 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1374 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1376 fix0 = _fjsp_setzero_v2r8();
1377 fiy0 = _fjsp_setzero_v2r8();
1378 fiz0 = _fjsp_setzero_v2r8();
1379 fix1 = _fjsp_setzero_v2r8();
1380 fiy1 = _fjsp_setzero_v2r8();
1381 fiz1 = _fjsp_setzero_v2r8();
1382 fix2 = _fjsp_setzero_v2r8();
1383 fiy2 = _fjsp_setzero_v2r8();
1384 fiz2 = _fjsp_setzero_v2r8();
1386 /* Start inner kernel loop */
1387 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1390 /* Get j neighbor index, and coordinate index */
1392 jnrB = jjnr[jidx+1];
1393 j_coord_offsetA = DIM*jnrA;
1394 j_coord_offsetB = DIM*jnrB;
1396 /* load j atom coordinates */
1397 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1398 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1400 /* Calculate displacement vector */
1401 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1402 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1403 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1404 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1405 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1406 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1407 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1408 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1409 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1410 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1411 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1412 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1413 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1414 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1415 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1416 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1417 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1418 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1419 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1420 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1421 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1422 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1423 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1424 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1425 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1426 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1427 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1429 /* Calculate squared distance and things based on it */
1430 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1431 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1432 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1433 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1434 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1435 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1436 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1437 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1438 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1440 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1441 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1442 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1443 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1444 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1445 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1446 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1447 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1448 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1450 fjx0 = _fjsp_setzero_v2r8();
1451 fjy0 = _fjsp_setzero_v2r8();
1452 fjz0 = _fjsp_setzero_v2r8();
1453 fjx1 = _fjsp_setzero_v2r8();
1454 fjy1 = _fjsp_setzero_v2r8();
1455 fjz1 = _fjsp_setzero_v2r8();
1456 fjx2 = _fjsp_setzero_v2r8();
1457 fjy2 = _fjsp_setzero_v2r8();
1458 fjz2 = _fjsp_setzero_v2r8();
1460 /**************************
1461 * CALCULATE INTERACTIONS *
1462 **************************/
1464 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1466 /* Calculate table index by multiplying r with table scale and truncate to integer */
1467 rt = _fjsp_mul_v2r8(r00,vftabscale);
1468 itab_tmp = _fjsp_dtox_v2r8(rt);
1469 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1470 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1471 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1476 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1477 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1478 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1479 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1480 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1481 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1482 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1483 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1484 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1485 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1487 /* CUBIC SPLINE TABLE DISPERSION */
1490 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1491 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1492 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1493 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1494 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1495 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1496 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1497 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1498 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1500 /* CUBIC SPLINE TABLE REPULSION */
1501 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1502 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1503 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1504 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1505 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1506 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1507 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1508 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1509 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1510 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1512 fscal = _fjsp_add_v2r8(felec,fvdw);
1514 /* Update vectorial force */
1515 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1516 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1517 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1519 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1520 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1521 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1523 /**************************
1524 * CALCULATE INTERACTIONS *
1525 **************************/
1527 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1529 /* Calculate table index by multiplying r with table scale and truncate to integer */
1530 rt = _fjsp_mul_v2r8(r01,vftabscale);
1531 itab_tmp = _fjsp_dtox_v2r8(rt);
1532 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1533 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1534 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1539 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1540 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1541 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1542 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1543 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1544 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1545 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1546 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1547 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1548 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1552 /* Update vectorial force */
1553 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1554 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1555 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1557 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1558 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1559 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1561 /**************************
1562 * CALCULATE INTERACTIONS *
1563 **************************/
1565 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1567 /* Calculate table index by multiplying r with table scale and truncate to integer */
1568 rt = _fjsp_mul_v2r8(r02,vftabscale);
1569 itab_tmp = _fjsp_dtox_v2r8(rt);
1570 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1571 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1572 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1577 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1578 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1579 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1580 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1581 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1582 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1583 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1584 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1585 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1586 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1590 /* Update vectorial force */
1591 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1592 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1593 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1595 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1596 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1597 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1599 /**************************
1600 * CALCULATE INTERACTIONS *
1601 **************************/
1603 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1605 /* Calculate table index by multiplying r with table scale and truncate to integer */
1606 rt = _fjsp_mul_v2r8(r10,vftabscale);
1607 itab_tmp = _fjsp_dtox_v2r8(rt);
1608 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1609 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1610 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1615 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1616 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1617 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1618 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1619 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1620 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1621 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1622 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1623 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1624 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1628 /* Update vectorial force */
1629 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1630 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1631 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1633 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1634 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1635 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1637 /**************************
1638 * CALCULATE INTERACTIONS *
1639 **************************/
1641 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1643 /* Calculate table index by multiplying r with table scale and truncate to integer */
1644 rt = _fjsp_mul_v2r8(r11,vftabscale);
1645 itab_tmp = _fjsp_dtox_v2r8(rt);
1646 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1647 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1648 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1653 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1654 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1655 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1656 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1657 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1658 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1659 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1660 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1661 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1662 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1666 /* Update vectorial force */
1667 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1668 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1669 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1671 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1672 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1673 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1675 /**************************
1676 * CALCULATE INTERACTIONS *
1677 **************************/
1679 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1681 /* Calculate table index by multiplying r with table scale and truncate to integer */
1682 rt = _fjsp_mul_v2r8(r12,vftabscale);
1683 itab_tmp = _fjsp_dtox_v2r8(rt);
1684 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1685 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1686 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1691 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1692 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1693 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1694 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1695 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1696 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1697 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1698 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1699 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1700 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1704 /* Update vectorial force */
1705 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1706 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1707 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1709 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1710 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1711 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1713 /**************************
1714 * CALCULATE INTERACTIONS *
1715 **************************/
1717 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1719 /* Calculate table index by multiplying r with table scale and truncate to integer */
1720 rt = _fjsp_mul_v2r8(r20,vftabscale);
1721 itab_tmp = _fjsp_dtox_v2r8(rt);
1722 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1723 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1724 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1729 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1730 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1731 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1732 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1733 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1734 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1735 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1736 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1737 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1738 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1742 /* Update vectorial force */
1743 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1744 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1745 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1747 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1748 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1749 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1751 /**************************
1752 * CALCULATE INTERACTIONS *
1753 **************************/
1755 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1757 /* Calculate table index by multiplying r with table scale and truncate to integer */
1758 rt = _fjsp_mul_v2r8(r21,vftabscale);
1759 itab_tmp = _fjsp_dtox_v2r8(rt);
1760 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1761 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1762 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1767 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1768 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1769 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1770 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1771 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1772 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1773 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1774 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1775 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1776 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1780 /* Update vectorial force */
1781 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1782 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1783 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1785 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1786 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1787 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1789 /**************************
1790 * CALCULATE INTERACTIONS *
1791 **************************/
1793 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1795 /* Calculate table index by multiplying r with table scale and truncate to integer */
1796 rt = _fjsp_mul_v2r8(r22,vftabscale);
1797 itab_tmp = _fjsp_dtox_v2r8(rt);
1798 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1799 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1800 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1805 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1806 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1807 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1808 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1809 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1810 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1811 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1812 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1813 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1814 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1818 /* Update vectorial force */
1819 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1820 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1821 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1823 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1824 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1825 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1827 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1829 /* Inner loop uses 400 flops */
1832 if(jidx<j_index_end)
1836 j_coord_offsetA = DIM*jnrA;
1838 /* load j atom coordinates */
1839 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1840 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1842 /* Calculate displacement vector */
1843 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1844 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1845 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1846 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1847 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1848 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1849 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1850 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1851 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1852 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1853 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1854 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1855 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1856 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1857 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1858 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1859 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1860 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1861 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1862 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1863 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1864 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1865 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1866 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1867 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1868 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1869 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1871 /* Calculate squared distance and things based on it */
1872 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1873 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1874 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1875 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1876 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1877 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1878 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1879 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1880 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1882 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1883 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1884 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1885 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1886 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1887 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1888 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1889 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1890 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1892 fjx0 = _fjsp_setzero_v2r8();
1893 fjy0 = _fjsp_setzero_v2r8();
1894 fjz0 = _fjsp_setzero_v2r8();
1895 fjx1 = _fjsp_setzero_v2r8();
1896 fjy1 = _fjsp_setzero_v2r8();
1897 fjz1 = _fjsp_setzero_v2r8();
1898 fjx2 = _fjsp_setzero_v2r8();
1899 fjy2 = _fjsp_setzero_v2r8();
1900 fjz2 = _fjsp_setzero_v2r8();
1902 /**************************
1903 * CALCULATE INTERACTIONS *
1904 **************************/
1906 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1908 /* Calculate table index by multiplying r with table scale and truncate to integer */
1909 rt = _fjsp_mul_v2r8(r00,vftabscale);
1910 itab_tmp = _fjsp_dtox_v2r8(rt);
1911 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1912 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1913 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1918 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1919 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1920 F = _fjsp_setzero_v2r8();
1921 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1922 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1923 H = _fjsp_setzero_v2r8();
1924 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1925 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1926 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1927 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1929 /* CUBIC SPLINE TABLE DISPERSION */
1932 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1933 F = _fjsp_setzero_v2r8();
1934 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1935 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1936 H = _fjsp_setzero_v2r8();
1937 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1938 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1939 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1940 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1942 /* CUBIC SPLINE TABLE REPULSION */
1943 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1944 F = _fjsp_setzero_v2r8();
1945 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1946 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1947 H = _fjsp_setzero_v2r8();
1948 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1949 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1950 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1951 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1952 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1954 fscal = _fjsp_add_v2r8(felec,fvdw);
1956 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1958 /* Update vectorial force */
1959 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1960 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1961 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1963 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1964 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1965 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1967 /**************************
1968 * CALCULATE INTERACTIONS *
1969 **************************/
1971 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1973 /* Calculate table index by multiplying r with table scale and truncate to integer */
1974 rt = _fjsp_mul_v2r8(r01,vftabscale);
1975 itab_tmp = _fjsp_dtox_v2r8(rt);
1976 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1977 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1978 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1983 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1984 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1985 F = _fjsp_setzero_v2r8();
1986 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1987 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1988 H = _fjsp_setzero_v2r8();
1989 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1990 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1991 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1992 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1996 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1998 /* Update vectorial force */
1999 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
2000 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2001 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2003 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2004 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2005 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2007 /**************************
2008 * CALCULATE INTERACTIONS *
2009 **************************/
2011 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
2013 /* Calculate table index by multiplying r with table scale and truncate to integer */
2014 rt = _fjsp_mul_v2r8(r02,vftabscale);
2015 itab_tmp = _fjsp_dtox_v2r8(rt);
2016 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2017 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2018 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2023 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2024 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2025 F = _fjsp_setzero_v2r8();
2026 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2027 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2028 H = _fjsp_setzero_v2r8();
2029 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2030 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2031 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2032 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
2036 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2038 /* Update vectorial force */
2039 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
2040 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2041 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2043 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2044 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2045 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2047 /**************************
2048 * CALCULATE INTERACTIONS *
2049 **************************/
2051 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
2053 /* Calculate table index by multiplying r with table scale and truncate to integer */
2054 rt = _fjsp_mul_v2r8(r10,vftabscale);
2055 itab_tmp = _fjsp_dtox_v2r8(rt);
2056 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2057 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2058 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2063 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2064 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2065 F = _fjsp_setzero_v2r8();
2066 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2067 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2068 H = _fjsp_setzero_v2r8();
2069 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2070 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2071 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2072 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
2076 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2078 /* Update vectorial force */
2079 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
2080 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2081 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2083 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2084 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2085 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2087 /**************************
2088 * CALCULATE INTERACTIONS *
2089 **************************/
2091 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
2093 /* Calculate table index by multiplying r with table scale and truncate to integer */
2094 rt = _fjsp_mul_v2r8(r11,vftabscale);
2095 itab_tmp = _fjsp_dtox_v2r8(rt);
2096 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2097 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2098 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2103 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2104 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2105 F = _fjsp_setzero_v2r8();
2106 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2107 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2108 H = _fjsp_setzero_v2r8();
2109 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2110 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2111 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2112 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
2116 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2118 /* Update vectorial force */
2119 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
2120 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2121 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2123 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2124 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2125 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2127 /**************************
2128 * CALCULATE INTERACTIONS *
2129 **************************/
2131 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
2133 /* Calculate table index by multiplying r with table scale and truncate to integer */
2134 rt = _fjsp_mul_v2r8(r12,vftabscale);
2135 itab_tmp = _fjsp_dtox_v2r8(rt);
2136 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2137 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2138 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2143 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2144 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2145 F = _fjsp_setzero_v2r8();
2146 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2147 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2148 H = _fjsp_setzero_v2r8();
2149 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2150 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2151 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2152 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2156 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2158 /* Update vectorial force */
2159 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
2160 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2161 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2163 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2164 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2165 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2167 /**************************
2168 * CALCULATE INTERACTIONS *
2169 **************************/
2171 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
2173 /* Calculate table index by multiplying r with table scale and truncate to integer */
2174 rt = _fjsp_mul_v2r8(r20,vftabscale);
2175 itab_tmp = _fjsp_dtox_v2r8(rt);
2176 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2177 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2178 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2183 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2184 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2185 F = _fjsp_setzero_v2r8();
2186 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2187 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2188 H = _fjsp_setzero_v2r8();
2189 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2190 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2191 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2192 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
2196 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2198 /* Update vectorial force */
2199 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
2200 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2201 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2203 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2204 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2205 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2207 /**************************
2208 * CALCULATE INTERACTIONS *
2209 **************************/
2211 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2213 /* Calculate table index by multiplying r with table scale and truncate to integer */
2214 rt = _fjsp_mul_v2r8(r21,vftabscale);
2215 itab_tmp = _fjsp_dtox_v2r8(rt);
2216 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2217 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2218 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2223 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2224 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2225 F = _fjsp_setzero_v2r8();
2226 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2227 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2228 H = _fjsp_setzero_v2r8();
2229 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2230 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2231 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2232 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2236 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2238 /* Update vectorial force */
2239 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2240 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2241 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2243 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2244 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2245 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2247 /**************************
2248 * CALCULATE INTERACTIONS *
2249 **************************/
2251 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2253 /* Calculate table index by multiplying r with table scale and truncate to integer */
2254 rt = _fjsp_mul_v2r8(r22,vftabscale);
2255 itab_tmp = _fjsp_dtox_v2r8(rt);
2256 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2257 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2258 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2263 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2264 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2265 F = _fjsp_setzero_v2r8();
2266 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2267 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2268 H = _fjsp_setzero_v2r8();
2269 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2270 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2271 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2272 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2276 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2278 /* Update vectorial force */
2279 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2280 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2281 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2283 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2284 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2285 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2287 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2289 /* Inner loop uses 400 flops */
2292 /* End of innermost loop */
2294 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2295 f+i_coord_offset,fshift+i_shift_offset);
2297 /* Increment number of inner iterations */
2298 inneriter += j_index_end - j_index_start;
2300 /* Outer loop uses 18 flops */
2303 /* Increment number of outer iterations */
2306 /* Update outer/inner flops */
2308 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);