2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: CubicSplineTable
54 * VdW interaction: CubicSplineTable
55 * Geometry: Water3-Water3
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
105 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
108 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
109 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
113 _fjsp_v2r8 dummy_mask,cutoff_mask;
114 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
115 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
116 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
123 jindex = nlist->jindex;
125 shiftidx = nlist->shift;
127 shiftvec = fr->shift_vec[0];
128 fshift = fr->fshift[0];
129 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
130 charge = mdatoms->chargeA;
131 nvdwtype = fr->ntype;
133 vdwtype = mdatoms->typeA;
135 vftab = kernel_data->table_elec_vdw->data;
136 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
138 /* Setup water-specific parameters */
139 inr = nlist->iinr[0];
140 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
141 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
142 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
143 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
145 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
146 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
147 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
148 vdwjidx0A = 2*vdwtype[inr+0];
149 qq00 = _fjsp_mul_v2r8(iq0,jq0);
150 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
151 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
152 qq01 = _fjsp_mul_v2r8(iq0,jq1);
153 qq02 = _fjsp_mul_v2r8(iq0,jq2);
154 qq10 = _fjsp_mul_v2r8(iq1,jq0);
155 qq11 = _fjsp_mul_v2r8(iq1,jq1);
156 qq12 = _fjsp_mul_v2r8(iq1,jq2);
157 qq20 = _fjsp_mul_v2r8(iq2,jq0);
158 qq21 = _fjsp_mul_v2r8(iq2,jq1);
159 qq22 = _fjsp_mul_v2r8(iq2,jq2);
161 /* Avoid stupid compiler warnings */
169 /* Start outer loop over neighborlists */
170 for(iidx=0; iidx<nri; iidx++)
172 /* Load shift vector for this list */
173 i_shift_offset = DIM*shiftidx[iidx];
175 /* Load limits for loop over neighbors */
176 j_index_start = jindex[iidx];
177 j_index_end = jindex[iidx+1];
179 /* Get outer coordinate index */
181 i_coord_offset = DIM*inr;
183 /* Load i particle coords and add shift vector */
184 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
185 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
187 fix0 = _fjsp_setzero_v2r8();
188 fiy0 = _fjsp_setzero_v2r8();
189 fiz0 = _fjsp_setzero_v2r8();
190 fix1 = _fjsp_setzero_v2r8();
191 fiy1 = _fjsp_setzero_v2r8();
192 fiz1 = _fjsp_setzero_v2r8();
193 fix2 = _fjsp_setzero_v2r8();
194 fiy2 = _fjsp_setzero_v2r8();
195 fiz2 = _fjsp_setzero_v2r8();
197 /* Reset potential sums */
198 velecsum = _fjsp_setzero_v2r8();
199 vvdwsum = _fjsp_setzero_v2r8();
201 /* Start inner kernel loop */
202 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
205 /* Get j neighbor index, and coordinate index */
208 j_coord_offsetA = DIM*jnrA;
209 j_coord_offsetB = DIM*jnrB;
211 /* load j atom coordinates */
212 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
213 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
215 /* Calculate displacement vector */
216 dx00 = _fjsp_sub_v2r8(ix0,jx0);
217 dy00 = _fjsp_sub_v2r8(iy0,jy0);
218 dz00 = _fjsp_sub_v2r8(iz0,jz0);
219 dx01 = _fjsp_sub_v2r8(ix0,jx1);
220 dy01 = _fjsp_sub_v2r8(iy0,jy1);
221 dz01 = _fjsp_sub_v2r8(iz0,jz1);
222 dx02 = _fjsp_sub_v2r8(ix0,jx2);
223 dy02 = _fjsp_sub_v2r8(iy0,jy2);
224 dz02 = _fjsp_sub_v2r8(iz0,jz2);
225 dx10 = _fjsp_sub_v2r8(ix1,jx0);
226 dy10 = _fjsp_sub_v2r8(iy1,jy0);
227 dz10 = _fjsp_sub_v2r8(iz1,jz0);
228 dx11 = _fjsp_sub_v2r8(ix1,jx1);
229 dy11 = _fjsp_sub_v2r8(iy1,jy1);
230 dz11 = _fjsp_sub_v2r8(iz1,jz1);
231 dx12 = _fjsp_sub_v2r8(ix1,jx2);
232 dy12 = _fjsp_sub_v2r8(iy1,jy2);
233 dz12 = _fjsp_sub_v2r8(iz1,jz2);
234 dx20 = _fjsp_sub_v2r8(ix2,jx0);
235 dy20 = _fjsp_sub_v2r8(iy2,jy0);
236 dz20 = _fjsp_sub_v2r8(iz2,jz0);
237 dx21 = _fjsp_sub_v2r8(ix2,jx1);
238 dy21 = _fjsp_sub_v2r8(iy2,jy1);
239 dz21 = _fjsp_sub_v2r8(iz2,jz1);
240 dx22 = _fjsp_sub_v2r8(ix2,jx2);
241 dy22 = _fjsp_sub_v2r8(iy2,jy2);
242 dz22 = _fjsp_sub_v2r8(iz2,jz2);
244 /* Calculate squared distance and things based on it */
245 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
246 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
247 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
248 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
249 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
250 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
251 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
252 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
253 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
255 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
256 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
257 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
258 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
259 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
260 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
261 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
262 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
263 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
265 fjx0 = _fjsp_setzero_v2r8();
266 fjy0 = _fjsp_setzero_v2r8();
267 fjz0 = _fjsp_setzero_v2r8();
268 fjx1 = _fjsp_setzero_v2r8();
269 fjy1 = _fjsp_setzero_v2r8();
270 fjz1 = _fjsp_setzero_v2r8();
271 fjx2 = _fjsp_setzero_v2r8();
272 fjy2 = _fjsp_setzero_v2r8();
273 fjz2 = _fjsp_setzero_v2r8();
275 /**************************
276 * CALCULATE INTERACTIONS *
277 **************************/
279 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
281 /* Calculate table index by multiplying r with table scale and truncate to integer */
282 rt = _fjsp_mul_v2r8(r00,vftabscale);
283 itab_tmp = _fjsp_dtox_v2r8(rt);
284 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
285 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
286 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
291 /* CUBIC SPLINE TABLE ELECTROSTATICS */
292 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
293 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
294 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
295 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
296 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
297 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
298 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
299 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
300 velec = _fjsp_mul_v2r8(qq00,VV);
301 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
302 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
304 /* CUBIC SPLINE TABLE DISPERSION */
307 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
308 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
309 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
310 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
311 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
312 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
313 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
314 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
315 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
316 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
317 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
319 /* CUBIC SPLINE TABLE REPULSION */
320 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
321 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
322 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
323 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
324 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
325 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
326 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
327 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
328 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
329 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
330 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
331 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
332 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
334 /* Update potential sum for this i atom from the interaction with this j atom. */
335 velecsum = _fjsp_add_v2r8(velecsum,velec);
336 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
338 fscal = _fjsp_add_v2r8(felec,fvdw);
340 /* Update vectorial force */
341 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
342 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
343 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
345 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
346 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
347 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
349 /**************************
350 * CALCULATE INTERACTIONS *
351 **************************/
353 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
355 /* Calculate table index by multiplying r with table scale and truncate to integer */
356 rt = _fjsp_mul_v2r8(r01,vftabscale);
357 itab_tmp = _fjsp_dtox_v2r8(rt);
358 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
359 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
360 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
365 /* CUBIC SPLINE TABLE ELECTROSTATICS */
366 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
367 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
368 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
369 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
370 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
371 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
372 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
373 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
374 velec = _fjsp_mul_v2r8(qq01,VV);
375 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
376 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
378 /* Update potential sum for this i atom from the interaction with this j atom. */
379 velecsum = _fjsp_add_v2r8(velecsum,velec);
383 /* Update vectorial force */
384 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
385 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
386 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
388 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
389 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
390 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
392 /**************************
393 * CALCULATE INTERACTIONS *
394 **************************/
396 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
398 /* Calculate table index by multiplying r with table scale and truncate to integer */
399 rt = _fjsp_mul_v2r8(r02,vftabscale);
400 itab_tmp = _fjsp_dtox_v2r8(rt);
401 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
402 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
403 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
408 /* CUBIC SPLINE TABLE ELECTROSTATICS */
409 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
410 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
411 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
412 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
413 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
414 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
415 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
416 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
417 velec = _fjsp_mul_v2r8(qq02,VV);
418 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
419 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
421 /* Update potential sum for this i atom from the interaction with this j atom. */
422 velecsum = _fjsp_add_v2r8(velecsum,velec);
426 /* Update vectorial force */
427 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
428 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
429 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
431 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
432 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
433 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
435 /**************************
436 * CALCULATE INTERACTIONS *
437 **************************/
439 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
441 /* Calculate table index by multiplying r with table scale and truncate to integer */
442 rt = _fjsp_mul_v2r8(r10,vftabscale);
443 itab_tmp = _fjsp_dtox_v2r8(rt);
444 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
445 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
446 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
451 /* CUBIC SPLINE TABLE ELECTROSTATICS */
452 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
453 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
454 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
455 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
456 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
457 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
458 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
459 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
460 velec = _fjsp_mul_v2r8(qq10,VV);
461 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
462 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
464 /* Update potential sum for this i atom from the interaction with this j atom. */
465 velecsum = _fjsp_add_v2r8(velecsum,velec);
469 /* Update vectorial force */
470 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
471 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
472 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
474 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
475 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
476 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
478 /**************************
479 * CALCULATE INTERACTIONS *
480 **************************/
482 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
484 /* Calculate table index by multiplying r with table scale and truncate to integer */
485 rt = _fjsp_mul_v2r8(r11,vftabscale);
486 itab_tmp = _fjsp_dtox_v2r8(rt);
487 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
488 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
489 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
494 /* CUBIC SPLINE TABLE ELECTROSTATICS */
495 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
496 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
497 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
498 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
499 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
500 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
501 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
502 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
503 velec = _fjsp_mul_v2r8(qq11,VV);
504 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
505 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
507 /* Update potential sum for this i atom from the interaction with this j atom. */
508 velecsum = _fjsp_add_v2r8(velecsum,velec);
512 /* Update vectorial force */
513 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
514 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
515 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
517 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
518 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
519 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
521 /**************************
522 * CALCULATE INTERACTIONS *
523 **************************/
525 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
527 /* Calculate table index by multiplying r with table scale and truncate to integer */
528 rt = _fjsp_mul_v2r8(r12,vftabscale);
529 itab_tmp = _fjsp_dtox_v2r8(rt);
530 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
531 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
532 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
537 /* CUBIC SPLINE TABLE ELECTROSTATICS */
538 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
539 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
540 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
541 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
542 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
543 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
544 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
545 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
546 velec = _fjsp_mul_v2r8(qq12,VV);
547 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
548 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
550 /* Update potential sum for this i atom from the interaction with this j atom. */
551 velecsum = _fjsp_add_v2r8(velecsum,velec);
555 /* Update vectorial force */
556 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
557 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
558 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
560 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
561 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
562 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
564 /**************************
565 * CALCULATE INTERACTIONS *
566 **************************/
568 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
570 /* Calculate table index by multiplying r with table scale and truncate to integer */
571 rt = _fjsp_mul_v2r8(r20,vftabscale);
572 itab_tmp = _fjsp_dtox_v2r8(rt);
573 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
574 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
575 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
580 /* CUBIC SPLINE TABLE ELECTROSTATICS */
581 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
582 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
583 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
584 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
585 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
586 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
587 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
588 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
589 velec = _fjsp_mul_v2r8(qq20,VV);
590 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
591 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
593 /* Update potential sum for this i atom from the interaction with this j atom. */
594 velecsum = _fjsp_add_v2r8(velecsum,velec);
598 /* Update vectorial force */
599 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
600 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
601 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
603 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
604 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
605 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
607 /**************************
608 * CALCULATE INTERACTIONS *
609 **************************/
611 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
613 /* Calculate table index by multiplying r with table scale and truncate to integer */
614 rt = _fjsp_mul_v2r8(r21,vftabscale);
615 itab_tmp = _fjsp_dtox_v2r8(rt);
616 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
617 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
618 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
623 /* CUBIC SPLINE TABLE ELECTROSTATICS */
624 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
625 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
626 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
627 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
628 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
629 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
630 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
631 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
632 velec = _fjsp_mul_v2r8(qq21,VV);
633 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
634 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
636 /* Update potential sum for this i atom from the interaction with this j atom. */
637 velecsum = _fjsp_add_v2r8(velecsum,velec);
641 /* Update vectorial force */
642 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
643 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
644 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
646 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
647 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
648 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
650 /**************************
651 * CALCULATE INTERACTIONS *
652 **************************/
654 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
656 /* Calculate table index by multiplying r with table scale and truncate to integer */
657 rt = _fjsp_mul_v2r8(r22,vftabscale);
658 itab_tmp = _fjsp_dtox_v2r8(rt);
659 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
660 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
661 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
666 /* CUBIC SPLINE TABLE ELECTROSTATICS */
667 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
668 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
669 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
670 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
671 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
672 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
673 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
674 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
675 velec = _fjsp_mul_v2r8(qq22,VV);
676 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
677 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
679 /* Update potential sum for this i atom from the interaction with this j atom. */
680 velecsum = _fjsp_add_v2r8(velecsum,velec);
684 /* Update vectorial force */
685 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
686 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
687 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
689 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
690 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
691 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
693 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
695 /* Inner loop uses 444 flops */
702 j_coord_offsetA = DIM*jnrA;
704 /* load j atom coordinates */
705 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
706 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
708 /* Calculate displacement vector */
709 dx00 = _fjsp_sub_v2r8(ix0,jx0);
710 dy00 = _fjsp_sub_v2r8(iy0,jy0);
711 dz00 = _fjsp_sub_v2r8(iz0,jz0);
712 dx01 = _fjsp_sub_v2r8(ix0,jx1);
713 dy01 = _fjsp_sub_v2r8(iy0,jy1);
714 dz01 = _fjsp_sub_v2r8(iz0,jz1);
715 dx02 = _fjsp_sub_v2r8(ix0,jx2);
716 dy02 = _fjsp_sub_v2r8(iy0,jy2);
717 dz02 = _fjsp_sub_v2r8(iz0,jz2);
718 dx10 = _fjsp_sub_v2r8(ix1,jx0);
719 dy10 = _fjsp_sub_v2r8(iy1,jy0);
720 dz10 = _fjsp_sub_v2r8(iz1,jz0);
721 dx11 = _fjsp_sub_v2r8(ix1,jx1);
722 dy11 = _fjsp_sub_v2r8(iy1,jy1);
723 dz11 = _fjsp_sub_v2r8(iz1,jz1);
724 dx12 = _fjsp_sub_v2r8(ix1,jx2);
725 dy12 = _fjsp_sub_v2r8(iy1,jy2);
726 dz12 = _fjsp_sub_v2r8(iz1,jz2);
727 dx20 = _fjsp_sub_v2r8(ix2,jx0);
728 dy20 = _fjsp_sub_v2r8(iy2,jy0);
729 dz20 = _fjsp_sub_v2r8(iz2,jz0);
730 dx21 = _fjsp_sub_v2r8(ix2,jx1);
731 dy21 = _fjsp_sub_v2r8(iy2,jy1);
732 dz21 = _fjsp_sub_v2r8(iz2,jz1);
733 dx22 = _fjsp_sub_v2r8(ix2,jx2);
734 dy22 = _fjsp_sub_v2r8(iy2,jy2);
735 dz22 = _fjsp_sub_v2r8(iz2,jz2);
737 /* Calculate squared distance and things based on it */
738 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
739 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
740 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
741 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
742 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
743 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
744 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
745 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
746 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
748 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
749 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
750 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
751 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
752 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
753 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
754 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
755 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
756 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
758 fjx0 = _fjsp_setzero_v2r8();
759 fjy0 = _fjsp_setzero_v2r8();
760 fjz0 = _fjsp_setzero_v2r8();
761 fjx1 = _fjsp_setzero_v2r8();
762 fjy1 = _fjsp_setzero_v2r8();
763 fjz1 = _fjsp_setzero_v2r8();
764 fjx2 = _fjsp_setzero_v2r8();
765 fjy2 = _fjsp_setzero_v2r8();
766 fjz2 = _fjsp_setzero_v2r8();
768 /**************************
769 * CALCULATE INTERACTIONS *
770 **************************/
772 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
774 /* Calculate table index by multiplying r with table scale and truncate to integer */
775 rt = _fjsp_mul_v2r8(r00,vftabscale);
776 itab_tmp = _fjsp_dtox_v2r8(rt);
777 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
778 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
779 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
784 /* CUBIC SPLINE TABLE ELECTROSTATICS */
785 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
786 F = _fjsp_setzero_v2r8();
787 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
788 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
789 H = _fjsp_setzero_v2r8();
790 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
791 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
792 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
793 velec = _fjsp_mul_v2r8(qq00,VV);
794 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
795 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
797 /* CUBIC SPLINE TABLE DISPERSION */
800 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
801 F = _fjsp_setzero_v2r8();
802 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
803 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
804 H = _fjsp_setzero_v2r8();
805 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
806 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
807 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
808 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
809 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
810 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
812 /* CUBIC SPLINE TABLE REPULSION */
813 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
814 F = _fjsp_setzero_v2r8();
815 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
816 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
817 H = _fjsp_setzero_v2r8();
818 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
819 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
820 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
821 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
822 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
823 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
824 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
825 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
827 /* Update potential sum for this i atom from the interaction with this j atom. */
828 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
829 velecsum = _fjsp_add_v2r8(velecsum,velec);
830 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
831 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
833 fscal = _fjsp_add_v2r8(felec,fvdw);
835 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
837 /* Update vectorial force */
838 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
839 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
840 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
842 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
843 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
844 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
846 /**************************
847 * CALCULATE INTERACTIONS *
848 **************************/
850 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
852 /* Calculate table index by multiplying r with table scale and truncate to integer */
853 rt = _fjsp_mul_v2r8(r01,vftabscale);
854 itab_tmp = _fjsp_dtox_v2r8(rt);
855 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
856 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
857 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
862 /* CUBIC SPLINE TABLE ELECTROSTATICS */
863 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
864 F = _fjsp_setzero_v2r8();
865 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
866 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
867 H = _fjsp_setzero_v2r8();
868 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
869 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
870 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
871 velec = _fjsp_mul_v2r8(qq01,VV);
872 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
873 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
875 /* Update potential sum for this i atom from the interaction with this j atom. */
876 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
877 velecsum = _fjsp_add_v2r8(velecsum,velec);
881 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
883 /* Update vectorial force */
884 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
885 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
886 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
888 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
889 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
890 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
892 /**************************
893 * CALCULATE INTERACTIONS *
894 **************************/
896 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
898 /* Calculate table index by multiplying r with table scale and truncate to integer */
899 rt = _fjsp_mul_v2r8(r02,vftabscale);
900 itab_tmp = _fjsp_dtox_v2r8(rt);
901 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
902 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
903 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
908 /* CUBIC SPLINE TABLE ELECTROSTATICS */
909 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
910 F = _fjsp_setzero_v2r8();
911 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
912 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
913 H = _fjsp_setzero_v2r8();
914 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
915 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
916 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
917 velec = _fjsp_mul_v2r8(qq02,VV);
918 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
919 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
921 /* Update potential sum for this i atom from the interaction with this j atom. */
922 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
923 velecsum = _fjsp_add_v2r8(velecsum,velec);
927 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
929 /* Update vectorial force */
930 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
931 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
932 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
934 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
935 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
936 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
938 /**************************
939 * CALCULATE INTERACTIONS *
940 **************************/
942 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
944 /* Calculate table index by multiplying r with table scale and truncate to integer */
945 rt = _fjsp_mul_v2r8(r10,vftabscale);
946 itab_tmp = _fjsp_dtox_v2r8(rt);
947 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
948 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
949 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
954 /* CUBIC SPLINE TABLE ELECTROSTATICS */
955 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
956 F = _fjsp_setzero_v2r8();
957 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
958 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
959 H = _fjsp_setzero_v2r8();
960 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
961 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
962 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
963 velec = _fjsp_mul_v2r8(qq10,VV);
964 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
965 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
967 /* Update potential sum for this i atom from the interaction with this j atom. */
968 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
969 velecsum = _fjsp_add_v2r8(velecsum,velec);
973 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
975 /* Update vectorial force */
976 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
977 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
978 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
980 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
981 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
982 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
984 /**************************
985 * CALCULATE INTERACTIONS *
986 **************************/
988 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
990 /* Calculate table index by multiplying r with table scale and truncate to integer */
991 rt = _fjsp_mul_v2r8(r11,vftabscale);
992 itab_tmp = _fjsp_dtox_v2r8(rt);
993 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
994 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
995 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1000 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1001 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1002 F = _fjsp_setzero_v2r8();
1003 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1004 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1005 H = _fjsp_setzero_v2r8();
1006 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1007 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1008 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1009 velec = _fjsp_mul_v2r8(qq11,VV);
1010 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1011 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1013 /* Update potential sum for this i atom from the interaction with this j atom. */
1014 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1015 velecsum = _fjsp_add_v2r8(velecsum,velec);
1019 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1021 /* Update vectorial force */
1022 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1023 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1024 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1026 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1027 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1028 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1030 /**************************
1031 * CALCULATE INTERACTIONS *
1032 **************************/
1034 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1036 /* Calculate table index by multiplying r with table scale and truncate to integer */
1037 rt = _fjsp_mul_v2r8(r12,vftabscale);
1038 itab_tmp = _fjsp_dtox_v2r8(rt);
1039 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1040 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1041 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1046 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1047 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1048 F = _fjsp_setzero_v2r8();
1049 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1050 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1051 H = _fjsp_setzero_v2r8();
1052 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1053 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1054 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1055 velec = _fjsp_mul_v2r8(qq12,VV);
1056 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1057 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1059 /* Update potential sum for this i atom from the interaction with this j atom. */
1060 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1061 velecsum = _fjsp_add_v2r8(velecsum,velec);
1065 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1067 /* Update vectorial force */
1068 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1069 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1070 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1072 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1073 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1074 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1076 /**************************
1077 * CALCULATE INTERACTIONS *
1078 **************************/
1080 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1082 /* Calculate table index by multiplying r with table scale and truncate to integer */
1083 rt = _fjsp_mul_v2r8(r20,vftabscale);
1084 itab_tmp = _fjsp_dtox_v2r8(rt);
1085 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1086 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1087 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1092 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1093 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1094 F = _fjsp_setzero_v2r8();
1095 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1096 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1097 H = _fjsp_setzero_v2r8();
1098 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1099 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1100 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1101 velec = _fjsp_mul_v2r8(qq20,VV);
1102 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1103 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1105 /* Update potential sum for this i atom from the interaction with this j atom. */
1106 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1107 velecsum = _fjsp_add_v2r8(velecsum,velec);
1111 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1113 /* Update vectorial force */
1114 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1115 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1116 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1118 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1119 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1120 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1122 /**************************
1123 * CALCULATE INTERACTIONS *
1124 **************************/
1126 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1128 /* Calculate table index by multiplying r with table scale and truncate to integer */
1129 rt = _fjsp_mul_v2r8(r21,vftabscale);
1130 itab_tmp = _fjsp_dtox_v2r8(rt);
1131 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1132 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1133 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1138 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1139 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1140 F = _fjsp_setzero_v2r8();
1141 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1142 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1143 H = _fjsp_setzero_v2r8();
1144 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1145 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1146 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1147 velec = _fjsp_mul_v2r8(qq21,VV);
1148 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1149 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1151 /* Update potential sum for this i atom from the interaction with this j atom. */
1152 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1153 velecsum = _fjsp_add_v2r8(velecsum,velec);
1157 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1159 /* Update vectorial force */
1160 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1161 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1162 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1164 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1165 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1166 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1168 /**************************
1169 * CALCULATE INTERACTIONS *
1170 **************************/
1172 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1174 /* Calculate table index by multiplying r with table scale and truncate to integer */
1175 rt = _fjsp_mul_v2r8(r22,vftabscale);
1176 itab_tmp = _fjsp_dtox_v2r8(rt);
1177 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1178 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1179 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1184 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1185 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1186 F = _fjsp_setzero_v2r8();
1187 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1188 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1189 H = _fjsp_setzero_v2r8();
1190 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1191 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1192 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
1193 velec = _fjsp_mul_v2r8(qq22,VV);
1194 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1195 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1197 /* Update potential sum for this i atom from the interaction with this j atom. */
1198 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1199 velecsum = _fjsp_add_v2r8(velecsum,velec);
1203 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1205 /* Update vectorial force */
1206 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1207 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1208 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1210 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1211 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1212 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1214 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1216 /* Inner loop uses 444 flops */
1219 /* End of innermost loop */
1221 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1222 f+i_coord_offset,fshift+i_shift_offset);
1225 /* Update potential energies */
1226 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1227 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1229 /* Increment number of inner iterations */
1230 inneriter += j_index_end - j_index_start;
1232 /* Outer loop uses 20 flops */
1235 /* Increment number of outer iterations */
1238 /* Update outer/inner flops */
1240 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444);
1243 * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1244 * Electrostatics interaction: CubicSplineTable
1245 * VdW interaction: CubicSplineTable
1246 * Geometry: Water3-Water3
1247 * Calculate force/pot: Force
1250 nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1251 (t_nblist * gmx_restrict nlist,
1252 rvec * gmx_restrict xx,
1253 rvec * gmx_restrict ff,
1254 t_forcerec * gmx_restrict fr,
1255 t_mdatoms * gmx_restrict mdatoms,
1256 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1257 t_nrnb * gmx_restrict nrnb)
1259 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1260 * just 0 for non-waters.
1261 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1262 * jnr indices corresponding to data put in the four positions in the SIMD register.
1264 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1265 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1267 int j_coord_offsetA,j_coord_offsetB;
1268 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1269 real rcutoff_scalar;
1270 real *shiftvec,*fshift,*x,*f;
1271 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1273 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1275 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1277 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1278 int vdwjidx0A,vdwjidx0B;
1279 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1280 int vdwjidx1A,vdwjidx1B;
1281 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1282 int vdwjidx2A,vdwjidx2B;
1283 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1284 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1285 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1286 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1287 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1288 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1289 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1290 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1291 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1292 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1293 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1296 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1299 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1300 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1301 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1303 _fjsp_v2r8 itab_tmp;
1304 _fjsp_v2r8 dummy_mask,cutoff_mask;
1305 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1306 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1307 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1314 jindex = nlist->jindex;
1316 shiftidx = nlist->shift;
1318 shiftvec = fr->shift_vec[0];
1319 fshift = fr->fshift[0];
1320 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1321 charge = mdatoms->chargeA;
1322 nvdwtype = fr->ntype;
1323 vdwparam = fr->nbfp;
1324 vdwtype = mdatoms->typeA;
1326 vftab = kernel_data->table_elec_vdw->data;
1327 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
1329 /* Setup water-specific parameters */
1330 inr = nlist->iinr[0];
1331 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1332 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1333 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1334 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1336 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1337 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1338 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1339 vdwjidx0A = 2*vdwtype[inr+0];
1340 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1341 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1342 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1343 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1344 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1345 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1346 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1347 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1348 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1349 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1350 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1352 /* Avoid stupid compiler warnings */
1354 j_coord_offsetA = 0;
1355 j_coord_offsetB = 0;
1360 /* Start outer loop over neighborlists */
1361 for(iidx=0; iidx<nri; iidx++)
1363 /* Load shift vector for this list */
1364 i_shift_offset = DIM*shiftidx[iidx];
1366 /* Load limits for loop over neighbors */
1367 j_index_start = jindex[iidx];
1368 j_index_end = jindex[iidx+1];
1370 /* Get outer coordinate index */
1372 i_coord_offset = DIM*inr;
1374 /* Load i particle coords and add shift vector */
1375 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1376 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1378 fix0 = _fjsp_setzero_v2r8();
1379 fiy0 = _fjsp_setzero_v2r8();
1380 fiz0 = _fjsp_setzero_v2r8();
1381 fix1 = _fjsp_setzero_v2r8();
1382 fiy1 = _fjsp_setzero_v2r8();
1383 fiz1 = _fjsp_setzero_v2r8();
1384 fix2 = _fjsp_setzero_v2r8();
1385 fiy2 = _fjsp_setzero_v2r8();
1386 fiz2 = _fjsp_setzero_v2r8();
1388 /* Start inner kernel loop */
1389 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1392 /* Get j neighbor index, and coordinate index */
1394 jnrB = jjnr[jidx+1];
1395 j_coord_offsetA = DIM*jnrA;
1396 j_coord_offsetB = DIM*jnrB;
1398 /* load j atom coordinates */
1399 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1400 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1402 /* Calculate displacement vector */
1403 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1404 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1405 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1406 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1407 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1408 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1409 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1410 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1411 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1412 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1413 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1414 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1415 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1416 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1417 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1418 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1419 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1420 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1421 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1422 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1423 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1424 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1425 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1426 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1427 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1428 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1429 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1431 /* Calculate squared distance and things based on it */
1432 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1433 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1434 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1435 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1436 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1437 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1438 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1439 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1440 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1442 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1443 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1444 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1445 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1446 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1447 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1448 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1449 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1450 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1452 fjx0 = _fjsp_setzero_v2r8();
1453 fjy0 = _fjsp_setzero_v2r8();
1454 fjz0 = _fjsp_setzero_v2r8();
1455 fjx1 = _fjsp_setzero_v2r8();
1456 fjy1 = _fjsp_setzero_v2r8();
1457 fjz1 = _fjsp_setzero_v2r8();
1458 fjx2 = _fjsp_setzero_v2r8();
1459 fjy2 = _fjsp_setzero_v2r8();
1460 fjz2 = _fjsp_setzero_v2r8();
1462 /**************************
1463 * CALCULATE INTERACTIONS *
1464 **************************/
1466 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1468 /* Calculate table index by multiplying r with table scale and truncate to integer */
1469 rt = _fjsp_mul_v2r8(r00,vftabscale);
1470 itab_tmp = _fjsp_dtox_v2r8(rt);
1471 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1472 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1473 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1478 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1479 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1480 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1481 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1482 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1483 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1484 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1485 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1486 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1487 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1489 /* CUBIC SPLINE TABLE DISPERSION */
1492 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1493 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1494 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1495 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1496 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1497 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1498 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1499 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1500 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1502 /* CUBIC SPLINE TABLE REPULSION */
1503 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1504 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1505 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1506 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1507 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1508 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1509 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1510 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1511 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1512 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1514 fscal = _fjsp_add_v2r8(felec,fvdw);
1516 /* Update vectorial force */
1517 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1518 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1519 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1521 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1522 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1523 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1525 /**************************
1526 * CALCULATE INTERACTIONS *
1527 **************************/
1529 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1531 /* Calculate table index by multiplying r with table scale and truncate to integer */
1532 rt = _fjsp_mul_v2r8(r01,vftabscale);
1533 itab_tmp = _fjsp_dtox_v2r8(rt);
1534 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1535 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1536 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1541 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1542 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1543 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1544 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1545 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1546 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1547 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1548 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1549 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1550 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1554 /* Update vectorial force */
1555 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1556 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1557 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1559 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1560 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1561 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1563 /**************************
1564 * CALCULATE INTERACTIONS *
1565 **************************/
1567 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1569 /* Calculate table index by multiplying r with table scale and truncate to integer */
1570 rt = _fjsp_mul_v2r8(r02,vftabscale);
1571 itab_tmp = _fjsp_dtox_v2r8(rt);
1572 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1573 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1574 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1579 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1580 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1581 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1582 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1583 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1584 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1585 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1586 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1587 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1588 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
1592 /* Update vectorial force */
1593 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1594 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1595 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1597 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1598 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1599 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1601 /**************************
1602 * CALCULATE INTERACTIONS *
1603 **************************/
1605 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1607 /* Calculate table index by multiplying r with table scale and truncate to integer */
1608 rt = _fjsp_mul_v2r8(r10,vftabscale);
1609 itab_tmp = _fjsp_dtox_v2r8(rt);
1610 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1611 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1612 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1617 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1618 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1619 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1620 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1621 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1622 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1623 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1624 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1625 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1626 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
1630 /* Update vectorial force */
1631 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1632 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1633 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1635 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1636 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1637 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1639 /**************************
1640 * CALCULATE INTERACTIONS *
1641 **************************/
1643 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1645 /* Calculate table index by multiplying r with table scale and truncate to integer */
1646 rt = _fjsp_mul_v2r8(r11,vftabscale);
1647 itab_tmp = _fjsp_dtox_v2r8(rt);
1648 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1649 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1650 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1655 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1656 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1657 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1658 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1659 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1660 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1661 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1662 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1663 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1664 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1668 /* Update vectorial force */
1669 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1670 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1671 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1673 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1674 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1675 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1677 /**************************
1678 * CALCULATE INTERACTIONS *
1679 **************************/
1681 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1683 /* Calculate table index by multiplying r with table scale and truncate to integer */
1684 rt = _fjsp_mul_v2r8(r12,vftabscale);
1685 itab_tmp = _fjsp_dtox_v2r8(rt);
1686 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1687 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1688 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1693 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1694 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1695 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1696 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1697 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1698 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1699 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1700 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1701 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1702 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1706 /* Update vectorial force */
1707 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1708 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1709 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1711 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1712 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1713 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1715 /**************************
1716 * CALCULATE INTERACTIONS *
1717 **************************/
1719 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1721 /* Calculate table index by multiplying r with table scale and truncate to integer */
1722 rt = _fjsp_mul_v2r8(r20,vftabscale);
1723 itab_tmp = _fjsp_dtox_v2r8(rt);
1724 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1725 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1726 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1731 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1732 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1733 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1734 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1735 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1736 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1737 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1738 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1739 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1740 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
1744 /* Update vectorial force */
1745 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1746 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1747 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1749 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1750 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1751 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1753 /**************************
1754 * CALCULATE INTERACTIONS *
1755 **************************/
1757 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1759 /* Calculate table index by multiplying r with table scale and truncate to integer */
1760 rt = _fjsp_mul_v2r8(r21,vftabscale);
1761 itab_tmp = _fjsp_dtox_v2r8(rt);
1762 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1763 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1764 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1769 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1770 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1771 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1772 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1773 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1774 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1775 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1776 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1777 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1778 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1782 /* Update vectorial force */
1783 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1784 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1785 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1787 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1788 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1789 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1791 /**************************
1792 * CALCULATE INTERACTIONS *
1793 **************************/
1795 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1797 /* Calculate table index by multiplying r with table scale and truncate to integer */
1798 rt = _fjsp_mul_v2r8(r22,vftabscale);
1799 itab_tmp = _fjsp_dtox_v2r8(rt);
1800 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1801 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1802 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1807 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1808 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1809 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1810 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1811 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1812 H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1813 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1814 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1815 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1816 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1820 /* Update vectorial force */
1821 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1822 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1823 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1825 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1826 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1827 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1829 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1831 /* Inner loop uses 400 flops */
1834 if(jidx<j_index_end)
1838 j_coord_offsetA = DIM*jnrA;
1840 /* load j atom coordinates */
1841 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1842 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1844 /* Calculate displacement vector */
1845 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1846 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1847 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1848 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1849 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1850 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1851 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1852 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1853 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1854 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1855 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1856 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1857 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1858 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1859 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1860 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1861 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1862 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1863 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1864 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1865 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1866 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1867 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1868 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1869 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1870 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1871 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1873 /* Calculate squared distance and things based on it */
1874 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1875 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1876 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1877 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1878 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1879 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1880 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1881 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1882 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1884 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1885 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1886 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1887 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1888 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1889 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1890 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1891 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1892 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1894 fjx0 = _fjsp_setzero_v2r8();
1895 fjy0 = _fjsp_setzero_v2r8();
1896 fjz0 = _fjsp_setzero_v2r8();
1897 fjx1 = _fjsp_setzero_v2r8();
1898 fjy1 = _fjsp_setzero_v2r8();
1899 fjz1 = _fjsp_setzero_v2r8();
1900 fjx2 = _fjsp_setzero_v2r8();
1901 fjy2 = _fjsp_setzero_v2r8();
1902 fjz2 = _fjsp_setzero_v2r8();
1904 /**************************
1905 * CALCULATE INTERACTIONS *
1906 **************************/
1908 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1910 /* Calculate table index by multiplying r with table scale and truncate to integer */
1911 rt = _fjsp_mul_v2r8(r00,vftabscale);
1912 itab_tmp = _fjsp_dtox_v2r8(rt);
1913 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1914 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1915 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1920 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1921 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1922 F = _fjsp_setzero_v2r8();
1923 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1924 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1925 H = _fjsp_setzero_v2r8();
1926 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1927 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1928 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1929 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
1931 /* CUBIC SPLINE TABLE DISPERSION */
1934 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1935 F = _fjsp_setzero_v2r8();
1936 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1937 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1938 H = _fjsp_setzero_v2r8();
1939 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1940 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1941 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1942 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1944 /* CUBIC SPLINE TABLE REPULSION */
1945 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1946 F = _fjsp_setzero_v2r8();
1947 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1948 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1949 H = _fjsp_setzero_v2r8();
1950 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1951 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1952 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1953 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1954 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1956 fscal = _fjsp_add_v2r8(felec,fvdw);
1958 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1960 /* Update vectorial force */
1961 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1962 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1963 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1965 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1966 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1967 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1969 /**************************
1970 * CALCULATE INTERACTIONS *
1971 **************************/
1973 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1975 /* Calculate table index by multiplying r with table scale and truncate to integer */
1976 rt = _fjsp_mul_v2r8(r01,vftabscale);
1977 itab_tmp = _fjsp_dtox_v2r8(rt);
1978 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1979 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1980 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1985 /* CUBIC SPLINE TABLE ELECTROSTATICS */
1986 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1987 F = _fjsp_setzero_v2r8();
1988 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1989 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1990 H = _fjsp_setzero_v2r8();
1991 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1992 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1993 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1994 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
1998 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2000 /* Update vectorial force */
2001 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
2002 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2003 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2005 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2006 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2007 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2009 /**************************
2010 * CALCULATE INTERACTIONS *
2011 **************************/
2013 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
2015 /* Calculate table index by multiplying r with table scale and truncate to integer */
2016 rt = _fjsp_mul_v2r8(r02,vftabscale);
2017 itab_tmp = _fjsp_dtox_v2r8(rt);
2018 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2019 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2020 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2025 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2026 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2027 F = _fjsp_setzero_v2r8();
2028 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2029 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2030 H = _fjsp_setzero_v2r8();
2031 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2032 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2033 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2034 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
2038 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2040 /* Update vectorial force */
2041 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
2042 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2043 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2045 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2046 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2047 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2049 /**************************
2050 * CALCULATE INTERACTIONS *
2051 **************************/
2053 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
2055 /* Calculate table index by multiplying r with table scale and truncate to integer */
2056 rt = _fjsp_mul_v2r8(r10,vftabscale);
2057 itab_tmp = _fjsp_dtox_v2r8(rt);
2058 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2059 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2060 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2065 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2066 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2067 F = _fjsp_setzero_v2r8();
2068 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2069 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2070 H = _fjsp_setzero_v2r8();
2071 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2072 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2073 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2074 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
2078 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2080 /* Update vectorial force */
2081 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
2082 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2083 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2085 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2086 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2087 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2089 /**************************
2090 * CALCULATE INTERACTIONS *
2091 **************************/
2093 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
2095 /* Calculate table index by multiplying r with table scale and truncate to integer */
2096 rt = _fjsp_mul_v2r8(r11,vftabscale);
2097 itab_tmp = _fjsp_dtox_v2r8(rt);
2098 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2099 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2100 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2105 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2106 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2107 F = _fjsp_setzero_v2r8();
2108 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2109 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2110 H = _fjsp_setzero_v2r8();
2111 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2112 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2113 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2114 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
2118 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2120 /* Update vectorial force */
2121 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
2122 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2123 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2125 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2126 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2127 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2129 /**************************
2130 * CALCULATE INTERACTIONS *
2131 **************************/
2133 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
2135 /* Calculate table index by multiplying r with table scale and truncate to integer */
2136 rt = _fjsp_mul_v2r8(r12,vftabscale);
2137 itab_tmp = _fjsp_dtox_v2r8(rt);
2138 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2139 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2140 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2145 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2146 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2147 F = _fjsp_setzero_v2r8();
2148 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2149 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2150 H = _fjsp_setzero_v2r8();
2151 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2152 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2153 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2154 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
2158 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2160 /* Update vectorial force */
2161 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
2162 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2163 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2165 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2166 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2167 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2169 /**************************
2170 * CALCULATE INTERACTIONS *
2171 **************************/
2173 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
2175 /* Calculate table index by multiplying r with table scale and truncate to integer */
2176 rt = _fjsp_mul_v2r8(r20,vftabscale);
2177 itab_tmp = _fjsp_dtox_v2r8(rt);
2178 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2179 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2180 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2185 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2186 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2187 F = _fjsp_setzero_v2r8();
2188 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2189 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2190 H = _fjsp_setzero_v2r8();
2191 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2192 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2193 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2194 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
2198 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2200 /* Update vectorial force */
2201 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
2202 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2203 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2205 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2206 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2207 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2209 /**************************
2210 * CALCULATE INTERACTIONS *
2211 **************************/
2213 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2215 /* Calculate table index by multiplying r with table scale and truncate to integer */
2216 rt = _fjsp_mul_v2r8(r21,vftabscale);
2217 itab_tmp = _fjsp_dtox_v2r8(rt);
2218 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2219 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2220 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2225 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2226 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2227 F = _fjsp_setzero_v2r8();
2228 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2229 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2230 H = _fjsp_setzero_v2r8();
2231 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2232 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2233 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2234 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
2238 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2240 /* Update vectorial force */
2241 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2242 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2243 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2245 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2246 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2247 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2249 /**************************
2250 * CALCULATE INTERACTIONS *
2251 **************************/
2253 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2255 /* Calculate table index by multiplying r with table scale and truncate to integer */
2256 rt = _fjsp_mul_v2r8(r22,vftabscale);
2257 itab_tmp = _fjsp_dtox_v2r8(rt);
2258 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2259 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
2260 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2265 /* CUBIC SPLINE TABLE ELECTROSTATICS */
2266 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2267 F = _fjsp_setzero_v2r8();
2268 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2269 G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2270 H = _fjsp_setzero_v2r8();
2271 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2272 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2273 FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2274 felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
2278 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2280 /* Update vectorial force */
2281 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2282 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2283 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2285 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2286 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2287 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2289 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2291 /* Inner loop uses 400 flops */
2294 /* End of innermost loop */
2296 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2297 f+i_coord_offset,fshift+i_shift_offset);
2299 /* Increment number of inner iterations */
2300 inneriter += j_index_end - j_index_start;
2302 /* Outer loop uses 18 flops */
2305 /* Increment number of outer iterations */
2308 /* Update outer/inner flops */
2310 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);