2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: Coulomb
52 * VdW interaction: LennardJones
53 * Geometry: Water4-Water4
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
86 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 int vdwjidx3A,vdwjidx3B;
94 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
95 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
96 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
97 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
98 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
99 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
100 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
101 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
102 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
103 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
104 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
105 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
108 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
111 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
112 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
114 _fjsp_v2r8 dummy_mask,cutoff_mask;
115 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
116 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
117 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
124 jindex = nlist->jindex;
126 shiftidx = nlist->shift;
128 shiftvec = fr->shift_vec[0];
129 fshift = fr->fshift[0];
130 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
131 charge = mdatoms->chargeA;
132 nvdwtype = fr->ntype;
134 vdwtype = mdatoms->typeA;
136 /* Setup water-specific parameters */
137 inr = nlist->iinr[0];
138 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
139 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
140 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
141 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
143 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
144 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
145 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
146 vdwjidx0A = 2*vdwtype[inr+0];
147 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
148 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
149 qq11 = _fjsp_mul_v2r8(iq1,jq1);
150 qq12 = _fjsp_mul_v2r8(iq1,jq2);
151 qq13 = _fjsp_mul_v2r8(iq1,jq3);
152 qq21 = _fjsp_mul_v2r8(iq2,jq1);
153 qq22 = _fjsp_mul_v2r8(iq2,jq2);
154 qq23 = _fjsp_mul_v2r8(iq2,jq3);
155 qq31 = _fjsp_mul_v2r8(iq3,jq1);
156 qq32 = _fjsp_mul_v2r8(iq3,jq2);
157 qq33 = _fjsp_mul_v2r8(iq3,jq3);
159 /* Avoid stupid compiler warnings */
167 /* Start outer loop over neighborlists */
168 for(iidx=0; iidx<nri; iidx++)
170 /* Load shift vector for this list */
171 i_shift_offset = DIM*shiftidx[iidx];
173 /* Load limits for loop over neighbors */
174 j_index_start = jindex[iidx];
175 j_index_end = jindex[iidx+1];
177 /* Get outer coordinate index */
179 i_coord_offset = DIM*inr;
181 /* Load i particle coords and add shift vector */
182 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
183 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
185 fix0 = _fjsp_setzero_v2r8();
186 fiy0 = _fjsp_setzero_v2r8();
187 fiz0 = _fjsp_setzero_v2r8();
188 fix1 = _fjsp_setzero_v2r8();
189 fiy1 = _fjsp_setzero_v2r8();
190 fiz1 = _fjsp_setzero_v2r8();
191 fix2 = _fjsp_setzero_v2r8();
192 fiy2 = _fjsp_setzero_v2r8();
193 fiz2 = _fjsp_setzero_v2r8();
194 fix3 = _fjsp_setzero_v2r8();
195 fiy3 = _fjsp_setzero_v2r8();
196 fiz3 = _fjsp_setzero_v2r8();
198 /* Reset potential sums */
199 velecsum = _fjsp_setzero_v2r8();
200 vvdwsum = _fjsp_setzero_v2r8();
202 /* Start inner kernel loop */
203 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
206 /* Get j neighbor index, and coordinate index */
209 j_coord_offsetA = DIM*jnrA;
210 j_coord_offsetB = DIM*jnrB;
212 /* load j atom coordinates */
213 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
214 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
215 &jy2,&jz2,&jx3,&jy3,&jz3);
217 /* Calculate displacement vector */
218 dx00 = _fjsp_sub_v2r8(ix0,jx0);
219 dy00 = _fjsp_sub_v2r8(iy0,jy0);
220 dz00 = _fjsp_sub_v2r8(iz0,jz0);
221 dx11 = _fjsp_sub_v2r8(ix1,jx1);
222 dy11 = _fjsp_sub_v2r8(iy1,jy1);
223 dz11 = _fjsp_sub_v2r8(iz1,jz1);
224 dx12 = _fjsp_sub_v2r8(ix1,jx2);
225 dy12 = _fjsp_sub_v2r8(iy1,jy2);
226 dz12 = _fjsp_sub_v2r8(iz1,jz2);
227 dx13 = _fjsp_sub_v2r8(ix1,jx3);
228 dy13 = _fjsp_sub_v2r8(iy1,jy3);
229 dz13 = _fjsp_sub_v2r8(iz1,jz3);
230 dx21 = _fjsp_sub_v2r8(ix2,jx1);
231 dy21 = _fjsp_sub_v2r8(iy2,jy1);
232 dz21 = _fjsp_sub_v2r8(iz2,jz1);
233 dx22 = _fjsp_sub_v2r8(ix2,jx2);
234 dy22 = _fjsp_sub_v2r8(iy2,jy2);
235 dz22 = _fjsp_sub_v2r8(iz2,jz2);
236 dx23 = _fjsp_sub_v2r8(ix2,jx3);
237 dy23 = _fjsp_sub_v2r8(iy2,jy3);
238 dz23 = _fjsp_sub_v2r8(iz2,jz3);
239 dx31 = _fjsp_sub_v2r8(ix3,jx1);
240 dy31 = _fjsp_sub_v2r8(iy3,jy1);
241 dz31 = _fjsp_sub_v2r8(iz3,jz1);
242 dx32 = _fjsp_sub_v2r8(ix3,jx2);
243 dy32 = _fjsp_sub_v2r8(iy3,jy2);
244 dz32 = _fjsp_sub_v2r8(iz3,jz2);
245 dx33 = _fjsp_sub_v2r8(ix3,jx3);
246 dy33 = _fjsp_sub_v2r8(iy3,jy3);
247 dz33 = _fjsp_sub_v2r8(iz3,jz3);
249 /* Calculate squared distance and things based on it */
250 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
251 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
252 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
253 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
254 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
255 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
256 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
257 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
258 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
259 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
261 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
262 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
263 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
264 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
265 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
266 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
267 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
268 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
269 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
271 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
272 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
273 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
274 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
275 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
276 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
277 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
278 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
279 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
280 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
282 fjx0 = _fjsp_setzero_v2r8();
283 fjy0 = _fjsp_setzero_v2r8();
284 fjz0 = _fjsp_setzero_v2r8();
285 fjx1 = _fjsp_setzero_v2r8();
286 fjy1 = _fjsp_setzero_v2r8();
287 fjz1 = _fjsp_setzero_v2r8();
288 fjx2 = _fjsp_setzero_v2r8();
289 fjy2 = _fjsp_setzero_v2r8();
290 fjz2 = _fjsp_setzero_v2r8();
291 fjx3 = _fjsp_setzero_v2r8();
292 fjy3 = _fjsp_setzero_v2r8();
293 fjz3 = _fjsp_setzero_v2r8();
295 /**************************
296 * CALCULATE INTERACTIONS *
297 **************************/
299 /* LENNARD-JONES DISPERSION/REPULSION */
301 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
302 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
303 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
304 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
305 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
307 /* Update potential sum for this i atom from the interaction with this j atom. */
308 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
312 /* Update vectorial force */
313 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
314 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
315 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
317 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
318 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
319 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
321 /**************************
322 * CALCULATE INTERACTIONS *
323 **************************/
325 /* COULOMB ELECTROSTATICS */
326 velec = _fjsp_mul_v2r8(qq11,rinv11);
327 felec = _fjsp_mul_v2r8(velec,rinvsq11);
329 /* Update potential sum for this i atom from the interaction with this j atom. */
330 velecsum = _fjsp_add_v2r8(velecsum,velec);
334 /* Update vectorial force */
335 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
336 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
337 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
339 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
340 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
341 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
343 /**************************
344 * CALCULATE INTERACTIONS *
345 **************************/
347 /* COULOMB ELECTROSTATICS */
348 velec = _fjsp_mul_v2r8(qq12,rinv12);
349 felec = _fjsp_mul_v2r8(velec,rinvsq12);
351 /* Update potential sum for this i atom from the interaction with this j atom. */
352 velecsum = _fjsp_add_v2r8(velecsum,velec);
356 /* Update vectorial force */
357 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
358 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
359 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
361 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
362 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
363 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
365 /**************************
366 * CALCULATE INTERACTIONS *
367 **************************/
369 /* COULOMB ELECTROSTATICS */
370 velec = _fjsp_mul_v2r8(qq13,rinv13);
371 felec = _fjsp_mul_v2r8(velec,rinvsq13);
373 /* Update potential sum for this i atom from the interaction with this j atom. */
374 velecsum = _fjsp_add_v2r8(velecsum,velec);
378 /* Update vectorial force */
379 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
380 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
381 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
383 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
384 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
385 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
387 /**************************
388 * CALCULATE INTERACTIONS *
389 **************************/
391 /* COULOMB ELECTROSTATICS */
392 velec = _fjsp_mul_v2r8(qq21,rinv21);
393 felec = _fjsp_mul_v2r8(velec,rinvsq21);
395 /* Update potential sum for this i atom from the interaction with this j atom. */
396 velecsum = _fjsp_add_v2r8(velecsum,velec);
400 /* Update vectorial force */
401 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
402 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
403 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
405 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
406 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
407 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
409 /**************************
410 * CALCULATE INTERACTIONS *
411 **************************/
413 /* COULOMB ELECTROSTATICS */
414 velec = _fjsp_mul_v2r8(qq22,rinv22);
415 felec = _fjsp_mul_v2r8(velec,rinvsq22);
417 /* Update potential sum for this i atom from the interaction with this j atom. */
418 velecsum = _fjsp_add_v2r8(velecsum,velec);
422 /* Update vectorial force */
423 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
424 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
425 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
427 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
428 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
429 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
431 /**************************
432 * CALCULATE INTERACTIONS *
433 **************************/
435 /* COULOMB ELECTROSTATICS */
436 velec = _fjsp_mul_v2r8(qq23,rinv23);
437 felec = _fjsp_mul_v2r8(velec,rinvsq23);
439 /* Update potential sum for this i atom from the interaction with this j atom. */
440 velecsum = _fjsp_add_v2r8(velecsum,velec);
444 /* Update vectorial force */
445 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
446 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
447 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
449 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
450 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
451 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
453 /**************************
454 * CALCULATE INTERACTIONS *
455 **************************/
457 /* COULOMB ELECTROSTATICS */
458 velec = _fjsp_mul_v2r8(qq31,rinv31);
459 felec = _fjsp_mul_v2r8(velec,rinvsq31);
461 /* Update potential sum for this i atom from the interaction with this j atom. */
462 velecsum = _fjsp_add_v2r8(velecsum,velec);
466 /* Update vectorial force */
467 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
468 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
469 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
471 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
472 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
473 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
475 /**************************
476 * CALCULATE INTERACTIONS *
477 **************************/
479 /* COULOMB ELECTROSTATICS */
480 velec = _fjsp_mul_v2r8(qq32,rinv32);
481 felec = _fjsp_mul_v2r8(velec,rinvsq32);
483 /* Update potential sum for this i atom from the interaction with this j atom. */
484 velecsum = _fjsp_add_v2r8(velecsum,velec);
488 /* Update vectorial force */
489 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
490 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
491 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
493 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
494 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
495 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
497 /**************************
498 * CALCULATE INTERACTIONS *
499 **************************/
501 /* COULOMB ELECTROSTATICS */
502 velec = _fjsp_mul_v2r8(qq33,rinv33);
503 felec = _fjsp_mul_v2r8(velec,rinvsq33);
505 /* Update potential sum for this i atom from the interaction with this j atom. */
506 velecsum = _fjsp_add_v2r8(velecsum,velec);
510 /* Update vectorial force */
511 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
512 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
513 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
515 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
516 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
517 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
519 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
521 /* Inner loop uses 317 flops */
528 j_coord_offsetA = DIM*jnrA;
530 /* load j atom coordinates */
531 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
532 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
533 &jy2,&jz2,&jx3,&jy3,&jz3);
535 /* Calculate displacement vector */
536 dx00 = _fjsp_sub_v2r8(ix0,jx0);
537 dy00 = _fjsp_sub_v2r8(iy0,jy0);
538 dz00 = _fjsp_sub_v2r8(iz0,jz0);
539 dx11 = _fjsp_sub_v2r8(ix1,jx1);
540 dy11 = _fjsp_sub_v2r8(iy1,jy1);
541 dz11 = _fjsp_sub_v2r8(iz1,jz1);
542 dx12 = _fjsp_sub_v2r8(ix1,jx2);
543 dy12 = _fjsp_sub_v2r8(iy1,jy2);
544 dz12 = _fjsp_sub_v2r8(iz1,jz2);
545 dx13 = _fjsp_sub_v2r8(ix1,jx3);
546 dy13 = _fjsp_sub_v2r8(iy1,jy3);
547 dz13 = _fjsp_sub_v2r8(iz1,jz3);
548 dx21 = _fjsp_sub_v2r8(ix2,jx1);
549 dy21 = _fjsp_sub_v2r8(iy2,jy1);
550 dz21 = _fjsp_sub_v2r8(iz2,jz1);
551 dx22 = _fjsp_sub_v2r8(ix2,jx2);
552 dy22 = _fjsp_sub_v2r8(iy2,jy2);
553 dz22 = _fjsp_sub_v2r8(iz2,jz2);
554 dx23 = _fjsp_sub_v2r8(ix2,jx3);
555 dy23 = _fjsp_sub_v2r8(iy2,jy3);
556 dz23 = _fjsp_sub_v2r8(iz2,jz3);
557 dx31 = _fjsp_sub_v2r8(ix3,jx1);
558 dy31 = _fjsp_sub_v2r8(iy3,jy1);
559 dz31 = _fjsp_sub_v2r8(iz3,jz1);
560 dx32 = _fjsp_sub_v2r8(ix3,jx2);
561 dy32 = _fjsp_sub_v2r8(iy3,jy2);
562 dz32 = _fjsp_sub_v2r8(iz3,jz2);
563 dx33 = _fjsp_sub_v2r8(ix3,jx3);
564 dy33 = _fjsp_sub_v2r8(iy3,jy3);
565 dz33 = _fjsp_sub_v2r8(iz3,jz3);
567 /* Calculate squared distance and things based on it */
568 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
569 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
570 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
571 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
572 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
573 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
574 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
575 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
576 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
577 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
579 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
580 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
581 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
582 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
583 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
584 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
585 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
586 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
587 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
589 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
590 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
591 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
592 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
593 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
594 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
595 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
596 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
597 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
598 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
600 fjx0 = _fjsp_setzero_v2r8();
601 fjy0 = _fjsp_setzero_v2r8();
602 fjz0 = _fjsp_setzero_v2r8();
603 fjx1 = _fjsp_setzero_v2r8();
604 fjy1 = _fjsp_setzero_v2r8();
605 fjz1 = _fjsp_setzero_v2r8();
606 fjx2 = _fjsp_setzero_v2r8();
607 fjy2 = _fjsp_setzero_v2r8();
608 fjz2 = _fjsp_setzero_v2r8();
609 fjx3 = _fjsp_setzero_v2r8();
610 fjy3 = _fjsp_setzero_v2r8();
611 fjz3 = _fjsp_setzero_v2r8();
613 /**************************
614 * CALCULATE INTERACTIONS *
615 **************************/
617 /* LENNARD-JONES DISPERSION/REPULSION */
619 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
620 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
621 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
622 vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
623 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
625 /* Update potential sum for this i atom from the interaction with this j atom. */
626 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
627 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
631 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
633 /* Update vectorial force */
634 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
635 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
636 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
638 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
639 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
640 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
642 /**************************
643 * CALCULATE INTERACTIONS *
644 **************************/
646 /* COULOMB ELECTROSTATICS */
647 velec = _fjsp_mul_v2r8(qq11,rinv11);
648 felec = _fjsp_mul_v2r8(velec,rinvsq11);
650 /* Update potential sum for this i atom from the interaction with this j atom. */
651 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
652 velecsum = _fjsp_add_v2r8(velecsum,velec);
656 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
658 /* Update vectorial force */
659 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
660 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
661 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
663 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
664 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
665 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
667 /**************************
668 * CALCULATE INTERACTIONS *
669 **************************/
671 /* COULOMB ELECTROSTATICS */
672 velec = _fjsp_mul_v2r8(qq12,rinv12);
673 felec = _fjsp_mul_v2r8(velec,rinvsq12);
675 /* Update potential sum for this i atom from the interaction with this j atom. */
676 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
677 velecsum = _fjsp_add_v2r8(velecsum,velec);
681 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
683 /* Update vectorial force */
684 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
685 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
686 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
688 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
689 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
690 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
692 /**************************
693 * CALCULATE INTERACTIONS *
694 **************************/
696 /* COULOMB ELECTROSTATICS */
697 velec = _fjsp_mul_v2r8(qq13,rinv13);
698 felec = _fjsp_mul_v2r8(velec,rinvsq13);
700 /* Update potential sum for this i atom from the interaction with this j atom. */
701 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
702 velecsum = _fjsp_add_v2r8(velecsum,velec);
706 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
708 /* Update vectorial force */
709 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
710 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
711 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
713 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
714 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
715 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
717 /**************************
718 * CALCULATE INTERACTIONS *
719 **************************/
721 /* COULOMB ELECTROSTATICS */
722 velec = _fjsp_mul_v2r8(qq21,rinv21);
723 felec = _fjsp_mul_v2r8(velec,rinvsq21);
725 /* Update potential sum for this i atom from the interaction with this j atom. */
726 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
727 velecsum = _fjsp_add_v2r8(velecsum,velec);
731 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
733 /* Update vectorial force */
734 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
735 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
736 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
738 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
739 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
740 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
742 /**************************
743 * CALCULATE INTERACTIONS *
744 **************************/
746 /* COULOMB ELECTROSTATICS */
747 velec = _fjsp_mul_v2r8(qq22,rinv22);
748 felec = _fjsp_mul_v2r8(velec,rinvsq22);
750 /* Update potential sum for this i atom from the interaction with this j atom. */
751 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
752 velecsum = _fjsp_add_v2r8(velecsum,velec);
756 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
758 /* Update vectorial force */
759 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
760 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
761 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
763 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
764 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
765 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
767 /**************************
768 * CALCULATE INTERACTIONS *
769 **************************/
771 /* COULOMB ELECTROSTATICS */
772 velec = _fjsp_mul_v2r8(qq23,rinv23);
773 felec = _fjsp_mul_v2r8(velec,rinvsq23);
775 /* Update potential sum for this i atom from the interaction with this j atom. */
776 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
777 velecsum = _fjsp_add_v2r8(velecsum,velec);
781 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
783 /* Update vectorial force */
784 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
785 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
786 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
788 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
789 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
790 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
792 /**************************
793 * CALCULATE INTERACTIONS *
794 **************************/
796 /* COULOMB ELECTROSTATICS */
797 velec = _fjsp_mul_v2r8(qq31,rinv31);
798 felec = _fjsp_mul_v2r8(velec,rinvsq31);
800 /* Update potential sum for this i atom from the interaction with this j atom. */
801 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
802 velecsum = _fjsp_add_v2r8(velecsum,velec);
806 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
808 /* Update vectorial force */
809 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
810 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
811 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
813 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
814 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
815 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
817 /**************************
818 * CALCULATE INTERACTIONS *
819 **************************/
821 /* COULOMB ELECTROSTATICS */
822 velec = _fjsp_mul_v2r8(qq32,rinv32);
823 felec = _fjsp_mul_v2r8(velec,rinvsq32);
825 /* Update potential sum for this i atom from the interaction with this j atom. */
826 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
827 velecsum = _fjsp_add_v2r8(velecsum,velec);
831 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
833 /* Update vectorial force */
834 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
835 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
836 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
838 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
839 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
840 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
842 /**************************
843 * CALCULATE INTERACTIONS *
844 **************************/
846 /* COULOMB ELECTROSTATICS */
847 velec = _fjsp_mul_v2r8(qq33,rinv33);
848 felec = _fjsp_mul_v2r8(velec,rinvsq33);
850 /* Update potential sum for this i atom from the interaction with this j atom. */
851 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
852 velecsum = _fjsp_add_v2r8(velecsum,velec);
856 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
858 /* Update vectorial force */
859 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
860 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
861 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
863 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
864 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
865 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
867 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
869 /* Inner loop uses 317 flops */
872 /* End of innermost loop */
874 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
875 f+i_coord_offset,fshift+i_shift_offset);
878 /* Update potential energies */
879 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
880 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
882 /* Increment number of inner iterations */
883 inneriter += j_index_end - j_index_start;
885 /* Outer loop uses 26 flops */
888 /* Increment number of outer iterations */
891 /* Update outer/inner flops */
893 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*317);
896 * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
897 * Electrostatics interaction: Coulomb
898 * VdW interaction: LennardJones
899 * Geometry: Water4-Water4
900 * Calculate force/pot: Force
903 nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
904 (t_nblist * gmx_restrict nlist,
905 rvec * gmx_restrict xx,
906 rvec * gmx_restrict ff,
907 t_forcerec * gmx_restrict fr,
908 t_mdatoms * gmx_restrict mdatoms,
909 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
910 t_nrnb * gmx_restrict nrnb)
912 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
913 * just 0 for non-waters.
914 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
915 * jnr indices corresponding to data put in the four positions in the SIMD register.
917 int i_shift_offset,i_coord_offset,outeriter,inneriter;
918 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
920 int j_coord_offsetA,j_coord_offsetB;
921 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
923 real *shiftvec,*fshift,*x,*f;
924 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
926 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
928 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
930 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
932 _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
933 int vdwjidx0A,vdwjidx0B;
934 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
935 int vdwjidx1A,vdwjidx1B;
936 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
937 int vdwjidx2A,vdwjidx2B;
938 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
939 int vdwjidx3A,vdwjidx3B;
940 _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
941 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
942 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
943 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
944 _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
945 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
946 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
947 _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
948 _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
949 _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
950 _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
951 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
954 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
957 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
958 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
960 _fjsp_v2r8 dummy_mask,cutoff_mask;
961 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
962 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
963 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
970 jindex = nlist->jindex;
972 shiftidx = nlist->shift;
974 shiftvec = fr->shift_vec[0];
975 fshift = fr->fshift[0];
976 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
977 charge = mdatoms->chargeA;
978 nvdwtype = fr->ntype;
980 vdwtype = mdatoms->typeA;
982 /* Setup water-specific parameters */
983 inr = nlist->iinr[0];
984 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
985 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
986 iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
987 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
989 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
990 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
991 jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
992 vdwjidx0A = 2*vdwtype[inr+0];
993 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
994 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
995 qq11 = _fjsp_mul_v2r8(iq1,jq1);
996 qq12 = _fjsp_mul_v2r8(iq1,jq2);
997 qq13 = _fjsp_mul_v2r8(iq1,jq3);
998 qq21 = _fjsp_mul_v2r8(iq2,jq1);
999 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1000 qq23 = _fjsp_mul_v2r8(iq2,jq3);
1001 qq31 = _fjsp_mul_v2r8(iq3,jq1);
1002 qq32 = _fjsp_mul_v2r8(iq3,jq2);
1003 qq33 = _fjsp_mul_v2r8(iq3,jq3);
1005 /* Avoid stupid compiler warnings */
1007 j_coord_offsetA = 0;
1008 j_coord_offsetB = 0;
1013 /* Start outer loop over neighborlists */
1014 for(iidx=0; iidx<nri; iidx++)
1016 /* Load shift vector for this list */
1017 i_shift_offset = DIM*shiftidx[iidx];
1019 /* Load limits for loop over neighbors */
1020 j_index_start = jindex[iidx];
1021 j_index_end = jindex[iidx+1];
1023 /* Get outer coordinate index */
1025 i_coord_offset = DIM*inr;
1027 /* Load i particle coords and add shift vector */
1028 gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1029 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1031 fix0 = _fjsp_setzero_v2r8();
1032 fiy0 = _fjsp_setzero_v2r8();
1033 fiz0 = _fjsp_setzero_v2r8();
1034 fix1 = _fjsp_setzero_v2r8();
1035 fiy1 = _fjsp_setzero_v2r8();
1036 fiz1 = _fjsp_setzero_v2r8();
1037 fix2 = _fjsp_setzero_v2r8();
1038 fiy2 = _fjsp_setzero_v2r8();
1039 fiz2 = _fjsp_setzero_v2r8();
1040 fix3 = _fjsp_setzero_v2r8();
1041 fiy3 = _fjsp_setzero_v2r8();
1042 fiz3 = _fjsp_setzero_v2r8();
1044 /* Start inner kernel loop */
1045 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1048 /* Get j neighbor index, and coordinate index */
1050 jnrB = jjnr[jidx+1];
1051 j_coord_offsetA = DIM*jnrA;
1052 j_coord_offsetB = DIM*jnrB;
1054 /* load j atom coordinates */
1055 gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1056 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1057 &jy2,&jz2,&jx3,&jy3,&jz3);
1059 /* Calculate displacement vector */
1060 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1061 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1062 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1063 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1064 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1065 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1066 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1067 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1068 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1069 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1070 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1071 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1072 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1073 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1074 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1075 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1076 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1077 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1078 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1079 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1080 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1081 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1082 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1083 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1084 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1085 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1086 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1087 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1088 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1089 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1091 /* Calculate squared distance and things based on it */
1092 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1093 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1094 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1095 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1096 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1097 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1098 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1099 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1100 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1101 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1103 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1104 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1105 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1106 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1107 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1108 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1109 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1110 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1111 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1113 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
1114 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1115 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1116 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1117 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1118 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1119 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1120 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1121 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1122 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1124 fjx0 = _fjsp_setzero_v2r8();
1125 fjy0 = _fjsp_setzero_v2r8();
1126 fjz0 = _fjsp_setzero_v2r8();
1127 fjx1 = _fjsp_setzero_v2r8();
1128 fjy1 = _fjsp_setzero_v2r8();
1129 fjz1 = _fjsp_setzero_v2r8();
1130 fjx2 = _fjsp_setzero_v2r8();
1131 fjy2 = _fjsp_setzero_v2r8();
1132 fjz2 = _fjsp_setzero_v2r8();
1133 fjx3 = _fjsp_setzero_v2r8();
1134 fjy3 = _fjsp_setzero_v2r8();
1135 fjz3 = _fjsp_setzero_v2r8();
1137 /**************************
1138 * CALCULATE INTERACTIONS *
1139 **************************/
1141 /* LENNARD-JONES DISPERSION/REPULSION */
1143 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1144 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1148 /* Update vectorial force */
1149 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1150 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1151 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1153 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1154 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1155 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1157 /**************************
1158 * CALCULATE INTERACTIONS *
1159 **************************/
1161 /* COULOMB ELECTROSTATICS */
1162 velec = _fjsp_mul_v2r8(qq11,rinv11);
1163 felec = _fjsp_mul_v2r8(velec,rinvsq11);
1167 /* Update vectorial force */
1168 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1169 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1170 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1172 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1173 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1174 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1176 /**************************
1177 * CALCULATE INTERACTIONS *
1178 **************************/
1180 /* COULOMB ELECTROSTATICS */
1181 velec = _fjsp_mul_v2r8(qq12,rinv12);
1182 felec = _fjsp_mul_v2r8(velec,rinvsq12);
1186 /* Update vectorial force */
1187 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1188 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1189 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1191 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1192 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1193 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1195 /**************************
1196 * CALCULATE INTERACTIONS *
1197 **************************/
1199 /* COULOMB ELECTROSTATICS */
1200 velec = _fjsp_mul_v2r8(qq13,rinv13);
1201 felec = _fjsp_mul_v2r8(velec,rinvsq13);
1205 /* Update vectorial force */
1206 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1207 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1208 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1210 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1211 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1212 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1214 /**************************
1215 * CALCULATE INTERACTIONS *
1216 **************************/
1218 /* COULOMB ELECTROSTATICS */
1219 velec = _fjsp_mul_v2r8(qq21,rinv21);
1220 felec = _fjsp_mul_v2r8(velec,rinvsq21);
1224 /* Update vectorial force */
1225 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1226 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1227 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1229 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1230 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1231 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1233 /**************************
1234 * CALCULATE INTERACTIONS *
1235 **************************/
1237 /* COULOMB ELECTROSTATICS */
1238 velec = _fjsp_mul_v2r8(qq22,rinv22);
1239 felec = _fjsp_mul_v2r8(velec,rinvsq22);
1243 /* Update vectorial force */
1244 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1245 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1246 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1248 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1249 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1250 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1252 /**************************
1253 * CALCULATE INTERACTIONS *
1254 **************************/
1256 /* COULOMB ELECTROSTATICS */
1257 velec = _fjsp_mul_v2r8(qq23,rinv23);
1258 felec = _fjsp_mul_v2r8(velec,rinvsq23);
1262 /* Update vectorial force */
1263 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1264 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1265 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1267 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1268 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1269 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1271 /**************************
1272 * CALCULATE INTERACTIONS *
1273 **************************/
1275 /* COULOMB ELECTROSTATICS */
1276 velec = _fjsp_mul_v2r8(qq31,rinv31);
1277 felec = _fjsp_mul_v2r8(velec,rinvsq31);
1281 /* Update vectorial force */
1282 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1283 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1284 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1286 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1287 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1288 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1290 /**************************
1291 * CALCULATE INTERACTIONS *
1292 **************************/
1294 /* COULOMB ELECTROSTATICS */
1295 velec = _fjsp_mul_v2r8(qq32,rinv32);
1296 felec = _fjsp_mul_v2r8(velec,rinvsq32);
1300 /* Update vectorial force */
1301 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1302 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1303 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1305 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1306 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1307 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1309 /**************************
1310 * CALCULATE INTERACTIONS *
1311 **************************/
1313 /* COULOMB ELECTROSTATICS */
1314 velec = _fjsp_mul_v2r8(qq33,rinv33);
1315 felec = _fjsp_mul_v2r8(velec,rinvsq33);
1319 /* Update vectorial force */
1320 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1321 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1322 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1324 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1325 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1326 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1328 gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1330 /* Inner loop uses 303 flops */
1333 if(jidx<j_index_end)
1337 j_coord_offsetA = DIM*jnrA;
1339 /* load j atom coordinates */
1340 gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1341 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1342 &jy2,&jz2,&jx3,&jy3,&jz3);
1344 /* Calculate displacement vector */
1345 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1346 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1347 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1348 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1349 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1350 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1351 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1352 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1353 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1354 dx13 = _fjsp_sub_v2r8(ix1,jx3);
1355 dy13 = _fjsp_sub_v2r8(iy1,jy3);
1356 dz13 = _fjsp_sub_v2r8(iz1,jz3);
1357 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1358 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1359 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1360 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1361 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1362 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1363 dx23 = _fjsp_sub_v2r8(ix2,jx3);
1364 dy23 = _fjsp_sub_v2r8(iy2,jy3);
1365 dz23 = _fjsp_sub_v2r8(iz2,jz3);
1366 dx31 = _fjsp_sub_v2r8(ix3,jx1);
1367 dy31 = _fjsp_sub_v2r8(iy3,jy1);
1368 dz31 = _fjsp_sub_v2r8(iz3,jz1);
1369 dx32 = _fjsp_sub_v2r8(ix3,jx2);
1370 dy32 = _fjsp_sub_v2r8(iy3,jy2);
1371 dz32 = _fjsp_sub_v2r8(iz3,jz2);
1372 dx33 = _fjsp_sub_v2r8(ix3,jx3);
1373 dy33 = _fjsp_sub_v2r8(iy3,jy3);
1374 dz33 = _fjsp_sub_v2r8(iz3,jz3);
1376 /* Calculate squared distance and things based on it */
1377 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1378 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1379 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1380 rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1381 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1382 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1383 rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1384 rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1385 rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1386 rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1388 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1389 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1390 rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
1391 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1392 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1393 rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
1394 rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
1395 rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
1396 rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
1398 rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
1399 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1400 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1401 rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
1402 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1403 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1404 rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
1405 rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
1406 rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
1407 rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
1409 fjx0 = _fjsp_setzero_v2r8();
1410 fjy0 = _fjsp_setzero_v2r8();
1411 fjz0 = _fjsp_setzero_v2r8();
1412 fjx1 = _fjsp_setzero_v2r8();
1413 fjy1 = _fjsp_setzero_v2r8();
1414 fjz1 = _fjsp_setzero_v2r8();
1415 fjx2 = _fjsp_setzero_v2r8();
1416 fjy2 = _fjsp_setzero_v2r8();
1417 fjz2 = _fjsp_setzero_v2r8();
1418 fjx3 = _fjsp_setzero_v2r8();
1419 fjy3 = _fjsp_setzero_v2r8();
1420 fjz3 = _fjsp_setzero_v2r8();
1422 /**************************
1423 * CALCULATE INTERACTIONS *
1424 **************************/
1426 /* LENNARD-JONES DISPERSION/REPULSION */
1428 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1429 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1433 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1435 /* Update vectorial force */
1436 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1437 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1438 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1440 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1441 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1442 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1444 /**************************
1445 * CALCULATE INTERACTIONS *
1446 **************************/
1448 /* COULOMB ELECTROSTATICS */
1449 velec = _fjsp_mul_v2r8(qq11,rinv11);
1450 felec = _fjsp_mul_v2r8(velec,rinvsq11);
1454 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1456 /* Update vectorial force */
1457 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1458 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1459 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1461 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1462 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1463 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1465 /**************************
1466 * CALCULATE INTERACTIONS *
1467 **************************/
1469 /* COULOMB ELECTROSTATICS */
1470 velec = _fjsp_mul_v2r8(qq12,rinv12);
1471 felec = _fjsp_mul_v2r8(velec,rinvsq12);
1475 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1477 /* Update vectorial force */
1478 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1479 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1480 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1482 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1483 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1484 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1486 /**************************
1487 * CALCULATE INTERACTIONS *
1488 **************************/
1490 /* COULOMB ELECTROSTATICS */
1491 velec = _fjsp_mul_v2r8(qq13,rinv13);
1492 felec = _fjsp_mul_v2r8(velec,rinvsq13);
1496 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1498 /* Update vectorial force */
1499 fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
1500 fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1501 fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1503 fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1504 fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1505 fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1507 /**************************
1508 * CALCULATE INTERACTIONS *
1509 **************************/
1511 /* COULOMB ELECTROSTATICS */
1512 velec = _fjsp_mul_v2r8(qq21,rinv21);
1513 felec = _fjsp_mul_v2r8(velec,rinvsq21);
1517 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1519 /* Update vectorial force */
1520 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1521 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1522 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1524 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1525 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1526 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1528 /**************************
1529 * CALCULATE INTERACTIONS *
1530 **************************/
1532 /* COULOMB ELECTROSTATICS */
1533 velec = _fjsp_mul_v2r8(qq22,rinv22);
1534 felec = _fjsp_mul_v2r8(velec,rinvsq22);
1538 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1540 /* Update vectorial force */
1541 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1542 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1543 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1545 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1546 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1547 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1549 /**************************
1550 * CALCULATE INTERACTIONS *
1551 **************************/
1553 /* COULOMB ELECTROSTATICS */
1554 velec = _fjsp_mul_v2r8(qq23,rinv23);
1555 felec = _fjsp_mul_v2r8(velec,rinvsq23);
1559 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1561 /* Update vectorial force */
1562 fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
1563 fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1564 fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1566 fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1567 fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1568 fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1570 /**************************
1571 * CALCULATE INTERACTIONS *
1572 **************************/
1574 /* COULOMB ELECTROSTATICS */
1575 velec = _fjsp_mul_v2r8(qq31,rinv31);
1576 felec = _fjsp_mul_v2r8(velec,rinvsq31);
1580 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1582 /* Update vectorial force */
1583 fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
1584 fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1585 fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1587 fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1588 fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1589 fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1591 /**************************
1592 * CALCULATE INTERACTIONS *
1593 **************************/
1595 /* COULOMB ELECTROSTATICS */
1596 velec = _fjsp_mul_v2r8(qq32,rinv32);
1597 felec = _fjsp_mul_v2r8(velec,rinvsq32);
1601 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1603 /* Update vectorial force */
1604 fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
1605 fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1606 fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1608 fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1609 fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1610 fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1612 /**************************
1613 * CALCULATE INTERACTIONS *
1614 **************************/
1616 /* COULOMB ELECTROSTATICS */
1617 velec = _fjsp_mul_v2r8(qq33,rinv33);
1618 felec = _fjsp_mul_v2r8(velec,rinvsq33);
1622 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1624 /* Update vectorial force */
1625 fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
1626 fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1627 fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1629 fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1630 fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1631 fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1633 gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1635 /* Inner loop uses 303 flops */
1638 /* End of innermost loop */
1640 gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1641 f+i_coord_offset,fshift+i_shift_offset);
1643 /* Increment number of inner iterations */
1644 inneriter += j_index_end - j_index_start;
1646 /* Outer loop uses 24 flops */
1649 /* Increment number of outer iterations */
1652 /* Update outer/inner flops */
1654 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*303);