2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: Ewald
52 * VdW interaction: LJEwald
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85 int vdwjidx0A,vdwjidx0B;
86 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87 int vdwjidx1A,vdwjidx1B;
88 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89 int vdwjidx2A,vdwjidx2B;
90 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
103 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
107 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108 _fjsp_v2r8 c6grid_00;
109 _fjsp_v2r8 c6grid_01;
110 _fjsp_v2r8 c6grid_02;
111 _fjsp_v2r8 c6grid_10;
112 _fjsp_v2r8 c6grid_11;
113 _fjsp_v2r8 c6grid_12;
114 _fjsp_v2r8 c6grid_20;
115 _fjsp_v2r8 c6grid_21;
116 _fjsp_v2r8 c6grid_22;
118 _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
119 _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
120 _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
121 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
124 _fjsp_v2r8 dummy_mask,cutoff_mask;
125 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
126 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
127 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
134 jindex = nlist->jindex;
136 shiftidx = nlist->shift;
138 shiftvec = fr->shift_vec[0];
139 fshift = fr->fshift[0];
140 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
141 charge = mdatoms->chargeA;
142 nvdwtype = fr->ntype;
144 vdwtype = mdatoms->typeA;
145 vdwgridparam = fr->ljpme_c6grid;
146 sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
147 ewclj = gmx_fjsp_set1_v2r8(fr->ewaldcoeff_lj);
148 ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
150 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
151 ewtab = fr->ic->tabq_coul_FDV0;
152 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
153 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
155 /* Setup water-specific parameters */
156 inr = nlist->iinr[0];
157 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
158 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
159 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
160 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
162 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
163 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
164 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
165 vdwjidx0A = 2*vdwtype[inr+0];
166 qq00 = _fjsp_mul_v2r8(iq0,jq0);
167 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
168 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
169 c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
170 qq01 = _fjsp_mul_v2r8(iq0,jq1);
171 qq02 = _fjsp_mul_v2r8(iq0,jq2);
172 qq10 = _fjsp_mul_v2r8(iq1,jq0);
173 qq11 = _fjsp_mul_v2r8(iq1,jq1);
174 qq12 = _fjsp_mul_v2r8(iq1,jq2);
175 qq20 = _fjsp_mul_v2r8(iq2,jq0);
176 qq21 = _fjsp_mul_v2r8(iq2,jq1);
177 qq22 = _fjsp_mul_v2r8(iq2,jq2);
179 /* Avoid stupid compiler warnings */
187 /* Start outer loop over neighborlists */
188 for(iidx=0; iidx<nri; iidx++)
190 /* Load shift vector for this list */
191 i_shift_offset = DIM*shiftidx[iidx];
193 /* Load limits for loop over neighbors */
194 j_index_start = jindex[iidx];
195 j_index_end = jindex[iidx+1];
197 /* Get outer coordinate index */
199 i_coord_offset = DIM*inr;
201 /* Load i particle coords and add shift vector */
202 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
203 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
205 fix0 = _fjsp_setzero_v2r8();
206 fiy0 = _fjsp_setzero_v2r8();
207 fiz0 = _fjsp_setzero_v2r8();
208 fix1 = _fjsp_setzero_v2r8();
209 fiy1 = _fjsp_setzero_v2r8();
210 fiz1 = _fjsp_setzero_v2r8();
211 fix2 = _fjsp_setzero_v2r8();
212 fiy2 = _fjsp_setzero_v2r8();
213 fiz2 = _fjsp_setzero_v2r8();
215 /* Reset potential sums */
216 velecsum = _fjsp_setzero_v2r8();
217 vvdwsum = _fjsp_setzero_v2r8();
219 /* Start inner kernel loop */
220 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
223 /* Get j neighbor index, and coordinate index */
226 j_coord_offsetA = DIM*jnrA;
227 j_coord_offsetB = DIM*jnrB;
229 /* load j atom coordinates */
230 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
231 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
233 /* Calculate displacement vector */
234 dx00 = _fjsp_sub_v2r8(ix0,jx0);
235 dy00 = _fjsp_sub_v2r8(iy0,jy0);
236 dz00 = _fjsp_sub_v2r8(iz0,jz0);
237 dx01 = _fjsp_sub_v2r8(ix0,jx1);
238 dy01 = _fjsp_sub_v2r8(iy0,jy1);
239 dz01 = _fjsp_sub_v2r8(iz0,jz1);
240 dx02 = _fjsp_sub_v2r8(ix0,jx2);
241 dy02 = _fjsp_sub_v2r8(iy0,jy2);
242 dz02 = _fjsp_sub_v2r8(iz0,jz2);
243 dx10 = _fjsp_sub_v2r8(ix1,jx0);
244 dy10 = _fjsp_sub_v2r8(iy1,jy0);
245 dz10 = _fjsp_sub_v2r8(iz1,jz0);
246 dx11 = _fjsp_sub_v2r8(ix1,jx1);
247 dy11 = _fjsp_sub_v2r8(iy1,jy1);
248 dz11 = _fjsp_sub_v2r8(iz1,jz1);
249 dx12 = _fjsp_sub_v2r8(ix1,jx2);
250 dy12 = _fjsp_sub_v2r8(iy1,jy2);
251 dz12 = _fjsp_sub_v2r8(iz1,jz2);
252 dx20 = _fjsp_sub_v2r8(ix2,jx0);
253 dy20 = _fjsp_sub_v2r8(iy2,jy0);
254 dz20 = _fjsp_sub_v2r8(iz2,jz0);
255 dx21 = _fjsp_sub_v2r8(ix2,jx1);
256 dy21 = _fjsp_sub_v2r8(iy2,jy1);
257 dz21 = _fjsp_sub_v2r8(iz2,jz1);
258 dx22 = _fjsp_sub_v2r8(ix2,jx2);
259 dy22 = _fjsp_sub_v2r8(iy2,jy2);
260 dz22 = _fjsp_sub_v2r8(iz2,jz2);
262 /* Calculate squared distance and things based on it */
263 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
264 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
265 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
266 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
267 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
268 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
269 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
270 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
271 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
273 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
274 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
275 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
276 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
277 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
278 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
279 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
280 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
281 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
283 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
284 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
285 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
286 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
287 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
288 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
289 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
290 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
291 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
293 fjx0 = _fjsp_setzero_v2r8();
294 fjy0 = _fjsp_setzero_v2r8();
295 fjz0 = _fjsp_setzero_v2r8();
296 fjx1 = _fjsp_setzero_v2r8();
297 fjy1 = _fjsp_setzero_v2r8();
298 fjz1 = _fjsp_setzero_v2r8();
299 fjx2 = _fjsp_setzero_v2r8();
300 fjy2 = _fjsp_setzero_v2r8();
301 fjz2 = _fjsp_setzero_v2r8();
303 /**************************
304 * CALCULATE INTERACTIONS *
305 **************************/
307 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
309 /* EWALD ELECTROSTATICS */
311 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
312 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
313 itab_tmp = _fjsp_dtox_v2r8(ewrt);
314 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
315 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
317 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
318 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
319 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
320 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
321 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
322 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
323 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
324 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
325 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
326 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
328 /* Analytical LJ-PME */
329 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
330 ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
331 ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
332 exponent = gmx_simd_exp_d(-ewcljrsq);
333 /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
334 poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
335 /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
336 vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
337 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
338 vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
339 /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
340 fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
342 /* Update potential sum for this i atom from the interaction with this j atom. */
343 velecsum = _fjsp_add_v2r8(velecsum,velec);
344 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
346 fscal = _fjsp_add_v2r8(felec,fvdw);
348 /* Update vectorial force */
349 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
350 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
351 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
353 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
354 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
355 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
357 /**************************
358 * CALCULATE INTERACTIONS *
359 **************************/
361 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
363 /* EWALD ELECTROSTATICS */
365 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
366 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
367 itab_tmp = _fjsp_dtox_v2r8(ewrt);
368 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
369 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
371 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
372 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
373 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
374 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
375 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
376 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
377 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
378 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
379 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
380 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
382 /* Update potential sum for this i atom from the interaction with this j atom. */
383 velecsum = _fjsp_add_v2r8(velecsum,velec);
387 /* Update vectorial force */
388 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
389 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
390 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
392 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
393 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
394 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
396 /**************************
397 * CALCULATE INTERACTIONS *
398 **************************/
400 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
402 /* EWALD ELECTROSTATICS */
404 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
405 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
406 itab_tmp = _fjsp_dtox_v2r8(ewrt);
407 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
408 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
410 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
411 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
412 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
413 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
414 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
415 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
416 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
417 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
418 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
419 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
421 /* Update potential sum for this i atom from the interaction with this j atom. */
422 velecsum = _fjsp_add_v2r8(velecsum,velec);
426 /* Update vectorial force */
427 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
428 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
429 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
431 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
432 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
433 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
435 /**************************
436 * CALCULATE INTERACTIONS *
437 **************************/
439 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
441 /* EWALD ELECTROSTATICS */
443 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
444 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
445 itab_tmp = _fjsp_dtox_v2r8(ewrt);
446 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
447 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
449 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
450 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
451 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
452 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
453 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
454 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
455 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
456 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
457 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
458 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
460 /* Update potential sum for this i atom from the interaction with this j atom. */
461 velecsum = _fjsp_add_v2r8(velecsum,velec);
465 /* Update vectorial force */
466 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
467 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
468 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
470 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
471 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
472 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
474 /**************************
475 * CALCULATE INTERACTIONS *
476 **************************/
478 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
480 /* EWALD ELECTROSTATICS */
482 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
483 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
484 itab_tmp = _fjsp_dtox_v2r8(ewrt);
485 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
486 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
488 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
489 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
490 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
491 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
492 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
493 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
494 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
495 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
496 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
497 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
499 /* Update potential sum for this i atom from the interaction with this j atom. */
500 velecsum = _fjsp_add_v2r8(velecsum,velec);
504 /* Update vectorial force */
505 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
506 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
507 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
509 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
510 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
511 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
513 /**************************
514 * CALCULATE INTERACTIONS *
515 **************************/
517 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
519 /* EWALD ELECTROSTATICS */
521 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
522 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
523 itab_tmp = _fjsp_dtox_v2r8(ewrt);
524 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
525 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
527 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
528 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
529 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
530 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
531 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
532 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
533 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
534 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
535 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
536 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
538 /* Update potential sum for this i atom from the interaction with this j atom. */
539 velecsum = _fjsp_add_v2r8(velecsum,velec);
543 /* Update vectorial force */
544 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
545 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
546 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
548 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
549 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
550 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
552 /**************************
553 * CALCULATE INTERACTIONS *
554 **************************/
556 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
558 /* EWALD ELECTROSTATICS */
560 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
561 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
562 itab_tmp = _fjsp_dtox_v2r8(ewrt);
563 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
564 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
566 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
567 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
568 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
569 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
570 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
571 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
572 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
573 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
574 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
575 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
577 /* Update potential sum for this i atom from the interaction with this j atom. */
578 velecsum = _fjsp_add_v2r8(velecsum,velec);
582 /* Update vectorial force */
583 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
584 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
585 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
587 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
588 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
589 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
591 /**************************
592 * CALCULATE INTERACTIONS *
593 **************************/
595 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
597 /* EWALD ELECTROSTATICS */
599 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
600 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
601 itab_tmp = _fjsp_dtox_v2r8(ewrt);
602 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
603 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
605 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
606 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
607 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
608 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
609 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
610 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
611 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
612 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
613 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
614 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
616 /* Update potential sum for this i atom from the interaction with this j atom. */
617 velecsum = _fjsp_add_v2r8(velecsum,velec);
621 /* Update vectorial force */
622 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
623 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
624 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
626 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
627 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
628 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
630 /**************************
631 * CALCULATE INTERACTIONS *
632 **************************/
634 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
636 /* EWALD ELECTROSTATICS */
638 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
639 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
640 itab_tmp = _fjsp_dtox_v2r8(ewrt);
641 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
642 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
644 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
645 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
646 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
647 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
648 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
649 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
650 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
651 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
652 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
653 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
655 /* Update potential sum for this i atom from the interaction with this j atom. */
656 velecsum = _fjsp_add_v2r8(velecsum,velec);
660 /* Update vectorial force */
661 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
662 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
663 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
665 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
666 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
667 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
669 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
671 /* Inner loop uses 420 flops */
678 j_coord_offsetA = DIM*jnrA;
680 /* load j atom coordinates */
681 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
682 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
684 /* Calculate displacement vector */
685 dx00 = _fjsp_sub_v2r8(ix0,jx0);
686 dy00 = _fjsp_sub_v2r8(iy0,jy0);
687 dz00 = _fjsp_sub_v2r8(iz0,jz0);
688 dx01 = _fjsp_sub_v2r8(ix0,jx1);
689 dy01 = _fjsp_sub_v2r8(iy0,jy1);
690 dz01 = _fjsp_sub_v2r8(iz0,jz1);
691 dx02 = _fjsp_sub_v2r8(ix0,jx2);
692 dy02 = _fjsp_sub_v2r8(iy0,jy2);
693 dz02 = _fjsp_sub_v2r8(iz0,jz2);
694 dx10 = _fjsp_sub_v2r8(ix1,jx0);
695 dy10 = _fjsp_sub_v2r8(iy1,jy0);
696 dz10 = _fjsp_sub_v2r8(iz1,jz0);
697 dx11 = _fjsp_sub_v2r8(ix1,jx1);
698 dy11 = _fjsp_sub_v2r8(iy1,jy1);
699 dz11 = _fjsp_sub_v2r8(iz1,jz1);
700 dx12 = _fjsp_sub_v2r8(ix1,jx2);
701 dy12 = _fjsp_sub_v2r8(iy1,jy2);
702 dz12 = _fjsp_sub_v2r8(iz1,jz2);
703 dx20 = _fjsp_sub_v2r8(ix2,jx0);
704 dy20 = _fjsp_sub_v2r8(iy2,jy0);
705 dz20 = _fjsp_sub_v2r8(iz2,jz0);
706 dx21 = _fjsp_sub_v2r8(ix2,jx1);
707 dy21 = _fjsp_sub_v2r8(iy2,jy1);
708 dz21 = _fjsp_sub_v2r8(iz2,jz1);
709 dx22 = _fjsp_sub_v2r8(ix2,jx2);
710 dy22 = _fjsp_sub_v2r8(iy2,jy2);
711 dz22 = _fjsp_sub_v2r8(iz2,jz2);
713 /* Calculate squared distance and things based on it */
714 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
715 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
716 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
717 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
718 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
719 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
720 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
721 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
722 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
724 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
725 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
726 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
727 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
728 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
729 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
730 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
731 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
732 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
734 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
735 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
736 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
737 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
738 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
739 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
740 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
741 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
742 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
744 fjx0 = _fjsp_setzero_v2r8();
745 fjy0 = _fjsp_setzero_v2r8();
746 fjz0 = _fjsp_setzero_v2r8();
747 fjx1 = _fjsp_setzero_v2r8();
748 fjy1 = _fjsp_setzero_v2r8();
749 fjz1 = _fjsp_setzero_v2r8();
750 fjx2 = _fjsp_setzero_v2r8();
751 fjy2 = _fjsp_setzero_v2r8();
752 fjz2 = _fjsp_setzero_v2r8();
754 /**************************
755 * CALCULATE INTERACTIONS *
756 **************************/
758 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
760 /* EWALD ELECTROSTATICS */
762 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
763 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
764 itab_tmp = _fjsp_dtox_v2r8(ewrt);
765 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
766 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
768 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
769 ewtabD = _fjsp_setzero_v2r8();
770 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
771 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
772 ewtabFn = _fjsp_setzero_v2r8();
773 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
774 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
775 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
776 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
777 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
779 /* Analytical LJ-PME */
780 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
781 ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
782 ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
783 exponent = gmx_simd_exp_d(-ewcljrsq);
784 /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
785 poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
786 /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
787 vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
788 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
789 vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
790 /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
791 fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
793 /* Update potential sum for this i atom from the interaction with this j atom. */
794 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
795 velecsum = _fjsp_add_v2r8(velecsum,velec);
796 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
797 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
799 fscal = _fjsp_add_v2r8(felec,fvdw);
801 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
803 /* Update vectorial force */
804 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
805 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
806 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
808 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
809 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
810 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
812 /**************************
813 * CALCULATE INTERACTIONS *
814 **************************/
816 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
818 /* EWALD ELECTROSTATICS */
820 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
821 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
822 itab_tmp = _fjsp_dtox_v2r8(ewrt);
823 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
824 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
826 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
827 ewtabD = _fjsp_setzero_v2r8();
828 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
829 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
830 ewtabFn = _fjsp_setzero_v2r8();
831 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
832 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
833 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
834 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
835 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
837 /* Update potential sum for this i atom from the interaction with this j atom. */
838 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
839 velecsum = _fjsp_add_v2r8(velecsum,velec);
843 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
845 /* Update vectorial force */
846 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
847 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
848 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
850 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
851 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
852 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
854 /**************************
855 * CALCULATE INTERACTIONS *
856 **************************/
858 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
860 /* EWALD ELECTROSTATICS */
862 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
863 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
864 itab_tmp = _fjsp_dtox_v2r8(ewrt);
865 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
866 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
868 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
869 ewtabD = _fjsp_setzero_v2r8();
870 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
871 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
872 ewtabFn = _fjsp_setzero_v2r8();
873 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
874 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
875 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
876 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
877 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
879 /* Update potential sum for this i atom from the interaction with this j atom. */
880 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
881 velecsum = _fjsp_add_v2r8(velecsum,velec);
885 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
887 /* Update vectorial force */
888 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
889 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
890 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
892 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
893 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
894 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
896 /**************************
897 * CALCULATE INTERACTIONS *
898 **************************/
900 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
902 /* EWALD ELECTROSTATICS */
904 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
905 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
906 itab_tmp = _fjsp_dtox_v2r8(ewrt);
907 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
908 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
910 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
911 ewtabD = _fjsp_setzero_v2r8();
912 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
913 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
914 ewtabFn = _fjsp_setzero_v2r8();
915 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
916 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
917 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
918 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
919 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
921 /* Update potential sum for this i atom from the interaction with this j atom. */
922 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
923 velecsum = _fjsp_add_v2r8(velecsum,velec);
927 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
929 /* Update vectorial force */
930 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
931 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
932 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
934 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
935 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
936 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
938 /**************************
939 * CALCULATE INTERACTIONS *
940 **************************/
942 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
944 /* EWALD ELECTROSTATICS */
946 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
947 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
948 itab_tmp = _fjsp_dtox_v2r8(ewrt);
949 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
950 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
952 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
953 ewtabD = _fjsp_setzero_v2r8();
954 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
955 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
956 ewtabFn = _fjsp_setzero_v2r8();
957 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
958 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
959 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
960 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
961 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
963 /* Update potential sum for this i atom from the interaction with this j atom. */
964 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
965 velecsum = _fjsp_add_v2r8(velecsum,velec);
969 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
971 /* Update vectorial force */
972 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
973 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
974 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
976 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
977 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
978 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
980 /**************************
981 * CALCULATE INTERACTIONS *
982 **************************/
984 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
986 /* EWALD ELECTROSTATICS */
988 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
989 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
990 itab_tmp = _fjsp_dtox_v2r8(ewrt);
991 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
992 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
994 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
995 ewtabD = _fjsp_setzero_v2r8();
996 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
997 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
998 ewtabFn = _fjsp_setzero_v2r8();
999 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1000 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1001 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1002 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
1003 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1005 /* Update potential sum for this i atom from the interaction with this j atom. */
1006 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1007 velecsum = _fjsp_add_v2r8(velecsum,velec);
1011 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1013 /* Update vectorial force */
1014 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1015 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1016 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1018 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1019 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1020 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1022 /**************************
1023 * CALCULATE INTERACTIONS *
1024 **************************/
1026 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1028 /* EWALD ELECTROSTATICS */
1030 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1031 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1032 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1033 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1034 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1036 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1037 ewtabD = _fjsp_setzero_v2r8();
1038 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1039 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1040 ewtabFn = _fjsp_setzero_v2r8();
1041 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1042 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1043 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1044 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
1045 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1047 /* Update potential sum for this i atom from the interaction with this j atom. */
1048 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1049 velecsum = _fjsp_add_v2r8(velecsum,velec);
1053 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1055 /* Update vectorial force */
1056 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1057 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1058 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1060 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1061 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1062 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1064 /**************************
1065 * CALCULATE INTERACTIONS *
1066 **************************/
1068 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1070 /* EWALD ELECTROSTATICS */
1072 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1073 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1074 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1075 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1076 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1078 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1079 ewtabD = _fjsp_setzero_v2r8();
1080 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1081 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1082 ewtabFn = _fjsp_setzero_v2r8();
1083 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1084 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1085 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1086 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
1087 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1089 /* Update potential sum for this i atom from the interaction with this j atom. */
1090 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1091 velecsum = _fjsp_add_v2r8(velecsum,velec);
1095 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1097 /* Update vectorial force */
1098 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1099 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1100 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1102 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1103 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1104 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1106 /**************************
1107 * CALCULATE INTERACTIONS *
1108 **************************/
1110 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1112 /* EWALD ELECTROSTATICS */
1114 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1115 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1116 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1117 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1118 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1120 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1121 ewtabD = _fjsp_setzero_v2r8();
1122 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1123 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1124 ewtabFn = _fjsp_setzero_v2r8();
1125 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1126 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1127 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1128 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
1129 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1131 /* Update potential sum for this i atom from the interaction with this j atom. */
1132 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1133 velecsum = _fjsp_add_v2r8(velecsum,velec);
1137 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1139 /* Update vectorial force */
1140 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1141 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1142 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1144 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1145 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1146 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1148 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1150 /* Inner loop uses 420 flops */
1153 /* End of innermost loop */
1155 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1156 f+i_coord_offset,fshift+i_shift_offset);
1159 /* Update potential energies */
1160 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1161 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1163 /* Increment number of inner iterations */
1164 inneriter += j_index_end - j_index_start;
1166 /* Outer loop uses 20 flops */
1169 /* Increment number of outer iterations */
1172 /* Update outer/inner flops */
1174 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*420);
1177 * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
1178 * Electrostatics interaction: Ewald
1179 * VdW interaction: LJEwald
1180 * Geometry: Water3-Water3
1181 * Calculate force/pot: Force
1184 nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
1185 (t_nblist * gmx_restrict nlist,
1186 rvec * gmx_restrict xx,
1187 rvec * gmx_restrict ff,
1188 t_forcerec * gmx_restrict fr,
1189 t_mdatoms * gmx_restrict mdatoms,
1190 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1191 t_nrnb * gmx_restrict nrnb)
1193 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1194 * just 0 for non-waters.
1195 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1196 * jnr indices corresponding to data put in the four positions in the SIMD register.
1198 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1199 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1201 int j_coord_offsetA,j_coord_offsetB;
1202 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1203 real rcutoff_scalar;
1204 real *shiftvec,*fshift,*x,*f;
1205 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1207 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1209 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1211 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1212 int vdwjidx0A,vdwjidx0B;
1213 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1214 int vdwjidx1A,vdwjidx1B;
1215 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1216 int vdwjidx2A,vdwjidx2B;
1217 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1218 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1219 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1220 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1221 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1222 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1223 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1224 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1225 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1226 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1227 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1230 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1233 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1234 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1235 _fjsp_v2r8 c6grid_00;
1236 _fjsp_v2r8 c6grid_01;
1237 _fjsp_v2r8 c6grid_02;
1238 _fjsp_v2r8 c6grid_10;
1239 _fjsp_v2r8 c6grid_11;
1240 _fjsp_v2r8 c6grid_12;
1241 _fjsp_v2r8 c6grid_20;
1242 _fjsp_v2r8 c6grid_21;
1243 _fjsp_v2r8 c6grid_22;
1245 _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
1246 _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
1247 _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
1248 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1250 _fjsp_v2r8 itab_tmp;
1251 _fjsp_v2r8 dummy_mask,cutoff_mask;
1252 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1253 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1254 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1261 jindex = nlist->jindex;
1263 shiftidx = nlist->shift;
1265 shiftvec = fr->shift_vec[0];
1266 fshift = fr->fshift[0];
1267 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1268 charge = mdatoms->chargeA;
1269 nvdwtype = fr->ntype;
1270 vdwparam = fr->nbfp;
1271 vdwtype = mdatoms->typeA;
1272 vdwgridparam = fr->ljpme_c6grid;
1273 sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
1274 ewclj = gmx_fjsp_set1_v2r8(fr->ewaldcoeff_lj);
1275 ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
1277 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1278 ewtab = fr->ic->tabq_coul_F;
1279 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1280 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1282 /* Setup water-specific parameters */
1283 inr = nlist->iinr[0];
1284 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1285 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1286 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1287 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1289 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1290 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1291 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1292 vdwjidx0A = 2*vdwtype[inr+0];
1293 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1294 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1295 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1296 c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
1297 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1298 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1299 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1300 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1301 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1302 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1303 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1304 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1306 /* Avoid stupid compiler warnings */
1308 j_coord_offsetA = 0;
1309 j_coord_offsetB = 0;
1314 /* Start outer loop over neighborlists */
1315 for(iidx=0; iidx<nri; iidx++)
1317 /* Load shift vector for this list */
1318 i_shift_offset = DIM*shiftidx[iidx];
1320 /* Load limits for loop over neighbors */
1321 j_index_start = jindex[iidx];
1322 j_index_end = jindex[iidx+1];
1324 /* Get outer coordinate index */
1326 i_coord_offset = DIM*inr;
1328 /* Load i particle coords and add shift vector */
1329 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1330 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1332 fix0 = _fjsp_setzero_v2r8();
1333 fiy0 = _fjsp_setzero_v2r8();
1334 fiz0 = _fjsp_setzero_v2r8();
1335 fix1 = _fjsp_setzero_v2r8();
1336 fiy1 = _fjsp_setzero_v2r8();
1337 fiz1 = _fjsp_setzero_v2r8();
1338 fix2 = _fjsp_setzero_v2r8();
1339 fiy2 = _fjsp_setzero_v2r8();
1340 fiz2 = _fjsp_setzero_v2r8();
1342 /* Start inner kernel loop */
1343 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1346 /* Get j neighbor index, and coordinate index */
1348 jnrB = jjnr[jidx+1];
1349 j_coord_offsetA = DIM*jnrA;
1350 j_coord_offsetB = DIM*jnrB;
1352 /* load j atom coordinates */
1353 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1354 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1356 /* Calculate displacement vector */
1357 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1358 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1359 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1360 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1361 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1362 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1363 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1364 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1365 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1366 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1367 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1368 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1369 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1370 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1371 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1372 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1373 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1374 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1375 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1376 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1377 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1378 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1379 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1380 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1381 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1382 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1383 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1385 /* Calculate squared distance and things based on it */
1386 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1387 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1388 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1389 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1390 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1391 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1392 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1393 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1394 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1396 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1397 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1398 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1399 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1400 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1401 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1402 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1403 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1404 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1406 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1407 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1408 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1409 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1410 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1411 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1412 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1413 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1414 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1416 fjx0 = _fjsp_setzero_v2r8();
1417 fjy0 = _fjsp_setzero_v2r8();
1418 fjz0 = _fjsp_setzero_v2r8();
1419 fjx1 = _fjsp_setzero_v2r8();
1420 fjy1 = _fjsp_setzero_v2r8();
1421 fjz1 = _fjsp_setzero_v2r8();
1422 fjx2 = _fjsp_setzero_v2r8();
1423 fjy2 = _fjsp_setzero_v2r8();
1424 fjz2 = _fjsp_setzero_v2r8();
1426 /**************************
1427 * CALCULATE INTERACTIONS *
1428 **************************/
1430 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1432 /* EWALD ELECTROSTATICS */
1434 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1435 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
1436 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1437 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1438 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1440 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1442 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1443 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1445 /* Analytical LJ-PME */
1446 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1447 ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
1448 ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
1449 exponent = gmx_simd_exp_d(-ewcljrsq);
1450 /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
1451 poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
1452 /* f6A = 6 * C6grid * (1 - poly) */
1453 f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
1454 /* f6B = C6grid * exponent * beta^6 */
1455 f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
1456 /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
1457 fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
1459 fscal = _fjsp_add_v2r8(felec,fvdw);
1461 /* Update vectorial force */
1462 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1463 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1464 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1466 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1467 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1468 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1470 /**************************
1471 * CALCULATE INTERACTIONS *
1472 **************************/
1474 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1476 /* EWALD ELECTROSTATICS */
1478 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1479 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
1480 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1481 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1482 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1484 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1486 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1487 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1491 /* Update vectorial force */
1492 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1493 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1494 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1496 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1497 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1498 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1500 /**************************
1501 * CALCULATE INTERACTIONS *
1502 **************************/
1504 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1506 /* EWALD ELECTROSTATICS */
1508 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1509 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
1510 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1511 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1512 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1514 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1516 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1517 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1521 /* Update vectorial force */
1522 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1523 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1524 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1526 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1527 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1528 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1530 /**************************
1531 * CALCULATE INTERACTIONS *
1532 **************************/
1534 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1536 /* EWALD ELECTROSTATICS */
1538 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1539 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1540 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1541 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1542 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1544 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1546 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1547 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1551 /* Update vectorial force */
1552 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1553 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1554 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1556 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1557 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1558 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1560 /**************************
1561 * CALCULATE INTERACTIONS *
1562 **************************/
1564 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1566 /* EWALD ELECTROSTATICS */
1568 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1569 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1570 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1571 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1572 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1574 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1576 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1577 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1581 /* Update vectorial force */
1582 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1583 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1584 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1586 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1587 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1588 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1590 /**************************
1591 * CALCULATE INTERACTIONS *
1592 **************************/
1594 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1596 /* EWALD ELECTROSTATICS */
1598 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1599 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1600 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1601 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1602 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1604 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1606 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1607 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1611 /* Update vectorial force */
1612 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1613 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1614 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1616 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1617 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1618 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1620 /**************************
1621 * CALCULATE INTERACTIONS *
1622 **************************/
1624 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1626 /* EWALD ELECTROSTATICS */
1628 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1629 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1630 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1631 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1632 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1634 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1636 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1637 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1641 /* Update vectorial force */
1642 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1643 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1644 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1646 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1647 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1648 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1650 /**************************
1651 * CALCULATE INTERACTIONS *
1652 **************************/
1654 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1656 /* EWALD ELECTROSTATICS */
1658 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1659 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1660 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1661 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1662 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1664 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1666 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1667 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1671 /* Update vectorial force */
1672 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1673 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1674 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1676 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1677 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1678 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1680 /**************************
1681 * CALCULATE INTERACTIONS *
1682 **************************/
1684 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1686 /* EWALD ELECTROSTATICS */
1688 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1689 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1690 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1691 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1692 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1694 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1696 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1697 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1701 /* Update vectorial force */
1702 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1703 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1704 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1706 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1707 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1708 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1710 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1712 /* Inner loop uses 373 flops */
1715 if(jidx<j_index_end)
1719 j_coord_offsetA = DIM*jnrA;
1721 /* load j atom coordinates */
1722 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1723 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1725 /* Calculate displacement vector */
1726 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1727 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1728 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1729 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1730 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1731 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1732 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1733 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1734 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1735 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1736 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1737 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1738 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1739 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1740 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1741 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1742 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1743 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1744 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1745 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1746 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1747 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1748 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1749 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1750 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1751 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1752 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1754 /* Calculate squared distance and things based on it */
1755 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1756 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1757 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1758 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1759 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1760 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1761 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1762 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1763 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1765 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1766 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1767 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1768 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1769 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1770 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1771 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1772 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1773 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1775 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1776 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1777 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1778 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1779 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1780 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1781 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1782 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1783 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1785 fjx0 = _fjsp_setzero_v2r8();
1786 fjy0 = _fjsp_setzero_v2r8();
1787 fjz0 = _fjsp_setzero_v2r8();
1788 fjx1 = _fjsp_setzero_v2r8();
1789 fjy1 = _fjsp_setzero_v2r8();
1790 fjz1 = _fjsp_setzero_v2r8();
1791 fjx2 = _fjsp_setzero_v2r8();
1792 fjy2 = _fjsp_setzero_v2r8();
1793 fjz2 = _fjsp_setzero_v2r8();
1795 /**************************
1796 * CALCULATE INTERACTIONS *
1797 **************************/
1799 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1801 /* EWALD ELECTROSTATICS */
1803 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1804 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
1805 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1806 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1807 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1809 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1810 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1811 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1813 /* Analytical LJ-PME */
1814 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1815 ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
1816 ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
1817 exponent = gmx_simd_exp_d(-ewcljrsq);
1818 /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
1819 poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
1820 /* f6A = 6 * C6grid * (1 - poly) */
1821 f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
1822 /* f6B = C6grid * exponent * beta^6 */
1823 f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
1824 /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
1825 fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
1827 fscal = _fjsp_add_v2r8(felec,fvdw);
1829 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1831 /* Update vectorial force */
1832 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1833 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1834 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1836 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1837 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1838 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1840 /**************************
1841 * CALCULATE INTERACTIONS *
1842 **************************/
1844 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1846 /* EWALD ELECTROSTATICS */
1848 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1849 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
1850 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1851 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1852 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1854 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1855 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1856 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1860 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1862 /* Update vectorial force */
1863 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1864 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1865 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1867 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1868 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1869 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1871 /**************************
1872 * CALCULATE INTERACTIONS *
1873 **************************/
1875 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1877 /* EWALD ELECTROSTATICS */
1879 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1880 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
1881 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1882 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1883 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1885 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1886 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1887 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1891 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1893 /* Update vectorial force */
1894 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1895 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1896 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1898 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1899 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1900 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1902 /**************************
1903 * CALCULATE INTERACTIONS *
1904 **************************/
1906 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1908 /* EWALD ELECTROSTATICS */
1910 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1911 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1912 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1913 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1914 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1916 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1917 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1918 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1922 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1924 /* Update vectorial force */
1925 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1926 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1927 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1929 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1930 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1931 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1933 /**************************
1934 * CALCULATE INTERACTIONS *
1935 **************************/
1937 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1939 /* EWALD ELECTROSTATICS */
1941 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1942 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1943 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1944 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1945 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1947 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1948 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1949 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1953 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1955 /* Update vectorial force */
1956 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1957 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1958 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1960 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1961 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1962 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1964 /**************************
1965 * CALCULATE INTERACTIONS *
1966 **************************/
1968 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1970 /* EWALD ELECTROSTATICS */
1972 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1973 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1974 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1975 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1976 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1978 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1979 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1980 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1984 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1986 /* Update vectorial force */
1987 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1988 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1989 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1991 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1992 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1993 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1995 /**************************
1996 * CALCULATE INTERACTIONS *
1997 **************************/
1999 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
2001 /* EWALD ELECTROSTATICS */
2003 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2004 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
2005 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2006 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2007 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2009 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2010 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2011 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
2015 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2017 /* Update vectorial force */
2018 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
2019 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2020 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2022 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2023 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2024 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2026 /**************************
2027 * CALCULATE INTERACTIONS *
2028 **************************/
2030 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2032 /* EWALD ELECTROSTATICS */
2034 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2035 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
2036 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2037 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2038 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2040 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2041 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2042 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
2046 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2048 /* Update vectorial force */
2049 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2050 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2051 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2053 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2054 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2055 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2057 /**************************
2058 * CALCULATE INTERACTIONS *
2059 **************************/
2061 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2063 /* EWALD ELECTROSTATICS */
2065 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2066 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
2067 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2068 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2069 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2071 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2072 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2073 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2077 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2079 /* Update vectorial force */
2080 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2081 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2082 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2084 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2085 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2086 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2088 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2090 /* Inner loop uses 373 flops */
2093 /* End of innermost loop */
2095 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2096 f+i_coord_offset,fshift+i_shift_offset);
2098 /* Increment number of inner iterations */
2099 inneriter += j_index_end - j_index_start;
2101 /* Outer loop uses 18 flops */
2104 /* Increment number of outer iterations */
2107 /* Update outer/inner flops */
2109 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*373);